genode/repos/dde_linux/src/lib/lxip/lxcc_emul.cc

760 lines
15 KiB
C++
Raw Normal View History

/*
2013-08-26 13:58:58 +02:00
* \brief Linux emulation code
* \author Sebastian Sumpf
* \author Emery Hemingway
* \author Christian Helmuth
2013-08-26 13:58:58 +02:00
* \date 2013-08-28
*/
/*
* Copyright (C) 2013-2017 Genode Labs GmbH
2013-08-26 13:58:58 +02:00
*
* This file is distributed under the terms of the GNU General Public License
* version 2.
2013-08-26 13:58:58 +02:00
*/
/* Genode includes */
2013-08-26 13:58:58 +02:00
#include <base/allocator_avl.h>
#include <base/object_pool.h>
#include <base/sleep.h>
2013-08-26 13:58:58 +02:00
#include <base/snprintf.h>
#include <dataspace/client.h>
#include <region_map/client.h>
#include <timer_session/connection.h>
#include <trace/timestamp.h>
2013-08-26 13:58:58 +02:00
/* local includes */
2013-08-26 13:58:58 +02:00
#include <lx_emul.h>
#include <lx.h>
2013-08-26 13:58:58 +02:00
/* Lx_kit */
#include <lx_kit/env.h>
/*********************************
** Lx::Backend_alloc interface **
*********************************/
2013-08-26 13:58:58 +02:00
#include <lx_kit/backend_alloc.h>
2013-08-26 13:58:58 +02:00
static Lx_kit::Env *lx_env;
void Lx::lxcc_emul_init(Lx_kit::Env &env)
{
lx_env = &env;
}
2013-08-26 13:58:58 +02:00
struct Memory_object_base : Genode::Object_pool<Memory_object_base>::Entry
2013-08-26 13:58:58 +02:00
{
Memory_object_base(Genode::Ram_dataspace_capability cap)
: Genode::Object_pool<Memory_object_base>::Entry(cap) {}
2013-08-26 13:58:58 +02:00
void free() { lx_env->ram().free(ram_cap()); }
Genode::Ram_dataspace_capability ram_cap()
{
using namespace Genode;
return reinterpret_cap_cast<Ram_dataspace>(cap());
}
2013-08-26 13:58:58 +02:00
};
static Genode::Object_pool<Memory_object_base> memory_pool;
2013-08-26 13:58:58 +02:00
Genode::Ram_dataspace_capability
Lx::backend_alloc(Genode::addr_t size, Genode::Cache_attribute cached)
2013-08-26 13:58:58 +02:00
{
using namespace Genode;
2013-08-26 13:58:58 +02:00
Genode::Ram_dataspace_capability cap = lx_env->ram().alloc(size);
Memory_object_base *o = new (lx_env->heap()) Memory_object_base(cap);
2013-08-26 13:58:58 +02:00
memory_pool.insert(o);
return cap;
2013-08-26 13:58:58 +02:00
}
void Lx::backend_free(Genode::Ram_dataspace_capability cap)
2013-08-26 13:58:58 +02:00
{
using namespace Genode;
2015-06-30 11:48:23 +02:00
Memory_object_base *object;
memory_pool.apply(cap, [&] (Memory_object_base *o) {
if (!o) return;
2013-08-26 13:58:58 +02:00
o->free();
memory_pool.remove(o);
2013-08-26 13:58:58 +02:00
object = o; /* save for destroy */
});
destroy(lx_env->heap(), object);
2013-08-26 13:58:58 +02:00
}
/*************************************
** Memory allocation, linux/slab.h **
*************************************/
2013-08-26 13:58:58 +02:00
#include <lx_emul/impl/slab.h>
2013-08-26 13:58:58 +02:00
2015-05-19 11:33:29 +02:00
void *alloc_large_system_hash(const char *tablename,
unsigned long bucketsize,
unsigned long numentries,
int scale,
int flags,
unsigned int *_hash_shift,
unsigned int *_hash_mask,
unsigned long low_limit,
unsigned long high_limit)
{
unsigned long elements = numentries ? numentries : high_limit;
unsigned long nlog2 = ilog2(elements);
nlog2 <<= (1 << nlog2) < elements ? 1 : 0;
void *table;
lx_env->heap().alloc(elements * bucketsize, &table);
2015-05-19 11:33:29 +02:00
if (_hash_mask)
*_hash_mask = (1 << nlog2) - 1;
if (_hash_shift)
*_hash_shift = nlog2;
return table;
}
void *kmalloc_array(size_t n, size_t size, gfp_t flags)
{
if (size != 0 && n > SIZE_MAX / size) return NULL;
return kmalloc(n * size, flags);
}
/********************
** linux/slab.h **
********************/
void *kmem_cache_alloc_node(struct kmem_cache *cache, gfp_t flags, int node)
{
return (void*)cache->alloc();
}
void *kmem_cache_zalloc(struct kmem_cache *cache, gfp_t flags)
{
void *addr = (void*)cache->alloc();
if (addr) { memset(addr, 0, cache->size()); }
return addr;
}
/*********************
** linux/vmalloc.h **
*********************/
void *vmalloc(unsigned long size)
{
return kmalloc(size, 0);
}
void vfree(void const *addr)
{
kfree(addr);
}
2015-05-19 11:33:29 +02:00
2013-08-26 13:58:58 +02:00
/********************
** linux/string.h **
********************/
char *strcpy(char *to, const char *from)
{
char *save = to;
for (; (*to = *from); ++from, ++to);
return(save);
}
char *strncpy(char *dst, const char* src, size_t n)
{
return Genode::strncpy(dst, src, n);
}
2013-08-26 13:58:58 +02:00
char *strchr(const char *p, int ch)
{
char c;
c = ch;
for (;; ++p) {
if (*p == c)
return ((char *)p);
if (*p == '\0')
break;
}
return 0;
}
2013-08-26 13:58:58 +02:00
char *strnchr(const char *p, size_t count, int ch)
{
char c;
c = ch;
for (; count; ++p, count--) {
if (*p == c)
return ((char *)p);
if (*p == '\0')
break;
}
return 0;
}
2013-08-26 13:58:58 +02:00
size_t strnlen(const char *s, size_t maxlen)
{
size_t c;
for (c = 0; c < maxlen; c++)
if (!s[c])
return c;
return maxlen;
}
2013-08-26 13:58:58 +02:00
size_t strlen(const char *s) { return Genode::strlen(s); }
int strcmp(const char *s1, const char *s2) { return Genode::strcmp(s1, s2); }
int strncmp(const char *s1, const char *s2, size_t len) {
2013-08-26 13:58:58 +02:00
return Genode::strcmp(s1, s2, len); }
int memcmp(const void *p0, const void *p1, size_t size) {
return Genode::memcmp(p0, p1, size); }
2013-08-26 13:58:58 +02:00
int snprintf(char *str, size_t size, const char *format, ...)
{
va_list list;
va_start(list, format);
Genode::String_console sc(str, size);
sc.vprintf(format, list);
va_end(list);
return sc.len();
}
size_t strlcpy(char *dest, const char *src, size_t size)
{
size_t ret = strlen(src);
if (size) {
size_t len = (ret >= size) ? size - 1 : ret;
Genode::memcpy(dest, src, len);
dest[len] = '\0';
}
return ret;
}
/* from linux/lib/string.c */
char *strstr(char const *s1, char const *s2)
{
size_t l1, l2;
l2 = strlen(s2);
if (!l2)
return (char *)s1;
l1 = strlen(s1);
while (l1 >= l2) {
l1--;
if (!memcmp(s1, s2, l2))
return (char *)s1;
s1++;
}
return NULL;
}
void *memset(void *s, int c, size_t n)
{
return Genode::memset(s, c, n);
}
void *memcpy(void *d, const void *s, size_t n)
2013-08-26 13:58:58 +02:00
{
return Genode::memcpy(d, s, n);
}
void *memmove(void *d, const void *s, size_t n)
{
return Genode::memmove(d, s, n);
}
2013-08-26 13:58:58 +02:00
/*******************
** linux/sched.h **
*******************/
struct Timeout : Genode::Io_signal_handler<Timeout>
{
Genode::Entrypoint &ep;
Timer::Connection timer;
void (*tick)();
void handle()
{
update_jiffies();
/* tick the higher layer of the component */
tick();
}
2015-05-19 11:33:29 +02:00
Timeout(Genode::Env &env, Genode::Entrypoint &ep, void (*ticker)())
:
Io_signal_handler<Timeout>(ep, *this, &Timeout::handle),
ep(ep), timer(env), tick(ticker)
{
timer.sigh(*this);
}
void schedule(signed long msec)
{
timer.trigger_once(msec * 1000);
}
void wait()
{
ep.wait_and_dispatch_one_io_signal();
}
};
2013-08-26 13:58:58 +02:00
static Timeout *_timeout;
static Genode::Signal_context_capability tick_sig_cap;
2013-08-26 13:58:58 +02:00
void Lx::event_init(Genode::Env &env, Genode::Entrypoint &ep, void (*ticker)())
2013-08-26 13:58:58 +02:00
{
os/timer: interpolate time via timestamps Previously, the Genode::Timer::curr_time always used the Timer_session::elapsed_ms RPC as back end. Now, Genode::Timer reads this remote time only in a periodic fashion independently from the calls to Genode::Timer::curr_time. If now one calls Genode::Timer::curr_time, the function takes the last read remote time value and adapts it using the timestamp difference since the remote-time read. The conversion factor from timestamps to time is estimated on every remote-time read using the last read remote-time value and the timestamp difference since the last remote time read. This commit also re-works the timeout test. The test now has two stages. In the first stage, it tests fast polling of the Genode::Timer::curr_time. This stage checks the error between locally interpolated and timer-driver time as well as wether the locally interpolated time is monotone and sufficiently homogeneous. In the second stage several periodic and one-shot timeouts are scheduled at once. This stage checks if the timeouts trigger sufficiently precise. This commit adds the new Kernel::time syscall to base-hw. The syscall is solely used by the Genode::Timer on base-hw as substitute for the timestamp. This is because on ARM, the timestamp function uses the ARM performance counter that stops counting when the WFI (wait for interrupt) instruction is active. This instruction, however is used by the base-hw idle contexts that get active when no user thread needs to be scheduled. Thus, the ARM performance counter is not a good choice for time interpolation and we use the kernel internal time instead. With this commit, the timeout library becomes a basic library. That means that it is linked against the LDSO which then provides it to the program it serves. Furthermore, you can't use the timeout library anymore without the LDSO because through the kernel-dependent LDSO make-files we can achieve a kernel-dependent timeout implementation. This commit introduces a structured Duration type that shall successively replace the use of Microseconds, Milliseconds, and integer types for duration values. Open issues: * The timeout test fails on Raspberry PI because of precision errors in the first stage. However, this does not render the framework unusable in general on the RPI but merely is an issue when speaking of microseconds precision. * If we run on ARM with another Kernel than HW the timestamp speed may continuously vary from almost 0 up to CPU speed. The Timer, however, only uses interpolation if the timestamp speed remained stable (12.5% tolerance) for at least 3 observation periods. Currently, one period is 100ms, so its 300ms. As long as this is not the case, Timer_session::elapsed_ms is called instead. Anyway, it might happen that the CPU load was stable for some time so interpolation becomes active and now the timestamp speed drops. In the worst case, we would now have 100ms of slowed down time. The bad thing about it would be, that this also affects the timeout of the period. Thus, it might "freeze" the local time for more than 100ms. On the other hand, if the timestamp speed suddenly raises after some stable time, interpolated time can get too fast. This would shorten the period but nonetheless may result in drifting away into the far future. Now we would have the problem that we can't deliver the real time anymore until it has caught up because the output of Timer::curr_time shall be monotone. So, effectively local time might "freeze" again for more than 100ms. It would be a solution to not use the Trace::timestamp on ARM w/o HW but a function whose return value causes the Timer to never use interpolation because of its stability policy. Fixes #2400
2017-04-22 00:52:23 +02:00
static ::Timeout handler(env, ep, ticker);
_timeout = &handler;
2013-08-26 13:58:58 +02:00
}
signed long schedule_timeout(signed long timeout)
{
long start = jiffies;
_timeout->schedule(timeout);
_timeout->wait();
2013-08-26 13:58:58 +02:00
timeout -= jiffies - start;
return timeout < 0 ? 0 : timeout;
}
long schedule_timeout_uninterruptible(signed long timeout)
{
return schedule_timeout(timeout);
}
2013-08-26 13:58:58 +02:00
void poll_wait(struct file * filp, wait_queue_head_t * wait_address, poll_table *p)
{
_timeout->wait();
2013-08-26 13:58:58 +02:00
}
bool poll_does_not_wait(const poll_table *p)
{
return p == nullptr;
}
2013-08-26 13:58:58 +02:00
/******************
** linux/time.h **
******************/
unsigned long get_seconds(void)
{
return jiffies / HZ;
}
2013-08-26 13:58:58 +02:00
/*****************
** linux/gfp.h **
*****************/
class Avl_page : public Genode::Avl_node<Avl_page>
{
private:
Genode::addr_t _addr;
Genode::size_t _size;
struct page *_page;
public:
Avl_page(Genode::size_t size) : _size(size)
{
_addr =(Genode::addr_t)kmalloc(size, 0);
if (!_addr)
throw -1;
_page = (struct page *) kzalloc(sizeof(struct page), 0);
if (!_page) {
kfree((void *)_addr);
throw -2;
}
_page->addr = (void *)_addr;
atomic_set(&_page->_count, 1);
lx_log(DEBUG_SLAB, "alloc page: %p addr: %lx-%lx", _page, _addr, _addr + _size);
2013-08-26 13:58:58 +02:00
}
virtual ~Avl_page()
{
lx_log(DEBUG_SLAB, "free page: %p addr: %lx-%lx", _page, _addr, _addr + _size);
2013-08-26 13:58:58 +02:00
kfree((void *)_addr);
kfree((void *)_page);
}
struct page* page() { return _page; }
bool higher(Avl_page *c)
{
return c->_addr > _addr;
}
2013-08-26 13:58:58 +02:00
Avl_page *find_by_address(Genode::addr_t addr)
{
if (addr >= _addr && addr < _addr + _size)
return this;
2013-08-26 13:58:58 +02:00
bool side = addr > _addr;
Avl_page *c = Avl_node<Avl_page>::child(side);
return c ? c->find_by_address(addr) : 0;
}
};
2013-08-26 13:58:58 +02:00
static Genode::Avl_tree<Avl_page> tree;
2013-08-26 13:58:58 +02:00
struct page *alloc_pages(gfp_t gfp_mask, unsigned int order)
{
Avl_page *p;
try {
p = (Avl_page *)new (lx_env->heap()) Avl_page(PAGE_SIZE << order);
2013-08-26 13:58:58 +02:00
tree.insert(p);
} catch (...) { return 0; }
return p->page();
}
void *__alloc_page_frag(struct page_frag_cache *nc,
unsigned int fragsz, gfp_t gfp_mask)
{
struct page *page = alloc_pages(gfp_mask, fragsz / PAGE_SIZE);
if (!page) return nullptr;
return page->addr;
}
void __free_page_frag(void *addr)
{
Avl_page *p = tree.first()->find_by_address((Genode::addr_t)addr);
tree.remove(p);
destroy(lx_env->heap(), p);
}
2013-08-26 13:58:58 +02:00
/****************
** linux/mm.h **
****************/
struct page *virt_to_head_page(const void *x)
{
Avl_page *p = tree.first()->find_by_address((Genode::addr_t)x);
2015-05-19 11:33:29 +02:00
lx_log(DEBUG_SLAB, "virt_to_head_page: %p page %p\n", x,p ? p->page() : 0);
2013-08-26 13:58:58 +02:00
return p ? p->page() : 0;
}
void put_page(struct page *page)
{
if (!atomic_dec_and_test(&page->_count))
return;
2015-05-19 11:33:29 +02:00
lx_log(DEBUG_SLAB, "put_page: %p", page);
2013-08-26 13:58:58 +02:00
Avl_page *p = tree.first()->find_by_address((Genode::addr_t)page->addr);
tree.remove(p);
destroy(lx_env->heap(), p);
2013-08-26 13:58:58 +02:00
}
static void create_event(char const *fmt, va_list list)
{
enum { BUFFER_LEN = 64, EVENT_LEN = BUFFER_LEN + 32 };
char buf[BUFFER_LEN];
using namespace Genode;
String_console sc(buf, BUFFER_LEN);
sc.vprintf(fmt, list);
char event[EVENT_LEN];
static Trace::Timestamp last = 0;
Trace::Timestamp now = Trace::timestamp();
Genode::snprintf(event, sizeof(event), "delta = %llu ms %s",
(now - last) / 2260000, buf);
Thread::trace(event);
last = now;
}
extern "C" void lx_trace_event(char const *fmt, ...)
{
va_list list;
va_start(list, fmt);
create_event(fmt, list);
va_end(list);
}
/*****************
** linux/uio.h **
*****************/
2017-05-18 16:22:46 +02:00
static inline size_t _copy_iter(void *addr, size_t bytes,
struct iov_iter *i, bool to_iter)
{
2017-05-18 16:22:46 +02:00
if (addr == nullptr) { return 0; }
2017-05-18 16:22:46 +02:00
if (i->count == 0 ||
i->iov == nullptr ||
i->iov->iov_len == 0) {
return 0;
2017-05-18 16:22:46 +02:00
}
2017-05-18 16:22:46 +02:00
if (i->nr_segs > 1) {
Genode::error(__func__, ": too many segments ", i->nr_segs);
return 0;
}
2017-05-18 16:22:46 +02:00
/* make sure the whole iter fits as there is only 1 iovec */
if (i->iov->iov_len < i->count) {
Genode::error(__func__, ": "
"iov->iov_len: ", i->iov->iov_len, " < "
"i->count: ", i->count);
return 0;
}
2017-05-18 16:22:46 +02:00
struct iovec const * const iov = i->iov;
size_t const iov_len = iov->iov_len;
void * const base = (iov->iov_base + i->iov_offset);
2017-05-18 16:22:46 +02:00
if (bytes > i->count) { bytes = i->count; }
2017-05-18 16:22:46 +02:00
size_t const len = (size_t)(bytes < iov_len ? bytes : iov_len);
void * const dst = to_iter ? base : addr;
void const * const src = to_iter ? addr : base;
2017-05-18 16:22:46 +02:00
/* actual function body */
{
Genode::memcpy(dst, src, len);
}
2017-05-18 16:22:46 +02:00
i->iov_offset += len;
i->count -= len;
2017-05-18 16:22:46 +02:00
return len;
}
2017-05-18 16:22:46 +02:00
size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
{
return _copy_iter(addr, bytes, i, false);
}
2017-05-18 16:22:46 +02:00
size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
{
return _copy_iter(addr, bytes, i, true);
}
size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i)
{
return copy_to_iter(reinterpret_cast<unsigned char*>(page->addr) + offset, bytes, i);
}
size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
struct iov_iter *i)
{
return copy_from_iter(reinterpret_cast<unsigned char*>(page->addr) + offset, bytes, i);
}
2017-05-18 16:22:46 +02:00
static size_t _csum_and_copy_iter(void *addr, size_t bytes, __wsum *csum,
struct iov_iter *i, bool to_iter)
{
2017-05-18 16:22:46 +02:00
if (addr == nullptr) { return 0; }
if (i->count == 0 ||
i->iov == nullptr ||
i->iov->iov_len == 0) {
return 0;
}
2017-05-18 16:22:46 +02:00
if (i->nr_segs > 1) {
Genode::error(__func__, ": too many segments ", i->nr_segs);
return 0;
2017-05-18 16:22:46 +02:00
}
2017-05-18 16:22:46 +02:00
/* make sure the whole iter fits as there is only 1 iovec */
if (i->iov->iov_len < i->count) {
Genode::error(__func__, ": "
"iov->iov_len: ", i->iov->iov_len, " < "
"i->count: ", i->count);
return 0;
}
2017-05-18 16:22:46 +02:00
struct iovec const * const iov = i->iov;
size_t const iov_len = iov->iov_len;
void * const base = (iov->iov_base + i->iov_offset);
2017-05-18 16:22:46 +02:00
if (bytes > i->count) { bytes = i->count; }
2017-05-18 16:22:46 +02:00
size_t const len = (size_t)(bytes < iov_len ? bytes : iov_len);
void * const dst = to_iter ? base : addr;
void const * const src = to_iter ? addr : base;
2017-05-18 16:22:46 +02:00
/* actual function body */
{
int err = 0;
__wsum next = csum_and_copy_from_user(src, dst, len, 0, &err);
2017-05-18 16:22:46 +02:00
if (err) {
Genode::error(__func__, ": err: ", err, " - sleeping");
Genode::sleep_forever();
}
2017-05-18 16:22:46 +02:00
*csum = csum_block_add(*csum, next, 0);
}
2017-05-18 16:22:46 +02:00
i->iov_offset += len;
i->count -= len;
2017-05-18 16:22:46 +02:00
return len;
}
2017-05-18 16:22:46 +02:00
size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i)
{
2017-05-18 16:22:46 +02:00
return _csum_and_copy_iter(addr, bytes, csum, i, false);
}
2017-05-18 16:22:46 +02:00
size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i)
{
return _csum_and_copy_iter(addr, bytes, csum, i, true);
}
/******************
** linux/wait.h **
******************/
void __wake_up(wait_queue_head_t *q, bool all) { }
/***********************
** linux/workqueue.h **
***********************/
static void execute_delayed_work(unsigned long dwork)
{
delayed_work *d = (delayed_work *)dwork;
d->work.func(&d->work);
}
bool mod_delayed_work(struct workqueue_struct *wq, struct delayed_work *dwork,
unsigned long delay)
{
/* treat delayed work without delay like any other work */
if (delay == 0) {
execute_delayed_work((unsigned long)dwork);
} else {
if (!dwork->timer.function) {
setup_timer(&dwork->timer, execute_delayed_work,
(unsigned long)dwork);
}
mod_timer(&dwork->timer, jiffies + delay);
}
return true;
}
int schedule_delayed_work(struct delayed_work *dwork, unsigned long delay)
{
return mod_delayed_work(0, dwork, delay);
}
/*******************
** linux/timer.h **
*******************/
static unsigned long round_jiffies(unsigned long j, bool force_up)
{
unsigned remainder = j % HZ;
/*
* from timer.c
*
* If the target jiffie is just after a whole second (which can happen
* due to delays of the timer irq, long irq off times etc etc) then
* we should round down to the whole second, not up. Use 1/4th second
* as cutoff for this rounding as an extreme upper bound for this.
* But never round down if @force_up is set.
*/
/* per default round down */
j = j - remainder;
/* round up if remainder more than 1/4 second (or if we're forced to) */
if (remainder >= HZ/4 || force_up)
j += HZ;
return j;
}
unsigned long round_jiffies(unsigned long j)
{
return round_jiffies(j, false);
}
unsigned long round_jiffies_up(unsigned long j)
{
return round_jiffies(j, true);
}
unsigned long round_jiffies_relative(unsigned long j)
{
return round_jiffies(j + jiffies, false) - jiffies;
}