diff --git a/repos/base-hw/include/kernel/interface.h b/repos/base-hw/include/kernel/interface.h index 1e2e881c6..02decc116 100644 --- a/repos/base-hw/include/kernel/interface.h +++ b/repos/base-hw/include/kernel/interface.h @@ -36,13 +36,12 @@ namespace Kernel constexpr Call_arg call_id_cancel_next_await_signal() { return 10; } constexpr Call_arg call_id_ack_signal() { return 11; } constexpr Call_arg call_id_print_char() { return 12; } - constexpr Call_arg call_id_update_data_region() { return 13; } - constexpr Call_arg call_id_update_instr_region() { return 14; } - constexpr Call_arg call_id_ack_cap() { return 15; } - constexpr Call_arg call_id_delete_cap() { return 16; } - constexpr Call_arg call_id_timeout() { return 17; } - constexpr Call_arg call_id_timeout_max_us() { return 18; } - constexpr Call_arg call_id_time() { return 19; } + constexpr Call_arg call_id_cache_coherent_region() { return 13; } + constexpr Call_arg call_id_ack_cap() { return 14; } + constexpr Call_arg call_id_delete_cap() { return 15; } + constexpr Call_arg call_id_timeout() { return 16; } + constexpr Call_arg call_id_timeout_max_us() { return 17; } + constexpr Call_arg call_id_time() { return 18; } /***************************************************************** @@ -176,26 +175,16 @@ namespace Kernel } /** - * Globally apply writes to a data region in the current domain + * Enforce coherent view (I-/D-Caches) on memory region * * \param base base of the region within the current domain * \param size size of the region */ - inline void update_data_region(addr_t const base, size_t const size) + inline void cache_coherent_region(addr_t const base, size_t const size) { - call(call_id_update_data_region(), (Call_arg)base, (Call_arg)size); + call(call_id_cache_coherent_region(), (Call_arg)base, (Call_arg)size); } - /** - * Globally apply writes to an instruction region in the current domain - * - * \param base base of the region within the current domain - * \param size size of the region - */ - inline void update_instr_region(addr_t const base, size_t const size) - { - call(call_id_update_instr_region(), (Call_arg)base, (Call_arg)size); - } /** * Send request message and await receipt of corresponding reply message diff --git a/repos/base-hw/lib/mk/spec/cortex_a8/core-hw.inc b/repos/base-hw/lib/mk/spec/cortex_a8/core-hw.inc index ce5f31f91..c5d7f1550 100644 --- a/repos/base-hw/lib/mk/spec/cortex_a8/core-hw.inc +++ b/repos/base-hw/lib/mk/spec/cortex_a8/core-hw.inc @@ -8,7 +8,6 @@ INC_DIR += $(BASE_DIR)/../base-hw/src/core/spec/cortex_a8 # add C++ sources -SRC_CC += spec/cortex_a8/cpu.cc SRC_CC += kernel/cpu_up.cc SRC_CC += kernel/lock.cc diff --git a/repos/base-hw/src/bootstrap/spec/riscv/cpu.cc b/repos/base-hw/src/bootstrap/spec/riscv/cpu.cc deleted file mode 100644 index ca079ad11..000000000 --- a/repos/base-hw/src/bootstrap/spec/riscv/cpu.cc +++ /dev/null @@ -1,18 +0,0 @@ -/* - * \brief CPU core implementation - * \author Sebastian Sumpf - * \author Stefan Kalkowski - * \date 2016-02-10 - */ - -/* - * Copyright (C) 2016-2017 Genode Labs GmbH - * - * This file is part of the Genode OS framework, which is distributed - * under the terms of the GNU Affero General Public License version 3. - */ - -#include - -void Genode::Cpu::translation_added(addr_t const addr, size_t const size) { - Genode::Cpu::sfence(); } diff --git a/repos/base-hw/src/core/kernel/thread.cc b/repos/base-hw/src/core/kernel/thread.cc index c3466504c..32aa476f0 100644 --- a/repos/base-hw/src/core/kernel/thread.cc +++ b/repos/base-hw/src/core/kernel/thread.cc @@ -727,8 +727,7 @@ void Thread::_call() /* switch over unrestricted kernel calls */ unsigned const call_id = user_arg_0(); switch (call_id) { - case call_id_update_data_region(): _call_update_data_region(); return; - case call_id_update_instr_region(): _call_update_instr_region(); return; + case call_id_cache_coherent_region(): _call_cache_coherent_region(); return; case call_id_stop_thread(): _call_stop_thread(); return; case call_id_restart_thread(): _call_restart_thread(); return; case call_id_yield_thread(): _call_yield_thread(); return; diff --git a/repos/base-hw/src/core/kernel/thread.h b/repos/base-hw/src/core/kernel/thread.h index ae088d66e..5554067be 100644 --- a/repos/base-hw/src/core/kernel/thread.h +++ b/repos/base-hw/src/core/kernel/thread.h @@ -228,8 +228,7 @@ class Kernel::Thread : private Kernel::Object, public Cpu_job, private Timeout void _call_send_request_msg(); void _call_send_reply_msg(); void _call_invalidate_tlb(); - void _call_update_data_region(); - void _call_update_instr_region(); + void _call_cache_coherent_region(); void _call_print_char(); void _call_await_signal(); void _call_pending_signal(); diff --git a/repos/base-hw/src/core/platform_pd.cc b/repos/base-hw/src/core/platform_pd.cc index ed025d0b3..b28ae2348 100644 --- a/repos/base-hw/src/core/platform_pd.cc +++ b/repos/base-hw/src/core/platform_pd.cc @@ -63,6 +63,17 @@ bool Hw::Address_space::insert_translation(addr_t virt, addr_t phys, } +bool Hw::Address_space::lookup_translation(addr_t const virt, addr_t & phys) +{ + /** FIXME: for the time-being we use it without lock, + * because it is used directly by the kernel when cache_coherent_region + * gets called. In future it would be better that core provides an API + * for it, and does the lookup with the hold lock + */ + return _tt.lookup_translation(virt, phys, _tt_alloc); +} + + void Hw::Address_space::flush(addr_t virt, size_t size, Core_local_addr) { Lock::Guard guard(_lock); diff --git a/repos/base-hw/src/core/platform_pd.h b/repos/base-hw/src/core/platform_pd.h index fb0c67909..ceb680d81 100644 --- a/repos/base-hw/src/core/platform_pd.h +++ b/repos/base-hw/src/core/platform_pd.h @@ -119,6 +119,8 @@ class Hw::Address_space : public Genode::Address_space bool insert_translation(Genode::addr_t virt, Genode::addr_t phys, Genode::size_t size, Genode::Page_flags flags); + bool lookup_translation(Genode::addr_t const virt, + Genode::addr_t & phys); /***************************** ** Address-space interface ** diff --git a/repos/base-hw/src/core/ram_dataspace_support.cc b/repos/base-hw/src/core/ram_dataspace_support.cc index cabb03b98..fd06127ac 100644 --- a/repos/base-hw/src/core/ram_dataspace_support.cc +++ b/repos/base-hw/src/core/ram_dataspace_support.cc @@ -44,15 +44,9 @@ void Ram_dataspace_factory::_clear_ds (Dataspace_component &ds) return; } - /* clear dataspace */ - memset(virt_addr, 0, page_rounded_size); - - /* uncached dataspaces need to be flushed from the data cache */ - if (ds.cacheability() != CACHED) - Kernel::update_data_region((addr_t)virt_addr, page_rounded_size); - - /* invalidate the dataspace memory from instruction cache */ - Kernel::update_instr_region((addr_t)virt_addr, page_rounded_size); + /* dependent on the architecture, cache maintainance might be necessary */ + Cpu::clear_memory_region((addr_t)virt_addr, page_rounded_size, + ds.cacheability() != CACHED); /* unmap dataspace from core */ if (!unmap_local((addr_t)virt_addr, num_pages)) diff --git a/repos/base-hw/src/core/spec/arm/cpu.cc b/repos/base-hw/src/core/spec/arm/cpu.cc index 4d2201961..379260e87 100644 --- a/repos/base-hw/src/core/spec/arm/cpu.cc +++ b/repos/base-hw/src/core/spec/arm/cpu.cc @@ -12,6 +12,7 @@ */ #include +#include #include #include @@ -104,3 +105,104 @@ void Arm_cpu::switch_to(Arm_cpu::Context&, Arm_cpu::Mmu_context & o) Cpu::synchronization_barrier(); } } + + +template +static inline void cache_maintainance(addr_t const base, + size_t const size, + size_t const cache_line_size, + FUNC & func) +{ + /** + * Although, the ARMv7 reference manual states that addresses does not + * need to be cacheline aligned, we observed problems when not doing so + * on i.MX6 Quad Sabrelite (maybe Cortex A9 generic issue?). + * Therefore, we align it here. + */ + addr_t start = base & ~(cache_line_size-1); + addr_t const end = base + size; + + /* iterate over all cachelines of the given region and apply the functor */ + for (; start < end; start += cache_line_size) func(start); +} + + +void Arm_cpu::cache_coherent_region(addr_t const base, + size_t const size) +{ + Genode::memory_barrier(); + + /** + * according to ARMv7 ref. manual: clean lines from data-cache, + * invalidate line in instruction-cache and branch-predictor + */ + auto lambda = [] (addr_t const base) { + Cpu::Dccmvac::write(base); + Cpu::synchronization_barrier(); + Cpu::Icimvau::write(base); + Cpu::Bpimva::write(base); + Cpu::synchronization_barrier(); + }; + + /* take the smallest cacheline, either from I-, or D-cache */ + size_t const cache_line_size = Genode::min(Cpu::instruction_cache_line_size(), + Cpu::data_cache_line_size()); + cache_maintainance(base, size, cache_line_size, lambda); +} + + +void Arm_cpu::clean_data_cache_by_virt_region(addr_t const base, + size_t const size) +{ + auto lambda = [] (addr_t const base) { Dccmvac::write(base); }; + cache_maintainance(base, size, Cpu::data_cache_line_size(), lambda); +} + + +void Arm_cpu::clean_invalidate_data_cache_by_virt_region(addr_t const base, + size_t const size) +{ + auto lambda = [] (addr_t const base) { Dccimvac::write(base); }; + cache_maintainance(base, size, Cpu::data_cache_line_size(), lambda); +} + + +/** + * Slightly more efficient method than Genode::memset, + * using word-wise assignment + */ +static inline void memzero(addr_t const addr, size_t const size) +{ + if (align_addr(addr, 2) == addr && align_addr(size, 2) == size) { + char * base = (char*) addr; + unsigned count = size/4; + for (; count--; base += 4) *((int*)base) = 0; + } else { + memset((void*)addr, 0, size); + } +} + + +void Arm_cpu::clear_memory_region(addr_t const addr, + size_t const size, + bool changed_cache_properties) +{ + Genode::memory_barrier(); + + memzero(addr, size); + + /** + * if the cache properties changed, this means we potentially zeroed + * DMA memory, which needs to be evicted from the D-cache + */ + if (changed_cache_properties) { + Cpu::clean_invalidate_data_cache_by_virt_region(addr, size); + } + + /** + * Invalidate the instruction cache, maybe lines from this memory are + * still within it. + */ + invalidate_instr_cache(); + Cpu::synchronization_barrier(); +} diff --git a/repos/base-hw/src/core/spec/arm/cpu_support.h b/repos/base-hw/src/core/spec/arm/cpu_support.h index 6aad17e86..13a35d996 100644 --- a/repos/base-hw/src/core/spec/arm/cpu_support.h +++ b/repos/base-hw/src/core/spec/arm/cpu_support.h @@ -69,37 +69,24 @@ struct Genode::Arm_cpu : public Hw::Arm_cpu asm volatile ("mcr p15, 0, %0, c7, c5, 0" :: "r" (0) : ); } /** - * Invalidate all branch predictions + * Clean data-cache for virtual region 'base' - 'base + size' */ - static void invalidate_branch_predicts() { - asm volatile ("mcr p15, 0, r0, c7, c5, 6" ::: "r0"); }; - - static constexpr addr_t line_size = 1 << Board::CACHE_LINE_SIZE_LOG2; - static constexpr addr_t line_align_mask = ~(line_size - 1); + static void clean_data_cache_by_virt_region(addr_t const base, + size_t const size); /** * Clean and invalidate data-cache for virtual region * 'base' - 'base + size' */ - static void clean_invalidate_data_cache_by_virt_region(addr_t base, - size_t const size) - { - addr_t const top = base + size; - base &= line_align_mask; - for (; base < top; base += line_size) { Dccimvac::write(base); } - } + static void clean_invalidate_data_cache_by_virt_region(addr_t const base, + size_t const size); - /** - * Invalidate instruction-cache for virtual region - * 'base' - 'base + size' - */ - void invalidate_instr_cache_by_virt_region(addr_t base, - size_t const size) - { - addr_t const top = base + size; - base &= line_align_mask; - for (; base < top; base += line_size) { Icimvau::write(base); } - } + static void clear_memory_region(addr_t const addr, + size_t const size, + bool changed_cache_properties); + + static void cache_coherent_region(addr_t const addr, + size_t const size); /** * Invalidate TLB regarding the given address space id diff --git a/repos/base-hw/src/core/spec/arm/kernel/thread.cc b/repos/base-hw/src/core/spec/arm/kernel/thread.cc index dd6cb5ac5..ad9ba1ee1 100644 --- a/repos/base-hw/src/core/spec/arm/kernel/thread.cc +++ b/repos/base-hw/src/core/spec/arm/kernel/thread.cc @@ -12,6 +12,9 @@ * under the terms of the GNU Affero General Public License version 3. */ +#include + +#include #include #include #include @@ -52,25 +55,33 @@ void Thread::exception(Cpu & cpu) } -void Kernel::Thread::_call_update_data_region() +void Kernel::Thread::_call_cache_coherent_region() { - Cpu &cpu = cpu_pool().cpu(Cpu::executing_id()); + addr_t base = (addr_t) user_arg_1(); + size_t const size = (size_t) user_arg_2(); - auto base = (addr_t)user_arg_1(); - auto const size = (size_t)user_arg_2(); - cpu.clean_invalidate_data_cache_by_virt_region(base, size); - cpu.invalidate_instr_cache(); -} + /** + * sanity check that only one small page is affected, + * because we only want to lookup one page in the page tables + * to limit execution time within the kernel + */ + if (Hw::trunc_page(base) != Hw::trunc_page(base+size-1)) { + Genode::raw(*this, " tried to make cross-page region cache coherent ", + (void*)base, " ", size); + return; + } - -void Kernel::Thread::_call_update_instr_region() -{ - Cpu &cpu = cpu_pool().cpu(Cpu::executing_id()); - - auto base = (addr_t)user_arg_1(); - auto const size = (size_t)user_arg_2(); - cpu.clean_invalidate_data_cache_by_virt_region(base, size); - cpu.invalidate_instr_cache_by_virt_region(base, size); + /** + * Lookup whether the page is backed, and if so make the memory coherent + * in between I-, and D-cache + */ + addr_t phys = 0; + if (pd().platform_pd().lookup_translation(base, phys)) { + Cpu::cache_coherent_region(base, size); + } else { + Genode::raw(*this, " tried to make invalid address ", + base, " cache coherent"); + } } diff --git a/repos/base-hw/src/core/spec/arm_v6/cpu.h b/repos/base-hw/src/core/spec/arm_v6/cpu.h index dfa3377c0..eeb0f9187 100644 --- a/repos/base-hw/src/core/spec/arm_v6/cpu.h +++ b/repos/base-hw/src/core/spec/arm_v6/cpu.h @@ -23,6 +23,38 @@ namespace Genode { struct Cpu; } struct Genode::Cpu : Arm_cpu { static inline void synchronization_barrier() {} + + static inline size_t data_cache_line_size() + { + struct Ctr : Genode::Register<32> { + struct D_min_line : Bitfield<12,2> {}; + }; + + static size_t data_cache_line_size = 0; + + if (!data_cache_line_size) { + data_cache_line_size = + (1 << (Ctr::D_min_line::get(Arm_cpu::Ctr::read())+1)) * sizeof(addr_t); + } + + return data_cache_line_size; + } + + static inline size_t instruction_cache_line_size() + { + struct Ctr : Genode::Register<32> { + struct I_min_line : Bitfield<0,2> {}; + }; + + static size_t instruction_cache_line_size = 0; + + if (!instruction_cache_line_size) { + instruction_cache_line_size = + (1 << (Ctr::I_min_line::get(Arm_cpu::Ctr::read())+1)) * sizeof(addr_t); + } + + return instruction_cache_line_size; + } }; #endif /* _CORE__SPEC__ARM_V6__CPU_H_ */ diff --git a/repos/base-hw/src/core/spec/arm_v6/translation_table.h b/repos/base-hw/src/core/spec/arm_v6/translation_table.h index 4da0d2656..bb283c584 100644 --- a/repos/base-hw/src/core/spec/arm_v6/translation_table.h +++ b/repos/base-hw/src/core/spec/arm_v6/translation_table.h @@ -26,7 +26,7 @@ constexpr bool Hw::Page_table::Descriptor_base::_smp() { return false; } void Hw::Page_table::_translation_added(unsigned long addr, unsigned long size) { - Genode::Arm_cpu::clean_invalidate_data_cache_by_virt_region(addr, size); + Genode::Arm_cpu::clean_data_cache_by_virt_region(addr, size); } #endif /* _CORE__SPEC__ARM_V6__TRANSLATION_TABLE_H_ */ diff --git a/repos/base-hw/src/core/spec/arm_v7/cpu_support.h b/repos/base-hw/src/core/spec/arm_v7/cpu_support.h index a08a515c1..4085c7506 100644 --- a/repos/base-hw/src/core/spec/arm_v7/cpu_support.h +++ b/repos/base-hw/src/core/spec/arm_v7/cpu_support.h @@ -44,8 +44,40 @@ struct Genode::Arm_v7_cpu : Arm_cpu static inline void synchronization_barrier() { - asm volatile("dsb sy\n" - "isb sy\n" ::: "memory"); + asm volatile("dsb\n" + "isb\n" ::: "memory"); + } + + static inline size_t data_cache_line_size() + { + struct Ctr : Genode::Register<32> { + struct D_min_line : Bitfield<16,4> {}; + }; + + static size_t data_cache_line_size = 0; + + if (!data_cache_line_size) { + data_cache_line_size = + (1 << Ctr::D_min_line::get(Arm_cpu::Ctr::read())) * sizeof(addr_t); + } + + return data_cache_line_size; + } + + static inline size_t instruction_cache_line_size() + { + struct Ctr : Genode::Register<32> { + struct I_min_line : Bitfield<0,4> {}; + }; + + static size_t instruction_cache_line_size = 0; + + if (!instruction_cache_line_size) { + instruction_cache_line_size = + (1 << Ctr::I_min_line::get(Arm_cpu::Ctr::read())) * sizeof(addr_t); + } + + return instruction_cache_line_size; } }; diff --git a/repos/base-hw/src/core/spec/arm_v8/cpu.cc b/repos/base-hw/src/core/spec/arm_v8/cpu.cc index bb4b526ae..58c641fc9 100644 --- a/repos/base-hw/src/core/spec/arm_v8/cpu.cc +++ b/repos/base-hw/src/core/spec/arm_v8/cpu.cc @@ -14,6 +14,7 @@ #include #include #include +#include #include @@ -73,33 +74,72 @@ Genode::Cpu::Mmu_context::~Mmu_context() } -static constexpr Genode::addr_t line_size = 1 << Board::CACHE_LINE_SIZE_LOG2; -static constexpr Genode::addr_t line_align_mask = ~(line_size - 1); - - -void Genode::Cpu::clean_data_cache_by_virt_region(addr_t base, size_t sz) +Genode::size_t Genode::Cpu::cache_line_size() { - addr_t const top = base + sz; - base &= line_align_mask; - for (; base < top; base += line_size) { - asm volatile("dc cvau, %0" :: "r" (base)); } + static Genode::size_t cache_line_size = 0; + + if (!cache_line_size) { + Genode::size_t i = 1 << Ctr_el0::I_min_line::get(Ctr_el0::read()); + Genode::size_t d = 1 << Ctr_el0::D_min_line::get(Ctr_el0::read()); + cache_line_size = Genode::min(i,d) * 4; /* word size is fixed in ARM */ + } + return cache_line_size; } -void Genode::Cpu::clean_invalidate_data_cache_by_virt_region(addr_t base, size_t sz) +template +static inline void cache_maintainance(Genode::addr_t const base, + Genode::size_t const size, + FUNC & func) { - addr_t const top = base + sz; - base &= line_align_mask; - for (; base < top; base += line_size) { - asm volatile("dc civac, %0" :: "r" (base)); } + Genode::addr_t start = (Genode::addr_t) base; + Genode::addr_t const end = base + size; + for (; start < end; start += Genode::Cpu::cache_line_size()) func(start); } -void Genode::Cpu::invalidate_instr_cache_by_virt_region(addr_t base, - size_t size) +void Genode::Cpu::cache_coherent_region(addr_t const base, + size_t const size) { - addr_t const top = base + size; - base &= line_align_mask; - for (; base < top; base += line_size) { - asm volatile("ic ivau, %0" :: "r" (base)); } + Genode::memory_barrier(); + + auto lambda = [] (addr_t const base) { + asm volatile("dc cvau, %0" :: "r" (base)); + asm volatile("dsb ish"); + asm volatile("ic ivau, %0" :: "r" (base)); + asm volatile("dsb ish"); + asm volatile("isb"); + }; + + cache_maintainance(base, size, lambda); +} + + +void Genode::Cpu::clear_memory_region(addr_t const addr, + size_t const size, + bool changed_cache_properties) +{ + Genode::memory_barrier(); + + /* normal memory is cleared by D-cache zeroing */ + auto normal = [] (addr_t const base) { + asm volatile("dc zva, %0" :: "r" (base)); + asm volatile("ic ivau, %0" :: "r" (base)); + }; + + /* DMA memory gets additionally evicted from the D-cache */ + auto dma = [] (addr_t const base) { + asm volatile("dc zva, %0" :: "r" (base)); + asm volatile("dc civac, %0" :: "r" (base)); + asm volatile("ic ivau, %0" :: "r" (base)); + }; + + if (changed_cache_properties) { + cache_maintainance(addr, size, dma); + } else { + cache_maintainance(addr, size, normal); + } + + asm volatile("dsb ish"); + asm volatile("isb"); } diff --git a/repos/base-hw/src/core/spec/arm_v8/cpu.h b/repos/base-hw/src/core/spec/arm_v8/cpu.h index 73505d8c9..e3b114ccc 100644 --- a/repos/base-hw/src/core/spec/arm_v8/cpu.h +++ b/repos/base-hw/src/core/spec/arm_v8/cpu.h @@ -85,10 +85,13 @@ struct Genode::Cpu : Hw::Arm_64_cpu */ static unsigned executing_id() { return Cpu::Mpidr::read() & 0xff; } + static size_t cache_line_size(); + static void clear_memory_region(addr_t const addr, + size_t const size, + bool changed_cache_properties); - static void clean_data_cache_by_virt_region(addr_t, size_t); - static void clean_invalidate_data_cache_by_virt_region(addr_t, size_t); - static void invalidate_instr_cache_by_virt_region(addr_t, size_t); + static void cache_coherent_region(addr_t const addr, + size_t const size); }; #endif /* _CORE__SPEC__ARM_V8__CPU_H_ */ diff --git a/repos/base-hw/src/core/spec/arm_v8/kernel/thread.cc b/repos/base-hw/src/core/spec/arm_v8/kernel/thread.cc index 196bf10e4..b81bb78b0 100644 --- a/repos/base-hw/src/core/spec/arm_v8/kernel/thread.cc +++ b/repos/base-hw/src/core/spec/arm_v8/kernel/thread.cc @@ -11,6 +11,8 @@ * under the terms of the GNU Affero General Public License version 3. */ +#include + #include #include #include @@ -104,21 +106,33 @@ bool Kernel::Pd::invalidate_tlb(Cpu &, addr_t addr, size_t size) } -void Kernel::Thread::_call_update_data_region() +void Kernel::Thread::_call_cache_coherent_region() { - addr_t const base = (addr_t)user_arg_1(); - size_t const size = (size_t)user_arg_2(); - Cpu::clean_invalidate_data_cache_by_virt_region(base, size); - Cpu::invalidate_instr_cache_by_virt_region(base, size); -} + addr_t base = (addr_t) user_arg_1(); + size_t const size = (size_t) user_arg_2(); + /** + * sanity check that only one small page is affected, + * because we only want to lookup one page in the page tables + * to limit execution time within the kernel + */ + if (Hw::trunc_page(base) != Hw::trunc_page(base+size-1)) { + Genode::raw(*this, " tried to make cross-page region cache coherent ", + (void*)base, " ", size); + return; + } -void Kernel::Thread::_call_update_instr_region() -{ - addr_t const base = (addr_t)user_arg_1(); - size_t const size = (size_t)user_arg_2(); - Cpu::clean_data_cache_by_virt_region(base, size); - Cpu::invalidate_instr_cache_by_virt_region(base, size); + /** + * Lookup whether the page is backed, and if so make the memory coherent + * in between I-, and D-cache + */ + addr_t phys = 0; + if (pd().platform_pd().lookup_translation(base, phys)) { + Cpu::cache_coherent_region(base, size); + } else { + Genode::raw(*this, " tried to make invalid address ", + base, " cache coherent"); + } } diff --git a/repos/base-hw/src/core/spec/cortex_a8/cpu.cc b/repos/base-hw/src/core/spec/cortex_a8/cpu.cc deleted file mode 100644 index a265f82ae..000000000 --- a/repos/base-hw/src/core/spec/cortex_a8/cpu.cc +++ /dev/null @@ -1,33 +0,0 @@ -/* - * \brief CPU driver for core Arm Cortex A8 specific implementation - * \author Martin stein - * \author Stefan Kalkowski - * \date 2015-12-14 - */ - -/* - * Copyright (C) 2015-2017 Genode Labs GmbH - * - * This file is part of the Genode OS framework, which is distributed - * under the terms of the GNU Affero General Public License version 3. - */ - -#include -#include -#include - - -void Genode::Cpu::translation_added(Genode::addr_t const base, - Genode::size_t const size) -{ - using namespace Kernel; - - /* - * The Cortex-A8 CPU can't use the L1 cache on page-table - * walks. Therefore, as the page-tables lie in write-back cacheable - * memory we've to clean the corresponding cache-lines even when a - * page table entry is added. We only do this as core as the kernel - * adds translations solely before MMU and caches are enabled. - */ - Cpu::clean_invalidate_data_cache_by_virt_region(base, size); -} diff --git a/repos/base-hw/src/core/spec/cortex_a8/cpu.h b/repos/base-hw/src/core/spec/cortex_a8/cpu.h index 7631f5432..84c22142f 100644 --- a/repos/base-hw/src/core/spec/cortex_a8/cpu.h +++ b/repos/base-hw/src/core/spec/cortex_a8/cpu.h @@ -18,18 +18,6 @@ /* core includes */ #include -namespace Genode { struct Cpu; } - - -struct Genode::Cpu : Arm_v7_cpu -{ - /** - * Post processing after a translation was added to a translation table - * - * \param addr virtual address of the translation - * \param size size of the translation - */ - static void translation_added(addr_t const addr, size_t const size); -}; +namespace Genode { using Cpu = Arm_v7_cpu; } #endif /* _CORE__SPEC__CORTEX_A8__CPU_H_ */ diff --git a/repos/base-hw/src/core/spec/cortex_a8/translation_table.h b/repos/base-hw/src/core/spec/cortex_a8/translation_table.h index 71a580176..2bb87e730 100644 --- a/repos/base-hw/src/core/spec/cortex_a8/translation_table.h +++ b/repos/base-hw/src/core/spec/cortex_a8/translation_table.h @@ -34,7 +34,7 @@ void Hw::Page_table::_translation_added(unsigned long addr, unsigned long size) * page table entry is added. We only do this as core as the kernel * adds translations solely before MMU and caches are enabled. */ - Genode::Cpu::clean_invalidate_data_cache_by_virt_region(addr, size); + Genode::Cpu::clean_data_cache_by_virt_region(addr, size); } #endif /* _CORE__SPEC__CORTEX_A8__TRANSLATION_TABLE_H_ */ diff --git a/repos/base-hw/src/core/spec/cortex_a9/cpu.h b/repos/base-hw/src/core/spec/cortex_a9/cpu.h index 4ac18546b..ec508b6ef 100644 --- a/repos/base-hw/src/core/spec/cortex_a9/cpu.h +++ b/repos/base-hw/src/core/spec/cortex_a9/cpu.h @@ -27,8 +27,8 @@ struct Genode::Cpu : Arm_v7_cpu * Clean and invalidate data-cache for virtual region * 'base' - 'base + size' */ - void clean_invalidate_data_cache_by_virt_region(addr_t base, - size_t const size) + static void clean_invalidate_data_cache_by_virt_region(addr_t const base, + size_t const size) { Arm_cpu::clean_invalidate_data_cache_by_virt_region(base, size); Board::l2_cache().clean_invalidate(); diff --git a/repos/base-hw/src/core/spec/riscv/cpu.cc b/repos/base-hw/src/core/spec/riscv/cpu.cc index 5a019a05b..d7d4e48d7 100644 --- a/repos/base-hw/src/core/spec/riscv/cpu.cc +++ b/repos/base-hw/src/core/spec/riscv/cpu.cc @@ -74,3 +74,13 @@ void Genode::Cpu::mmu_fault(Context &, Kernel::Thread_fault & f) f.addr = Genode::Cpu::Sbadaddr::read(); f.type = Kernel::Thread_fault::PAGE_MISSING; } + + +void Genode::Cpu::clear_memory_region(addr_t const addr, + size_t const size, bool) +{ + memset((void*)addr, 0, size); + + /* FIXME: is this really necessary? */ + Genode::Cpu::sfence(); +} diff --git a/repos/base-hw/src/core/spec/riscv/cpu.h b/repos/base-hw/src/core/spec/riscv/cpu.h index 98691a3c6..d85ce509d 100644 --- a/repos/base-hw/src/core/spec/riscv/cpu.h +++ b/repos/base-hw/src/core/spec/riscv/cpu.h @@ -81,6 +81,10 @@ class Genode::Cpu : public Hw::Riscv_cpu static void mmu_fault(Context & c, Kernel::Thread_fault & f); static unsigned executing_id() { return 0; } + + static void clear_memory_region(addr_t const addr, + size_t const size, + bool changed_cache_properties); }; #endif /* _CORE__SPEC__RISCV__CPU_H_ */ diff --git a/repos/base-hw/src/core/spec/riscv/kernel/thread.cc b/repos/base-hw/src/core/spec/riscv/kernel/thread.cc index fd81e095d..6024a7bb1 100644 --- a/repos/base-hw/src/core/spec/riscv/kernel/thread.cc +++ b/repos/base-hw/src/core/spec/riscv/kernel/thread.cc @@ -51,13 +51,7 @@ void Thread::exception(Cpu & cpu) } -void Thread::_call_update_data_region() -{ - Genode::Cpu::sfence(); -} - - -void Thread::_call_update_instr_region() { } +void Thread::_call_cache_coherent_region() { } void Kernel::Thread::proceed(Cpu & cpu) diff --git a/repos/base-hw/src/core/spec/x86_64/cpu.cc b/repos/base-hw/src/core/spec/x86_64/cpu.cc index 54a2db435..667a604a3 100644 --- a/repos/base-hw/src/core/spec/x86_64/cpu.cc +++ b/repos/base-hw/src/core/spec/x86_64/cpu.cc @@ -131,3 +131,17 @@ unsigned Genode::Cpu::executing_id() unsigned const cpu_id = (stack_addr - stack_base) / kernel_stack_size; return cpu_id; } + + +void Genode::Cpu::clear_memory_region(Genode::addr_t const addr, + Genode::size_t const size, bool) +{ + if (align_addr(addr, 3) == addr && align_addr(size, 3) == size) { + Genode::addr_t start = addr; + Genode::size_t count = size / 8; + asm volatile ("rep stosq" : "+D" (start), "+c" (count) + : "a" (0) : "memory"); + } else { + Genode::memset((void*)addr, 0, size); + } +} diff --git a/repos/base-hw/src/core/spec/x86_64/cpu.h b/repos/base-hw/src/core/spec/x86_64/cpu.h index de5fd8273..62e645117 100644 --- a/repos/base-hw/src/core/spec/x86_64/cpu.h +++ b/repos/base-hw/src/core/spec/x86_64/cpu.h @@ -130,6 +130,11 @@ class Genode::Cpu : public Hw::X86_64_cpu */ static void invalidate_tlb() { Genode::Cpu::Cr3::write(Genode::Cpu::Cr3::read()); } + + + static void clear_memory_region(addr_t const addr, + size_t const size, + bool changed_cache_properties); }; #endif /* _CORE__SPEC__X86_64__CPU_H_ */ diff --git a/repos/base-hw/src/core/spec/x86_64/kernel/thread.cc b/repos/base-hw/src/core/spec/x86_64/kernel/thread.cc index d6aab3aa1..ab03d55af 100644 --- a/repos/base-hw/src/core/spec/x86_64/kernel/thread.cc +++ b/repos/base-hw/src/core/spec/x86_64/kernel/thread.cc @@ -31,10 +31,7 @@ void Kernel::Thread::Tlb_invalidation::execute() }; -void Kernel::Thread::_call_update_data_region() { } - - -void Kernel::Thread::_call_update_instr_region() { } +void Kernel::Thread::_call_cache_coherent_region() { } void Kernel::Thread::proceed(Cpu & cpu) diff --git a/repos/base-hw/src/include/hw/spec/arm/cpu.h b/repos/base-hw/src/include/hw/spec/arm/cpu.h index 55d897a7b..95508266e 100644 --- a/repos/base-hw/src/include/hw/spec/arm/cpu.h +++ b/repos/base-hw/src/include/hw/spec/arm/cpu.h @@ -199,6 +199,12 @@ struct Hw::Arm_cpu /* Branch predictor invalidate all */ ARM_CP15_REGISTER_32BIT(Bpiall, c7, c5, 0, 6); + /* Branch predictor invalidate all */ + ARM_CP15_REGISTER_32BIT(Bpimva, c7, c5, 0, 7); + + /* Data Cache Clean by MVA to PoC */ + ARM_CP15_REGISTER_32BIT(Dccmvac, c7, c10, 0, 1); + /* Data Cache Clean and Invalidate by MVA to PoC */ ARM_CP15_REGISTER_32BIT(Dccimvac, c7, c14, 0, 1); diff --git a/repos/base-hw/src/include/hw/spec/arm/lpae.h b/repos/base-hw/src/include/hw/spec/arm/lpae.h index 7e78eecfd..ef5671a16 100644 --- a/repos/base-hw/src/include/hw/spec/arm/lpae.h +++ b/repos/base-hw/src/include/hw/spec/arm/lpae.h @@ -370,6 +370,26 @@ class Hw::Level_3_translation_table : desc = 0; } }; + struct Lookup_func + { + bool found { false }; + addr_t phys { 0 }; + + Lookup_func() { } + + void operator () (addr_t const, + addr_t const, + size_t const, + Descriptor::access_t &desc) + { + using Base = Long_translation_table; + using Block_descriptor = typename Stage_trait::Type; + phys = Block_descriptor::Output_address::masked(desc); + found = true; + } + }; + public: using Allocator = Hw::Page_table_allocator<1 << SIZE_LOG2_4KB>; @@ -381,8 +401,20 @@ class Hw::Level_3_translation_table : Allocator &) { _range_op(vo, pa, size, Insert_func(flags)); } - void remove_translation(addr_t vo, size_t size, Allocator&) { - _range_op(vo, 0, size, Remove_func()); } + void remove_translation(addr_t vo, size_t size, Allocator&) + { + addr_t pa = 0; + _range_op(vo, pa, size, Remove_func()); + } + + bool lookup_translation(addr_t vo, addr_t & pa, Allocator&) + { + size_t page_size = 1 << SIZE_LOG2_4KB; + Lookup_func functor {}; + _range_op(vo, 0, page_size, functor); + pa = functor.phys; + return functor.found; + } }; @@ -487,6 +519,43 @@ class Hw::Level_x_translation_table : } }; + + template + struct Lookup_func + { + Allocator & alloc; + bool found { false }; + addr_t phys { 0 }; + + Lookup_func(Allocator & alloc) : alloc(alloc) { } + + void operator () (addr_t const vo, + addr_t const, + size_t const, + typename Descriptor::access_t &desc) + { + using Nt = typename Table_descriptor::Next_table; + + switch (Descriptor::type(desc)) { + case Descriptor::BLOCK: + { + phys = Block_descriptor::Output_address::masked(desc); + found = true; + return; + }; + case Descriptor::TABLE: + { + /* use allocator to retrieve virt address of table */ + E & table = alloc.virt_addr(Nt::masked(desc)); + found = table.lookup_translation(vo - (vo & Base::BLOCK_MASK), + phys, alloc); + return; + }; + case Descriptor::INVALID: return; + } + } + }; + public: static constexpr size_t MIN_PAGE_SIZE_LOG2 = SIZE_LOG2_4KB; @@ -518,8 +587,28 @@ class Hw::Level_x_translation_table : * \param alloc second level translation table allocator */ void remove_translation(addr_t vo, size_t size, - Allocator & alloc) { - this->_range_op(vo, 0, size, Remove_func(alloc)); } + Allocator & alloc) + { + addr_t pa = 0; + this->_range_op(vo, pa, size, Remove_func(alloc)); + } + + /** + * Lookup translation + * + * \param virt region offset within the tables virtual region + * \param phys region size + * \param alloc second level translation table allocator + */ + bool lookup_translation(addr_t const virt, addr_t & phys, + Allocator & alloc) + { + size_t page_size = 1 << SIZE_LOG2_4KB; + Lookup_func functor(alloc); + this->_range_op(virt, phys, page_size, functor); + phys = functor.phys; + return functor.found; + } }; diff --git a/repos/base-hw/src/include/hw/spec/arm/page_table.h b/repos/base-hw/src/include/hw/spec/arm/page_table.h index 8f1a39c9d..9ed857977 100644 --- a/repos/base-hw/src/include/hw/spec/arm/page_table.h +++ b/repos/base-hw/src/include/hw/spec/arm/page_table.h @@ -259,6 +259,28 @@ class Hw::Page_table } } + /** + * Lookup translation + * + * \param virt virtual address offset to look for + * \param phys physical address to return + * \returns true if lookup was successful, otherwise false + */ + bool lookup_translation(addr_t const virt, addr_t & phys) + { + unsigned idx = 0; + if (!_index_by_vo(idx, virt)) return false; + + switch (Descriptor::type(_entries[idx])) { + case Descriptor::SMALL_PAGE: + { + phys = Small_page::Pa::masked(_entries[idx]); + return true; + } + default: return false; + } + } + /** * Does this table solely contain invalid entries */ @@ -576,6 +598,42 @@ class Hw::Page_table vo += sz; } } + + /** + * Lookup translation + * + * \param virt virtual address to look at + * \param phys physical address to return + * \param alloc second level translation table allocator + * \returns true if a translation was found, otherwise false + */ + bool lookup_translation(addr_t const virt, addr_t & phys, Allocator & alloc) + { + unsigned idx = 0; + if (!_index_by_vo(idx, virt)) return false; + + switch (Descriptor::type(_entries[idx])) { + + case Descriptor::SECTION: + { + phys = Section::Pa::masked(_entries[idx]); + return true; + } + + case Descriptor::PAGE_TABLE: + { + using Pt = Page_table_level_2; + using Ptd = Page_table_descriptor; + + Pt & pt = + alloc.virt_addr(Ptd::Pa::masked(_entries[idx])); + + addr_t const offset = virt - Section::Pa::masked(virt); + return pt.lookup_translation(offset, phys); + } + default: return false; + }; + } } __attribute__((aligned(1< {}; ); + SYSTEM_REGISTER(32, Ctr_el0, ctr_el0, + struct I_min_line : Bitfield<0, 4> {}; + struct D_min_line : Bitfield<16, 4> {}; + ); + SYSTEM_REGISTER(64, Current_el, currentel, enum Level { EL0, EL1, EL2, EL3 }; struct El : Bitfield<2, 2> {}; diff --git a/repos/base-hw/src/include/hw/spec/arm_64/imx8q_evk_board.h b/repos/base-hw/src/include/hw/spec/arm_64/imx8q_evk_board.h index 055465106..73cb1cf32 100644 --- a/repos/base-hw/src/include/hw/spec/arm_64/imx8q_evk_board.h +++ b/repos/base-hw/src/include/hw/spec/arm_64/imx8q_evk_board.h @@ -27,8 +27,6 @@ namespace Hw::Imx8q_evk_board { UART_BASE = 0x30860000, UART_SIZE = 0x1000, UART_CLOCK = 250000000, - - CACHE_LINE_SIZE_LOG2 = 6, }; namespace Cpu_mmio { diff --git a/repos/base-hw/src/include/hw/spec/arm_64/rpi3_board.h b/repos/base-hw/src/include/hw/spec/arm_64/rpi3_board.h index 9d8d841a8..bdd280852 100644 --- a/repos/base-hw/src/include/hw/spec/arm_64/rpi3_board.h +++ b/repos/base-hw/src/include/hw/spec/arm_64/rpi3_board.h @@ -33,8 +33,6 @@ namespace Hw::Rpi3_board { LOCAL_IRQ_CONTROLLER_BASE = 0x40000000, LOCAL_IRQ_CONTROLLER_SIZE = 0x1000, - - CACHE_LINE_SIZE_LOG2 = 6, }; }; diff --git a/repos/base-hw/src/include/hw/spec/riscv/page_table.h b/repos/base-hw/src/include/hw/spec/riscv/page_table.h index 6e60acb98..ae013d6e4 100644 --- a/repos/base-hw/src/include/hw/spec/riscv/page_table.h +++ b/repos/base-hw/src/include/hw/spec/riscv/page_table.h @@ -335,6 +335,12 @@ class Sv39::Level_x_translation_table { _range_op(vo, 0, size, Remove_func(alloc)); } + + bool lookup_translation(addr_t, addr_t &, Allocator &) + { + Genode::raw(__func__, " not implemented yet"); + return false; + } } __attribute__((aligned(1 << ALIGNM_LOG2))); namespace Sv39 { diff --git a/repos/base-hw/src/include/hw/spec/x86_64/page_table.h b/repos/base-hw/src/include/hw/spec/x86_64/page_table.h index ee7fc455d..539fd10d7 100644 --- a/repos/base-hw/src/include/hw/spec/x86_64/page_table.h +++ b/repos/base-hw/src/include/hw/spec/x86_64/page_table.h @@ -14,6 +14,7 @@ #ifndef _SRC__LIB__HW__SPEC__X86_64__PAGE_TABLE_H_ #define _SRC__LIB__HW__SPEC__X86_64__PAGE_TABLE_H_ +#include #include #include #include @@ -680,6 +681,12 @@ class Hw::Pml4_table { _range_op(vo, 0, size, Remove_func(alloc)); } + + bool lookup_translation(addr_t const, addr_t &, Allocator &) + { + Genode::raw(__func__, " not implemented yet"); + return false; + } } __attribute__((aligned(1 << ALIGNM_LOG2))); diff --git a/repos/base-hw/src/lib/base/cache.cc b/repos/base-hw/src/lib/base/cache.cc index ffe8a65c9..1dd639a37 100644 --- a/repos/base-hw/src/lib/base/cache.cc +++ b/repos/base-hw/src/lib/base/cache.cc @@ -13,9 +13,25 @@ #include +#include #include +#include void Genode::cache_coherent(Genode::addr_t addr, Genode::size_t size) { - Kernel::update_instr_region(addr, size); + using namespace Genode; + + /** + * The kernel accepts the 'cache_coherent_region' call for one designated + * page only. Otherwise, it just ignores the call to limit the time being + * uninteruppptible in the kernel. Therefor, we have to loop if more than + * one page is affected by the given region. + */ + while (size) { + addr_t next_page = align_addr(addr+1, get_page_size_log2()); + size_t s = min(size, next_page - addr); + Kernel::cache_coherent_region(addr, s); + addr += s; + size -= s; + } } diff --git a/repos/base/include/drivers/defs/exynos5.h b/repos/base/include/drivers/defs/exynos5.h index a2cc79f8a..41e6fa541 100644 --- a/repos/base/include/drivers/defs/exynos5.h +++ b/repos/base/include/drivers/defs/exynos5.h @@ -55,9 +55,6 @@ namespace Exynos5 { MCT_IRQ_L0 = 152, MCT_IRQ_L1 = 153, - /* CPU cache */ - CACHE_LINE_SIZE_LOG2 = 6, - /* IRAM */ IRAM_BASE = 0x02020000, diff --git a/repos/base/include/drivers/defs/imx53.h b/repos/base/include/drivers/defs/imx53.h index 36980d4b4..45595d79e 100644 --- a/repos/base/include/drivers/defs/imx53.h +++ b/repos/base/include/drivers/defs/imx53.h @@ -106,9 +106,6 @@ namespace Imx53 { M4IF_BASE = 0x63fd8000, M4IF_SIZE = 0x00001000, - - /* CPU cache */ - CACHE_LINE_SIZE_LOG2 = 6, }; }; diff --git a/repos/base/include/drivers/defs/imx6.h b/repos/base/include/drivers/defs/imx6.h index a34a768a3..3d60934c7 100644 --- a/repos/base/include/drivers/defs/imx6.h +++ b/repos/base/include/drivers/defs/imx6.h @@ -52,9 +52,6 @@ namespace Imx6 { /* System reset controller */ SRC_MMIO_BASE = 0x20d8000, - /* CPU cache */ - CACHE_LINE_SIZE_LOG2 = 5, - /* SD host controller */ SDHC_1_IRQ = 54, SDHC_1_MMIO_BASE = 0x02190000, diff --git a/repos/base/include/drivers/defs/imx7d_sabre.h b/repos/base/include/drivers/defs/imx7d_sabre.h index 570cb91ce..6a572bf3e 100644 --- a/repos/base/include/drivers/defs/imx7d_sabre.h +++ b/repos/base/include/drivers/defs/imx7d_sabre.h @@ -37,8 +37,6 @@ namespace Imx7d_sabre { UART_1_MMIO_SIZE = 0x10000UL, TIMER_CLOCK = 1000000000UL, - - CACHE_LINE_SIZE_LOG2 = 6, }; } diff --git a/repos/base/include/drivers/defs/panda.h b/repos/base/include/drivers/defs/panda.h index e6a41cddf..450712c91 100644 --- a/repos/base/include/drivers/defs/panda.h +++ b/repos/base/include/drivers/defs/panda.h @@ -90,9 +90,6 @@ namespace Panda { /* SD card */ HSMMC_IRQ = 115, - - /* CPU cache */ - CACHE_LINE_SIZE_LOG2 = 2, /* FIXME get correct value from board spec */ }; }; diff --git a/repos/base/include/drivers/defs/pbxa9.h b/repos/base/include/drivers/defs/pbxa9.h index 87ca2365c..6b5dd3060 100644 --- a/repos/base/include/drivers/defs/pbxa9.h +++ b/repos/base/include/drivers/defs/pbxa9.h @@ -74,9 +74,6 @@ namespace Pbxa9 { /* SD card */ PL180_IRQ_0 = 49, PL180_IRQ_1 = 50, - - /* CPU cache */ - CACHE_LINE_SIZE_LOG2 = 2, /* FIXME get correct value from board spec */ }; }; diff --git a/repos/base/include/drivers/defs/rpi.h b/repos/base/include/drivers/defs/rpi.h index 3374a734e..5bce75a48 100644 --- a/repos/base/include/drivers/defs/rpi.h +++ b/repos/base/include/drivers/defs/rpi.h @@ -62,9 +62,6 @@ namespace Rpi { /* USB host controller */ DWC_IRQ = 17, - /* CPU cache */ - CACHE_LINE_SIZE_LOG2 = 5, - /* SD card */ SDHCI_BASE = MMIO_0_BASE + 0x300000, SDHCI_SIZE = 0x100, diff --git a/repos/base/include/drivers/defs/zynq.h b/repos/base/include/drivers/defs/zynq.h index c6e4565e8..d524f1fe6 100644 --- a/repos/base/include/drivers/defs/zynq.h +++ b/repos/base/include/drivers/defs/zynq.h @@ -52,7 +52,6 @@ namespace Zynq { /* CPU cache */ PL310_MMIO_BASE = MMIO_1_BASE + 0xF02000, PL310_MMIO_SIZE = 0x1000, - CACHE_LINE_SIZE_LOG2 = 2, /* FIXME get correct value from board spec */ /* TTC (triple timer counter) */ TTC0_MMIO_BASE = MMIO_1_BASE + 0x1000, diff --git a/repos/os/src/server/vmm/spec/arm_v7/board.h b/repos/os/src/server/vmm/spec/arm_v7/board.h index bf2ca2468..eeea70fc3 100644 --- a/repos/os/src/server/vmm/spec/arm_v7/board.h +++ b/repos/os/src/server/vmm/spec/arm_v7/board.h @@ -70,9 +70,6 @@ struct Vea9x4::Board CORTEX_A9_PRIVATE_MEM_BASE = 0x1e000000, CORTEX_A9_PRIVATE_MEM_SIZE = 0x2000, CORTEX_A9_PRIVATE_TIMER_CLK = 200010000, - - /* CPU cache */ - CACHE_LINE_SIZE_LOG2 = 2, /* FIXME get correct value from board spec */ }; };