hw_x86_64: Implementation of IA-32e paging

IA-32e paging translates 48-bit linear addresses to 52-bit physical
addresses. Translation structures are hierarchical and four levels deep.
The current implementation supports regular 4KB and 1 GB and 2 MB large
page mappings.

Memory typing is not yet implemented since the encoded type bits depend
on the active page attribute table (PAT)*.

For detailed information refer to Intel SDM Vol. 3A, section 4.5.

* The default PAT after power up does not allow the encoding of the
  write-combining memory type, see Intel SDM Vol. 3A, section 11.12.4.
* Add common IA-32e paging descriptor type:
    The type represents a table entry and encompasses all fields shared by
    paging structure entries of all four levels (PML4, PDPT, PD and PT).
* Simplify PT entry type by using common descriptor:
    Differing fields are the physical address, the global flag and the memory
    type flags.
* Simplify directory entry type by using common descriptor:
    Page directory entries (PDPT and PD) have an additional 'page size' field
    that specifies if the entry references a next level paging structure or
    represents a large page mapping.
* Simplify PML4 entry type by using common descriptor
    Top-level paging structure entries (PML4) do not have a 'pat' flag and the
    memory type is specified by the 'pwt' and 'pcd' fields only.
* Implement access right merging for directory paging entries
    The access rights for translations are determined by the U/S, R/W and XD
    flags. Paging structure entries that reference other tables must provide
    the superset of rights required for all entries of the referenced table.
    Thus merge access rights of new mappings into existing directory entries to
    grant additional rights if needed.
* Add cr3 register definition:
    The control register 3 is used to set the current page-directory base
    register.
* Add cr3 variable to x86_64 Cpu Context
    The variable designates the address of the top-level paging structure.
* Return current cr3 value as translation table base
* Set context cr3 value on translation table assignment
* Implement switch to virtual mode in kernel
    Activate translation table in init_virt_kernel function by updating the
    cr3 register.
* Ignore accessed and dirty flags when comparing existing table entries
    These flags can be set by the MMU and must be disregarded.
This commit is contained in:
Adrian-Ken Rueegsegger 2015-02-16 15:20:06 +01:00 committed by Christian Helmuth
parent c7cadf52a6
commit 4417fe6201
3 changed files with 703 additions and 36 deletions

View File

@ -51,20 +51,56 @@ class Genode::Cpu
static constexpr addr_t exception_entry = 0x0; /* XXX */
static constexpr addr_t mtc_size = 1 << 13;
/**
* Control register 3: Page-Directory base register
*
* See Intel SDM Vol. 3A, section 2.5.
*/
struct Cr3 : Register<64>
{
struct Pwt : Bitfield<3,1> { }; /* Page-level write-through */
struct Pcd : Bitfield<4,1> { }; /* Page-level cache disable */
struct Pdb : Bitfield<12, 36> { }; /* Page-directory base address */
static void write(access_t const v) {
asm volatile ("mov %0, %%cr3" :: "r" (v) : ); }
static access_t read()
{
access_t v;
asm volatile ("mov %%cr3, %0" : "=r" (v) :: );
return v;
}
/**
* Return initialized value
*
* \param table base of targeted translation table
*/
static access_t init(addr_t const table) {
return Pdb::masked(table); }
};
/**
* Extend basic CPU state by members relevant for 'base-hw' only
*/
struct Context : Cpu_state
{
/**
* Address of top-level paging structure.
*/
addr_t cr3;
/**
* Return base of assigned translation table
*/
addr_t translation_table() const { return 0UL; }
addr_t translation_table() const { return cr3; }
/**
* Assign translation-table base 'table'
*/
void translation_table(addr_t const table) { }
void translation_table(addr_t const table) {
cr3 = Cr3::init(table); }
/**
* Assign protection domain
@ -187,8 +223,8 @@ class Genode::Cpu
* \param process_id process ID of the kernel address-space
*/
static void
init_virt_kernel(addr_t const table, unsigned const process_id)
{ }
init_virt_kernel(addr_t const table, unsigned const process_id) {
Cr3::write(Cr3::init(table)); }
inline static void finish_init_phys_kernel()
{ }

View File

@ -15,58 +15,257 @@
#ifndef _TRANSLATION_TABLE_H_
#define _TRANSLATION_TABLE_H_
#include <page_flags.h>
/* Genode includes */
#include <util/misc_math.h>
#include <util/register.h>
#include <base/printf.h>
#include <assert.h>
/* base-hw includes */
#include <page_flags.h>
#include <page_slab.h>
namespace Genode
{
/**
* First level translation table
* IA-32e paging translates 48-bit linear addresses to 52-bit physical
* addresses. Translation structures are hierarchical and four levels
* deep.
*
* For detailed information refer to Intel SDM Vol. 3A, section 4.5.
*/
class Translation_table;
enum {
SIZE_LOG2_4KB = 12,
SIZE_LOG2_2MB = 21,
SIZE_LOG2_1GB = 30,
SIZE_LOG2_512GB = 39,
SIZE_LOG2_256TB = 48,
};
class Level_4_translation_table;
class PML4_table;
/**
* IA-32e page directory template.
*
* Page directories can refer to paging structures of the next higher level
* or directly map page frames by using large page mappings.
*
* \param PAGE_SIZE_LOG2 virtual address range size in log2
* of a single table entry
* \param SIZE_LOG2 virtual address range size in log2 of whole table
*/
template <typename ENTRY, unsigned PAGE_SIZE_LOG2, unsigned SIZE_LOG2>
class Page_directory;
using Level_3_translation_table =
Page_directory<Level_4_translation_table,
SIZE_LOG2_2MB, SIZE_LOG2_1GB>;
using Level_2_translation_table =
Page_directory<Level_3_translation_table,
SIZE_LOG2_1GB, SIZE_LOG2_512GB>;
using Translation_table = PML4_table;
/**
* IA-32e common descriptor.
*
* Table entry containing descriptor fields common to all four levels.
*/
struct Common_descriptor : Register<64>
{
struct P : Bitfield<0, 1> { }; /* present */
struct Rw : Bitfield<1, 1> { }; /* read/write */
struct Us : Bitfield<2, 1> { }; /* user/supervisor */
struct Pwt : Bitfield<3, 1> { }; /* write-through */
struct Pcd : Bitfield<4, 1> { }; /* cache disable */
struct A : Bitfield<5, 1> { }; /* accessed */
struct D : Bitfield<6, 1> { }; /* dirty */
struct Xd : Bitfield<63, 1> { }; /* execute-disable */
static bool present(access_t const v) { return P::get(v); }
static access_t create(Page_flags const &flags)
{
return P::bits(1)
| Rw::bits(flags.writeable)
| Us::bits(!flags.privileged)
| Xd::bits(!flags.executable);
}
/**
* Return descriptor value with cleared accessed and dirty flags. These
* flags can be set by the MMU.
*/
static access_t clear_mmu_flags(access_t value)
{
A::clear(value);
D::clear(value);
return value;
}
/**
* Merge access rights of descriptor with given flags.
*/
static void merge_access_rights(access_t &desc,
Page_flags const &flags)
{
Rw::set(desc, Rw::get(desc) | flags.writeable);
Us::set(desc, Us::get(desc) | !flags.privileged);
Xd::set(desc, Xd::get(desc) & !flags.executable);
}
};
}
class Genode::Translation_table
class Genode::Level_4_translation_table
{
public:
private:
enum {
ALIGNM_LOG2 = 12,
MIN_PAGE_SIZE_LOG2 = 12,
MAX_COSTS_PER_TRANSLATION = 4*4096
static constexpr size_t PAGE_SIZE_LOG2 = SIZE_LOG2_4KB;
static constexpr size_t SIZE_LOG2 = SIZE_LOG2_2MB;
static constexpr size_t MAX_ENTRIES = 1 << (SIZE_LOG2-PAGE_SIZE_LOG2);
static constexpr size_t PAGE_SIZE = 1 << PAGE_SIZE_LOG2;
static constexpr size_t PAGE_MASK = ~((1 << PAGE_SIZE_LOG2) - 1);
class Misaligned {};
class Invalid_range {};
class Double_insertion {};
struct Descriptor : Common_descriptor
{
using Common = Common_descriptor;
struct Pat : Bitfield<7, 1> { }; /* page attribute table */
struct G : Bitfield<8, 1> { }; /* global */
struct Pa : Bitfield<12, 36> { }; /* physical address */
struct Mt : Bitset_3<Pwt, Pcd, Pat> { }; /* memory type */
static access_t create(Page_flags const &flags, addr_t const pa)
{
/* XXX: Set memory type depending on active PAT */
return Common::create(flags)
| G::bits(flags.global)
| Pa::masked(pa);
}
};
void * operator new (size_t, void * p) { return p; }
typename Descriptor::access_t _entries[MAX_ENTRIES];
/**
* Constructor
*/
Translation_table() { }
inline bool _aligned(addr_t const a, size_t const alignm_log2) {
return a == ((a >> alignm_log2) << alignm_log2); }
/**
* Maximum virtual offset that can be translated by this table
*/
static addr_t max_virt_offset()
struct Insert_func
{
PDBG("not implemented");
return 0;
Page_flags const & flags;
Page_slab * slab;
Insert_func(Page_flags const & flags,
Page_slab * slab) : flags(flags), slab(slab) { }
void operator () (addr_t const vo,
addr_t const pa,
size_t const size,
Descriptor::access_t &desc)
{
if ((vo & ~PAGE_MASK) || (pa & ~PAGE_MASK) ||
size < PAGE_SIZE)
throw Invalid_range();
Descriptor::access_t table_entry =
Descriptor::create(flags, pa);
if (Descriptor::present(desc) &&
Descriptor::clear_mmu_flags(desc) != table_entry)
throw Double_insertion();
desc = table_entry;
}
};
struct Remove_func
{
Page_slab * slab;
Remove_func(Page_slab * slab) : slab(slab) { }
void operator () (addr_t const vo,
addr_t const pa,
size_t const size,
Descriptor::access_t &desc) {
desc = 0; }
};
template <typename FUNC>
void _range_op(addr_t vo, addr_t pa, size_t size, FUNC &&func)
{
for (size_t i = vo >> PAGE_SIZE_LOG2; size > 0;
i = vo >> PAGE_SIZE_LOG2) {
addr_t end = (vo + PAGE_SIZE) & PAGE_MASK;
size_t sz = min(size, end-vo);
func(vo, pa, sz, _entries[i]);
/* check whether we wrap */
if (end < vo) return;
size = size - sz;
vo += sz;
pa += sz;
}
}
public:
static constexpr size_t MIN_PAGE_SIZE_LOG2 = SIZE_LOG2_4KB;
static constexpr size_t ALIGNM_LOG2 = SIZE_LOG2_4KB;
/**
* IA-32e page table (Level 4)
*
* A page table consists of 512 entries that each maps a 4KB page
* frame.
* For further details refer to Intel SDM Vol. 3A, table 4-19.
*/
Level_4_translation_table()
{
if (!_aligned((addr_t)this, ALIGNM_LOG2))
throw Misaligned();
memset(&_entries, 0, sizeof(_entries));
}
/**
* Returns True if table does not contain any page mappings.
*
* \return false if an entry is present, True otherwise
*/
bool empty()
{
for (unsigned i = 0; i < MAX_ENTRIES; i++)
if (Descriptor::present(_entries[i]))
return false;
return true;
}
/**
* Insert translations into this table
*
* \param vo offset of virt. transl. region in virt. table region
* \param pa base of physical backing store
* \param size size of translated region
* \param f mapping flags
* \param s second level page slab allocator
* \param vo offset of the virtual region represented
* by the translation within the virtual
* region represented by this table
* \param pa base of the physical backing store
* \param size size of the translated region
* \param flags mapping flags
* \param slab second level page slab allocator
*/
void insert_translation(addr_t vo, addr_t pa, size_t size,
Page_flags const & f, Page_slab * const s)
void insert_translation(addr_t vo,
addr_t pa,
size_t size,
Page_flags const & flags,
Page_slab * slab)
{
PDBG("not implemented");
this->_range_op(vo, pa, size, Insert_func(flags, slab));
}
/**
@ -78,8 +277,440 @@ class Genode::Translation_table
*/
void remove_translation(addr_t vo, size_t size, Page_slab * slab)
{
PDBG("not implemented");
this->_range_op(vo, 0, size, Remove_func(slab));
}
};
} __attribute__((aligned(1 << ALIGNM_LOG2)));
template <typename ENTRY, unsigned PAGE_SIZE_LOG2, unsigned SIZE_LOG2>
class Genode::Page_directory
{
private:
static constexpr size_t MAX_ENTRIES = 1 << (SIZE_LOG2-PAGE_SIZE_LOG2);
static constexpr size_t PAGE_SIZE = 1 << PAGE_SIZE_LOG2;
static constexpr size_t PAGE_MASK = ~((1 << PAGE_SIZE_LOG2) - 1);
class Misaligned {};
class Invalid_range {};
class Double_insertion {};
struct Base_descriptor : Common_descriptor
{
using Common = Common_descriptor;
struct Ps : Common::template Bitfield<7, 1> { }; /* page size */
static bool maps_page(access_t const v) { return Ps::get(v); }
};
struct Page_descriptor : Base_descriptor
{
using Base = Base_descriptor;
/**
* Global attribute
*/
struct G : Base::template Bitfield<8, 1> { };
/**
* Page attribute table
*/
struct Pat : Base::template Bitfield<12, 1> { };
/**
* Physical address
*/
struct Pa : Base::template Bitfield<PAGE_SIZE_LOG2,
48 - PAGE_SIZE_LOG2> { };
/**
* Memory type
*/
struct Mt : Base::template Bitset_3<Base::Pwt,
Base::Pcd, Pat> { };
static typename Base::access_t create(Page_flags const &flags,
addr_t const pa)
{
/* XXX: Set memory type depending on active PAT */
return Base::create(flags)
| Base::Ps::bits(1)
| G::bits(flags.global)
| Pa::masked(pa);
}
};
struct Table_descriptor : Base_descriptor
{
using Base = Base_descriptor;
/**
* Physical address
*/
struct Pa : Base::template Bitfield<12, 36> { };
/**
* Memory types
*/
struct Mt : Base::template Bitset_2<Base::Pwt,
Base::Pcd> { };
static typename Base::access_t create(Page_flags const &flags,
addr_t const pa)
{
/* XXX: Set memory type depending on active PAT */
return Base::create(flags)
| Pa::masked(pa);
}
};
typename Base_descriptor::access_t _entries[MAX_ENTRIES];
inline bool _aligned(addr_t const a, size_t const alignm_log2) {
return a == ((a >> alignm_log2) << alignm_log2); }
struct Insert_func
{
Page_flags const & flags;
Page_slab * slab;
Insert_func(Page_flags const & flags,
Page_slab * slab) : flags(flags), slab(slab) { }
void operator () (addr_t const vo,
addr_t const pa,
size_t const size,
typename Base_descriptor::access_t &desc)
{
/* can we insert a large page mapping? */
if (!((vo & ~PAGE_MASK) || (pa & ~PAGE_MASK) ||
size < PAGE_SIZE)) {
typename Base_descriptor::access_t table_entry =
Page_descriptor::create(flags, pa);
if (Base_descriptor::present(desc) &&
Base_descriptor::clear_mmu_flags(desc) != table_entry)
throw Double_insertion();
desc = table_entry;
return;
}
/* we need to use a next level table */
ENTRY *table;
if (!Base_descriptor::present(desc)) {
if (!slab)
throw Allocator::Out_of_memory();
/* create and link next level table */
table = new (slab) ENTRY();
ENTRY * phys_addr = (ENTRY*) slab->phys_addr(table);
desc = (typename Base_descriptor::access_t)
Table_descriptor::create(flags,
(addr_t)(phys_addr ? phys_addr
: table));
} else if (Base_descriptor::maps_page(desc)) {
throw Double_insertion();
} else {
Base_descriptor::merge_access_rights(desc, flags);
ENTRY * phys_addr = (ENTRY*)
Table_descriptor::Pa::masked(desc);
table = (ENTRY*) slab->virt_addr(phys_addr);
table = table ? table : (ENTRY*)phys_addr;
}
/* insert translation */
table->insert_translation(vo - (vo & PAGE_MASK),
pa, size, flags, slab);
}
};
struct Remove_func
{
Page_slab * slab;
Remove_func(Page_slab * slab) : slab(slab) { }
void operator () (addr_t const vo,
addr_t const pa,
size_t const size,
typename Base_descriptor::access_t &desc)
{
if (Base_descriptor::present(desc)) {
if (Base_descriptor::maps_page(desc)) {
desc = 0;
} else {
/* use allocator to retrieve virt address of table */
ENTRY* phys_addr = (ENTRY*)
Table_descriptor::Pa::masked(desc);
ENTRY* table = (ENTRY*) slab->virt_addr(phys_addr);
table = table ? table : (ENTRY*)phys_addr;
table->remove_translation(vo - (vo & PAGE_MASK),
size, slab);
if (table->empty()) {
destroy(slab, table);
desc = 0;
}
}
}
}
};
template <typename FUNC>
void _range_op(addr_t vo, addr_t pa, size_t size, FUNC &&func)
{
for (size_t i = vo >> PAGE_SIZE_LOG2; size > 0;
i = vo >> PAGE_SIZE_LOG2) {
addr_t end = (vo + PAGE_SIZE) & PAGE_MASK;
size_t sz = min(size, end-vo);
func(vo, pa, sz, _entries[i]);
/* check whether we wrap */
if (end < vo) return;
size = size - sz;
vo += sz;
pa += sz;
}
}
public:
static constexpr size_t MIN_PAGE_SIZE_LOG2 = SIZE_LOG2_4KB;
static constexpr size_t ALIGNM_LOG2 = SIZE_LOG2_4KB;
Page_directory()
{
if (!_aligned((addr_t)this, ALIGNM_LOG2))
throw Misaligned();
memset(&_entries, 0, sizeof(_entries));
}
/**
* Returns True if table does not contain any page mappings.
*
* \return false if an entry is present, True otherwise
*/
bool empty()
{
for (unsigned i = 0; i < MAX_ENTRIES; i++)
if (Base_descriptor::present(_entries[i]))
return false;
return true;
}
/**
* Insert translations into this table
*
* \param vo offset of the virtual region represented
* by the translation within the virtual
* region represented by this table
* \param pa base of the physical backing store
* \param size size of the translated region
* \param flags mapping flags
* \param slab second level page slab allocator
*/
void insert_translation(addr_t vo,
addr_t pa,
size_t size,
Page_flags const & flags,
Page_slab * slab)
{
_range_op(vo, pa, size, Insert_func(flags, slab));
}
/**
* Remove translations that overlap with a given virtual region
*
* \param vo region offset within the tables virtual region
* \param size region size
* \param slab second level page slab allocator
*/
void remove_translation(addr_t vo, size_t size, Page_slab * slab)
{
_range_op(vo, 0, size, Remove_func(slab));
}
} __attribute__((aligned(1 << ALIGNM_LOG2)));
class Genode::PML4_table
{
private:
static constexpr size_t PAGE_SIZE_LOG2 = SIZE_LOG2_256TB;
static constexpr size_t SIZE_LOG2 = SIZE_LOG2_512GB;
static constexpr size_t MAX_ENTRIES = 512;
static constexpr size_t PAGE_SIZE = 1UL << PAGE_SIZE_LOG2;
static constexpr size_t PAGE_MASK = ~((1UL << PAGE_SIZE_LOG2) - 1);
class Misaligned {};
class Invalid_range {};
struct Descriptor : Common_descriptor
{
struct Pa : Bitfield<12, SIZE_LOG2> { }; /* physical address */
struct Mt : Bitset_2<Pwt, Pcd> { }; /* memory type */
static access_t create(Page_flags const &flags, addr_t const pa)
{
/* XXX: Set memory type depending on active PAT */
return Common_descriptor::create(flags)
| Pa::masked(pa);
}
};
typename Descriptor::access_t _entries[MAX_ENTRIES];
inline bool _aligned(addr_t const a, size_t const alignm_log2) {
return a == ((a >> alignm_log2) << alignm_log2); }
using ENTRY = Level_2_translation_table;
struct Insert_func
{
Page_flags const & flags;
Page_slab * slab;
Insert_func(Page_flags const & flags,
Page_slab * slab) : flags(flags), slab(slab) { }
void operator () (addr_t const vo,
addr_t const pa,
size_t const size,
Descriptor::access_t &desc)
{
/* we need to use a next level table */
ENTRY *table;
if (!Descriptor::present(desc)) {
if (!slab)
throw Allocator::Out_of_memory();
/* create and link next level table */
table = new (slab) ENTRY();
ENTRY * phys_addr = (ENTRY*) slab->phys_addr(table);
desc = Descriptor::create(flags,
(addr_t)(phys_addr ? phys_addr
: table));
} else {
Descriptor::merge_access_rights(desc, flags);
ENTRY * phys_addr = (ENTRY*)
Descriptor::Pa::masked(desc);
table = (ENTRY*) slab->virt_addr(phys_addr);
table = table ? table : (ENTRY*)phys_addr;
}
/* insert translation */
table->insert_translation(vo - (vo & PAGE_MASK),
pa, size, flags, slab);
}
};
struct Remove_func
{
Page_slab * slab;
Remove_func(Page_slab * slab) : slab(slab) { }
void operator () (addr_t const vo,
addr_t const pa,
size_t const size,
Descriptor::access_t &desc)
{
if (Descriptor::present(desc)) {
/* use allocator to retrieve virt address of table */
ENTRY* phys_addr = (ENTRY*)
Descriptor::Pa::masked(desc);
ENTRY* table = (ENTRY*) slab->virt_addr(phys_addr);
table = table ? table : (ENTRY*)phys_addr;
table->remove_translation(vo - (vo & PAGE_MASK), size,
slab);
if (table->empty()) {
destroy(slab, table);
desc = 0;
}
}
}
};
template <typename FUNC>
void _range_op(addr_t vo, addr_t pa, size_t size, FUNC &&func)
{
for (size_t i = vo >> PAGE_SIZE_LOG2; size > 0;
i = vo >> PAGE_SIZE_LOG2) {
addr_t end = (vo + PAGE_SIZE) & PAGE_MASK;
size_t sz = min(size, end-vo);
func(vo, pa, sz, _entries[i]);
/* check whether we wrap */
if (end < vo) return;
size = size - sz;
vo += sz;
pa += sz;
}
}
public:
static constexpr size_t MIN_PAGE_SIZE_LOG2 = SIZE_LOG2_4KB;
static constexpr size_t ALIGNM_LOG2 = SIZE_LOG2_4KB;
PML4_table()
{
if (!_aligned((addr_t)this, ALIGNM_LOG2))
throw Misaligned();
memset(&_entries, 0, sizeof(_entries));
}
/**
* Returns True if table does not contain any page mappings.
*
* \return false if an entry is present, True otherwise
*/
bool empty()
{
for (unsigned i = 0; i < MAX_ENTRIES; i++)
if (Descriptor::present(_entries[i]))
return false;
return true;
}
/**
* Insert translations into this table
*
* \param vo offset of the virtual region represented
* by the translation within the virtual
* region represented by this table
* \param pa base of the physical backing store
* \param size size of the translated region
* \param flags mapping flags
* \param slab second level page slab allocator
*/
void insert_translation(addr_t vo,
addr_t pa,
size_t size,
Page_flags const & flags,
Page_slab * slab)
{
_range_op(vo, pa, size, Insert_func(flags, slab));
}
/**
* Remove translations that overlap with a given virtual region
*
* \param vo region offset within the tables virtual region
* \param size region size
* \param slab second level page slab allocator
*/
void remove_translation(addr_t vo, size_t size, Page_slab * slab)
{
_range_op(vo, 0, size, Remove_func(slab));
}
} __attribute__((aligned(1 << ALIGNM_LOG2)));
#endif /* _TRANSLATION_TABLE_H_ */

View File

@ -35,7 +35,7 @@
_mt_master_context_begin:
/* space must be at least as large as 'Cpu_state' */
.space 20*8
.space 21*8
.global _mt_master_context_end
_mt_master_context_end: