From 9f2a8d027d79777fa843e29dd5ee4e2f4f09821d Mon Sep 17 00:00:00 2001 From: Andy Bui Date: Tue, 24 Oct 2023 17:40:14 +1100 Subject: [PATCH 01/19] elfloader: add support for Jetson Orin This commit also adds support for the tegra194-tcu uart device. Signed-off-by: Andy Bui --- cmake-tool/helpers/application_settings.cmake | 2 +- elfloader-tool/src/drivers/uart/tegra-uart.c | 59 +++++++++++++++++++ 2 files changed, 60 insertions(+), 1 deletion(-) create mode 100644 elfloader-tool/src/drivers/uart/tegra-uart.c diff --git a/cmake-tool/helpers/application_settings.cmake b/cmake-tool/helpers/application_settings.cmake index 01580404..e4365c04 100644 --- a/cmake-tool/helpers/application_settings.cmake +++ b/cmake-tool/helpers/application_settings.cmake @@ -14,7 +14,7 @@ function(ApplyData61ElfLoaderSettings kernel_platform kernel_sel4_arch) binary_list "tx1;hikey;odroidc2;odroidc4;imx8mq-evk;imx8mm-evk;hifive;tqma8xqp1gb;bcm2711;rocketchip" ) - set(efi_list "tk1;rockpro64;quartz64") + set(efi_list "tk1;rockpro64;quartz64;orin") set(uimage_list "tx2;am335x") if( ${kernel_platform} IN_LIST efi_list diff --git a/elfloader-tool/src/drivers/uart/tegra-uart.c b/elfloader-tool/src/drivers/uart/tegra-uart.c new file mode 100644 index 00000000..7bf0f62a --- /dev/null +++ b/elfloader-tool/src/drivers/uart/tegra-uart.c @@ -0,0 +1,59 @@ +/* + * Copyright 2023, NIO + * + * SPDX-License-Identifier: GPL-2.0-only + */ + +#include +#include +#include +#include + +#define NUM_BYTES_FIELD_BIT (24U) +#define FLUSH_BIT (26U) +#define INTR_TRIGGER_BIT (31U) +#define UART_REG(mmio, x) ((volatile uint32_t *)(mmio + (x))) + +static int tegra_uart_putchar(struct elfloader_device *dev, unsigned int c) +{ + uint32_t reg_val; + + reg_val = (uint32_t)(1UL << NUM_BYTES_FIELD_BIT); + reg_val |= BIT(INTR_TRIGGER_BIT); + reg_val |= c; + + if (c == '\r' || c == '\n') { + reg_val |= BIT(FLUSH_BIT); + } + + while (*UART_REG(dev->region_bases[0], 0) & BIT(INTR_TRIGGER_BIT)); + + *UART_REG(dev->region_bases[0], 0) = reg_val; + return 0; +} + +static int tegra_uart_init(struct elfloader_device *dev, UNUSED void *match_data) +{ + uart_set_out(dev); + *UART_REG(dev->region_bases[0], 0) = 0; + + return 0; +} + +static const struct dtb_match_table tegra_uart_matches[] = { + { .compatible = "nvidia,tegra194-tcu" }, + { .compatible = NULL /* sentinel */ }, +}; + +static const struct elfloader_uart_ops tegra_uart_ops = { + .putc = &tegra_uart_putchar, +}; + +static const struct elfloader_driver tegra_uart = { + .match_table = tegra_uart_matches, + .type = DRIVER_UART, + .init = &tegra_uart_init, + .ops = &tegra_uart_ops, +}; + +ELFLOADER_DRIVER(tegra_uart); From 276519bc8c052b1dcfa9b4db502bcb38715cb0e8 Mon Sep 17 00:00:00 2001 From: Andy Bui Date: Fri, 3 Nov 2023 12:12:22 +1100 Subject: [PATCH 02/19] elfloader: arm: flush dcache by va range instead of set/way. Cleaning a cache by set/way only cleans the CPU-local caches. System caches can only be cleaned by virtual address, thus the elfloader is changed to clean the data cache by VA instead of set/way. This is important as the point-of-coherency (PoC) may lie beyond the system cache, in which case we cannot clean to PoC unless we use a clean by va. An ifdef is used for the implementation of continue_boot(), as we expect this to diverge from the legacy (ARMv7 right now) implementation slowly. Signed-off-by: Andy Bui --- .../include/arch-arm/64/mode/assembler.h | 20 +++-- .../src/arch-arm/armv/armv8-a/64/mmu-hyp.S | 68 ++++++++------- .../src/arch-arm/armv/armv8-a/64/mmu.S | 11 --- elfloader-tool/src/arch-arm/smp_boot.c | 11 ++- elfloader-tool/src/arch-arm/sys_boot.c | 87 +++++++++++++++++++ .../binaries/efi/gnuefi/elf_aarch64_efi.lds | 1 + 6 files changed, 143 insertions(+), 55 deletions(-) diff --git a/elfloader-tool/include/arch-arm/64/mode/assembler.h b/elfloader-tool/include/arch-arm/64/mode/assembler.h index 4f9972c0..d8863133 100644 --- a/elfloader-tool/include/arch-arm/64/mode/assembler.h +++ b/elfloader-tool/include/arch-arm/64/mode/assembler.h @@ -67,7 +67,15 @@ #define MT_NORMAL_WT 5 #define MAIR(_attr, _mt) ((_attr) << ((_mt) * 8)) +.macro disable_mmu sctlr tmp + __disable_mmu \sctlr, \tmp + ic ialluis + dsb sy + isb +.endm + .macro enable_mmu sctlr tmp + dsb sy mrs \tmp, \sctlr orr \tmp, \tmp, #(1 << 0) orr \tmp, \tmp, #(1 << 2) @@ -76,17 +84,11 @@ isb .endm -.macro disable_mmu sctlr tmp - mrs \tmp, \sctlr - bic \tmp, \tmp, #(1 << 0) - bic \tmp, \tmp, #(1 << 2) - bic \tmp, \tmp, #(1 << 12) - msr \sctlr, \tmp +.macro __disable_mmu sctlr tmp + dsb sy isb -.endm - -.macro disable_id_cache sctlr tmp mrs \tmp, \sctlr + bic \tmp, \tmp, #(1 << 0) bic \tmp, \tmp, #(1 << 2) bic \tmp, \tmp, #(1 << 12) msr \sctlr, \tmp diff --git a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S index 31cb8a27..55a15de7 100644 --- a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S +++ b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S @@ -21,31 +21,42 @@ .extern invalidate_icache .extern _boot_pgd_down -BEGIN_FUNC(disable_caches_hyp) - stp x29, x30, [sp, #-16]! - mov x29, sp - bl flush_dcache - disable_id_cache sctlr_el2, x9 - ldp x29, x30, [sp], #16 +BEGIN_FUNC(disable_mmu_caches_hyp) + /* Assume D-cache already cleaned to PoC */ + disable_mmu sctlr_el2, x9 + ret +END_FUNC(disable_mmu_caches_hyp) + +BEGIN_FUNC(clean_dcache_by_range) + /* Ordering needed for strongly-ordered mem, not needed for NORMAL mem. + * See ARM DDI 0487I.a, page D7-5063. + */ + dmb sy + + /* Extract minimum DCache CL size into x3 and CL mask into x4 */ + mrs x2, ctr_el0 + ubfx x4, x2, #16, #4 + mov x3, #4 + lsl x3, x3, x4 + sub x4, x3, #1 + + /* Apply mask to start address before entering the loop */ + bic x4, x0, x4 +clean_dcache_by_range_loop: + dc cvac, x4 + add x4, x4, x3 + cmp x4, x1 + b.lt clean_dcache_by_range_loop + dsb sy + isb ret -END_FUNC(disable_caches_hyp) +END_FUNC(clean_dcache_by_range) BEGIN_FUNC(leave_hyp) /* We call nested functions, follow the ABI. */ stp x29, x30, [sp, #-16]! mov x29, sp - bl flush_dcache - - /* Ensure I-cache, D-cache and mmu are disabled for EL2/Stage1 */ - disable_mmu sctlr_el2, x9 - - /* - * Invalidate the local I-cache so that any instructions fetched - * speculatively are discarded. - */ - bl invalidate_icache - /* Ensure I-cache, D-cache and mmu are disabled for EL1/Stage2 */ mov x9, #(1 << 31) msr hcr_el2, x9 @@ -71,15 +82,6 @@ BEGIN_FUNC(leave_hyp) END_FUNC(leave_hyp) BEGIN_FUNC(arm_enable_hyp_mmu) - stp x29, x30, [sp, #-16]! - mov x29, sp - - bl flush_dcache - - disable_mmu sctlr_el2, x8 - - bl invalidate_icache - /* * DEVICE_nGnRnE 000 00000000 * DEVICE_nGnRE 001 00000100 @@ -107,13 +109,13 @@ BEGIN_FUNC(arm_enable_hyp_mmu) dsb ish isb - enable_mmu sctlr_el2, x8 + /* Invalidate icache */ ic ialluis - dsb ish + dsb sy isb - tlbi alle2is - dsb ish - isb - ldp x29, x30, [sp], #16 + + enable_mmu sctlr_el2, x8 + /* NOTE: enable_mmu already contains an isb after enabling. */ + ret END_FUNC(arm_enable_hyp_mmu) diff --git a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu.S b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu.S index ee161192..bbb80fa7 100644 --- a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu.S +++ b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu.S @@ -43,17 +43,6 @@ BEGIN_FUNC(arm_enable_mmu) stp x29, x30, [sp, #-16]! mov x29, sp - bl flush_dcache - - /* Ensure I-cache, D-cache and mmu are disabled for EL1/Stage1 */ - disable_mmu sctlr_el1 , x8 - - /* - * Invalidate the local I-cache so that any instructions fetched - * speculatively are discarded. - */ - bl invalidate_icache - /* * DEVICE_nGnRnE 000 00000000 * DEVICE_nGnRE 001 00000100 diff --git a/elfloader-tool/src/arch-arm/smp_boot.c b/elfloader-tool/src/arch-arm/smp_boot.c index d429d113..d49426ad 100644 --- a/elfloader-tool/src/arch-arm/smp_boot.c +++ b/elfloader-tool/src/arch-arm/smp_boot.c @@ -45,12 +45,19 @@ void non_boot_main(void) abort(); } -#ifndef CONFIG_ARM_HYPERVISOR_SUPPORT if (is_hyp_mode()) { extern void leave_hyp(void); + extern void disable_mmu_caches_hyp(void); +#ifdef CONFIG_ARCH_AARCH64 + /* Disable the MMU and cacheability unconditionally on ARM64. + * The 32 bit ARM platforms do not expect the MMU to be turned + * off, so we leave them alone. */ + disable_mmu_caches_hyp(); +#endif +#ifndef CONFIG_ARM_HYPERVISOR_SUPPORT leave_hyp(); - } #endif + } /* Enable the MMU, and enter the kernel. */ if (is_hyp_mode()) { arm_enable_hyp_mmu(); diff --git a/elfloader-tool/src/arch-arm/sys_boot.c b/elfloader-tool/src/arch-arm/sys_boot.c index bf98aaf2..b15683bd 100644 --- a/elfloader-tool/src/arch-arm/sys_boot.c +++ b/elfloader-tool/src/arch-arm/sys_boot.c @@ -168,6 +168,92 @@ void main(UNUSED void *arg) abort(); } +/* ARMv8 64-bit specific implementation of continue_boot() */ +#if defined(CONFIG_ARCH_AARCH64) +void continue_boot(int was_relocated) +{ + if (was_relocated) { + printf("ELF loader relocated, continuing boot...\n"); + } + + /* + * If we were relocated, we need to re-initialise the + * driver model so all its pointers are set up properly. + */ + if (was_relocated) { + initialise_devices(); + } + + /* If in EL2, disable MMU and I/D cacheability unconditionally */ + if (is_hyp_mode()) { + extern void disable_mmu_caches_hyp(void); + extern void clean_dcache_by_range(paddr_t start, paddr_t end); + + paddr_t start = kernel_info.phys_region_start; + paddr_t end = kernel_info.phys_region_end; + clean_dcache_by_range(start, end); + start = (paddr_t)user_info.phys_region_start; + end = (paddr_t)user_info.phys_region_end; + clean_dcache_by_range(start, end); + start = (paddr_t)_text; + end = (paddr_t)_end; + clean_dcache_by_range(start, end); + if (dtb) { + start = (paddr_t)dtb; + end = start + dtb_size; + clean_dcache_by_range(start, end); + } + +#if defined(CONFIG_ARCH_AARCH64) + /* Disable the MMU and cacheability unconditionally on ARM64. + * The 32 bit ARM platforms do not expect the MMU to be turned + * off, so we leave them alone. */ + disable_mmu_caches_hyp(); +#endif + +#if (defined(CONFIG_ARCH_ARM_V7A) || defined(CONFIG_ARCH_ARM_V8A)) && !defined(CONFIG_ARM_HYPERVISOR_SUPPORT) + extern void leave_hyp(void); + /* Switch to EL1, assume EL2 MMU already disabled for ARMv8. */ + leave_hyp(); +#endif + /* Setup MMU. */ + if (is_hyp_mode()) { + init_hyp_boot_vspace(&kernel_info); + } else { + /* If we are not in HYP mode, we enable the SV MMU and paging + * just in case the kernel does not support hyp mode. */ + init_boot_vspace(&kernel_info); + } + +#if CONFIG_MAX_NUM_NODES > 1 + smp_boot(); +#endif /* CONFIG_MAX_NUM_NODES */ + + if (is_hyp_mode()) { + printf("Enabling hypervisor MMU and paging\n"); + arm_enable_hyp_mmu(); + } else { + printf("Enabling MMU and paging\n"); + arm_enable_mmu(); + } + + /* Enter kernel. The UART may no longer be accessible here. */ + if ((uintptr_t)uart_get_mmio() < kernel_info.virt_region_start) { + printf("Jumping to kernel-image entry point...\n\n"); + } + + ((init_arm_kernel_t)kernel_info.virt_entry)(user_info.phys_region_start, + user_info.phys_region_end, + user_info.phys_virt_offset, + user_info.virt_entry, + (word_t)dtb, + dtb_size); + + /* We should never get here. */ + printf("ERROR: Kernel returned back to the ELF Loader\n"); + abort(); +} +#else void continue_boot(int was_relocated) { if (was_relocated) { @@ -232,3 +318,4 @@ void continue_boot(int was_relocated) printf("ERROR: Kernel returned back to the ELF Loader\n"); abort(); } +#endif diff --git a/elfloader-tool/src/binaries/efi/gnuefi/elf_aarch64_efi.lds b/elfloader-tool/src/binaries/efi/gnuefi/elf_aarch64_efi.lds index bbbc502f..33513349 100644 --- a/elfloader-tool/src/binaries/efi/gnuefi/elf_aarch64_efi.lds +++ b/elfloader-tool/src/binaries/efi/gnuefi/elf_aarch64_efi.lds @@ -61,6 +61,7 @@ SECTIONS .dynstr : { *(.dynstr) } . = ALIGN(4096); .note.gnu.build-id : { *(.note.gnu.build-id) } + _end = .; /DISCARD/ : { *(.rel.reloc) From b9c6f44eb9526415ead747394ed4787d3877eec9 Mon Sep 17 00:00:00 2001 From: Andy Bui Date: Thu, 16 Nov 2023 13:05:29 +1100 Subject: [PATCH 03/19] elfloader: arm: stabilize secondary core booting EFI may boot the elfloader with caches disabled on the secondary cores, we want the value of non_boot_lock to be visible. Some barriers are added to stabilize SMP booting in the elfloader. Co-authored-by: Yanyan Shen Co-authored-by: Matthias Rosenfelder Signed-off-by: Andy Bui --- .../src/arch-arm/armv/armv8-a/64/smp.c | 6 ++++++ .../src/arch-arm/drivers/smp-psci.c | 9 +++++++- elfloader-tool/src/arch-arm/smp_boot.c | 21 +++++++++++++++++-- 3 files changed, 33 insertions(+), 3 deletions(-) diff --git a/elfloader-tool/src/arch-arm/armv/armv8-a/64/smp.c b/elfloader-tool/src/arch-arm/armv/armv8-a/64/smp.c index a86b02b4..edbf07e5 100644 --- a/elfloader-tool/src/arch-arm/armv/armv8-a/64/smp.c +++ b/elfloader-tool/src/arch-arm/armv/armv8-a/64/smp.c @@ -35,6 +35,12 @@ void core_entry(uint64_t sp) int is_core_up(int i) { + /* Secondary core may be booted with caches disabled, + * this value might be written in memory, invalidate our + * copy and get a new one. */ + asm volatile("dc ivac, %0\n\t" + "dmb nsh\n\t" + :: "r"(&core_up[i])); return core_up[i] == i; } diff --git a/elfloader-tool/src/arch-arm/drivers/smp-psci.c b/elfloader-tool/src/arch-arm/drivers/smp-psci.c index ef3ea012..ae5fe951 100644 --- a/elfloader-tool/src/arch-arm/drivers/smp-psci.c +++ b/elfloader-tool/src/arch-arm/drivers/smp-psci.c @@ -3,6 +3,7 @@ * * SPDX-License-Identifier: GPL-2.0-only */ +#include #include #include #include @@ -24,7 +25,13 @@ static int smp_psci_cpu_on(UNUSED struct elfloader_device *dev, } secondary_data.entry = entry; secondary_data.stack = stack; - dmb(); +#if defined(CONFIG_ARCH_AARCH64) + /* If the secondary core caches are off, need to make sure that the info + * is clean to the physical memory so that the sedcondary cores can read it. + */ + asm volatile("dc cvac, %0" :: "r"(&secondary_data)); + dsb(); +#endif int ret = psci_cpu_on(cpu->cpu_id, (unsigned long)&secondary_startup, 0); if (ret != PSCI_SUCCESS) { printf("Failed to bring up core 0x%x with error %d\n", cpu->cpu_id, ret); diff --git a/elfloader-tool/src/arch-arm/smp_boot.c b/elfloader-tool/src/arch-arm/smp_boot.c index d49426ad..7ed5fecb 100644 --- a/elfloader-tool/src/arch-arm/smp_boot.c +++ b/elfloader-tool/src/arch-arm/smp_boot.c @@ -34,7 +34,11 @@ void non_boot_main(void) #endif /* Spin until the first CPU has finished initialisation. */ while (!non_boot_lock) { -#ifndef CONFIG_ARCH_AARCH64 +#ifdef CONFIG_ARCH_AARCH64 + /* The compiler may optimize this loop away, add a dsb() + * to force a reload. */ + dsb(); +#else cpu_idle(); #endif } @@ -124,7 +128,13 @@ WEAK void init_cpus(void) abort(); } - while (!is_core_up(num_cpus)); + while (!is_core_up(num_cpus)) { +#if defined(CONFIG_ARCH_AARCH64) + /* The compiler may optimize this loop away, add a dsb() + * to force a reload. */ + dsb(); +#endif + } printf("Core %d is up with logic id %d\n", elfloader_cpus[i].cpu_id, num_cpus); num_cpus++; } @@ -141,6 +151,13 @@ void smp_boot(void) arm_disable_dcaches(); #endif init_cpus(); +#if defined(CONFIG_ARCH_AARCH64) + dsb(); + non_boot_lock = 1; + /* Secondary CPUs may still run with MMU & caches off. Force the update to be visible. */ + asm volatile("dc civac, %0\n\t" :: "r"(&non_boot_lock) : "memory");; +#else non_boot_lock = 1; +#endif } #endif /* CONFIG_MAX_NUM_NODES */ From 0cdf9c27c3aeb335819562a8ead3bd10514e5011 Mon Sep 17 00:00:00 2001 From: Andy Bui Date: Thu, 16 Nov 2023 14:49:37 +1100 Subject: [PATCH 04/19] elfloader: arm: move kernel page table init code Move init_boot_vspace so that we can clean to PoC. This is only doable by va, hence we initialize the kernel page tables before the MMU is reset. Signed-off-by: Andy Bui --- elfloader-tool/src/arch-arm/64/mmu.c | 6 ++++++ elfloader-tool/src/arch-arm/sys_boot.c | 14 +++++++------- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/elfloader-tool/src/arch-arm/64/mmu.c b/elfloader-tool/src/arch-arm/64/mmu.c index 75d3b0a5..4cedf5d1 100644 --- a/elfloader-tool/src/arch-arm/64/mmu.c +++ b/elfloader-tool/src/arch-arm/64/mmu.c @@ -11,6 +11,7 @@ #include #include #include +#include /* * Create a "boot" page table, which contains a 1:1 mapping below @@ -55,6 +56,11 @@ void init_boot_vspace(struct image_info *kernel_info) | BIT(0); /* 2M block */ first_paddr += BIT(ARM_2MB_BLOCK_BITS); } + + /* Architecturally required barrier to make all writes to pagetable memories + * visible to the pagetable walker. See ARM DDI 0487I.a, section D8.2.6. + */ + dsb(); } void init_hyp_boot_vspace(struct image_info *kernel_info) diff --git a/elfloader-tool/src/arch-arm/sys_boot.c b/elfloader-tool/src/arch-arm/sys_boot.c index b15683bd..37bc7d9f 100644 --- a/elfloader-tool/src/arch-arm/sys_boot.c +++ b/elfloader-tool/src/arch-arm/sys_boot.c @@ -184,6 +184,13 @@ void continue_boot(int was_relocated) initialise_devices(); } + /* Setup MMU. */ +#if defined(CONFIG_ARM_HYPERVISOR_SUPPORT) + init_hyp_boot_vspace(&kernel_info); +#else + init_boot_vspace(&kernel_info); +#endif + /* If in EL2, disable MMU and I/D cacheability unconditionally */ if (is_hyp_mode()) { extern void disable_mmu_caches_hyp(void); @@ -216,13 +223,6 @@ void continue_boot(int was_relocated) /* Switch to EL1, assume EL2 MMU already disabled for ARMv8. */ leave_hyp(); #endif - /* Setup MMU. */ - if (is_hyp_mode()) { - init_hyp_boot_vspace(&kernel_info); - } else { - /* If we are not in HYP mode, we enable the SV MMU and paging - * just in case the kernel does not support hyp mode. */ - init_boot_vspace(&kernel_info); } #if CONFIG_MAX_NUM_NODES > 1 From 0108b774b0511bc81e4ccb86aae69aba36468aea Mon Sep 17 00:00:00 2001 From: Andy Bui Date: Thu, 16 Nov 2023 15:19:52 +1100 Subject: [PATCH 05/19] elfloader: arm: do not save FP and LR on stack Since we do not have to branch to another label, there is no need to follow the ABI here. This removes 2 memory access before and after changing the state of the MMU, which should overall reduce the chance of any speculative fetches going wrong. Signed-off-by: Andy Bui --- elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S | 5 ----- elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu.S | 5 ----- 2 files changed, 10 deletions(-) diff --git a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S index 55a15de7..8b9fd216 100644 --- a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S +++ b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S @@ -53,10 +53,6 @@ clean_dcache_by_range_loop: END_FUNC(clean_dcache_by_range) BEGIN_FUNC(leave_hyp) - /* We call nested functions, follow the ABI. */ - stp x29, x30, [sp, #-16]! - mov x29, sp - /* Ensure I-cache, D-cache and mmu are disabled for EL1/Stage2 */ mov x9, #(1 << 31) msr hcr_el2, x9 @@ -74,7 +70,6 @@ BEGIN_FUNC(leave_hyp) msr spsr_el2, x9 /* Let's the caller use our stack, in case it needs to pop something */ - ldp x29, x30, [sp], #16 mov x10, sp msr sp_el1, x10 msr elr_el2, x30 diff --git a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu.S b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu.S index bbb80fa7..705c5671 100644 --- a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu.S +++ b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu.S @@ -39,10 +39,6 @@ BEGIN_FUNC(flush_dcache) END_FUNC(flush_dcache) BEGIN_FUNC(arm_enable_mmu) - /* We call nested functions, follow the ABI. */ - stp x29, x30, [sp, #-16]! - mov x29, sp - /* * DEVICE_nGnRnE 000 00000000 * DEVICE_nGnRE 001 00000100 @@ -81,6 +77,5 @@ BEGIN_FUNC(arm_enable_mmu) adrp x8, arm_vector_table msr vbar_el1, x8 - ldp x29, x30, [sp], #16 ret END_FUNC(arm_enable_mmu) From 7b3c2f32491a3491817e2196f4fa40d4f0c93284 Mon Sep 17 00:00:00 2001 From: Tw Date: Tue, 11 Jul 2023 11:14:10 +0800 Subject: [PATCH 06/19] elfloader: arm: fix incorrect sp when restoring bootloader parameters. Signed-off-by: Tw --- elfloader-tool/src/arch-arm/64/crt0.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elfloader-tool/src/arch-arm/64/crt0.S b/elfloader-tool/src/arch-arm/64/crt0.S index acd4de92..7d1249be 100644 --- a/elfloader-tool/src/arch-arm/64/crt0.S +++ b/elfloader-tool/src/arch-arm/64/crt0.S @@ -29,7 +29,7 @@ BEGIN_FUNC(_start) bl fixup_image_base mov x2, x0 /* restore original arguments for next step */ - ldp x0, x1, [sp, #-16]! + ldp x0, x1, [sp], #16 /* fixup_image_base returns 1 if no need to move */ cmp x2, #1 beq 1f From 1aa2fb03cae603fdb3a83fb791fbcabe2fb866f5 Mon Sep 17 00:00:00 2001 From: Matthias Rosenfelder Date: Tue, 11 Jul 2023 15:54:12 +0200 Subject: [PATCH 07/19] elfloader: arm: fix function declaration (type mismatch). This chops off the aff3 level on Aarch64. Why does the compiler not warn??? Because the own header was not included. If you just include the header (without the change of the return value in the header), we get: seL4_tools/elfloader-tool/src/arch-arm/64/cpuid.c:14:10: error: conflicting types for 'read_cpuid_mpidr' 14 | uint32_t read_cpuid_mpidr(void) | ^~~~~~~~~~~~~~~~ In file included from /home/mro/nvos_neu2/tools/seL4_tools/ elfloader-tool/src/arch-arm/64/cpuid.c:9: elfloader-tool/include/arch-arm/cpuid.h:15:8: note: previous declaration of 'read_cpuid_mpidr' was here 15 | word_t read_cpuid_mpidr(void); | ^~~~~~~~~~~~~~~~ [190/200] Building C object elfloader-tool/CMakeFiles/elfloader.dir/ src/arch-arm/smp_boot.c.obj ninja: build stopped: subcommand failed. Signed-off-by: Matthias Rosenfelder --- elfloader-tool/include/arch-arm/cpuid.h | 2 +- elfloader-tool/src/arch-arm/64/cpuid.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/elfloader-tool/include/arch-arm/cpuid.h b/elfloader-tool/include/arch-arm/cpuid.h index f84612be..c0e1a6ce 100644 --- a/elfloader-tool/include/arch-arm/cpuid.h +++ b/elfloader-tool/include/arch-arm/cpuid.h @@ -12,7 +12,7 @@ uint32_t read_cpuid_id(void); /* read MP ID register from CPUID */ -uint32_t read_cpuid_mpidr(void); +word_t read_cpuid_mpidr(void); /* check if CPU is in HYP/EL2 mode */ word_t is_hyp_mode(void); diff --git a/elfloader-tool/src/arch-arm/64/cpuid.c b/elfloader-tool/src/arch-arm/64/cpuid.c index 66d9d09b..6c979a0f 100644 --- a/elfloader-tool/src/arch-arm/64/cpuid.c +++ b/elfloader-tool/src/arch-arm/64/cpuid.c @@ -6,6 +6,7 @@ #include #include +#include /* we only care about the affinity bits */ #define MPIDR_MASK (0xff00ffffff) From c329bef78ba38a83aa0b97663fae96b18ce25601 Mon Sep 17 00:00:00 2001 From: Matthias Rosenfelder Date: Tue, 18 Jul 2023 17:32:57 +0200 Subject: [PATCH 08/19] elfloader: arm: fix alignment of AArch32 pagetables. The 64 kiB alignment is a maximum requirement for a stage2 concatenated pagetable. See Table G5-4 in ARM DDI 0487I.a, page G5-9186. Note: Both comments at the top of the file as well as in line 85 say "64 kiB". 2^14 is unfortunately only 16 kiB. Note2: This code is not executed on AArch64, because the finish_relocation() function panics on AArch64. The latter always takes the "shortcut" via continue_boot(). Signed-off-by: Matthias Rosenfelder --- elfloader-tool/src/arch-arm/sys_boot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elfloader-tool/src/arch-arm/sys_boot.c b/elfloader-tool/src/arch-arm/sys_boot.c index 37bc7d9f..895a6dfc 100644 --- a/elfloader-tool/src/arch-arm/sys_boot.c +++ b/elfloader-tool/src/arch-arm/sys_boot.c @@ -23,7 +23,7 @@ #define DTB_MAGIC (0xedfe0dd0) /* Maximum alignment we need to preserve when relocating (64K) */ -#define MAX_ALIGN_BITS (14) +#define MAX_ALIGN_BITS (16) #ifdef CONFIG_IMAGE_EFI ALIGN(BIT(PAGE_BITS)) VISIBLE From e8a15ec23cf48b473133069a33674c15d2fc4d74 Mon Sep 17 00:00:00 2001 From: Matthias Rosenfelder Date: Tue, 22 Aug 2023 20:38:03 +0200 Subject: [PATCH 09/19] elfloader: fix variable prototype and remove var shadowing. The variable "dtb_size" is of type "size_t" and is defined in src/arch-arm/sys_boot.c, line 36. "size_t" is most certainly NOT the same size as "uint32_t", even on 32-bit architectures. Thus, the declaration in smp_boot.c is incorrect, since it does not match the definition in sys_boot.c. Why even create a local declaration - put this in a common header file and you will see those problems right away. Single point of maintenance! This may lead to an incorrectly sized memory access, that only happens to be correct by chance in Little-Endian mode. For ARM in Big-Endian mode this is a bug and will most likely result in an incorrect DTB size of zero. This fixes c5735119 ("elfloader: pass DTB from bootloader to seL4 on ARM"). Moreover, remove the shadowing of global variables by defining local ones with the same name => Rename the local one in src/common.c. This could have been detected with "-Wshadow". Practically speaking our DTBs are always (a lot) smaller than 32-bit. Thus, continue to pass a 32-bit size to the kernel in order to not change the API here. Signed-off-by: Matthias Rosenfelder --- elfloader-tool/src/arch-arm/smp_boot.c | 6 +++--- elfloader-tool/src/arch-arm/sys_boot.c | 3 ++- elfloader-tool/src/common.c | 12 ++++++------ 3 files changed, 11 insertions(+), 10 deletions(-) diff --git a/elfloader-tool/src/arch-arm/smp_boot.c b/elfloader-tool/src/arch-arm/smp_boot.c index 7ed5fecb..704d90d2 100644 --- a/elfloader-tool/src/arch-arm/smp_boot.c +++ b/elfloader-tool/src/arch-arm/smp_boot.c @@ -24,7 +24,7 @@ static volatile int non_boot_lock = 0; void arm_disable_dcaches(void); extern void const *dtb; -extern uint32_t dtb_size; +extern size_t dtb_size; /* Entry point for all CPUs other than the initial. */ void non_boot_main(void) @@ -69,10 +69,10 @@ void non_boot_main(void) arm_enable_mmu(); } - /* Jump to the kernel. */ + /* Jump to the kernel. Note: Our DTB is smaller than 4 GiB. */ ((init_arm_kernel_t)kernel_info.virt_entry)(user_info.phys_region_start, user_info.phys_region_end, user_info.phys_virt_offset, - user_info.virt_entry, (paddr_t)dtb, dtb_size); + user_info.virt_entry, (paddr_t)dtb, (uint32_t)dtb_size); printf("AP Kernel returned back to the elf-loader.\n"); abort(); diff --git a/elfloader-tool/src/arch-arm/sys_boot.c b/elfloader-tool/src/arch-arm/sys_boot.c index 895a6dfc..f63e9824 100644 --- a/elfloader-tool/src/arch-arm/sys_boot.c +++ b/elfloader-tool/src/arch-arm/sys_boot.c @@ -242,12 +242,13 @@ void continue_boot(int was_relocated) printf("Jumping to kernel-image entry point...\n\n"); } + /* Jump to the kernel. Note: Our DTB is smaller than 4 GiB. */ ((init_arm_kernel_t)kernel_info.virt_entry)(user_info.phys_region_start, user_info.phys_region_end, user_info.phys_virt_offset, user_info.virt_entry, (word_t)dtb, - dtb_size); + (uint32_t)dtb_size); /* We should never get here. */ printf("ERROR: Kernel returned back to the ELF Loader\n"); diff --git a/elfloader-tool/src/common.c b/elfloader-tool/src/common.c index 9846422f..d351df06 100644 --- a/elfloader-tool/src/common.c +++ b/elfloader-tool/src/common.c @@ -468,29 +468,29 @@ int load_images( /* keep it page aligned */ next_phys_addr = dtb_phys_start = ROUND_UP(kernel_phys_end, PAGE_BITS); - size_t dtb_size = fdt_size(dtb); - if (0 == dtb_size) { + size_t dtb_sz = fdt_size(dtb); + if (0 == dtb_sz) { printf("ERROR: Invalid device tree blob supplied\n"); return -1; } /* Make sure this is a sane thing to do */ ret = ensure_phys_range_valid(next_phys_addr, - next_phys_addr + dtb_size); + next_phys_addr + dtb_sz); if (0 != ret) { printf("ERROR: Physical address of DTB invalid\n"); return -1; } - memmove((void *)next_phys_addr, dtb, dtb_size); - next_phys_addr += dtb_size; + memmove((void *)next_phys_addr, dtb, dtb_sz); + next_phys_addr += dtb_sz; next_phys_addr = ROUND_UP(next_phys_addr, PAGE_BITS); dtb_phys_end = next_phys_addr; printf("Loaded DTB from %p.\n", dtb); printf(" paddr=[%p..%p]\n", dtb_phys_start, dtb_phys_end - 1); *chosen_dtb = (void *)dtb_phys_start; - *chosen_dtb_size = dtb_size; + *chosen_dtb_size = dtb_sz; } else { next_phys_addr = ROUND_UP(kernel_phys_end, PAGE_BITS); } From dea692439d0e8e3f07edf74ab63a845f11fcfee8 Mon Sep 17 00:00:00 2001 From: Matthias Rosenfelder Date: Mon, 14 Aug 2023 12:04:25 +0200 Subject: [PATCH 10/19] elfloader: fix EFI image size The size calculation was incorrect, unfortunately. That lead to an incorrect memory map provided by UEFI. When switching on EFI_DEBUG one can see that the memory range occupied by the ELF-loader ("paddr=" line) is not fully marked as used by UEFI and the last few pages of the ELF-loader are actually marked as being free. Output before: ELF-loader started on Image ranges: paddr=[7f9bc0000..7fcde1fff] text[7f9bc0000..7f9bd1e8f] data[7f9bd2000..7fcddd9ff] bss[7f9bd2850..7f9c0a4cf] edata[7fcddda00..7fcde1fff] [...] [paddr=0x180023000-0x7f9bbffff] [type = Conventional, attr: Normal <- ok [paddr=0x7f9bc0000-0x7fcdddfff] [type = Loader Code, attr: Normal <- Not ok: end address too low Should be 0x4000 higher [paddr=0x7fcdde000-0x7ffffffff] [type = Conventional, attr: Normal <- Not ok: start address too low Should be 0x4000 higher After: ELF-loader started on Image ranges: paddr=[7f9bbc000..7fcdddfff] text[7f9bbc000..7f9bcde8f] data[7f9bce000..7fcdd99ff] bss[7f9bce850..7f9c064cf] edata[7fcdd9a00..7fcdddfff] [...] [paddr=0x180023000-0x7f9bbbfff] [type = Conventional, attr: Normal <- ok [paddr=0x7f9bbc000-0x7fcdddfff] [type = Loader Code, attr: Normal <- ok (same as above) [paddr=0x7fcdde000-0x7ffffffff] [type = Conventional, attr: Normal <- ok (starts one after paddr end) Note: You don't have that debug output (EFI_DEBUG) in your code, that prints the UEFI memory map. So you have to believe me that this is the actual output. This fixes 030d83bf ("elfloader: improve EFI support"). Signed-off-by: Matthias Rosenfelder --- elfloader-tool/src/binaries/efi/gnuefi/crt0-efi-aarch64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/elfloader-tool/src/binaries/efi/gnuefi/crt0-efi-aarch64.S b/elfloader-tool/src/binaries/efi/gnuefi/crt0-efi-aarch64.S index 13792b87..c735616b 100644 --- a/elfloader-tool/src/binaries/efi/gnuefi/crt0-efi-aarch64.S +++ b/elfloader-tool/src/binaries/efi/gnuefi/crt0-efi-aarch64.S @@ -64,7 +64,7 @@ extra_header_fields: .short 0 // MinorSubsystemVersion .long 0 // Win32VersionValue - .long _edata - ImageBase // SizeOfImage + .long _end - ImageBase // SizeOfImage // Everything before the kernel image is considered part of the header .long _gnuefi_start - ImageBase // SizeOfHeaders From 65b49ae06483a221b269b622bb3d9c748b7890d6 Mon Sep 17 00:00:00 2001 From: Matthias Rosenfelder Date: Wed, 23 Aug 2023 22:53:00 +0200 Subject: [PATCH 11/19] elfloader: move the data of ELFloader together The driver list section was part of the "*(COMMON)" section that was placed *after* the image payload (kernel, rootserver etc.). The driver list is data and should be placed adjacent to other data belonging to the ELFloader. This is clearly visible in the mapfile (which you don't generate). The driver list entry is present in the aarch32 linker script for EFI, why was it missing for 64 bit? No functional change. Signed-off-by: Matthias Rosenfelder --- elfloader-tool/src/binaries/efi/gnuefi/elf_aarch64_efi.lds | 3 +++ 1 file changed, 3 insertions(+) diff --git a/elfloader-tool/src/binaries/efi/gnuefi/elf_aarch64_efi.lds b/elfloader-tool/src/binaries/efi/gnuefi/elf_aarch64_efi.lds index 33513349..5ee11d7e 100644 --- a/elfloader-tool/src/binaries/efi/gnuefi/elf_aarch64_efi.lds +++ b/elfloader-tool/src/binaries/efi/gnuefi/elf_aarch64_efi.lds @@ -31,6 +31,9 @@ SECTIONS *(.data) *(.data1) *(.data.*) + __start__driver_list = .; + *(_driver_list) + __stop__driver_list = .; *(.got.plt) *(.got) From fa81fff7f15e341737554d55f3731b9b88f31fe3 Mon Sep 17 00:00:00 2001 From: Matthias Rosenfelder Date: Fri, 14 Jul 2023 12:36:13 +0200 Subject: [PATCH 12/19] elfloader: arm: do not hard-code values Use existing defines to make the code more descriptive. For this move some defines out of the assembler-only file. This is a preparation patch for upcoming patches. No functional change. Signed-off-by: Matthias Rosenfelder --- .../include/arch-arm/64/mode/aarch64.h | 65 +++++++++++++++++++ .../include/arch-arm/64/mode/assembler.h | 58 +---------------- elfloader-tool/src/arch-arm/64/mmu.c | 9 +-- 3 files changed, 71 insertions(+), 61 deletions(-) create mode 100644 elfloader-tool/include/arch-arm/64/mode/aarch64.h diff --git a/elfloader-tool/include/arch-arm/64/mode/aarch64.h b/elfloader-tool/include/arch-arm/64/mode/aarch64.h new file mode 100644 index 00000000..e46611c4 --- /dev/null +++ b/elfloader-tool/include/arch-arm/64/mode/aarch64.h @@ -0,0 +1,65 @@ +/* + * Copyright 2023, NIO GmbH + * + * SPDX-License-Identifier: GPL-2.0-only + */ +#pragma once + +/* This file contains useful defines for assembly and C code. */ + +#define PSR_F_BIT 0x00000040 +#define PSR_I_BIT 0x00000080 +#define PSR_A_BIT 0x00000100 +#define PSR_D_BIT 0x00000200 + +#define PSR_MODE_EL0t 0x00000000 +#define PSR_MODE_EL1t 0x00000004 +#define PSR_MODE_EL1h 0x00000005 +#define PSR_MODE_EL2t 0x00000008 +#define PSR_MODE_EL2h 0x00000009 +#define PSR_MODE_SVC_32 0x00000013 + +#define TCR_T0SZ(x) ((64 - (x))) +#define TCR_T1SZ(x) ((64 - (x)) << 16) +#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) + +#define TCR_IRGN0_WBWC (1 << 8) +#define TCR_IRGN_NC ((0 << 8) | (0 << 24)) +#define TCR_IRGN_WBWA ((1 << 8) | (1 << 24)) +#define TCR_IRGN_WT ((2 << 8) | (2 << 24)) +#define TCR_IRGN_WBnWA ((3 << 8) | (3 << 24)) +#define TCR_IRGN_MASK ((3 << 8) | (3 << 24)) + +#define TCR_ORGN0_WBWC (1 << 10) +#define TCR_ORGN_NC ((0 << 10) | (0 << 26)) +#define TCR_ORGN_WBWA ((1 << 10) | (1 << 26)) +#define TCR_ORGN_WT ((2 << 10) | (2 << 26)) +#define TCR_ORGN_WBnWA ((3 << 10) | (3 << 26)) +#define TCR_ORGN_MASK ((3 << 10) | (3 << 26)) + +#define TCR_SH0_ISH (3 << 12) +#define TCR_SHARED ((3 << 12) | (3 << 28)) + +#define TCR_TG0_4K (0 << 14) +#define TCR_TG0_64K (1 << 14) +#define TCR_TG1_4K (2 << 30) +#define TCR_TG1_64K (3 << 30) + +#define TCR_PS_4G (0 << 16) +#define TCR_PS_64G (1 << 16) +#define TCR_PS_1T (2 << 16) +#define TCR_PS_4T (3 << 16) +#define TCR_PS_16T (4 << 16) +#define TCR_PS_256T (5 << 16) + +/* bits are reserved as 1 */ +#define TCR_EL2_RES1 ((1 << 23) | (1 << 31)) +#define TCR_ASID16 (1 << 36) + +#define MT_DEVICE_nGnRnE 0 +#define MT_DEVICE_nGnRE 1 +#define MT_DEVICE_GRE 2 +#define MT_NORMAL_NC 3 +#define MT_NORMAL 4 +#define MT_NORMAL_WT 5 +#define MAIR(_attr, _mt) ((_attr) << ((_mt) * 8)) diff --git a/elfloader-tool/include/arch-arm/64/mode/assembler.h b/elfloader-tool/include/arch-arm/64/mode/assembler.h index d8863133..1d235a7e 100644 --- a/elfloader-tool/include/arch-arm/64/mode/assembler.h +++ b/elfloader-tool/include/arch-arm/64/mode/assembler.h @@ -9,63 +9,7 @@ /* This file contains useful macros for assembly code. */ #ifdef __ASSEMBLER__ - -#define PSR_F_BIT 0x00000040 -#define PSR_I_BIT 0x00000080 -#define PSR_A_BIT 0x00000100 -#define PSR_D_BIT 0x00000200 - -#define PSR_MODE_EL0t 0x00000000 -#define PSR_MODE_EL1t 0x00000004 -#define PSR_MODE_EL1h 0x00000005 -#define PSR_MODE_EL2t 0x00000008 -#define PSR_MODE_EL2h 0x00000009 -#define PSR_MODE_SVC_32 0x00000013 - -#define TCR_T0SZ(x) ((64 - (x))) -#define TCR_T1SZ(x) ((64 - (x)) << 16) -#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) - -#define TCR_IRGN0_WBWC (1 << 8) -#define TCR_IRGN_NC ((0 << 8) | (0 << 24)) -#define TCR_IRGN_WBWA ((1 << 8) | (1 << 24)) -#define TCR_IRGN_WT ((2 << 8) | (2 << 24)) -#define TCR_IRGN_WBnWA ((3 << 8) | (3 << 24)) -#define TCR_IRGN_MASK ((3 << 8) | (3 << 24)) - -#define TCR_ORGN0_WBWC (1 << 10) -#define TCR_ORGN_NC ((0 << 10) | (0 << 26)) -#define TCR_ORGN_WBWA ((1 << 10) | (1 << 26)) -#define TCR_ORGN_WT ((2 << 10) | (2 << 26)) -#define TCR_ORGN_WBnWA ((3 << 10) | (3 << 26)) -#define TCR_ORGN_MASK ((3 << 10) | (3 << 26)) - -#define TCR_SH0_ISH (3 << 12) -#define TCR_SHARED ((3 << 12) | (3 << 28)) - -#define TCR_TG0_4K (0 << 14) -#define TCR_TG0_64K (1 << 14) -#define TCR_TG1_4K (2 << 30) -#define TCR_TG1_64K (3 << 30) - -#define TCR_PS_4G (0 << 16) -#define TCR_PS_64G (1 << 16) -#define TCR_PS_1T (2 << 16) -#define TCR_PS_4T (3 << 16) -#define TCR_PS_16T (4 << 16) -#define TCR_PS_256T (5 << 16) - -/* bits are reserved as 1 */ -#define TCR_EL2_RES1 ((1 << 23) | (1 << 31)) -#define TCR_ASID16 (1 << 36) - -#define MT_DEVICE_nGnRnE 0 -#define MT_DEVICE_nGnRE 1 -#define MT_DEVICE_GRE 2 -#define MT_NORMAL_NC 3 -#define MT_NORMAL 4 -#define MT_NORMAL_WT 5 -#define MAIR(_attr, _mt) ((_attr) << ((_mt) * 8)) +#include .macro disable_mmu sctlr tmp __disable_mmu \sctlr, \tmp diff --git a/elfloader-tool/src/arch-arm/64/mmu.c b/elfloader-tool/src/arch-arm/64/mmu.c index 4cedf5d1..e927f3a6 100644 --- a/elfloader-tool/src/arch-arm/64/mmu.c +++ b/elfloader-tool/src/arch-arm/64/mmu.c @@ -11,6 +11,7 @@ #include #include #include +#include #include /* @@ -31,7 +32,7 @@ void init_boot_vspace(struct image_info *kernel_info) for (i = 0; i < BIT(PUD_BITS); i++) { _boot_pud_down[i] = (i << ARM_1GB_BLOCK_BITS) | BIT(10) /* access flag */ - | (0 << 2) /* strongly ordered memory */ + | (MT_DEVICE_nGnRnE << 2) /* strongly ordered memory */ | BIT(0); /* 1G block */ } @@ -52,7 +53,7 @@ void init_boot_vspace(struct image_info *kernel_info) #if CONFIG_MAX_NUM_NODES > 1 | (3 << 8) /* make sure the shareability is the same as the kernel's */ #endif - | (4 << 2) /* MT_NORMAL memory */ + | (MT_NORMAL << 2) /* MT_NORMAL memory */ | BIT(0); /* 2M block */ first_paddr += BIT(ARM_2MB_BLOCK_BITS); } @@ -74,7 +75,7 @@ void init_hyp_boot_vspace(struct image_info *kernel_info) for (i = 0; i < BIT(PUD_BITS); i++) { _boot_pud_down[i] = (i << ARM_1GB_BLOCK_BITS) | BIT(10) /* access flag */ - | (0 << 2) /* strongly ordered memory */ + | (MT_DEVICE_nGnRnE << 2) /* strongly ordered memory */ | BIT(0); /* 1G block */ } @@ -91,7 +92,7 @@ void init_hyp_boot_vspace(struct image_info *kernel_info) #if CONFIG_MAX_NUM_NODES > 1 | (3 << 8) #endif - | (4 << 2) /* MT_NORMAL memory */ + | (MT_NORMAL << 2) /* MT_NORMAL memory */ | BIT(0); /* 2M block */ } } From 33f00c8babeb5dd3014115cd8b6e5c5ac39a557b Mon Sep 17 00:00:00 2001 From: Matthias Rosenfelder Date: Mon, 21 Aug 2023 21:09:39 +0200 Subject: [PATCH 13/19] elfloader: arm: fix potential UB in right shift MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Regarding right shifts the standard says: "The type of the result is that of the promoted left operand. The behavior is undefined if the right operand is negative, or greater than or equal to the length in bits of the promoted left operand." Corresponding GCC warning (if used on a "small" type like uint8_t): main.c:25:39: warning: right shift count >= width of type [-Wshift-count-overflow] \#define GET_PGD_INDEX(x) (((x) >> (ARM_2MB_BLOCK_BITS + PMD_BITS + PUD_BITS)) & MASK(PGD_BITS)) main.c:46:39: note: in expansion of macro ‘GET_PGD_INDEX’ 46 | printf("GET_PGD_INDEX(x): %lu\n", GET_PGD_INDEX(x)); | ^~~~~~~~~~~~~ Thus, make sure that we never exceed/reach it by explicitly casting to a 64-bit type. It also allows using a pointer as macro parameter. This is a preparation patch for upcoming patches. Signed-off-by: Matthias Rosenfelder --- elfloader-tool/include/arch-arm/64/mode/structures.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/elfloader-tool/include/arch-arm/64/mode/structures.h b/elfloader-tool/include/arch-arm/64/mode/structures.h index f77ef93d..aaa8bced 100644 --- a/elfloader-tool/include/arch-arm/64/mode/structures.h +++ b/elfloader-tool/include/arch-arm/64/mode/structures.h @@ -21,9 +21,9 @@ #define PMD_BITS 9 #define PMD_SIZE_BITS (PMD_BITS + PMDE_SIZE_BITS) -#define GET_PGD_INDEX(x) (((x) >> (ARM_2MB_BLOCK_BITS + PMD_BITS + PUD_BITS)) & MASK(PGD_BITS)) -#define GET_PUD_INDEX(x) (((x) >> (ARM_2MB_BLOCK_BITS + PMD_BITS)) & MASK(PUD_BITS)) -#define GET_PMD_INDEX(x) (((x) >> (ARM_2MB_BLOCK_BITS)) & MASK(PMD_BITS)) +#define GET_PGD_INDEX(x) (((word_t)(x) >> (ARM_2MB_BLOCK_BITS + PMD_BITS + PUD_BITS)) & MASK(PGD_BITS)) +#define GET_PUD_INDEX(x) (((word_t)(x) >> (ARM_2MB_BLOCK_BITS + PMD_BITS)) & MASK(PUD_BITS)) +#define GET_PMD_INDEX(x) (((word_t)(x) >> (ARM_2MB_BLOCK_BITS)) & MASK(PMD_BITS)) extern uint64_t _boot_pgd_up[BIT(PGD_BITS)]; extern uint64_t _boot_pud_up[BIT(PUD_BITS)]; From 4996de7d5e086f894fa49d2f2b6e35a7d9c44aee Mon Sep 17 00:00:00 2001 From: Matthias Rosenfelder Date: Wed, 23 Aug 2023 19:25:51 +0200 Subject: [PATCH 14/19] elfloader: setup pagetables as needed This change sets up pagetables individually for: - The ELFloader image (Normal memory) - The DTB, whether supplied by EFI, cpio or u-boot (Normal mem) - The UART MMIO range (Strongly-Ordered mem) Thus, it removes the bulk 512 GiB 1:1 mapping that was there before. This resulted in problems, since the kernel image was mapped with Normal memory, but the same physical memory was part of the 1:1 Strongly-Ordered mapping. This fulfills the definition of "Mismatched memory attributes" from the ARM Architecture specification (ARM DDI 0487I.a, section B2.8). Even though I am currently unable to see where there would *occur* such a mismatched access, having such a mapping situation is certainly not desirable and should be avoided. Moreover, it is unclear whether there could arise problems from establishing the (Strongly-ordered) mapping if there is nothing behind a physical address (which is certainly true for some parts of the 512 GiB range). This commit solves the sporadics hangs while booting after the "Enabling MMU and ..." message. Tests on several different Orins (Muc and SJ) show promising results, i.e. no "hangs" occurred anymore. Note: The code in arm_switch_to_hyp_tables() still disables and re-enables both MMU & caches, but there are no memory accesses in between. That section has been engineered to be as short as possible and no memory accesses happen in between. Several barriers and code to invalidate instruction caches have been added, too, in order to be on the safe side. However, tests with just adding *that* code still showed the problem being present. The only change that showed behavior change was the change of translation tables. Thus, this *is* the actual solution to the instability problems. Moreover, we need to support crossing a 1 GiB page for placement of the ELFloader. This is due to the latest firmware on Orin0 in MUC, named "Jetson UEFI firmware (version 4.1-33958178)", which puts our image closely below a 1 GiB boundary. Only for tiny image sizes the boundary will not be crossed. Thus, we do not hard-code the writing of tables, because the logic for doing so while crossing a 1 GiB boundary is too complicated. Instead, we use a fully dynamic approach that walks the pagetables in software for a given VA and inserts missing levels on demand from a preallocated pool of pages. Only the two top-level pagetables are fixed. This allows for re-use of all pagetable code, where we only need to distinguish in one (!) place between hypervisor and non-hyp (or VHE). Signed-off-by: Matthias Rosenfelder --- .../include/arch-arm/64/mode/structures.h | 10 +- elfloader-tool/include/arch-arm/elfloader.h | 15 + elfloader-tool/include/drivers/uart.h | 5 + elfloader-tool/src/arch-arm/64/mmu.c | 441 +++++++++++++++--- elfloader-tool/src/arch-arm/64/structures.c | 7 +- .../src/arch-arm/armv/armv8-a/64/mmu-hyp.S | 27 +- elfloader-tool/src/arch-arm/smp_boot.c | 11 +- elfloader-tool/src/arch-arm/sys_boot.c | 20 +- elfloader-tool/src/drivers/uart/common.c | 3 + 9 files changed, 444 insertions(+), 95 deletions(-) diff --git a/elfloader-tool/include/arch-arm/64/mode/structures.h b/elfloader-tool/include/arch-arm/64/mode/structures.h index aaa8bced..dbc7a49f 100644 --- a/elfloader-tool/include/arch-arm/64/mode/structures.h +++ b/elfloader-tool/include/arch-arm/64/mode/structures.h @@ -6,6 +6,12 @@ #pragma once +/* ARM VMSAv8-64 (with a fully populated last level) has the same number of PTEs + * in all levels (we don't use concatenated pagetables in ELFloader) and each + * table entry is always eight bytes large. + */ +#define BITS_PER_LEVEL (PAGE_BITS - 3) + #define ARM_1GB_BLOCK_BITS 30 #define ARM_2MB_BLOCK_BITS 21 @@ -26,9 +32,5 @@ #define GET_PMD_INDEX(x) (((word_t)(x) >> (ARM_2MB_BLOCK_BITS)) & MASK(PMD_BITS)) extern uint64_t _boot_pgd_up[BIT(PGD_BITS)]; -extern uint64_t _boot_pud_up[BIT(PUD_BITS)]; -extern uint64_t _boot_pmd_up[BIT(PMD_BITS)]; - extern uint64_t _boot_pgd_down[BIT(PGD_BITS)]; -extern uint64_t _boot_pud_down[BIT(PUD_BITS)]; diff --git a/elfloader-tool/include/arch-arm/elfloader.h b/elfloader-tool/include/arch-arm/elfloader.h index 93293a75..ceab4796 100644 --- a/elfloader-tool/include/arch-arm/elfloader.h +++ b/elfloader-tool/include/arch-arm/elfloader.h @@ -22,7 +22,22 @@ typedef void (*init_arm_kernel_t)(word_t ui_p_reg_start, /* Enable the mmu. */ extern void arm_enable_mmu(void); + +/* These functions are very similar however, there are some small differences + * between the ARMv8 and legacy implementation. + * + * New ARMv8 implementation: + * - Does the MMU disabling. This is to keep the time spent with MMU off low. + * - Is only meant if seL4 runs in EL2. + */ +#if defined(CONFIG_ARCH_AARCH64) +/* Switches MMU-related stuff: pagetables, MAIR & TCR etc. Works also if the MMU + * was off initially. EL2 translation regime only. + */ +extern void arm_switch_to_hyp_tables(void); +#else extern void arm_enable_hyp_mmu(void); +#endif /* Setup boot VSpace. */ diff --git a/elfloader-tool/include/drivers/uart.h b/elfloader-tool/include/drivers/uart.h index 1fa9f970..74ce4b16 100644 --- a/elfloader-tool/include/drivers/uart.h +++ b/elfloader-tool/include/drivers/uart.h @@ -6,6 +6,7 @@ #pragma once +#include #include #define dev_get_uart(dev) ((struct elfloader_uart_ops *)(dev->drv->ops)) @@ -16,3 +17,7 @@ struct elfloader_uart_ops { volatile void *uart_get_mmio(void); void uart_set_out(struct elfloader_device *out); +#if defined(CONFIG_ARCH_AARCH64) +/* Implemented in mmu.c */ +void mmu_set_uart_base(volatile void *base); +#endif diff --git a/elfloader-tool/src/arch-arm/64/mmu.c b/elfloader-tool/src/arch-arm/64/mmu.c index e927f3a6..c70b5466 100644 --- a/elfloader-tool/src/arch-arm/64/mmu.c +++ b/elfloader-tool/src/arch-arm/64/mmu.c @@ -11,88 +11,415 @@ #include #include #include +#include #include -#include +#include /* dsb() */ +#include -/* -* Create a "boot" page table, which contains a 1:1 mapping below -* the kernel's first vaddr, and a virtual-to-physical mapping above the -* kernel's first vaddr. -*/ -void init_boot_vspace(struct image_info *kernel_info) +/* Note: "typeof()" is a GCC extension that is supported by Clang, too. */ +#define READ_ONCE(x) (*(const volatile typeof(x) *)&(x)) +#define WRITE_ONCE(var, value) \ + *((volatile typeof(var) *)(&(var))) = (value); + + +//#define DEBUG_PAGETABLES + +#ifndef DEBUG_PAGETABLES +#define dbg_printf(...) /* empty */ +static void dgb_print_2M_mapping_details(const char *map_name UNUSED, + paddr_t pa UNUSED, size_t size UNUSED) {} +#else +#define dbg_printf(...) printf(__VA_ARGS__) + +static int dgb_print_2M_mapping_indices(paddr_t pa) { - word_t i; + return printf("%u.%u.%u.X", + GET_PGD_INDEX(pa), + GET_PUD_INDEX(pa), + GET_PMD_INDEX(pa)); +} - vaddr_t first_vaddr = kernel_info->virt_region_start; - vaddr_t last_vaddr = kernel_info->virt_region_end; - paddr_t first_paddr = kernel_info->phys_region_start; +static void dgb_print_2M_mapping_details(const char *map_name, paddr_t pa, size_t size) +{ + int cnt = 0; + paddr_t pa_start = pa; + size_t orig_sz = size; - _boot_pgd_down[0] = ((uintptr_t)_boot_pud_down) | BIT(1) | BIT(0); /* its a page table */ + pa = ROUND_DOWN(pa, ARM_2MB_BLOCK_BITS); + size += (pa_start - pa); + size = ROUND_UP(size, ARM_2MB_BLOCK_BITS); - for (i = 0; i < BIT(PUD_BITS); i++) { - _boot_pud_down[i] = (i << ARM_1GB_BLOCK_BITS) - | BIT(10) /* access flag */ - | (MT_DEVICE_nGnRnE << 2) /* strongly ordered memory */ - | BIT(0); /* 1G block */ + cnt += dgb_print_2M_mapping_indices(pa); + if (orig_sz) { + while (cnt < 11) { + printf(" "); + cnt++; + } + cnt += printf("--"); + while (cnt < 16) { + printf(" "); + cnt++; + } + cnt += dgb_print_2M_mapping_indices(pa + size - 1); + } + while (cnt < 27) { + printf(" "); + cnt++; } + if (orig_sz) { + printf("PA 0x%lx - 0x%lx (size: %lu MiB): %s\n", pa, pa + size - 1, size / 1024u / 1024, map_name); + } else { + /* No range given, just a single 2 MiB page */ + printf("PA 0x%lx: %s\n", pa, map_name); + } +} +#endif /* DEBUG_PAGETABLES */ - _boot_pgd_up[GET_PGD_INDEX(first_vaddr)] - = ((uintptr_t)_boot_pud_up) | BIT(1) | BIT(0); /* its a page table */ +/* Page allocator. Contains a fixed number of pages. All page-aligned. No returning possible. */ - _boot_pud_up[GET_PUD_INDEX(first_vaddr)] - = ((uintptr_t)_boot_pmd_up) | BIT(1) | BIT(0); /* its a page table */ +#define NUM_PAGES 7 +static char pages[BIT(PAGE_BITS) * NUM_PAGES] ALIGN(BIT(PGD_SIZE_BITS)); +static unsigned page_cnt; - /* We only map in 1 GiB, so check that the kernel doesn't cross 1GiB boundary. */ - if ((first_vaddr & ~MASK(ARM_1GB_BLOCK_BITS)) != (last_vaddr & ~MASK(ARM_1GB_BLOCK_BITS))) { - printf("We only map 1GiB, but kernel vaddr range covers multiple GiB.\n"); - abort(); +static void *get_page(void) +{ + void *ret = NULL; + + if (page_cnt == 0) { + dbg_printf("get_page(): pages @ 0x%p\n", pages); } - for (i = GET_PMD_INDEX(first_vaddr); i < BIT(PMD_BITS); i++) { - _boot_pmd_up[i] = first_paddr - | BIT(10) /* access flag */ + + if (page_cnt < NUM_PAGES) { + ret = &pages[BIT(PAGE_BITS) * page_cnt]; + dbg_printf("get_page(): ret: 0x%p (%u->%u)\n", ret, page_cnt, page_cnt + 1); + page_cnt ++; + } + + return ret; +} + +/* Translate a PA to a VA such that when accessing the VA we end up at that PA. + * Usually done in OS kernels via a physical memory map which has a constant + * virt-to-phys offset. Here this is the same, since either the MMU is off or + * we're running on the identity mapping. + */ +static inline uint64_t pa_to_va(uint64_t pa) +{ + return pa; +} + +static inline uint64_t va_to_pa(uint64_t va) +{ + return va; +} + +typedef uint64_t pte_t; + +/* This can be used to clear unwanted bits from a PA that is supposed to be put + * into a PTE/PDE; or it can be used to extract the PA from a PTE/PDE. + */ +static inline uint64_t mask_pa(uint64_t pa) +{ + /* Mask out the upper 16 bits and lower 12 bits. Only 48-bit OA for now. */ + return (pa & 0x0000FFFFFFFFF000); +} + +static inline uintptr_t pde_to_paddr(uint64_t pde_val) +{ + /* ARM DDI ARM DDI 0487I.a, page D8-5124 */ + return mask_pa(pde_val); +} + +static inline uint64_t make_pde(uintptr_t pa) +{ + /* For now we set all (upper) attributes to zero */ + return (mask_pa(pa) | BIT(1) | BIT(0)); +} + +/* Accept a pointer, otherwise same as make_pde() */ +static inline uint64_t make_pde_from_ptr(pte_t *pagetable_target) +{ + return make_pde(va_to_pa((uintptr_t)pagetable_target)); +} + +/* ARM DDI 0487I.a, section D8.5.2 */ +#define INNER_SHAREABLE 3 +static inline uint64_t make_pte(paddr_t pa, uint8_t mem_attr_index) +{ + /* Note: As per R_PYFVQ from the ARM spec, we can always safely set the + * shareability to inner, even for device-type memory. + */ + return mask_pa(pa) + | BIT(10) /* access flag */ #if CONFIG_MAX_NUM_NODES > 1 - | (3 << 8) /* make sure the shareability is the same as the kernel's */ + | (INNER_SHAREABLE << 8) #endif - | (MT_NORMAL << 2) /* MT_NORMAL memory */ - | BIT(0); /* 2M block */ - first_paddr += BIT(ARM_2MB_BLOCK_BITS); + | (mem_attr_index << 2) + | BIT(0); /* valid page/block mapping */ +} + +static inline _Bool pte_is_valid(pte_t pte) +{ + return (pte & 1); +} + +static inline _Bool pte_is_block(pte_t pte) +{ + return ((pte & 3) == 1); +} + +/* Take care about atomicity */ +static inline void pte_set(pte_t *ptep, pte_t val) +{ + WRITE_ONCE(*ptep, val); +} + +static inline pte_t pte_get(pte_t *ptep) +{ + return READ_ONCE(*ptep); +} + +static_assert(PGD_BITS == BITS_PER_LEVEL, "Mismatch in expected pagetable size"); +static_assert(PUD_BITS == BITS_PER_LEVEL, "Mismatch in expected pagetable size"); +static_assert(PMD_BITS == BITS_PER_LEVEL, "Mismatch in expected pagetable size"); +/* ARM VMSAv8-64: Each table entry is always eight bytes large */ +static_assert(PAGE_BITS == (BITS_PER_LEVEL + 3), "Mismatch in expected page size"); + +/* A valid PA can be maximum 48 or 52 bit large, so upper bits are always zero */ +#define INVALID_PA ((uint64_t)-1) +static paddr_t walk_pagetables(vaddr_t va, uint64_t *l0_table, + unsigned *level, pte_t **fault_pde) +{ + paddr_t ret = INVALID_PA; + /* All levels have the same size and therefore number of index bits + * (9 for 4kiB Translation Granule) on ARMv8. + */ + uint64_t index_mask_bits = PGD_BITS + PUD_BITS + PMD_BITS + PAGE_BITS; + uint64_t *tbl = l0_table; + + unsigned idx, lvl; + paddr_t pa; + pte_t pte; + + /* Walk up to four levels */ + for (lvl = 0; lvl <= 3; lvl++) { + idx = (va >> index_mask_bits) & MASK(BITS_PER_LEVEL); + pte = pte_get(&tbl[idx]); + + if (!pte_is_valid(pte)) { + goto err_out; + } else if (pte_is_block(pte)) { + /* L0 giant pages (512 GiB) are not allowed by the architecture for + * 4kiB Granule size and 48 bit OA. We don't support 52 bit OA. + */ + if (lvl == 0) { + goto err_out; + } + break; + } + if (lvl == 3) { + /* ARM DDI 0487I.a, page D8-5126 (I_WYRBP), D8-5131 (I_VKPKF): + * If the PTE in the last level is valid, it is interpreted as a page + * table, irrespectively of bit 1. This allows for the "loopback + * trick" - described in every (good) OS lecture at university :-) + * Other architectures like RISC-V have screwed this up with their + * pagetable format. + */ + break; + } + /* We have a table descriptor. Descent to the next lower level */ + pa = pde_to_paddr(pte); + vaddr_t va_next = pa_to_va(pa); + tbl = (uint64_t *)va_next; + + index_mask_bits -= BITS_PER_LEVEL; } - /* Architecturally required barrier to make all writes to pagetable memories - * visible to the pagetable walker. See ARM DDI 0487I.a, section D8.2.6. + ret = (pa | (va & (MASK(index_mask_bits)))); + +err_out: + *level = lvl; + *fault_pde = &tbl[idx]; + return ret; +} + +/* Returns NULL if there is already something mappped at the requested VA. Fills + * in page tables if needed until the desired level is reached. + */ +static pte_t *fill_pt_tree(vaddr_t va, uint64_t *l0_table, unsigned target_lvl) +{ + paddr_t pa; + unsigned lvl; + pte_t *fault_pde; + + pa = walk_pagetables(va, l0_table, &lvl, &fault_pde); + + while ((lvl < target_lvl) && (pa == INVALID_PA)) { + /* fault_pde points to the entry to write. Add a new pagetable */ + pte_set(fault_pde, make_pde_from_ptr(get_page())); + + pa = walk_pagetables(va, l0_table, &lvl, &fault_pde); + } + + if ((lvl == target_lvl) && fault_pde && !pte_is_valid(pte_get(fault_pde))) { + return fault_pde; + } + return NULL; +} + +extern char _text[]; +extern char _end[]; + +extern size_t dtb_size; + +static inline void clean_inval_cl(void *addr) +{ + asm volatile("dc civac, %0\n\t" :: "r"(addr)); +} + +static void clean_inval_pagetables(void) +{ + dsb(); + /* Whole image for now; EFI case: Maybe our image is loaded on the boot + * CPU with caches enabled (and still being dirty), but the secondary CPUs + * start with caches disabled. Further, assume CL size is >= 64 Bytes. + * Maybe this is too cautious. Can we relax this? */ + for (vaddr_t va = (vaddr_t)_text; va < (vaddr_t)(_end); va += 64) { + clean_inval_cl((void *)va); + } dsb(); } -void init_hyp_boot_vspace(struct image_info *kernel_info) +static void map_uart(paddr_t base) +{ + pte_t *pte; + + base = ROUND_DOWN(base, ARM_2MB_BLOCK_BITS); + pte = fill_pt_tree(base, _boot_pgd_down, 2); + if (pte) { + pte_set(pte, make_pte(base, MT_DEVICE_nGnRnE)); + } else { + printf("Unable to map the UART at PA 0x%lx\n", base); + abort(); + } + dbg_printf("Done mapping UART at PA: 0x%lx\n", base); +} + + +static paddr_t uart_base_mmio; +void mmu_set_uart_base(volatile void *base) +{ + uart_base_mmio = (paddr_t)base; +} + +/* + * Create a "boot" page table, which contains a 1:1 mapping for the ELFloader and + * the DTB. Moreover create a mapping for the kernel image at the desired VA with the + * physical memory that was used when extracting the kernel from the elfloader + * image previously. + */ +static void init_boot_vspace_impl(const struct image_info *kernel_info, _Bool has_one_va_range) { - word_t i; - word_t pmd_index; + /* We may be running with MMU & caches off. Before we write new values + * make sure to clean & invalidate all previous data in those locations. + */ + clean_inval_pagetables(); + + /* Map UART, using strongly ordered memory; one 2 MiB page; 1:1 VA/PA */ + paddr_t uart_base = ROUND_DOWN(uart_base_mmio, ARM_2MB_BLOCK_BITS); + map_uart(uart_base); + + /* Map Elfloader image, using NORMAL memory; 1:1 VA/PA */ + paddr_t start_paddr = ROUND_DOWN(((paddr_t)_text), ARM_2MB_BLOCK_BITS); + paddr_t end_paddr = ROUND_UP(((paddr_t)_end), ARM_2MB_BLOCK_BITS); + + for (paddr_t pa = start_paddr; pa < end_paddr; pa += BIT(ARM_2MB_BLOCK_BITS)) { + pte_t *pte = fill_pt_tree(pa, _boot_pgd_down, 2); + if (pte) { + pte_set(pte, make_pte(pa, MT_NORMAL)); + } else { + printf("Unable to map ELFloader at PA: 0x%lx\n", pa); + abort(); + } + dbg_printf("Map Elfloader PA: 0x%lx\n", pa); + } + dbg_printf("Done mapping Elfloader\n"); + + paddr_t dtb_map_start, dtb_map_end; + if (dtb && (dtb_size > 0)) { + /* Device Tree Blob (DTB): + * An UEFI-supplied DTB lies outside of the image memory => Add mapping. + * For other DTBs the ELFloader of course saves the *target* address of + * the copied DTB in "dtb". + * So we also need to add a mapping here in those cases. + */ + paddr_t dtb_end = (paddr_t)dtb + dtb_size; + + dtb_map_start = ROUND_DOWN((paddr_t)dtb, ARM_2MB_BLOCK_BITS); + dtb_map_end = ROUND_UP(dtb_end, ARM_2MB_BLOCK_BITS); + for (paddr_t pa = dtb_map_start; pa < dtb_map_end; pa += BIT(ARM_2MB_BLOCK_BITS)) { + pte_t *pte = fill_pt_tree(pa, _boot_pgd_down, 2); + if (pte) { + pte_set(pte, make_pte(pa, MT_NORMAL)); + } else { + printf("Unable to map DTB at PA: 0x%lx\n", pa); + } + dbg_printf("Map DTB PA: 0x%lx\n", pa); + } + dbg_printf("Done mapping DTB\n"); + } + + /* Map the kernel */ vaddr_t first_vaddr = kernel_info->virt_region_start; + vaddr_t last_vaddr = kernel_info->virt_region_end; paddr_t first_paddr = kernel_info->phys_region_start; - _boot_pgd_down[0] = ((uintptr_t)_boot_pud_down) | BIT(1) | BIT(0); - for (i = 0; i < BIT(PUD_BITS); i++) { - _boot_pud_down[i] = (i << ARM_1GB_BLOCK_BITS) - | BIT(10) /* access flag */ - | (MT_DEVICE_nGnRnE << 2) /* strongly ordered memory */ - | BIT(0); /* 1G block */ + uint64_t *l0_table = has_one_va_range ? _boot_pgd_down : _boot_pgd_up; + paddr_t pa = first_paddr; + for (vaddr_t va = first_vaddr; va < last_vaddr; + va += BIT(ARM_2MB_BLOCK_BITS), + pa += BIT(ARM_2MB_BLOCK_BITS)) { + + pte_t *pte = fill_pt_tree(va, l0_table, 2); + if (pte) { + pte_set(pte, make_pte(pa, MT_NORMAL)); + } else { + printf("Unable to map kernel at VA/PA: 0x%lx / 0x%lx\n", va, pa); + } + dbg_printf("Map kernel VA -> PA: 0x%lx -> 0x%lx\n", va, pa); } + dbg_printf("Done mapping kernel\n"); - _boot_pgd_down[GET_PGD_INDEX(first_vaddr)] - = ((uintptr_t)_boot_pud_up) | BIT(1) | BIT(0); /* its a page table */ + dbg_printf("Mapping indices:\n"); + dgb_print_2M_mapping_details("UART", uart_base, /* one 2 MiB page */ 2u * 1024 * 1024); + dgb_print_2M_mapping_details("ELFloader image", (paddr_t)_text, (paddr_t)_end - (paddr_t)_text); + if (dtb && (dtb_size > 0)) { + dgb_print_2M_mapping_details("dtb", dtb_map_start, dtb_map_end - dtb_map_start - 1); + } - _boot_pud_up[GET_PUD_INDEX(first_vaddr)] - = ((uintptr_t)_boot_pmd_up) | BIT(1) | BIT(0); /* its a page table */ - pmd_index = GET_PMD_INDEX(first_vaddr); - for (i = pmd_index; i < BIT(PMD_BITS); i++) { - _boot_pmd_up[i] = (((i - pmd_index) << ARM_2MB_BLOCK_BITS) + first_paddr) - | BIT(10) /* access flag */ -#if CONFIG_MAX_NUM_NODES > 1 - | (3 << 8) -#endif - | (MT_NORMAL << 2) /* MT_NORMAL memory */ - | BIT(0); /* 2M block */ - } + /* Architecturally required barrier to make all writes to pagetable memories + * visible to the pagetable walker. See ARM DDI 0487I.a, section D8.2.6. + */ + dsb(); + + /* Maintenance again, just to be sure. This is only necessary for the secondary + * CPUs; they may come up with caches & MMU disabled. What they should usually + * do is enable caches & MMU together! The following code is only necessary + * if they enable ONLY the MMU first and after that they enable the cache. + * That would be totally ... well ... suboptimal, but we play "better safe + * than sorry" here. + */ + clean_inval_pagetables(); +} + +void init_boot_vspace(struct image_info *kernel_info) +{ + init_boot_vspace_impl(kernel_info, 0); +} + +void init_hyp_boot_vspace(struct image_info *kernel_info) +{ + init_boot_vspace_impl(kernel_info, 1); } diff --git a/elfloader-tool/src/arch-arm/64/structures.c b/elfloader-tool/src/arch-arm/64/structures.c index 654fc7dc..e24680f4 100644 --- a/elfloader-tool/src/arch-arm/64/structures.c +++ b/elfloader-tool/src/arch-arm/64/structures.c @@ -8,11 +8,6 @@ #include #include -/* Paging structures for kernel mapping */ +/* Top-level paging structures for kernel and identity mapping */ uint64_t _boot_pgd_up[BIT(PGD_BITS)] ALIGN(BIT(PGD_SIZE_BITS)); -uint64_t _boot_pud_up[BIT(PUD_BITS)] ALIGN(BIT(PUD_SIZE_BITS)); -uint64_t _boot_pmd_up[BIT(PMD_BITS)] ALIGN(BIT(PMD_SIZE_BITS)); - -/* Paging structures for identity mapping */ uint64_t _boot_pgd_down[BIT(PGD_BITS)] ALIGN(BIT(PGD_SIZE_BITS)); -uint64_t _boot_pud_down[BIT(PUD_BITS)] ALIGN(BIT(PUD_SIZE_BITS)); diff --git a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S index 8b9fd216..eb091f81 100644 --- a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S +++ b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S @@ -21,12 +21,6 @@ .extern invalidate_icache .extern _boot_pgd_down -BEGIN_FUNC(disable_mmu_caches_hyp) - /* Assume D-cache already cleaned to PoC */ - disable_mmu sctlr_el2, x9 - ret -END_FUNC(disable_mmu_caches_hyp) - BEGIN_FUNC(clean_dcache_by_range) /* Ordering needed for strongly-ordered mem, not needed for NORMAL mem. * See ARM DDI 0487I.a, page D7-5063. @@ -76,7 +70,10 @@ BEGIN_FUNC(leave_hyp) eret END_FUNC(leave_hyp) -BEGIN_FUNC(arm_enable_hyp_mmu) +BEGIN_FUNC(arm_switch_to_hyp_tables) + /* Load MAIR & TCR values; construct TTBR address before disabling and re- + * enabling the MMU & caches. + */ /* * DEVICE_nGnRnE 000 00000000 * DEVICE_nGnRE 001 00000100 @@ -91,16 +88,28 @@ BEGIN_FUNC(arm_enable_hyp_mmu) MAIR(0x44, MT_NORMAL_NC) | \ MAIR(0xff, MT_NORMAL) | \ MAIR(0xaa, MT_NORMAL_WT) - msr mair_el2, x5 + ldr x8, =TCR_T0SZ(48) | TCR_IRGN0_WBWC | TCR_ORGN0_WBWC | TCR_SH0_ISH | TCR_TG0_4K | TCR_PS | TCR_EL2_RES1 + + /* Use x16 as temp register */ + disable_mmu sctlr_el2, x16 + + msr mair_el2, x5 msr tcr_el2, x8 isb + /* For non-VHE the "down" contains both the the kernel mapping and 1:1 mapping. */ adrp x8, _boot_pgd_down msr ttbr0_el2, x8 isb + /* Invalidate TLBs */ + dsb sy tlbi alle2is + tlbi vmalls12e1 + dsb sy + + tlbi vmalle1is dsb ish isb @@ -113,4 +122,4 @@ BEGIN_FUNC(arm_enable_hyp_mmu) /* NOTE: enable_mmu already contains an isb after enabling. */ ret -END_FUNC(arm_enable_hyp_mmu) +END_FUNC(arm_switch_to_hyp_tables) diff --git a/elfloader-tool/src/arch-arm/smp_boot.c b/elfloader-tool/src/arch-arm/smp_boot.c index 704d90d2..f795f3a7 100644 --- a/elfloader-tool/src/arch-arm/smp_boot.c +++ b/elfloader-tool/src/arch-arm/smp_boot.c @@ -51,20 +51,17 @@ void non_boot_main(void) if (is_hyp_mode()) { extern void leave_hyp(void); - extern void disable_mmu_caches_hyp(void); -#ifdef CONFIG_ARCH_AARCH64 - /* Disable the MMU and cacheability unconditionally on ARM64. - * The 32 bit ARM platforms do not expect the MMU to be turned - * off, so we leave them alone. */ - disable_mmu_caches_hyp(); -#endif #ifndef CONFIG_ARM_HYPERVISOR_SUPPORT leave_hyp(); #endif } /* Enable the MMU, and enter the kernel. */ if (is_hyp_mode()) { +#if defined(CONFIG_ARCH_AARCH64) + arm_switch_to_hyp_tables(); +#else arm_enable_hyp_mmu(); +#endif } else { arm_enable_mmu(); } diff --git a/elfloader-tool/src/arch-arm/sys_boot.c b/elfloader-tool/src/arch-arm/sys_boot.c index f63e9824..5b49d2a5 100644 --- a/elfloader-tool/src/arch-arm/sys_boot.c +++ b/elfloader-tool/src/arch-arm/sys_boot.c @@ -191,34 +191,31 @@ void continue_boot(int was_relocated) init_boot_vspace(&kernel_info); #endif - /* If in EL2, disable MMU and I/D cacheability unconditionally */ if (is_hyp_mode()) { - extern void disable_mmu_caches_hyp(void); extern void clean_dcache_by_range(paddr_t start, paddr_t end); paddr_t start = kernel_info.phys_region_start; paddr_t end = kernel_info.phys_region_end; clean_dcache_by_range(start, end); + start = (paddr_t)user_info.phys_region_start; end = (paddr_t)user_info.phys_region_end; clean_dcache_by_range(start, end); + start = (paddr_t)_text; end = (paddr_t)_end; clean_dcache_by_range(start, end); + if (dtb) { start = (paddr_t)dtb; end = start + dtb_size; clean_dcache_by_range(start, end); } -#if defined(CONFIG_ARCH_AARCH64) - /* Disable the MMU and cacheability unconditionally on ARM64. - * The 32 bit ARM platforms do not expect the MMU to be turned - * off, so we leave them alone. */ - disable_mmu_caches_hyp(); -#endif - -#if (defined(CONFIG_ARCH_ARM_V7A) || defined(CONFIG_ARCH_ARM_V8A)) && !defined(CONFIG_ARM_HYPERVISOR_SUPPORT) +#if defined(CONFIG_ARM_HYPERVISOR_SUPPORT) + printf("Switch to hypervisor mapping\n"); + arm_switch_to_hyp_tables(); +#else extern void leave_hyp(void); /* Switch to EL1, assume EL2 MMU already disabled for ARMv8. */ leave_hyp(); @@ -230,8 +227,7 @@ void continue_boot(int was_relocated) #endif /* CONFIG_MAX_NUM_NODES */ if (is_hyp_mode()) { - printf("Enabling hypervisor MMU and paging\n"); - arm_enable_hyp_mmu(); + /* Nothing to be done here, we already switched above */ } else { printf("Enabling MMU and paging\n"); arm_enable_mmu(); diff --git a/elfloader-tool/src/drivers/uart/common.c b/elfloader-tool/src/drivers/uart/common.c index a5c914ff..b6ec30af 100644 --- a/elfloader-tool/src/drivers/uart/common.c +++ b/elfloader-tool/src/drivers/uart/common.c @@ -18,6 +18,9 @@ void uart_set_out(struct elfloader_device *out) return; } uart_out = out; +#if defined(CONFIG_ARCH_AARCH64) + mmu_set_uart_base(out->region_bases[0]); +#endif } volatile void *uart_get_mmio(void) From 5ebb1fcef3b1a3813063336188a19dbb63d877c6 Mon Sep 17 00:00:00 2001 From: Matthias Rosenfelder Date: Thu, 24 Aug 2023 02:32:24 +0200 Subject: [PATCH 15/19] elfloader: Exit UEFI boot services very early UEFI is an operating system that hides as a bootloader. UEFI is in control of the machine as long as we didn't call exit_boot_services. For instance, UEFI may set up timers to interrupt us while we're fiddling with hardware and UEFI is fiddling with hardware itself and UEFI may be fiddling with the exact same hardware that we are fiddling with, while we're being preempted. That is not good. The previous state of ELFloader is that before exiting UEFI boot services, we already called platform_init() in main(), which may fiddle around with all kinds of hardware. Thus, we should have already exited UEFI boot services when main() is called. Note that exit_boot_services now still executes on the UEFI stack (since we switch the stack in _start()). But so did e.g. the clear_bss() function. I don't see a problem here. It's more a question the other way around: Previously, we called into UEFI with exit_boot_services on our own, potentially too small, stack. Do we have enough space for UEFI to execute? How are we supposed to know that? The UEFI implementation can change, so we can never be sure. But it would be unreasonable for UEFI to start us with a stack that is too small to call any UEFI API, including exit_boot_services. So we can safely assume that there is enough space when using the UEFI stack (since our use of stack to this point is minimal). Also, mask all exceptions until we are about to enter the kernel. We do not want to run with whatever state the bootloader set us up before, do we? We only re-enable the asyncs and debugs; interrupts and FIQs are still masked when entering the kernel. What would we gain from that? We don't expect any. Asyncs (SErrors), however, can indicate that we e.g. touched memory that we shouldn't have touched (secure memory). Signed-off-by: Matthias Rosenfelder --- elfloader-tool/src/arch-arm/sys_boot.c | 3 +++ elfloader-tool/src/binaries/efi/efi_init.c | 18 +++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/elfloader-tool/src/arch-arm/sys_boot.c b/elfloader-tool/src/arch-arm/sys_boot.c index 5b49d2a5..ff8c5ad5 100644 --- a/elfloader-tool/src/arch-arm/sys_boot.c +++ b/elfloader-tool/src/arch-arm/sys_boot.c @@ -238,6 +238,9 @@ void continue_boot(int was_relocated) printf("Jumping to kernel-image entry point...\n\n"); } + /* Clear D&A in DAIF */ + asm volatile("msr daifclr, #0xC\n\t"); + /* Jump to the kernel. Note: Our DTB is smaller than 4 GiB. */ ((init_arm_kernel_t)kernel_info.virt_entry)(user_info.phys_region_start, user_info.phys_region_end, diff --git a/elfloader-tool/src/binaries/efi/efi_init.c b/elfloader-tool/src/binaries/efi/efi_init.c index a177c083..c55fb090 100644 --- a/elfloader-tool/src/binaries/efi/efi_init.c +++ b/elfloader-tool/src/binaries/efi/efi_init.c @@ -4,18 +4,28 @@ * SPDX-License-Identifier: GPL-2.0-only */ +#include #include #include void *__application_handle = NULL; // current efi application handler efi_system_table_t *__efi_system_table = NULL; // current efi system table +static unsigned long efi_exit_bs_result = EFI_SUCCESS; +static unsigned long exit_boot_services(void); + +unsigned long efi_exit_boot_services(void) +{ + return efi_exit_bs_result; +} + extern void _start(void); unsigned int efi_main(uintptr_t application_handle, uintptr_t efi_system_table) { clear_bss(); __application_handle = (void *)application_handle; __efi_system_table = (efi_system_table_t *)efi_system_table; + efi_exit_bs_result = exit_boot_services(); _start(); return 0; } @@ -41,7 +51,7 @@ void *efi_get_fdt(void) * This means boot time services are not available anymore. We should store * system information e.g. current memory map and pass them to kernel. */ -unsigned long efi_exit_boot_services(void) +static unsigned long exit_boot_services(void) { unsigned long status; efi_memory_desc_t *memory_map; @@ -78,5 +88,11 @@ unsigned long efi_exit_boot_services(void) } status = bts->exit_boot_services(__application_handle, key); + +#if defined(CONFIG_ARCH_AARCH64) + /* Now that we're free, mask all exceptions until we enter the kernel */ + asm volatile("msr daifset, #0xF\n\t"); +#endif + return status; } From 80b8902c968523183e62eb6f90a0450a4e8c040a Mon Sep 17 00:00:00 2001 From: Matthias Rosenfelder Date: Thu, 14 Dec 2023 20:06:31 +0100 Subject: [PATCH 16/19] elfloader: Rewrite loop: Do not use goto There are better ways to loop in C. No functional change. Signed-off-by: Matthias Rosenfelder --- elfloader-tool/src/binaries/efi/efi_init.c | 40 ++++++++++++---------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/elfloader-tool/src/binaries/efi/efi_init.c b/elfloader-tool/src/binaries/efi/efi_init.c index c55fb090..e938d037 100644 --- a/elfloader-tool/src/binaries/efi/efi_init.c +++ b/elfloader-tool/src/binaries/efi/efi_init.c @@ -62,30 +62,32 @@ static unsigned long exit_boot_services(void) efi_boot_services_t *bts = get_efi_boot_services(); /* - * As the number of existing memeory segments are unknown, + * As the number of existing memory segments are unknown, * we need to resort to a trial and error to guess that. * We start from 32 and increase it by one until get a valid value. */ map_size = sizeof(*memory_map) * 32; -again: - status = bts->allocate_pool(EFI_LOADER_DATA, map_size, (void **)&memory_map); - - if (status != EFI_SUCCESS) - return status; - - status = bts->get_memory_map(&map_size, memory_map, &key, &desc_size, &desc_version); - if (status == EFI_BUFFER_TOO_SMALL) { - bts->free_pool(memory_map); - - map_size += sizeof(*memory_map); - goto again; - } - - if (status != EFI_SUCCESS){ - bts->free_pool(memory_map); - return status; - } + do { + status = bts->allocate_pool(EFI_LOADER_DATA, map_size, (void **)&memory_map); + /* If the allocation fails, there is something wrong and we cannot continue */ + if (status != EFI_SUCCESS) { + return status; + } + + status = bts->get_memory_map(&map_size, memory_map, &key, &desc_size, &desc_version); + if (status != EFI_SUCCESS) { + bts->free_pool(memory_map); + memory_map = NULL; + + if (status == EFI_BUFFER_TOO_SMALL) { + map_size += sizeof(*memory_map); + } else { + /* some other error; bail out! */ + return status; + } + } + } while (status == EFI_BUFFER_TOO_SMALL); status = bts->exit_boot_services(__application_handle, key); From 0e0a99d2a78880cd4afb7c73ee8ee81af1b2a767 Mon Sep 17 00:00:00 2001 From: Matthias Rosenfelder Date: Thu, 14 Dec 2023 20:12:26 +0100 Subject: [PATCH 17/19] elfloader: fix UEFI integration bug: descriptor size mismatch. The UEFI specification 2.10 says in section 7 for EFI_BOOT_SERVICES.GetMemoryMap(): "The GetMemoryMap() function also returns the size and revision number of the EFI_MEMORY_DESCRIPTOR. The DescriptorSize represents the size in bytes of an EFI_MEMORY_DESCRIPTOR array element returned in MemoryMap. The size is returned to allow for future expansion of the EFI_MEMORY_DESCRIPTOR in response to hardware innovation. The structure of the EFI_MEMORY_DESCRIPTOR may be extended in the future but it will remain backwards compatible with the current definition. Thus OS software must use the DescriptorSize to find the start of each EFI_MEMORY_DESCRIPTOR in the MemoryMap array." This mismatch is the case on (our) Orin UEFI. The compiled size of a memory descriptor is 40 Bytes, but the Orin UEFI implementation uses 48 Bytes per descriptor. Thus, due to the requirement to use a larger size than the returned total size (due to the fact that the buffer allocation itself may lead to one more entry in the memory map), we must increase by the size (in terms of number of descriptors), but use the number of bytes that UEFI uses for one memory map entry, not what we think it might be. Some other people already stumbled over this: https://forum.osdev.org/viewtopic.php?f=1&t=32953 Based on the comment in the existing code, the author seems to not have understood how the size of the memory map can be determined. Just read the spec! So we better update that misleading comment. Signed-off-by: Matthias Rosenfelder --- elfloader-tool/src/binaries/efi/efi_init.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/elfloader-tool/src/binaries/efi/efi_init.c b/elfloader-tool/src/binaries/efi/efi_init.c index e938d037..47dc2651 100644 --- a/elfloader-tool/src/binaries/efi/efi_init.c +++ b/elfloader-tool/src/binaries/efi/efi_init.c @@ -63,8 +63,8 @@ static unsigned long exit_boot_services(void) /* * As the number of existing memory segments are unknown, - * we need to resort to a trial and error to guess that. - * We start from 32 and increase it by one until get a valid value. + * we need to start somewhere. The API then tells us how much space we need + * if it is not enough. */ map_size = sizeof(*memory_map) * 32; @@ -81,7 +81,12 @@ static unsigned long exit_boot_services(void) memory_map = NULL; if (status == EFI_BUFFER_TOO_SMALL) { - map_size += sizeof(*memory_map); + /* Note: "map_size" is an IN/OUT-parameter and has been updated to the + * required size. We still add one more entry ("desc_size" is in bytes) + * due to the hint from the spec ("since allocation of the new buffer + * may potentially increase memory map size."). + */ + map_size += desc_size; } else { /* some other error; bail out! */ return status; From 16ea8ef0c827aeceb630677b7ed93c3fb0e4c398 Mon Sep 17 00:00:00 2001 From: Matthias Rosenfelder Date: Wed, 31 Jan 2024 15:17:45 +1100 Subject: [PATCH 18/19] elfloader: Set Exec-Never for Device Memory Type PTE. The ARM spec (ARM DDI 0487J.a) says on page B2-216 ("Aarch64 Application Level Memory Model"): "Hardware does not prevent speculative instruction fetches from a memory location with any of the Device memory attributes unless the memory location is also marked as execute-never for all Exception levels." and "Failure to mark a memory location with any Device memory attribute as execute-never for all Exception levels is a programming error." Similar statements can be found in the chapter about the Aarch32 Application Level Memory Model for aarch32 mode. Signed-off-by: Andy Bui --- elfloader-tool/include/arch-arm/64/mode/aarch64.h | 2 ++ elfloader-tool/include/elfloader_common.h | 2 +- elfloader-tool/src/arch-arm/64/mmu.c | 13 +++++++++---- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/elfloader-tool/include/arch-arm/64/mode/aarch64.h b/elfloader-tool/include/arch-arm/64/mode/aarch64.h index e46611c4..89fdcfd6 100644 --- a/elfloader-tool/include/arch-arm/64/mode/aarch64.h +++ b/elfloader-tool/include/arch-arm/64/mode/aarch64.h @@ -63,3 +63,5 @@ #define MT_NORMAL 4 #define MT_NORMAL_WT 5 #define MAIR(_attr, _mt) ((_attr) << ((_mt) * 8)) + +#define IS_DEV_MEM_INDEX(_idx) ((_idx) <= MT_DEVICE_GRE) diff --git a/elfloader-tool/include/elfloader_common.h b/elfloader-tool/include/elfloader_common.h index 7a54c316..c80f143c 100644 --- a/elfloader-tool/include/elfloader_common.h +++ b/elfloader-tool/include/elfloader_common.h @@ -14,7 +14,7 @@ typedef uintptr_t vaddr_t; #define PAGE_BITS 12 -#define BIT(x) (1 << (x)) +#define BIT(x) (1ul << (x)) #define MASK(n) (BIT(n) - 1) #define MIN(a, b) (((a) < (b)) ? (a) : (b)) #define IS_ALIGNED(n, b) (!((n) & MASK(b))) diff --git a/elfloader-tool/src/arch-arm/64/mmu.c b/elfloader-tool/src/arch-arm/64/mmu.c index c70b5466..35df2a3f 100644 --- a/elfloader-tool/src/arch-arm/64/mmu.c +++ b/elfloader-tool/src/arch-arm/64/mmu.c @@ -142,14 +142,19 @@ static inline uint64_t make_pde_from_ptr(pte_t *pagetable_target) return make_pde(va_to_pa((uintptr_t)pagetable_target)); } -/* ARM DDI 0487I.a, section D8.5.2 */ +/* ARM DDI 0487J.a, section D8.5.2 */ #define INNER_SHAREABLE 3 static inline uint64_t make_pte(paddr_t pa, uint8_t mem_attr_index) { - /* Note: As per R_PYFVQ from the ARM spec, we can always safely set the - * shareability to inner, even for device-type memory. + /* As per R_PYFVQ from the ARM spec, we can always safely set the shareability + * to inner, even for Device memory type. + * For exec-never bit(s), see Table D8-46 for EL2 translation regime (TR) + * and Table D8-45 for all others. + * PXN (bit 53) is RES0 for EL2 TR (Figure D8-16), so we simply always set it. */ - return mask_pa(pa) + uint64_t xn = (IS_DEV_MEM_INDEX(mem_attr_index)) ? (BIT(54) | BIT(53)) : 0; + return xn + | mask_pa(pa) | BIT(10) /* access flag */ #if CONFIG_MAX_NUM_NODES > 1 | (INNER_SHAREABLE << 8) From 84a55cd5aeca16ffd4207a262e661678b063b1c3 Mon Sep 17 00:00:00 2001 From: Andy Bui Date: Wed, 7 Feb 2024 13:01:32 +1100 Subject: [PATCH 19/19] elfloader: unconditionally set PTE shareability to Inner Shareable. We noticed a lot of instability with the Jetson Orin port when running in UP + HYP mode. Setting the shareability to Inner Shareable (IS) unconditionally is the fix, however, we're unable to find the exact line in Arm documentation that explains why this is the case. Potential theory: there are other agents on the Jetson Orin that require cache coherency and live in the IS domain. Being more permissive with memory resolves the prefetching and translation faults we were getting. For reference, Arm states "Arm expects operating systems to mark the majority of DRAM memory as Normal Write-back cacheable, Inner shareable" (102376_0200_01_en version 2). Signed-off-by: Andy Bui --- elfloader-tool/src/arch-arm/64/mmu.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/elfloader-tool/src/arch-arm/64/mmu.c b/elfloader-tool/src/arch-arm/64/mmu.c index 35df2a3f..149f2f32 100644 --- a/elfloader-tool/src/arch-arm/64/mmu.c +++ b/elfloader-tool/src/arch-arm/64/mmu.c @@ -156,9 +156,7 @@ static inline uint64_t make_pte(paddr_t pa, uint8_t mem_attr_index) return xn | mask_pa(pa) | BIT(10) /* access flag */ -#if CONFIG_MAX_NUM_NODES > 1 | (INNER_SHAREABLE << 8) -#endif | (mem_attr_index << 2) | BIT(0); /* valid page/block mapping */ }