From 6f239c7a4a74313dc74c170edd862663bf806b3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alf-Andr=C3=A9=20Walla?= Date: Mon, 24 Jun 2024 20:05:21 +0200 Subject: [PATCH] Add experimental inaccurate dispatch --- emulator/src/main.cpp | 7 +- lib/libriscv/bytecode_dispatch.cpp | 2 + lib/libriscv/cpu.hpp | 6 + lib/libriscv/cpu_inaccurate_dispatch.cpp | 368 +++++++++++++++++++++++ lib/libriscv/threaded_dispatch.cpp | 10 + 5 files changed, 391 insertions(+), 2 deletions(-) create mode 100644 lib/libriscv/cpu_inaccurate_dispatch.cpp diff --git a/emulator/src/main.cpp b/emulator/src/main.cpp index f7bb9781..c9cad41c 100644 --- a/emulator/src/main.cpp +++ b/emulator/src/main.cpp @@ -303,7 +303,10 @@ static void run_program( debug.simulate(); } else { // Normal RISC-V simulation - machine.simulate(cli_args.fuel); + if (cli_args.accurate) + machine.simulate(cli_args.fuel); + else + machine.cpu.simulate_inaccurate(machine.cpu.pc()); } } catch (riscv::MachineException& me) { printf("%s\n", machine.cpu.current_instruction_to_string().c_str()); @@ -342,7 +345,7 @@ static void run_program( const auto retval = machine.return_value(); printf(">>> Program exited, exit code = %" PRId64 " (0x%" PRIX64 ")\n", int64_t(retval), uint64_t(retval)); - if (cli_args.accurate || !riscv::binary_translation_enabled) + if (cli_args.accurate) printf("Instructions executed: %" PRIu64 " Runtime: %.3fms Insn/s: %.0fmi/s\n", machine.instruction_counter(), runtime.count()*1000.0, machine.instruction_counter() / (runtime.count() * 1e6)); diff --git a/lib/libriscv/bytecode_dispatch.cpp b/lib/libriscv/bytecode_dispatch.cpp index 5372a1da..98a5f766 100644 --- a/lib/libriscv/bytecode_dispatch.cpp +++ b/lib/libriscv/bytecode_dispatch.cpp @@ -8,6 +8,8 @@ #include "cpu_dispatch.cpp" +#include "cpu_inaccurate_dispatch.cpp" + namespace riscv { INSTANTIATE_32_IF_ENABLED(CPU); diff --git a/lib/libriscv/cpu.hpp b/lib/libriscv/cpu.hpp index af4f8471..3c8e76ce 100644 --- a/lib/libriscv/cpu.hpp +++ b/lib/libriscv/cpu.hpp @@ -35,6 +35,12 @@ namespace riscv /// @return Returns true if the machine stopped normally, otherwise an execution timeout happened. bool simulate(address_t pc, uint64_t icounter, uint64_t maxcounter); + /// @brief Simulate faster by not counting instructions, and consequently + /// not checking for timeouts. This is useful when there is another + /// layer of timeout checking, like signal handling. + /// @param pc The starting address + void simulate_inaccurate(address_t pc); + // Step precisely one instruction forward from current PC. void step_one(); diff --git a/lib/libriscv/cpu_inaccurate_dispatch.cpp b/lib/libriscv/cpu_inaccurate_dispatch.cpp new file mode 100644 index 00000000..716353bc --- /dev/null +++ b/lib/libriscv/cpu_inaccurate_dispatch.cpp @@ -0,0 +1,368 @@ +#include "machine.hpp" +#include "decoder_cache.hpp" +#include "threaded_bytecodes.hpp" +#include "rv32i_instr.hpp" +#include "rvfd.hpp" +#ifdef RISCV_EXT_COMPRESSED +#include "rvc.hpp" +#endif +#ifdef RISCV_EXT_VECTOR +#include "rvv.hpp" +#endif + +/** + * This file is included by threaded_dispatch.cpp and bytecode_dispatch.cpp + * It implements the logic for switch-based and threaded dispatch. + * + * All dispatch modes share bytecode_impl.cpp + **/ + +namespace riscv +{ +#define VIEW_INSTR() \ + auto instr = *(rv32i_instruction *)&decoder->instr; +#define VIEW_INSTR_AS(name, x) \ + auto &&name = *(x *)&decoder->instr; +#define NEXT_INSTR() \ + if constexpr (compressed_enabled) \ + decoder += 2; \ + else \ + decoder += 1; \ + EXECUTE_INSTR(); +#define NEXT_C_INSTR() \ + decoder += 1; \ + EXECUTE_INSTR(); + +#define NEXT_BLOCK(len, OF) \ + pc += len; \ + decoder += len >> DecoderCache::SHIFT; \ + if constexpr (FUZZING) /* Give OOB-aid to ASAN */ \ + decoder = &exec_decoder[pc >> DecoderCache::SHIFT]; \ + pc += decoder->block_bytes(); \ + EXECUTE_INSTR(); + +#define NEXT_SEGMENT() \ + decoder = &exec_decoder[pc >> DecoderCache::SHIFT]; \ + pc += decoder->block_bytes(); \ + EXECUTE_INSTR(); + +#define PERFORM_BRANCH() \ + if constexpr (VERBOSE_JUMPS) \ + fprintf(stderr, "Branch 0x%lX >= 0x%lX (decoder=%p)\n", long(pc), long(pc + fi.signed_imm()), decoder); \ + NEXT_BLOCK(fi.signed_imm(), false); + +#define PERFORM_FORWARD_BRANCH() \ + if constexpr (VERBOSE_JUMPS) \ + fprintf(stderr, "Fw.Branch 0x%lX >= 0x%lX\n", long(pc), long(pc + fi.signed_imm())); \ + NEXT_BLOCK(fi.signed_imm(), false); + +#define OVERFLOW_CHECKED_JUMP() \ + goto check_jump + + template + DISPATCH_ATTR void CPU::simulate_inaccurate(address_t pc) + { + static constexpr uint32_t XLEN = W * 8; + using addr_t = address_type; + using saddr_t = signed_address_type; + +#ifdef DISPATCH_MODE_THREADED + static constexpr void *computed_opcode[] = { + [RV32I_BC_INVALID] = &&execute_invalid, + [RV32I_BC_ADDI] = &&rv32i_addi, + [RV32I_BC_LI] = &&rv32i_li, + [RV32I_BC_MV] = &&rv32i_mv, + [RV32I_BC_SLLI] = &&rv32i_slli, + [RV32I_BC_SLTI] = &&rv32i_slti, + [RV32I_BC_SLTIU] = &&rv32i_sltiu, + [RV32I_BC_XORI] = &&rv32i_xori, + [RV32I_BC_SRLI] = &&rv32i_srli, + [RV32I_BC_SRAI] = &&rv32i_srai, + [RV32I_BC_ORI] = &&rv32i_ori, + [RV32I_BC_ANDI] = &&rv32i_andi, + + [RV32I_BC_LUI] = &&rv32i_lui, + [RV32I_BC_AUIPC] = &&rv32i_auipc, + + [RV32I_BC_LDB] = &&rv32i_ldb, + [RV32I_BC_LDBU] = &&rv32i_ldbu, + [RV32I_BC_LDH] = &&rv32i_ldh, + [RV32I_BC_LDHU] = &&rv32i_ldhu, + [RV32I_BC_LDW] = &&rv32i_ldw, + + [RV32I_BC_STB] = &&rv32i_stb, + [RV32I_BC_STH] = &&rv32i_sth, + [RV32I_BC_STW] = &&rv32i_stw, +#ifdef RISCV_64I + [RV32I_BC_LDWU] = &&rv32i_ldwu, + [RV32I_BC_LDD] = &&rv32i_ldd, + [RV32I_BC_STD] = &&rv32i_std, +#endif + + [RV32I_BC_BEQ] = &&rv32i_beq, + [RV32I_BC_BNE] = &&rv32i_bne, + [RV32I_BC_BLT] = &&rv32i_blt, + [RV32I_BC_BGE] = &&rv32i_bge, + [RV32I_BC_BLTU] = &&rv32i_bltu, + [RV32I_BC_BGEU] = &&rv32i_bgeu, + [RV32I_BC_BEQ_FW] = &&rv32i_beq_fw, + [RV32I_BC_BNE_FW] = &&rv32i_bne_fw, + + [RV32I_BC_JAL] = &&rv32i_jal, + [RV32I_BC_JALR] = &&rv32i_jalr, + [RV32I_BC_FAST_JAL] = &&rv32i_fast_jal, + [RV32I_BC_FAST_CALL] = &&rv32i_fast_call, + + [RV32I_BC_OP_ADD] = &&rv32i_op_add, + [RV32I_BC_OP_SUB] = &&rv32i_op_sub, + [RV32I_BC_OP_SLL] = &&rv32i_op_sll, + [RV32I_BC_OP_SLT] = &&rv32i_op_slt, + [RV32I_BC_OP_SLTU] = &&rv32i_op_sltu, + [RV32I_BC_OP_XOR] = &&rv32i_op_xor, + [RV32I_BC_OP_SRL] = &&rv32i_op_srl, + [RV32I_BC_OP_OR] = &&rv32i_op_or, + [RV32I_BC_OP_AND] = &&rv32i_op_and, + [RV32I_BC_OP_MUL] = &&rv32i_op_mul, + [RV32I_BC_OP_DIV] = &&rv32i_op_div, + [RV32I_BC_OP_DIVU] = &&rv32i_op_divu, + [RV32I_BC_OP_REM] = &&rv32i_op_rem, + [RV32I_BC_OP_REMU] = &&rv32i_op_remu, + [RV32I_BC_OP_SRA] = &&rv32i_op_sra, + [RV32I_BC_OP_ZEXT_H] = &&rv32i_op_zext_h, + [RV32I_BC_OP_SH1ADD] = &&rv32i_op_sh1add, + [RV32I_BC_OP_SH2ADD] = &&rv32i_op_sh2add, + [RV32I_BC_OP_SH3ADD] = &&rv32i_op_sh3add, + + [RV32I_BC_SEXT_B] = &&rv32i_sext_b, + [RV32I_BC_SEXT_H] = &&rv32i_sext_h, + [RV32I_BC_BSETI] = &&rv32i_bseti, + [RV32I_BC_BEXTI] = &&rv32i_bexti, + +#ifdef RISCV_64I + [RV64I_BC_ADDIW] = &&rv64i_addiw, + [RV64I_BC_SLLIW] = &&rv64i_slliw, + [RV64I_BC_SRLIW] = &&rv64i_srliw, + [RV64I_BC_SRAIW] = &&rv64i_sraiw, + [RV64I_BC_OP_ADDW] = &&rv64i_op_addw, + [RV64I_BC_OP_SUBW] = &&rv64i_op_subw, + [RV64I_BC_OP_MULW] = &&rv64i_op_mulw, + [RV64I_BC_OP_ADD_UW] = &&rv64i_op_add_uw, + [RV64I_BC_OP_SH1ADD_UW] = &&rv64i_op_sh1add_uw, + [RV64I_BC_OP_SH2ADD_UW] = &&rv64i_op_sh2add_uw, +#endif // RISCV_64I + +#ifdef RISCV_EXT_COMPRESSED + [RV32C_BC_ADDI] = &&rv32c_addi, + [RV32C_BC_LI] = &&rv32c_addi, + [RV32C_BC_MV] = &&rv32c_mv, + [RV32C_BC_SLLI] = &&rv32c_slli, + [RV32C_BC_BEQZ] = &&rv32c_beqz, + [RV32C_BC_BNEZ] = &&rv32c_bnez, + [RV32C_BC_JMP] = &&rv32c_jmp, + [RV32C_BC_JR] = &&rv32c_jr, + [RV32C_BC_JAL_ADDIW] = &&rv32c_jal_addiw, + [RV32C_BC_JALR] = &&rv32c_jalr, + [RV32C_BC_LDD] = &&rv32c_ldd, + [RV32C_BC_STD] = &&rv32c_std, + [RV32C_BC_FUNCTION] = &&rv32c_func, + [RV32C_BC_JUMPFUNC] = &&rv32c_jumpfunc, +#endif + + [RV32I_BC_SYSCALL] = &&rv32i_syscall, + [RV32I_BC_STOP] = &&rv32i_stop, + [RV32I_BC_NOP] = &&rv32i_nop, + + [RV32F_BC_FLW] = &&rv32i_flw, + [RV32F_BC_FLD] = &&rv32i_fld, + [RV32F_BC_FSW] = &&rv32i_fsw, + [RV32F_BC_FSD] = &&rv32i_fsd, + [RV32F_BC_FADD] = &&rv32f_fadd, + [RV32F_BC_FSUB] = &&rv32f_fsub, + [RV32F_BC_FMUL] = &&rv32f_fmul, + [RV32F_BC_FDIV] = &&rv32f_fdiv, + [RV32F_BC_FMADD] = &&rv32f_fmadd, +#ifdef RISCV_EXT_VECTOR + [RV32V_BC_VLE32] = &&rv32v_vle32, + [RV32V_BC_VSE32] = &&rv32v_vse32, + [RV32V_BC_VFADD_VV] = &&rv32v_vfadd_vv, + [RV32V_BC_VFMUL_VF] = &&rv32v_vfmul_vf, +#endif + [RV32I_BC_FUNCTION] = &&execute_decoded_function, + [RV32I_BC_FUNCBLOCK] = &&execute_function_block, +#ifdef RISCV_BINARY_TRANSLATION + [RV32I_BC_TRANSLATOR] = &&translated_function, +#endif + [RV32I_BC_SYSTEM] = &&rv32i_system, + }; +#endif + + machine().set_max_instructions(1); + + DecodedExecuteSegment *exec = this->m_exec; + address_t current_begin = exec->exec_begin(); + address_t current_end = exec->exec_end(); + + DecoderData *exec_decoder = exec->decoder_cache(); + DecoderData *decoder; + + // We need an execute segment matching current PC + if (UNLIKELY(!(pc >= current_begin && pc < current_end))) + goto new_execute_segment; + +#ifdef RISCV_BINARY_TRANSLATION + // There's a very high chance that the (first) instruction is a translated function + decoder = &exec_decoder[pc >> DecoderCache::SHIFT]; + if (LIKELY(decoder->get_bytecode() == RV32I_BC_TRANSLATOR)) + goto retry_translated_function; +#endif + + continue_segment: + decoder = &exec_decoder[pc >> DecoderCache::SHIFT]; + + pc += decoder->block_bytes(); + +#ifdef DISPATCH_MODE_SWITCH_BASED + + while (true) + { + switch (decoder->get_bytecode()) + { +#define INSTRUCTION(bc, lbl) case bc: + +#else + goto *computed_opcode[decoder->get_bytecode()]; +#define INSTRUCTION(bc, lbl) \ + lbl: + +#endif + +#define DECODER() (*decoder) +#define CPU() (*this) +#define REG(x) registers().get()[x] +#define REGISTERS() registers() +#define VECTORS() registers().rvv() +#define MACHINE() machine() + + /** Instruction handlers **/ + +#include "bytecode_impl.cpp" + + INSTRUCTION(RV32I_BC_SYSTEM, rv32i_system) + { + VIEW_INSTR(); + // Make the current PC visible + REGISTERS().pc = pc; + // Invoke SYSTEM + MACHINE().system(instr); + // Overflow-check, next block + NEXT_BLOCK(4, true); + } + +#ifdef RISCV_BINARY_TRANSLATION + INSTRUCTION(RV32I_BC_TRANSLATOR, translated_function) + { + retry_translated_function: + // Invoke translated code + auto bintr_results = + exec->unchecked_mapping_at(decoder->instr)(*this, 0, 1, pc); + pc = REGISTERS().pc; + if (LIKELY(bintr_results.max_counter != 0 && (pc - current_begin < current_end - current_begin))) + { + decoder = &exec_decoder[pc >> DecoderCache::SHIFT]; + if (decoder->get_bytecode() == RV32I_BC_TRANSLATOR) + { + pc += decoder->block_bytes(); + goto retry_translated_function; + } + goto continue_segment; + } else if (bintr_results.max_counter == 0) + #ifdef RISCV_LIBTCC + goto exit_check; + #else + return; + #endif + else + goto check_jump; + } +#endif // RISCV_BINARY_TRANSLATION + + INSTRUCTION(RV32I_BC_SYSCALL, rv32i_syscall) + { + // Make the current PC visible + REGISTERS().pc = pc; + // Invoke system call + MACHINE().system_call(REG(REG_ECALL)); + if (MACHINE().stopped()) + return; + else if (UNLIKELY(pc != REGISTERS().pc)) + { + // System calls are always full-length instructions + if constexpr (VERBOSE_JUMPS) + { + if (pc != REGISTERS().pc) + fprintf(stderr, "SYSCALL jump from 0x%lX to 0x%lX\n", + long(pc), long(REGISTERS().pc + 4)); + } + pc = REGISTERS().pc + 4; + goto check_jump; + } + NEXT_BLOCK(4, false); + } + + INSTRUCTION(RV32I_BC_STOP, rv32i_stop) + { + REGISTERS().pc = pc + 4; + return; + } + +#ifdef DISPATCH_MODE_SWITCH_BASED + default: + goto execute_invalid; + } // switch case + } // while loop + +#endif + + check_jump: + if (LIKELY(pc - current_begin < current_end - current_begin)) + goto continue_segment; + else + goto new_execute_segment; + + // Change to a new execute segment + new_execute_segment: + { + auto new_values = this->next_execute_segment(pc); + exec = new_values.exec; + pc = new_values.pc; + current_begin = exec->exec_begin(); + current_end = exec->exec_end(); + exec_decoder = exec->decoder_cache(); + } + goto continue_segment; + + exit_check: +#ifdef RISCV_LIBTCC + // We need to check if we have a current exception + if (UNLIKELY(CPU().has_current_exception())) + goto handle_rethrow_exception; +#endif + return; + + execute_invalid: + // Calculate the current PC from the decoder pointer + pc = (decoder - exec_decoder) << DecoderCache::SHIFT; + registers().pc = pc; + trigger_exception(ILLEGAL_OPCODE, decoder->instr); + +#ifdef RISCV_LIBTCC + handle_rethrow_exception: + // We have an exception, so we need to rethrow it + const auto except = CPU().current_exception(); + CPU().clear_current_exception(); + std::rethrow_exception(except); +#endif + + } // CPU::simulate_inaccurate() + +} // riscv diff --git a/lib/libriscv/threaded_dispatch.cpp b/lib/libriscv/threaded_dispatch.cpp index 41a61064..085bf86e 100644 --- a/lib/libriscv/threaded_dispatch.cpp +++ b/lib/libriscv/threaded_dispatch.cpp @@ -12,6 +12,16 @@ #include "cpu_dispatch.cpp" +#undef VIEW_INSTR +#undef VIEW_INSTR_AS +#undef NEXT_INSTR +#undef NEXT_C_INSTR +#undef NEXT_BLOCK +#undef NEXT_SEGMENT +#undef PERFORM_BRANCH +#undef PERFORM_FORWARD_BRANCH +#include "cpu_inaccurate_dispatch.cpp" + namespace riscv { INSTANTIATE_32_IF_ENABLED(CPU);