Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Full SME(1) instruction support and STREAMING Groups #415

Draft
wants to merge 12 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
86 changes: 79 additions & 7 deletions configs/a64fx_SME.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,12 @@ Ports:
Instruction-Group-Support:
- INT_SIMPLE
- INT_MUL
- STORE_DATA
- STORE_DATA_INT
- STORE_DATA_SCALAR
- STORE_DATA_VECTOR
- STORE_DATA_SVE
- STORE_DATA_STREAMING_SVE
- STORE_DATA_SME
3:
Portname: FLB
Instruction-Group-Support:
Expand All @@ -77,28 +82,62 @@ Ports:
- INT_DIV_OR_SQRT
5:
Portname: EAGA
Instruction-Support:
- LOAD
- STORE_ADDRESS
Instruction-Group-Support:
- LOAD_INT
- LOAD_SCALAR
- LOAD_VECTOR
- LOAD_SVE
- LOAD_STREAMING_SVE
- LOAD_SME
- STORE_ADDRESS_INT
- STORE_ADDRESS_SCALAR
- STORE_ADDRESS_VECTOR
- STORE_ADDRESS_SVE
- STORE_ADDRESS_STREAMING_SVE
- STORE_ADDRESS_SME
- INT_SIMPLE_ARTH_NOSHIFT
- INT_SIMPLE_LOGICAL_NOSHIFT
- INT_SIMPLE_CMP
6:
Portname: EAGB
Instruction-Support:
- LOAD
- STORE_ADDRESS
Instruction-Group-Support:
- LOAD_INT
- LOAD_SCALAR
- LOAD_VECTOR
- LOAD_SVE
- LOAD_STREAMING_SVE
- LOAD_SME
- STORE_ADDRESS_INT
- STORE_ADDRESS_SCALAR
- STORE_ADDRESS_VECTOR
- STORE_ADDRESS_SVE
- STORE_ADDRESS_STREAMING_SVE
- STORE_ADDRESS_SME
- INT_SIMPLE_ARTH_NOSHIFT
- INT_SIMPLE_LOGICAL_NOSHIFT
- INT_SIMPLE_CMP
7:
Portname: BR
Instruction-Group-Support:
- BRANCH
# Define example SME / SVE Streaming Mode units
8:
Portname: SME
Instruction-Group-Support:
- SME
9:
Portname: PR_S
Instruction-Group-Support:
- STREAMING_PREDICATE
10:
Portname: FLA_S
Instruction-Group-Support:
- STREAMING_SVE
11:
Portname: FLB_S
Instruction-Group-Support:
- STREAMING_SVE_SIMPLE
- STREAMING_SVE_MUL
Reservation-Stations:
0:
Size: 20
Expand Down Expand Up @@ -133,6 +172,13 @@ Reservation-Stations:
Dispatch-Rate: 1
Ports:
- SME
6:
Size: 40
Dispatch-Rate: 3
Ports:
- FLA_S
- FLB_S
- PR_S
Execution-Units:
0:
Pipelined: True
Expand Down Expand Up @@ -188,6 +234,24 @@ Execution-Units:
- INT_DIV_OR_SQRT
- FP_DIV_OR_SQRT
- SVE_DIV_OR_SQRT
9:
Pipelined: True
Blocking-Groups:
- INT_DIV_OR_SQRT
- FP_DIV_OR_SQRT
- SVE_DIV_OR_SQRT
10:
Pipelined: True
Blocking-Groups:
- INT_DIV_OR_SQRT
- FP_DIV_OR_SQRT
- SVE_DIV_OR_SQRT
11:
Pipelined: True
Blocking-Groups:
- INT_DIV_OR_SQRT
- FP_DIV_OR_SQRT
- SVE_DIV_OR_SQRT
Latencies:
0:
Instruction-Groups:
Expand Down Expand Up @@ -216,9 +280,11 @@ Latencies:
- SCALAR_SIMPLE
- VECTOR_SIMPLE_LOGICAL
- SVE_SIMPLE_LOGICAL
- STREAMING_SVE_SIMPLE_LOGICAL
- SME_SIMPLE_LOGICAL
- VECTOR_SIMPLE_CMP
- SVE_SIMPLE_CMP
- STREAMING_SVE_SIMPLE_CMP
- SME_SIMPLE_CMP
Execution-Latency: 4
Execution-Throughput: 1
Expand All @@ -232,21 +298,25 @@ Latencies:
- SCALAR_SIMPLE_CVT
- VECTOR_SIMPLE
- SVE_SIMPLE
- STREAMING_SVE_SIMPLE
- SME_SIMPLE
- FP_MUL
- SVE_MUL
- STREAMING_SVE_MUL
- SME_MUL
Execution-Latency: 9
Execution-Throughput: 1
7:
Instruction-Groups:
- SVE_DIV_OR_SQRT
- STREAMING_SVE_DIV_OR_SQRT
- SME_DIV_OR_SQRT
Execution-Latency: 98
Execution-Throughput: 98
8:
Instruction-Groups:
- PREDICATE
- STREAMING_PREDICATE
Execution-Latency: 3
Execution-Throughput: 1
9:
Expand All @@ -260,8 +330,10 @@ Latencies:
10:
Instruction-Groups:
- LOAD_SVE
- LOAD_STREAMING_SVE
- LOAD_SME
- STORE_ADDRESS_SVE
- STORE_ADDRESS_STREAMING_SVE
- STORE_ADDRESS_SME
Execution-Latency: 6
Execution-Throughput: 1
Expand Down
Binary file removed docs/sphinx/assets/instruction_groups.png
Binary file not shown.
Binary file modified docs/sphinx/assets/instruction_groups_AArch64.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 6 additions & 0 deletions src/include/simeng/arch/aarch64/Architecture.hh
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,12 @@ class Architecture : public arch::Architecture {
/** Returns the current value of SVCRval_. */
uint64_t getSVCRval() const;

/** Returns if SVE Streaming Mode is enabled. */
bool isStreamingModeEnabled() const;

/** Returns if the SME ZA Register is enabled. */
bool isZA_RegisterEnabled() const;

/** Update the value of SVCRval_. */
void setSVCRval(const uint64_t newVal) const;

Expand Down
9 changes: 9 additions & 0 deletions src/include/simeng/arch/aarch64/Instruction.hh
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,12 @@ class Instruction : public simeng::Instruction {
* processing this instruction. */
InstructionException getException() const;

/** Checks whether the current SVE Streaming Mode status is different to when
* this instruction was first decoded, and updates the instruction group
* accordingly if required.
* Returns TRUE if the group was updated, FALSE otherwise. */
bool checkStreamingGroup();

private:
/** Process the instruction's metadata to determine source/destination
* registers. */
Expand Down Expand Up @@ -451,6 +457,9 @@ class Instruction : public simeng::Instruction {
* the `InsnType` namespace allowing each bit to represent a unique
* identifier such as `isLoad` or `isMultiply` etc. */
uint32_t instructionIdentifier_ = 0;

/** The instruction group this instruction belongs to. */
uint16_t instructionGroup_ = InstructionGroups::NONE;
};

} // namespace aarch64
Expand Down
86 changes: 60 additions & 26 deletions src/include/simeng/arch/aarch64/InstructionGroups.hh
Original file line number Diff line number Diff line change
Expand Up @@ -72,37 +72,53 @@ const uint16_t LOAD_SVE = 62;
const uint16_t STORE_ADDRESS_SVE = 63;
const uint16_t STORE_DATA_SVE = 64;
const uint16_t STORE_SVE = 65;
const uint16_t PREDICATE = 66;
const uint16_t LOAD = 67;
const uint16_t STORE_ADDRESS = 68;
const uint16_t STORE_DATA = 69;
const uint16_t STORE = 70;
const uint16_t BRANCH = 71;
const uint16_t SME = 72;
const uint16_t SME_SIMPLE = 73;
const uint16_t SME_SIMPLE_ARTH = 74;
const uint16_t SME_SIMPLE_ARTH_NOSHIFT = 75;
const uint16_t SME_SIMPLE_LOGICAL = 76;
const uint16_t SME_SIMPLE_LOGICAL_NOSHIFT = 77;
const uint16_t SME_SIMPLE_CMP = 78;
const uint16_t SME_SIMPLE_CVT = 79;
const uint16_t SME_MUL = 80;
const uint16_t SME_DIV_OR_SQRT = 81;
const uint16_t LOAD_SME = 82;
const uint16_t STORE_ADDRESS_SME = 83;
const uint16_t STORE_DATA_SME = 84;
const uint16_t STORE_SME = 85;
const uint16_t ALL = 86;
const uint16_t NONE = 87;
const uint16_t STREAMING_SVE = 66;
const uint16_t STREAMING_SVE_SIMPLE = 67;
const uint16_t STREAMING_SVE_SIMPLE_ARTH = 68;
const uint16_t STREAMING_SVE_SIMPLE_ARTH_NOSHIFT = 69;
const uint16_t STREAMING_SVE_SIMPLE_LOGICAL = 70;
const uint16_t STREAMING_SVE_SIMPLE_LOGICAL_NOSHIFT = 71;
const uint16_t STREAMING_SVE_SIMPLE_CMP = 72;
const uint16_t STREAMING_SVE_SIMPLE_CVT = 73;
const uint16_t STREAMING_SVE_MUL = 74;
const uint16_t STREAMING_SVE_DIV_OR_SQRT = 75;
const uint16_t LOAD_STREAMING_SVE = 76;
const uint16_t STORE_ADDRESS_STREAMING_SVE = 77;
const uint16_t STORE_DATA_STREAMING_SVE = 78;
const uint16_t STORE_STREAMING_SVE = 79;
const uint16_t SME = 80;
const uint16_t SME_SIMPLE = 81;
const uint16_t SME_SIMPLE_ARTH = 82;
const uint16_t SME_SIMPLE_ARTH_NOSHIFT = 83;
const uint16_t SME_SIMPLE_LOGICAL = 84;
const uint16_t SME_SIMPLE_LOGICAL_NOSHIFT = 85;
const uint16_t SME_SIMPLE_CMP = 86;
const uint16_t SME_SIMPLE_CVT = 87;
const uint16_t SME_MUL = 88;
const uint16_t SME_DIV_OR_SQRT = 89;
const uint16_t LOAD_SME = 90;
const uint16_t STORE_ADDRESS_SME = 91;
const uint16_t STORE_DATA_SME = 92;
const uint16_t STORE_SME = 93;
const uint16_t PREDICATE = 94;
const uint16_t STREAMING_PREDICATE = 95;
const uint16_t LOAD = 96;
const uint16_t STORE_ADDRESS = 97;
const uint16_t STORE_DATA = 98;
const uint16_t STORE = 99;
const uint16_t BRANCH = 100;
const uint16_t ALL = 101;
const uint16_t NONE = 102;
} // namespace InstructionGroups

/** The number of aarch64 instruction groups. */
static constexpr uint8_t NUM_GROUPS = 88;
static constexpr uint8_t NUM_GROUPS = 103;

const std::unordered_map<uint16_t, std::vector<uint16_t>> groupInheritance_ = {
{InstructionGroups::ALL,
{InstructionGroups::INT, InstructionGroups::FP, InstructionGroups::SVE,
InstructionGroups::PREDICATE, InstructionGroups::SME,
InstructionGroups::STREAMING_SVE, InstructionGroups::SME,
InstructionGroups::PREDICATE, InstructionGroups::STREAMING_PREDICATE,
InstructionGroups::LOAD, InstructionGroups::STORE,
InstructionGroups::BRANCH}},
{InstructionGroups::INT,
Expand Down Expand Up @@ -176,6 +192,19 @@ const std::unordered_map<uint16_t, std::vector<uint16_t>> groupInheritance_ = {
{InstructionGroups::SVE_SIMPLE_ARTH_NOSHIFT}},
{InstructionGroups::SVE_SIMPLE_LOGICAL,
{InstructionGroups::SVE_SIMPLE_LOGICAL_NOSHIFT}},
{InstructionGroups::STREAMING_SVE,
{InstructionGroups::STREAMING_SVE_SIMPLE,
InstructionGroups::STREAMING_SVE_DIV_OR_SQRT,
InstructionGroups::STREAMING_SVE_MUL}},
{InstructionGroups::STREAMING_SVE_SIMPLE,
{InstructionGroups::STREAMING_SVE_SIMPLE_ARTH,
InstructionGroups::STREAMING_SVE_SIMPLE_LOGICAL,
InstructionGroups::STREAMING_SVE_SIMPLE_CMP,
InstructionGroups::STREAMING_SVE_SIMPLE_CVT}},
{InstructionGroups::STREAMING_SVE_SIMPLE_ARTH,
{InstructionGroups::STREAMING_SVE_SIMPLE_ARTH_NOSHIFT}},
{InstructionGroups::STREAMING_SVE_SIMPLE_LOGICAL,
{InstructionGroups::STREAMING_SVE_SIMPLE_LOGICAL_NOSHIFT}},
{InstructionGroups::SME,
{InstructionGroups::SME_SIMPLE, InstructionGroups::SME_DIV_OR_SQRT,
InstructionGroups::SME_MUL}},
Expand All @@ -189,11 +218,11 @@ const std::unordered_map<uint16_t, std::vector<uint16_t>> groupInheritance_ = {
{InstructionGroups::LOAD,
{InstructionGroups::LOAD_INT, InstructionGroups::LOAD_SCALAR,
InstructionGroups::LOAD_VECTOR, InstructionGroups::LOAD_SVE,
InstructionGroups::LOAD_SME}},
InstructionGroups::LOAD_STREAMING_SVE, InstructionGroups::LOAD_SME}},
{InstructionGroups::STORE,
{InstructionGroups::STORE_INT, InstructionGroups::STORE_SCALAR,
InstructionGroups::STORE_VECTOR, InstructionGroups::STORE_SVE,
InstructionGroups::STORE_SME}},
InstructionGroups::STORE_STREAMING_SVE, InstructionGroups::STORE_SME}},
{InstructionGroups::STORE_INT,
{InstructionGroups::STORE_ADDRESS_INT, InstructionGroups::STORE_DATA_INT}},
{InstructionGroups::STORE_SCALAR,
Expand All @@ -204,17 +233,22 @@ const std::unordered_map<uint16_t, std::vector<uint16_t>> groupInheritance_ = {
InstructionGroups::STORE_DATA_VECTOR}},
{InstructionGroups::STORE_SVE,
{InstructionGroups::STORE_ADDRESS_SVE, InstructionGroups::STORE_DATA_SVE}},
{InstructionGroups::STORE_STREAMING_SVE,
{InstructionGroups::STORE_ADDRESS_STREAMING_SVE,
InstructionGroups::STORE_DATA_STREAMING_SVE}},
{InstructionGroups::STORE_SME,
{InstructionGroups::STORE_ADDRESS_SME, InstructionGroups::STORE_DATA_SME}},
{InstructionGroups::STORE_ADDRESS,
{InstructionGroups::STORE_ADDRESS_INT,
InstructionGroups::STORE_ADDRESS_SCALAR,
InstructionGroups::STORE_ADDRESS_VECTOR,
InstructionGroups::STORE_ADDRESS_SVE,
InstructionGroups::STORE_ADDRESS_STREAMING_SVE,
InstructionGroups::STORE_ADDRESS_SME}},
{InstructionGroups::STORE_DATA,
{InstructionGroups::STORE_DATA_INT, InstructionGroups::STORE_DATA_SCALAR,
InstructionGroups::STORE_DATA_VECTOR, InstructionGroups::STORE_DATA_SVE,
InstructionGroups::STORE_DATA_STREAMING_SVE,
InstructionGroups::STORE_DATA_SME}}};

} // namespace aarch64
Expand Down
3 changes: 3 additions & 0 deletions src/include/simeng/arch/riscv/Instruction.hh
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,9 @@ class Instruction : public simeng::Instruction {
* the `InsnType` namespace allowing each bit to represent a unique
* identifier such as `isLoad` or `isMultiply` etc. */
uint16_t instructionIdentifier_ = 0;

/** The instruction group this instruction belongs to. */
uint16_t instructionGroup_ = InstructionGroups::NONE;
};

} // namespace riscv
Expand Down
20 changes: 20 additions & 0 deletions src/lib/arch/aarch64/Architecture.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,20 @@ uint8_t Architecture::predecode(const uint8_t* ptr, uint16_t bytesAvailable,
newInsn.setExecutionInfo(getExecutionInfo(newInsn));
// Cache the instruction
iter = decodeCache_.insert({insn, newInsn}).first;
} else {
Instruction& cachedInsn = decodeCache_.at(insn);
// Check if SVE or Predicate instructions need their group updating due to
// SVE Streaming Mode activeness being different from when the instruction
// was first decoded.
if (cachedInsn.checkStreamingGroup()) {
// If the instruction's group has changed then update its execution info.
// The newly set group is most likely to be the most accurate, as an
// incorrect group allocation is only achieved when an exception/flush is
// triggered by changing the SVE Streaming Mode state.
cachedInsn.setExecutionInfo(getExecutionInfo(cachedInsn));
}
// Need to re-set iterator after updating the decodeCache_ structure
iter = decodeCache_.find(insn);
}

// Split instruction into 1 or more defined micro-ops
Expand Down Expand Up @@ -281,6 +295,12 @@ void Architecture::setSVCRval(const uint64_t newVal) const {
SVCRval_ = newVal;
}

// 0th bit of SVCR register determines if streaming-mode is enabled.
bool Architecture::isStreamingModeEnabled() const { return SVCRval_ & 1; }

// 1st bit of SVCR register determines if ZA register is enabled.
bool Architecture::isZA_RegisterEnabled() const { return SVCRval_ & 2; }

} // namespace aarch64
} // namespace arch
} // namespace simeng
Loading