mirror of
https://git.eden-emu.dev/eden-emu/eden.git
synced 2025-07-20 08:15:46 +00:00
Move dead submodules in-tree
Signed-off-by: swurl <swurl@swurl.xyz>
This commit is contained in:
parent
c0cceff365
commit
6c655321e6
4081 changed files with 1185566 additions and 45 deletions
1
externals/oaknut
vendored
1
externals/oaknut
vendored
|
@ -1 +0,0 @@
|
|||
Subproject commit 9d091109deb445bc6e9289c6195a282b7c993d49
|
215
externals/oaknut/.clang-format
vendored
Normal file
215
externals/oaknut/.clang-format
vendored
Normal file
|
@ -0,0 +1,215 @@
|
|||
---
|
||||
Language: Cpp
|
||||
AccessModifierOffset: -4
|
||||
AlignAfterOpenBracket: Align
|
||||
AlignConsecutiveAssignments: None
|
||||
AlignConsecutiveBitFields: None
|
||||
AlignConsecutiveDeclarations: None
|
||||
AlignConsecutiveMacros: None
|
||||
AlignEscapedNewlines: Right
|
||||
AlignOperands: AlignAfterOperator
|
||||
AlignTrailingComments: true
|
||||
AllowAllArgumentsOnNextLine: true
|
||||
AllowAllConstructorInitializersOnNextLine: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
AllowShortEnumsOnASingleLine: true
|
||||
AllowShortBlocksOnASingleLine: Empty
|
||||
AllowShortCaseLabelsOnASingleLine: false
|
||||
AllowShortFunctionsOnASingleLine: Inline
|
||||
AllowShortLambdasOnASingleLine: All
|
||||
AllowShortIfStatementsOnASingleLine: Never
|
||||
AllowShortLoopsOnASingleLine: false
|
||||
AlwaysBreakAfterDefinitionReturnType: None
|
||||
AlwaysBreakAfterReturnType: None
|
||||
AlwaysBreakBeforeMultilineStrings: true
|
||||
AlwaysBreakTemplateDeclarations: Yes
|
||||
AttributeMacros:
|
||||
- __capability
|
||||
BinPackArguments: true
|
||||
BinPackParameters: false
|
||||
BitFieldColonSpacing: Both
|
||||
BraceWrapping:
|
||||
AfterCaseLabel: false
|
||||
AfterClass: false
|
||||
AfterControlStatement: Never
|
||||
AfterEnum: false
|
||||
AfterFunction: true
|
||||
AfterNamespace: false
|
||||
AfterObjCDeclaration: false
|
||||
AfterStruct: false
|
||||
AfterUnion: false
|
||||
AfterExternBlock: false
|
||||
BeforeCatch: false
|
||||
BeforeElse: false
|
||||
BeforeLambdaBody: false
|
||||
BeforeWhile: false
|
||||
IndentBraces: false
|
||||
SplitEmptyFunction: false
|
||||
SplitEmptyRecord: false
|
||||
SplitEmptyNamespace: false
|
||||
BreakBeforeBinaryOperators: All
|
||||
BreakBeforeBraces: Custom
|
||||
BreakBeforeConceptDeclarations: true
|
||||
BreakBeforeTernaryOperators: true
|
||||
BreakBeforeInheritanceComma: false
|
||||
BreakConstructorInitializersBeforeComma: true
|
||||
BreakConstructorInitializers: BeforeComma
|
||||
BreakInheritanceList: BeforeComma
|
||||
BreakAfterJavaFieldAnnotations: false
|
||||
BreakStringLiterals: true
|
||||
ColumnLimit: 0
|
||||
CommentPragmas: '^ IWYU pragma:'
|
||||
CompactNamespaces: false
|
||||
ConstructorInitializerAllOnOneLineOrOnePerLine: true
|
||||
ConstructorInitializerIndentWidth: 4
|
||||
ContinuationIndentWidth: 4
|
||||
Cpp11BracedListStyle: true
|
||||
DeriveLineEnding: true
|
||||
DerivePointerAlignment: false
|
||||
DisableFormat: false
|
||||
# EmptyLineAfterAccessModifier: Leave
|
||||
EmptyLineBeforeAccessModifier: Always
|
||||
ExperimentalAutoDetectBinPacking: false
|
||||
FixNamespaceComments: true
|
||||
ForEachMacros:
|
||||
- foreach
|
||||
- Q_FOREACH
|
||||
- BOOST_FOREACH
|
||||
IncludeBlocks: Regroup
|
||||
IncludeCategories:
|
||||
- Regex: '^<mach/'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^<windows.h>'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '(^<signal.h>)|(^<sys/ucontext.h>)|(^<ucontext.h>)'
|
||||
Priority: 1
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^<([^\.])*>$'
|
||||
Priority: 2
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '^<.*\.'
|
||||
Priority: 3
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
- Regex: '.*'
|
||||
Priority: 4
|
||||
SortPriority: 0
|
||||
CaseSensitive: false
|
||||
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
||||
IncludeIsMainSourceRegex: ''
|
||||
# IndentAccessModifiers: false
|
||||
IndentCaseBlocks: false
|
||||
IndentCaseLabels: false
|
||||
IndentExternBlock: NoIndent
|
||||
IndentGotoLabels: false
|
||||
IndentPPDirectives: AfterHash
|
||||
IndentRequires: false
|
||||
IndentWidth: 4
|
||||
IndentWrappedFunctionNames: false
|
||||
# InsertTrailingCommas: None
|
||||
JavaScriptQuotes: Leave
|
||||
JavaScriptWrapImports: true
|
||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
||||
MacroBlockBegin: ''
|
||||
MacroBlockEnd: ''
|
||||
MaxEmptyLinesToKeep: 1
|
||||
NamespaceIndentation: None
|
||||
NamespaceMacros:
|
||||
ObjCBinPackProtocolList: Never
|
||||
ObjCBlockIndentWidth: 2
|
||||
ObjCBreakBeforeNestedBlockParam: true
|
||||
ObjCSpaceAfterProperty: false
|
||||
ObjCSpaceBeforeProtocolList: true
|
||||
PenaltyBreakAssignment: 2
|
||||
PenaltyBreakBeforeFirstCallParameter: 1
|
||||
PenaltyBreakComment: 300
|
||||
PenaltyBreakFirstLessLess: 120
|
||||
PenaltyBreakString: 1000
|
||||
PenaltyBreakTemplateDeclaration: 10
|
||||
PenaltyExcessCharacter: 1000000
|
||||
PenaltyReturnTypeOnItsOwnLine: 200
|
||||
PenaltyIndentedWhitespace: 0
|
||||
PointerAlignment: Left
|
||||
RawStringFormats:
|
||||
- Language: Cpp
|
||||
Delimiters:
|
||||
- cc
|
||||
- CC
|
||||
- cpp
|
||||
- Cpp
|
||||
- CPP
|
||||
- 'c++'
|
||||
- 'C++'
|
||||
CanonicalDelimiter: ''
|
||||
BasedOnStyle: google
|
||||
- Language: TextProto
|
||||
Delimiters:
|
||||
- pb
|
||||
- PB
|
||||
- proto
|
||||
- PROTO
|
||||
EnclosingFunctions:
|
||||
- EqualsProto
|
||||
- EquivToProto
|
||||
- PARSE_PARTIAL_TEXT_PROTO
|
||||
- PARSE_TEST_PROTO
|
||||
- PARSE_TEXT_PROTO
|
||||
- ParseTextOrDie
|
||||
- ParseTextProtoOrDie
|
||||
- ParseTestProto
|
||||
- ParsePartialTestProto
|
||||
CanonicalDelimiter: ''
|
||||
BasedOnStyle: google
|
||||
ReflowComments: true
|
||||
# ShortNamespaceLines: 5
|
||||
SortIncludes: true
|
||||
SortJavaStaticImport: Before
|
||||
SortUsingDeclarations: true
|
||||
SpaceAfterCStyleCast: false
|
||||
SpaceAfterLogicalNot: false
|
||||
SpaceAfterTemplateKeyword: false
|
||||
SpaceBeforeAssignmentOperators: true
|
||||
SpaceBeforeCaseColon: false
|
||||
SpaceBeforeCpp11BracedList: false
|
||||
SpaceBeforeCtorInitializerColon: true
|
||||
SpaceBeforeInheritanceColon: true
|
||||
SpaceBeforeParens: ControlStatements
|
||||
SpaceAroundPointerQualifiers: Default
|
||||
SpaceBeforeRangeBasedForLoopColon: true
|
||||
SpaceBeforeSquareBrackets: false
|
||||
SpaceInEmptyBlock: false
|
||||
SpaceInEmptyParentheses: false
|
||||
SpacesBeforeTrailingComments: 2
|
||||
SpacesInAngles: false
|
||||
SpacesInCStyleCastParentheses: false
|
||||
SpacesInConditionalStatement: false
|
||||
SpacesInContainerLiterals: false
|
||||
# SpacesInLineCommentPrefix: -1
|
||||
SpacesInParentheses: false
|
||||
SpacesInSquareBrackets: false
|
||||
Standard: Latest
|
||||
StatementAttributeLikeMacros:
|
||||
- Q_EMIT
|
||||
StatementMacros:
|
||||
- Q_UNUSED
|
||||
- QT_REQUIRE_VERSION
|
||||
TabWidth: 4
|
||||
TypenameMacros:
|
||||
UseCRLF: false
|
||||
UseTab: Never
|
||||
WhitespaceSensitiveMacros:
|
||||
- STRINGIZE
|
||||
- PP_STRINGIZE
|
||||
- BOOST_PP_STRINGIZE
|
||||
- NS_SWIFT_NAME
|
||||
- CF_SWIFT_NAME
|
||||
- FCODE
|
||||
- ICODE
|
||||
...
|
||||
|
246
externals/oaknut/.github/workflows/build-and-test.yml
vendored
Normal file
246
externals/oaknut/.github/workflows/build-and-test.yml
vendored
Normal file
|
@ -0,0 +1,246 @@
|
|||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
test_on_ubuntu:
|
||||
runs-on: ubuntu-latest
|
||||
name: g++-10 (aarch64)
|
||||
|
||||
steps:
|
||||
- name: Checkout oaknut repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Update package repositories
|
||||
run: sudo apt-get update
|
||||
|
||||
- name: Install dependencies
|
||||
run: >
|
||||
sudo apt-get install -q -y
|
||||
gcc-10-aarch64-linux-gnu
|
||||
g++-10-aarch64-linux-gnu
|
||||
ninja-build
|
||||
qemu-user
|
||||
|
||||
- name: Checkout qemu
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: qemu/qemu
|
||||
ref: v8.1.2
|
||||
path: externals/qemu
|
||||
|
||||
- name: Build qemu
|
||||
working-directory: externals/qemu
|
||||
run: |
|
||||
sudo apt-get install git libglib2.0-dev libfdt-dev libpixman-1-dev zlib1g-dev ninja-build
|
||||
mkdir build
|
||||
cd build
|
||||
../configure --target-list=aarch64-linux-user
|
||||
make -j4 qemu-aarch64
|
||||
|
||||
- name: Checkout Catch2 v3 repo
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: catchorg/Catch2
|
||||
ref: v3.2.0
|
||||
path: externals/catch
|
||||
|
||||
- name: Configure CMake
|
||||
env:
|
||||
CC: aarch64-linux-gnu-gcc-10
|
||||
CXX: aarch64-linux-gnu-g++-10
|
||||
run: >
|
||||
cmake
|
||||
-B ${{github.workspace}}/build
|
||||
-H.
|
||||
-GNinja
|
||||
-DOAKNUT_USE_BUNDLED_CATCH=ON
|
||||
|
||||
- name: Build
|
||||
working-directory: ${{github.workspace}}/build
|
||||
run: ninja
|
||||
|
||||
- name: Test
|
||||
working-directory: ${{github.workspace}}/build
|
||||
run: ../externals/qemu/build/qemu-aarch64 -L /usr/aarch64-linux-gnu ./oaknut-tests -d yes
|
||||
|
||||
test_on_windows:
|
||||
runs-on: windows-latest
|
||||
name: msvc-arm64
|
||||
|
||||
steps:
|
||||
- name: Checkout oaknut repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Checkout Catch2 v3 repo
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: catchorg/Catch2
|
||||
ref: v3.2.0
|
||||
path: externals/catch
|
||||
|
||||
- name: Setup msvc-arm64 environment
|
||||
uses: ilammy/msvc-dev-cmd@v1
|
||||
with:
|
||||
arch: amd64_arm64
|
||||
|
||||
- name: Configure CMake
|
||||
run: >
|
||||
cmake
|
||||
-B ${{github.workspace}}/build
|
||||
-GNinja
|
||||
-DOAKNUT_USE_BUNDLED_CATCH=ON
|
||||
|
||||
- name: Build
|
||||
working-directory: ${{github.workspace}}/build
|
||||
run: cmake --build . --config Release
|
||||
|
||||
test_on_macos:
|
||||
runs-on: macos-latest
|
||||
name: macos-arm64
|
||||
|
||||
steps:
|
||||
- name: Checkout oaknut repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Checkout Catch2 v3 repo
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: catchorg/Catch2
|
||||
ref: v3.2.0
|
||||
path: externals/catch
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
brew install ninja
|
||||
|
||||
- name: Configure CMake
|
||||
run: >
|
||||
cmake
|
||||
-B ${{github.workspace}}/build
|
||||
-GNinja
|
||||
-DCMAKE_OSX_ARCHITECTURES=arm64
|
||||
-DOAKNUT_USE_BUNDLED_CATCH=ON
|
||||
|
||||
- name: Build
|
||||
working-directory: ${{github.workspace}}/build
|
||||
run: cmake --build . --config Release
|
||||
|
||||
test_on_freebsd:
|
||||
runs-on: ubuntu-latest
|
||||
name: freebsd-arm64
|
||||
|
||||
steps:
|
||||
- name: Checkout oaknut repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Build and Test
|
||||
uses: cross-platform-actions/action@v0.19.1
|
||||
with:
|
||||
operating_system: freebsd
|
||||
architecture: arm64
|
||||
version: '13.2'
|
||||
shell: bash
|
||||
run: |
|
||||
pwd
|
||||
sudo pkg update
|
||||
sudo pkg install -y catch2 cmake ninja
|
||||
cmake -B ${{github.workspace}}/build -GNinja
|
||||
cd build
|
||||
cmake --build . --config Release
|
||||
./oaknut-tests -d yes
|
||||
|
||||
test_on_openbsd:
|
||||
runs-on: ubuntu-latest
|
||||
name: openbsd-arm64
|
||||
|
||||
steps:
|
||||
- name: Checkout oaknut repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Build and Test
|
||||
uses: cross-platform-actions/action@v0.19.1
|
||||
with:
|
||||
operating_system: openbsd
|
||||
architecture: arm64
|
||||
version: '7.3'
|
||||
shell: bash
|
||||
run: |
|
||||
pwd
|
||||
sudo pkg_add catch2 cmake ninja
|
||||
cmake -B ${{github.workspace}}/build -GNinja
|
||||
cd build
|
||||
cmake --build . --config Release
|
||||
./oaknut-tests -d yes "~[slow]"
|
||||
|
||||
test_on_android:
|
||||
runs-on: ubuntu-latest
|
||||
name: android
|
||||
|
||||
steps:
|
||||
- name: Checkout oaknut repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Update package repositories
|
||||
run: sudo apt-get update
|
||||
|
||||
- name: Install dependencies
|
||||
run: sudo apt-get install -q -y ninja-build
|
||||
|
||||
- name: Checkout Catch2 v3 repo
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: catchorg/Catch2
|
||||
ref: v3.2.0
|
||||
path: externals/catch
|
||||
|
||||
- name: Configure CMake
|
||||
run: >
|
||||
cmake
|
||||
-B ${{github.workspace}}/build
|
||||
-H.
|
||||
-GNinja
|
||||
-DANDROID_ABI=arm64-v8a
|
||||
-DANDROID_PLATFORM=30
|
||||
-DCMAKE_BUILD_TYPE=Release
|
||||
-DCMAKE_TOOLCHAIN_FILE=$ANDROID_NDK/build/cmake/android.toolchain.cmake
|
||||
-DOAKNUT_USE_BUNDLED_CATCH=ON
|
||||
|
||||
- name: Build
|
||||
working-directory: ${{github.workspace}}/build
|
||||
run: cmake --build . --config Release
|
||||
|
||||
x86_64:
|
||||
runs-on: ubuntu-latest
|
||||
name: x86_64
|
||||
|
||||
steps:
|
||||
- name: Checkout oaknut repo
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Update package repositories
|
||||
run: sudo apt-get update
|
||||
|
||||
- name: Install dependencies
|
||||
run: sudo apt-get install -q -y ninja-build
|
||||
|
||||
- name: Checkout Catch2 v3 repo
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
repository: catchorg/Catch2
|
||||
ref: v3.2.0
|
||||
path: externals/catch
|
||||
|
||||
- name: Configure CMake
|
||||
run: >
|
||||
cmake
|
||||
-B ${{github.workspace}}/build
|
||||
-H.
|
||||
-GNinja
|
||||
-DOAKNUT_USE_BUNDLED_CATCH=ON
|
||||
|
||||
- name: Build
|
||||
working-directory: ${{github.workspace}}/build
|
||||
run: ninja
|
||||
|
||||
- name: Test
|
||||
working-directory: ${{github.workspace}}/build
|
||||
run: ./oaknut-tests -d yes
|
4
externals/oaknut/.gitignore
vendored
Normal file
4
externals/oaknut/.gitignore
vendored
Normal file
|
@ -0,0 +1,4 @@
|
|||
.DS_Store
|
||||
a.out
|
||||
work/
|
||||
*build*/
|
138
externals/oaknut/CMakeLists.txt
vendored
Normal file
138
externals/oaknut/CMakeLists.txt
vendored
Normal file
|
@ -0,0 +1,138 @@
|
|||
cmake_minimum_required(VERSION 3.8)
|
||||
project(oaknut LANGUAGES CXX VERSION 2.0.2)
|
||||
|
||||
# Determine if we're built as a subproject (using add_subdirectory)
|
||||
# or if this is the master project.
|
||||
set(MASTER_PROJECT OFF)
|
||||
if (CMAKE_CURRENT_SOURCE_DIR STREQUAL CMAKE_SOURCE_DIR)
|
||||
set(MASTER_PROJECT ON)
|
||||
endif()
|
||||
|
||||
# Disable in-source builds
|
||||
set(CMAKE_DISABLE_SOURCE_CHANGES ON)
|
||||
set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
|
||||
if ("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_BINARY_DIR}")
|
||||
message(SEND_ERROR "In-source builds are not allowed.")
|
||||
endif()
|
||||
|
||||
# Source project files
|
||||
set(header_files
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/code_block.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/dual_code_block.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/cpu_feature.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/feature_detection.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/feature_detection/id_registers.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/arm64_encode_helpers.inc.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/cpu_feature.inc.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/enum.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/imm.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/list.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_fpsimd_v8.0.inc.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_fpsimd_v8.1.inc.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_fpsimd_v8.2.inc.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_fpsimd_v8.3.inc.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_generic_v8.1.inc.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/mnemonics_generic_v8.3.inc.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/multi_typed_name.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/offset.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/overloaded.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/reg.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/impl/string_literal.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/oaknut.hpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut/oaknut_exception.hpp
|
||||
)
|
||||
|
||||
include(GNUInstallDirs)
|
||||
|
||||
# Library definition
|
||||
add_library(oaknut INTERFACE)
|
||||
add_library(merry::oaknut ALIAS oaknut)
|
||||
target_sources(oaknut INTERFACE "$<BUILD_INTERFACE:${header_files}>")
|
||||
target_include_directories(oaknut INTERFACE
|
||||
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>
|
||||
)
|
||||
target_compile_features(oaknut INTERFACE cxx_std_20)
|
||||
|
||||
# Tests
|
||||
if (MASTER_PROJECT)
|
||||
include(CTest)
|
||||
endif()
|
||||
|
||||
if (BUILD_TESTING)
|
||||
option(OAKNUT_USE_BUNDLED_CATCH "Use the embedded Catch2 submodule" OFF)
|
||||
if (OAKNUT_USE_BUNDLED_CATCH)
|
||||
add_subdirectory(externals/catch)
|
||||
else()
|
||||
find_package(Catch2 3 REQUIRED)
|
||||
endif()
|
||||
|
||||
add_executable(oaknut-tests
|
||||
tests/_feature_detect.cpp
|
||||
tests/basic.cpp
|
||||
tests/fpsimd.cpp
|
||||
tests/general.cpp
|
||||
tests/rand_int.hpp
|
||||
tests/vector_code_gen.cpp
|
||||
)
|
||||
target_include_directories(oaknut-tests PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/tests)
|
||||
target_link_libraries(oaknut-tests PRIVATE Catch2::Catch2WithMain merry::oaknut)
|
||||
if (MSVC)
|
||||
target_compile_options(oaknut-tests PRIVATE
|
||||
/experimental:external
|
||||
/external:W0
|
||||
/external:anglebrackets
|
||||
/W4
|
||||
/WX
|
||||
/w44263 # Non-virtual member function hides base class virtual function
|
||||
/w44265 # Class has virtual functions, but destructor is not virtual
|
||||
/w44456 # Declaration of 'var' hides previous local declaration
|
||||
/w44457 # Declaration of 'var' hides function parameter
|
||||
/w44458 # Declaration of 'var' hides class member
|
||||
/w44459 # Declaration of 'var' hides global definition
|
||||
/w44946 # Reinterpret-cast between related types
|
||||
/wd4592 # Symbol will be dynamically initialized (implementation limitation)
|
||||
/permissive- # Stricter C++ standards conformance
|
||||
/MP
|
||||
/Zi
|
||||
/Zo
|
||||
/EHsc
|
||||
/Zc:externConstexpr # Allows external linkage for variables declared "extern constexpr", as the standard permits.
|
||||
/Zc:inline # Omits inline functions from object-file output.
|
||||
/Zc:throwingNew # Assumes new (without std::nothrow) never returns null.
|
||||
/volatile:iso # Use strict standard-abiding volatile semantics
|
||||
)
|
||||
else()
|
||||
target_compile_options(oaknut-tests PRIVATE -Wall -Wextra -Wcast-qual -pedantic -pedantic-errors -Wfatal-errors -Wno-missing-braces)
|
||||
endif()
|
||||
|
||||
add_test(oaknut-tests oaknut-tests --durations yes)
|
||||
endif()
|
||||
|
||||
# Install
|
||||
include(CMakePackageConfigHelpers)
|
||||
|
||||
install(TARGETS oaknut EXPORT oaknutTargets)
|
||||
install(EXPORT oaknutTargets
|
||||
NAMESPACE merry::
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/oaknut"
|
||||
)
|
||||
|
||||
configure_package_config_file("${CMAKE_CURRENT_SOURCE_DIR}/oaknutConfig.cmake.in"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/oaknutConfig.cmake"
|
||||
INSTALL_DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/oaknut"
|
||||
)
|
||||
write_basic_package_version_file("${CMAKE_CURRENT_BINARY_DIR}/oaknutConfigVersion.cmake"
|
||||
COMPATIBILITY SameMajorVersion
|
||||
)
|
||||
install(FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/oaknutConfig.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/oaknutConfigVersion.cmake"
|
||||
DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/oaknut"
|
||||
)
|
||||
install(DIRECTORY
|
||||
"${CMAKE_CURRENT_SOURCE_DIR}/include/oaknut"
|
||||
DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}"
|
||||
)
|
21
externals/oaknut/LICENSE
vendored
Normal file
21
externals/oaknut/LICENSE
vendored
Normal file
|
@ -0,0 +1,21 @@
|
|||
MIT License
|
||||
|
||||
Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
265
externals/oaknut/README.md
vendored
Normal file
265
externals/oaknut/README.md
vendored
Normal file
|
@ -0,0 +1,265 @@
|
|||
# Oaknut
|
||||
|
||||
*A C++20 assembler for AArch64 (ARMv8.0 to ARMv8.3)*
|
||||
|
||||
Oaknut is a header-only library that allows one to dynamically assemble code in-memory at runtime.
|
||||
|
||||
## Usage
|
||||
|
||||
Give `oaknut::CodeGenerator` a pointer to a block of memory. Call functions on it to emit code.
|
||||
|
||||
Simple example:
|
||||
|
||||
```cpp
|
||||
#include <cstdio>
|
||||
#include <oaknut/code_block.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
using EmittedFunction = int (*)();
|
||||
|
||||
EmittedFunction EmitExample(oaknut::CodeGenerator& code, int value)
|
||||
{
|
||||
using namespace oaknut::util;
|
||||
|
||||
EmittedFunction result = code.xptr<EmittedFunction>();
|
||||
|
||||
code.MOV(W0, value);
|
||||
code.RET();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
oaknut::CodeBlock mem{4096};
|
||||
oaknut::CodeGenerator code{mem.ptr()};
|
||||
|
||||
mem.unprotect();
|
||||
|
||||
EmittedFunction fn = EmitExample(code, 42);
|
||||
|
||||
mem.protect();
|
||||
mem.invalidate_all();
|
||||
|
||||
std::printf("%i\n", fn()); // Output: 42
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
CodeGenerator also has a constructor taking two pointers. The first pointer is the memory address to write to, and the second pointer is the memory address that the code will be executing from. This allows you to write to a buffer before copying to the final destination for execution, or to have to use dual-mapped memory blocks to avoid memory protection overhead.
|
||||
|
||||
Below is an example of using the oaknut-provided utility header for dual-mapped memory blocks:
|
||||
|
||||
```cpp
|
||||
#include <cstdio>
|
||||
#include <oaknut/dual_code_block.hpp>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
|
||||
using EmittedFunction = ;
|
||||
|
||||
int main()
|
||||
{
|
||||
using namespace oaknut::util;
|
||||
|
||||
oaknut::DualCodeBlock mem{4096};
|
||||
oaknut::CodeGenerator code{mem.wptr(), mem.xptr()};
|
||||
|
||||
const auto result = code.xptr<int (*)()>();
|
||||
|
||||
code.MOV(W0, value);
|
||||
code.RET();
|
||||
|
||||
mem.invalidate_all();
|
||||
|
||||
std::printf("%i\n", fn()); // Output: 42
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
### Emit to `std::vector`
|
||||
|
||||
If you wish to merely emit code into memory without executing it, or if you are developing a cross-compiler that is not running on an ARM64 device, you can use `oaknut::VectorCodeGenerator` instead.
|
||||
|
||||
Provide `oaknut::VectorCodeGenerator` with a reference to a `std::vector<std::uint32_t>` and it will append to that vector.
|
||||
|
||||
The second pointer argument represents the destination address the code will eventually be executed from.
|
||||
|
||||
Simple example:
|
||||
|
||||
```cpp
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <oaknut/oaknut.hpp>
|
||||
#include <vector>
|
||||
|
||||
int main()
|
||||
{
|
||||
std::vector<std::uint32_t> vec;
|
||||
oaknut::VectorCodeGenerator code{vec, (uint32_t*)0x1000};
|
||||
|
||||
code.MOV(W0, 42);
|
||||
code.RET();
|
||||
|
||||
std::printf("%08x %08x\n", vec[0], vec[1]); // Output: d2800540 d65f03c0
|
||||
|
||||
return 0;
|
||||
}
|
||||
```
|
||||
|
||||
## Headers
|
||||
|
||||
| Header | Compiles on non-ARM64 | Contents |
|
||||
| ------ | --------------------- | -------- |
|
||||
| `<oaknut/oaknut.hpp>` | Yes | Provides `CodeGenerator` and `VectorCodeGenerator` for code emission, as well as the `oaknut::util` namespace. |
|
||||
| `<oaknut/code_block.hpp>` | No | Utility header that provides `CodeBlock`, allocates, alters permissions of, and invalidates executable memory. |
|
||||
| `<oaknut/dual_code_block.hpp>` | No | Utility header that provides `DualCodeBlock`, which allocates two mirrored memory blocks (with RW and RX permissions respectively). |
|
||||
| `<oaknut/oaknut_exception.hpp>` | Yes | Provides `OaknutException` which is thrown on an error. |
|
||||
| `<oaknut/feature_detection/cpu_feature.hpp>` | Yes | Utility header that provides `CpuFeatures` which can be used to describe AArch64 features. |
|
||||
| `<oaknut/feature_detection/feature_detection.hpp>` | No | Utility header that provides `detect_features` and `read_id_registers` for determining available AArch64 features. |
|
||||
|
||||
### Instructions
|
||||
|
||||
Each AArch64 instruction corresponds to one emitter function. For a list of emitter functions see:
|
||||
|
||||
* ARMv8.0: [general instructions](include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp), [FP & SIMD instructions](include/oaknut/impl/mnemonics_fpsimd_v8.0.inc.hpp)
|
||||
* ARMv8.1: [general instructions](include/oaknut/impl/mnemonics_generic_v8.1.inc.hpp), [FP & SIMD instructions](include/oaknut/impl/mnemonics_fpsimd_v8.1.inc.hpp)
|
||||
* ARMv8.2: [general instructions](include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp), [FP & SIMD instructions](include/oaknut/impl/mnemonics_fpsimd_v8.2.inc.hpp)
|
||||
* ARMv8.3: [general instructions](include/oaknut/impl/mnemonics_generic_v8.3.inc.hpp), [FP & SIMD instructions](include/oaknut/impl/mnemonics_fpsimd_v8.3.inc.hpp)
|
||||
|
||||
### Operands
|
||||
|
||||
The `oaknut::util` namespace provides convenient names for operands for instructions. For example:
|
||||
|
||||
|Name|Class| |
|
||||
|----|----|----|
|
||||
|W0, W1, ..., W30|`WReg`|32-bit general purpose registers|
|
||||
|X0, X1, ..., X30|`XReg`|64-bit general purpose registers|
|
||||
|WZR|`WzrReg` (convertable to `WReg`)|32-bit zero register|
|
||||
|XZR|`ZrReg` (convertable to `XReg`)|64-bit zero register|
|
||||
|WSP|`WspReg` (convertable to `WRegSp`)|32-bit stack pointer|
|
||||
|SP|`SpReg` (convertable to `XRegSp`)|64-bit stack pointer|
|
||||
|B0, B1, ..., B31|`BReg`|8-bit scalar SIMD register|
|
||||
|H0, H1, ..., H31|`HReg`|16-bit scalar SIMD register|
|
||||
|S0, S1, ..., S31|`SReg`|32-bit scalar SIMD register|
|
||||
|D0, D1, ..., D31|`DReg`|64-bit scalar SIMD register|
|
||||
|Q0, Q1, ..., Q31|`QReg`|128-bit scalar SIMD register|
|
||||
|
||||
For vector operations, you can specify registers like so:
|
||||
|
||||
|Name|Class| |
|
||||
|----|----|----|
|
||||
|V0.B8(), ...|`VReg_8B`|8 elements each 8 bits in size|
|
||||
|V0.B16(), ...|`VReg_16B`|16 elements each 8 bits in size|
|
||||
|V0.H4(), ...|`VReg_4H`|4 elements each 16 bits in size|
|
||||
|V0.H8(), ...|`VReg_8H`|8 elements each 16 bits in size|
|
||||
|V0.S2(), ...|`VReg_2S`|2 elements each 32 bits in size|
|
||||
|V0.S4(), ...|`VReg_4S`|4 elements each 32 bits in size|
|
||||
|V0.D1(), ...|`VReg_1D`|1 elements each 64 bits in size|
|
||||
|V0.D2(), ...|`VReg_2D`|2 elements each 64 bits in size|
|
||||
|
||||
And you can specify elements like so:
|
||||
|
||||
|Name|Class| |
|
||||
|----|----|----|
|
||||
|V0.B()[0]|`BElem`|0th 8-bit element of V0 register|
|
||||
|V0.H()[0]|`HElem`|0th 16-bit element of V0 register|
|
||||
|V0.S()[0]|`SElem`|0th 32-bit element of V0 register|
|
||||
|V0.D()[0]|`DElem`|0th 64-bit element of V0 register|
|
||||
|
||||
Register lists are specified using `List`:
|
||||
|
||||
```
|
||||
List{V0.B16(), V1.B16(), V2.B16()} // This expression has type List<VReg_16B, 3>
|
||||
```
|
||||
|
||||
And lists of elements similarly (both forms are equivalent):
|
||||
|
||||
```
|
||||
List{V0.B()[1], V1.B()[1], V2.B()[1]} // This expression has type List<BElem, 3>
|
||||
List{V0.B(), V1.B(), V2.B()}[1] // This expression has type List<BElem, 3>
|
||||
```
|
||||
|
||||
You can find examples of instruction use in [tests/general.cpp](tests/general.cpp) and [tests/fpsimd.cpp](tests/fpsimd.cpp).
|
||||
|
||||
## Feature Detection
|
||||
|
||||
### CPU features
|
||||
|
||||
This library also includes utility headers for CPU feature detection.
|
||||
|
||||
One just needs to include `<oaknut/feature_detection/feature_detection.hpp>`, then call `detect_features` to get a bitset of features in a cross-platform manner.
|
||||
|
||||
CPU feature detection is operating system specific, and some operating systems even have multiple methods. Here are a list of supported operating systems and implemented methods:
|
||||
|
||||
| Operating system | Default Method |
|
||||
| ---- | ---- |
|
||||
| Linux / Android | [ELF hwcaps](https://www.kernel.org/doc/html/latest/arch/arm64/elf_hwcaps.html) |
|
||||
| Apple | [sysctlbyname](https://developer.apple.com/documentation/kernel/1387446-sysctlbyname) |
|
||||
| Windows | [IsProcessorFeaturePresent](https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent) |
|
||||
| FreeBSD | ELF hwcaps |
|
||||
| NetBSD | machdep.cpu%d.cpu_id sysctl |
|
||||
| OpenBSD | CTL_MACHDEP.CPU_ID_* sysctl |
|
||||
|
||||
There are alternative methods available for advanced users to specify specific methods to detect features if they wish. (See `detect_features_via_*`.)
|
||||
|
||||
Simple example:
|
||||
|
||||
```cpp
|
||||
#include <cstdio>
|
||||
#include <oaknut/feature_detection/feature_detection.hpp>
|
||||
|
||||
int main() {
|
||||
oaknut::CpuFeatures feats = oaknut::detect_features();
|
||||
|
||||
std::printf("CPU supports JSCVT: %i\n", feats.has(oaknut::CpuFeature::JSCVT));
|
||||
}
|
||||
```
|
||||
|
||||
### ID registers
|
||||
|
||||
We also provide a crossplatform way for ID registers to be read:
|
||||
|
||||
| **`OAKNUT_SUPPORTS_READING_ID_REGISTERS`** | Available functionality |
|
||||
| ---- | ---- |
|
||||
| 0 | Reading ID registers is not supported on this operating system. |
|
||||
| 1 | This operating system provides a system-wide set of ID registers, use `read_id_registers()`. |
|
||||
| 2 | Per-core ID registers, use `get_core_count()` and `read_id_registers(int index)`. |
|
||||
|
||||
All of the above operating systems with the exception of apple also support reading ID registers, and if one prefers one can do feature detection via `detect_features_via_id_registers(*read_id_registers())`.
|
||||
|
||||
Simple example:
|
||||
|
||||
```cpp
|
||||
#include <cstddef>
|
||||
#include <cstdio>
|
||||
#include <oaknut/feature_detection/feature_detection.hpp>
|
||||
|
||||
int main() {
|
||||
#if OAKNUT_SUPPORTS_READING_ID_REGISTERS == 1
|
||||
|
||||
oaknut::id::IdRegisters id = oaknut::read_id_registers();
|
||||
|
||||
std::printf("ISAR0 register: %08x\n", id.isar0.value);
|
||||
|
||||
#elif OAKNUT_SUPPORTS_READING_ID_REGISTERS == 2
|
||||
|
||||
oaknut::id::IdRegisters id = oaknut::read_id_registers(0);
|
||||
|
||||
const std::size_t core_count = oaknut::get_core_count();
|
||||
for (std::size_t core_index = 0; core_index < core_count; core_index++) {
|
||||
std::printf("ISAR0 register (for core %zu): %08x\n", core_index, id.isar0.value);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
std::printf("Reading ID registers not supported\n");
|
||||
|
||||
#endif
|
||||
}
|
||||
```
|
||||
|
||||
## License
|
||||
|
||||
This project is [MIT licensed](LICENSE).
|
143
externals/oaknut/include/oaknut/code_block.hpp
vendored
Normal file
143
externals/oaknut/include/oaknut/code_block.hpp
vendored
Normal file
|
@ -0,0 +1,143 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <new>
|
||||
|
||||
#if defined(_WIN32)
|
||||
# define NOMINMAX
|
||||
# include <windows.h>
|
||||
#elif defined(__APPLE__)
|
||||
# include <TargetConditionals.h>
|
||||
# include <libkern/OSCacheControl.h>
|
||||
# include <pthread.h>
|
||||
# include <sys/mman.h>
|
||||
# include <unistd.h>
|
||||
#else
|
||||
# include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
class CodeBlock {
|
||||
public:
|
||||
explicit CodeBlock(std::size_t size)
|
||||
: m_size(size)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
m_memory = (std::uint32_t*)VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
||||
#elif defined(__APPLE__)
|
||||
# if TARGET_OS_IPHONE
|
||||
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
# else
|
||||
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0);
|
||||
# endif
|
||||
#elif defined(__NetBSD__)
|
||||
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_MPROTECT(PROT_READ | PROT_WRITE | PROT_EXEC), MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
#elif defined(__OpenBSD__)
|
||||
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
#else
|
||||
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
#endif
|
||||
|
||||
if (m_memory == nullptr)
|
||||
throw std::bad_alloc{};
|
||||
}
|
||||
|
||||
~CodeBlock()
|
||||
{
|
||||
if (m_memory == nullptr)
|
||||
return;
|
||||
|
||||
#if defined(_WIN32)
|
||||
VirtualFree((void*)m_memory, 0, MEM_RELEASE);
|
||||
#else
|
||||
munmap(m_memory, m_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
CodeBlock(const CodeBlock&) = delete;
|
||||
CodeBlock& operator=(const CodeBlock&) = delete;
|
||||
CodeBlock(CodeBlock&&) = delete;
|
||||
CodeBlock& operator=(CodeBlock&&) = delete;
|
||||
|
||||
std::uint32_t* ptr() const
|
||||
{
|
||||
return m_memory;
|
||||
}
|
||||
|
||||
void protect()
|
||||
{
|
||||
#if defined(__APPLE__) && !TARGET_OS_IPHONE
|
||||
pthread_jit_write_protect_np(1);
|
||||
#elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__)
|
||||
mprotect(m_memory, m_size, PROT_READ | PROT_EXEC);
|
||||
#endif
|
||||
}
|
||||
|
||||
void unprotect()
|
||||
{
|
||||
#if defined(__APPLE__) && !TARGET_OS_IPHONE
|
||||
pthread_jit_write_protect_np(0);
|
||||
#elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__)
|
||||
mprotect(m_memory, m_size, PROT_READ | PROT_WRITE);
|
||||
#endif
|
||||
}
|
||||
|
||||
void invalidate(std::uint32_t* mem, std::size_t size)
|
||||
{
|
||||
#if defined(__APPLE__)
|
||||
sys_icache_invalidate(mem, size);
|
||||
#elif defined(_WIN32)
|
||||
FlushInstructionCache(GetCurrentProcess(), mem, size);
|
||||
#else
|
||||
static std::size_t icache_line_size = 0x10000, dcache_line_size = 0x10000;
|
||||
|
||||
std::uint64_t ctr;
|
||||
__asm__ volatile("mrs %0, ctr_el0"
|
||||
: "=r"(ctr));
|
||||
|
||||
const std::size_t isize = icache_line_size = std::min<std::size_t>(icache_line_size, 4 << ((ctr >> 0) & 0xf));
|
||||
const std::size_t dsize = dcache_line_size = std::min<std::size_t>(dcache_line_size, 4 << ((ctr >> 16) & 0xf));
|
||||
|
||||
const std::uintptr_t end = (std::uintptr_t)mem + size;
|
||||
|
||||
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(dsize - 1); addr < end; addr += dsize) {
|
||||
__asm__ volatile("dc cvau, %0"
|
||||
:
|
||||
: "r"(addr)
|
||||
: "memory");
|
||||
}
|
||||
__asm__ volatile("dsb ish\n"
|
||||
:
|
||||
:
|
||||
: "memory");
|
||||
|
||||
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(isize - 1); addr < end; addr += isize) {
|
||||
__asm__ volatile("ic ivau, %0"
|
||||
:
|
||||
: "r"(addr)
|
||||
: "memory");
|
||||
}
|
||||
__asm__ volatile("dsb ish\nisb\n"
|
||||
:
|
||||
:
|
||||
: "memory");
|
||||
#endif
|
||||
}
|
||||
|
||||
void invalidate_all()
|
||||
{
|
||||
invalidate(m_memory, m_size);
|
||||
}
|
||||
|
||||
protected:
|
||||
std::uint32_t* m_memory;
|
||||
std::size_t m_size = 0;
|
||||
};
|
||||
|
||||
} // namespace oaknut
|
165
externals/oaknut/include/oaknut/dual_code_block.hpp
vendored
Normal file
165
externals/oaknut/include/oaknut/dual_code_block.hpp
vendored
Normal file
|
@ -0,0 +1,165 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2024 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <new>
|
||||
|
||||
#if defined(_WIN32)
|
||||
# define NOMINMAX
|
||||
# include <windows.h>
|
||||
#elif defined(__APPLE__)
|
||||
# include <mach/mach.h>
|
||||
# include <mach/vm_map.h>
|
||||
|
||||
# include <TargetConditionals.h>
|
||||
# include <libkern/OSCacheControl.h>
|
||||
# include <pthread.h>
|
||||
# include <sys/mman.h>
|
||||
# include <unistd.h>
|
||||
#else
|
||||
# if !defined(_GNU_SOURCE)
|
||||
# define _GNU_SOURCE
|
||||
# endif
|
||||
# include <sys/mman.h>
|
||||
# include <sys/types.h>
|
||||
# include <unistd.h>
|
||||
#endif
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
class DualCodeBlock {
|
||||
public:
|
||||
explicit DualCodeBlock(std::size_t size)
|
||||
: m_size(size)
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
m_wmem = m_xmem = (std::uint32_t*)VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
|
||||
if (m_wmem == nullptr)
|
||||
throw std::bad_alloc{};
|
||||
#elif defined(__APPLE__)
|
||||
m_wmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
|
||||
if (m_wmem == MAP_FAILED)
|
||||
throw std::bad_alloc{};
|
||||
|
||||
vm_prot_t cur_prot, max_prot;
|
||||
kern_return_t ret = vm_remap(mach_task_self(), (vm_address_t*)&m_xmem, size, 0, VM_FLAGS_ANYWHERE | VM_FLAGS_RANDOM_ADDR, mach_task_self(), (mach_vm_address_t)m_wmem, false, &cur_prot, &max_prot, VM_INHERIT_NONE);
|
||||
if (ret != KERN_SUCCESS)
|
||||
throw std::bad_alloc{};
|
||||
|
||||
mprotect(m_xmem, size, PROT_READ | PROT_EXEC);
|
||||
#else
|
||||
# if defined(__OpenBSD__)
|
||||
char tmpl[] = "oaknut_dual_code_block.XXXXXXXXXX";
|
||||
fd = shm_mkstemp(tmpl);
|
||||
if (fd < 0)
|
||||
throw std::bad_alloc{};
|
||||
shm_unlink(tmpl);
|
||||
# else
|
||||
fd = memfd_create("oaknut_dual_code_block", 0);
|
||||
if (fd < 0)
|
||||
throw std::bad_alloc{};
|
||||
# endif
|
||||
|
||||
int ret = ftruncate(fd, size);
|
||||
if (ret != 0)
|
||||
throw std::bad_alloc{};
|
||||
|
||||
m_wmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
|
||||
m_xmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
|
||||
|
||||
if (m_wmem == MAP_FAILED || m_xmem == MAP_FAILED)
|
||||
throw std::bad_alloc{};
|
||||
#endif
|
||||
}
|
||||
|
||||
~DualCodeBlock()
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
VirtualFree((void*)m_xmem, 0, MEM_RELEASE);
|
||||
#elif defined(__APPLE__)
|
||||
#else
|
||||
munmap(m_wmem, m_size);
|
||||
munmap(m_xmem, m_size);
|
||||
close(fd);
|
||||
#endif
|
||||
}
|
||||
|
||||
DualCodeBlock(const DualCodeBlock&) = delete;
|
||||
DualCodeBlock& operator=(const DualCodeBlock&) = delete;
|
||||
DualCodeBlock(DualCodeBlock&&) = delete;
|
||||
DualCodeBlock& operator=(DualCodeBlock&&) = delete;
|
||||
|
||||
/// Pointer to executable mirror of memory (permissions: R-X)
|
||||
std::uint32_t* xptr() const
|
||||
{
|
||||
return m_xmem;
|
||||
}
|
||||
|
||||
/// Pointer to writeable mirror of memory (permissions: RW-)
|
||||
std::uint32_t* wptr() const
|
||||
{
|
||||
return m_wmem;
|
||||
}
|
||||
|
||||
/// Invalidate should be used with executable memory pointers.
|
||||
void invalidate(std::uint32_t* mem, std::size_t size)
|
||||
{
|
||||
#if defined(__APPLE__)
|
||||
sys_icache_invalidate(mem, size);
|
||||
#elif defined(_WIN32)
|
||||
FlushInstructionCache(GetCurrentProcess(), mem, size);
|
||||
#else
|
||||
static std::size_t icache_line_size = 0x10000, dcache_line_size = 0x10000;
|
||||
|
||||
std::uint64_t ctr;
|
||||
__asm__ volatile("mrs %0, ctr_el0"
|
||||
: "=r"(ctr));
|
||||
|
||||
const std::size_t isize = icache_line_size = std::min<std::size_t>(icache_line_size, 4 << ((ctr >> 0) & 0xf));
|
||||
const std::size_t dsize = dcache_line_size = std::min<std::size_t>(dcache_line_size, 4 << ((ctr >> 16) & 0xf));
|
||||
|
||||
const std::uintptr_t end = (std::uintptr_t)mem + size;
|
||||
|
||||
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(dsize - 1); addr < end; addr += dsize) {
|
||||
__asm__ volatile("dc cvau, %0"
|
||||
:
|
||||
: "r"(addr)
|
||||
: "memory");
|
||||
}
|
||||
__asm__ volatile("dsb ish\n"
|
||||
:
|
||||
:
|
||||
: "memory");
|
||||
|
||||
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(isize - 1); addr < end; addr += isize) {
|
||||
__asm__ volatile("ic ivau, %0"
|
||||
:
|
||||
: "r"(addr)
|
||||
: "memory");
|
||||
}
|
||||
__asm__ volatile("dsb ish\nisb\n"
|
||||
:
|
||||
:
|
||||
: "memory");
|
||||
#endif
|
||||
}
|
||||
|
||||
void invalidate_all()
|
||||
{
|
||||
invalidate(m_xmem, m_size);
|
||||
}
|
||||
|
||||
protected:
|
||||
#if !defined(_WIN32) && !defined(__APPLE__)
|
||||
int fd = -1;
|
||||
#endif
|
||||
std::uint32_t* m_xmem = nullptr;
|
||||
std::uint32_t* m_wmem = nullptr;
|
||||
std::size_t m_size = 0;
|
||||
};
|
||||
|
||||
} // namespace oaknut
|
107
externals/oaknut/include/oaknut/feature_detection/cpu_feature.hpp
vendored
Normal file
107
externals/oaknut/include/oaknut/feature_detection/cpu_feature.hpp
vendored
Normal file
|
@ -0,0 +1,107 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bitset>
|
||||
#include <cstddef>
|
||||
#include <initializer_list>
|
||||
|
||||
#if defined(__cpp_lib_constexpr_bitset) && __cpp_lib_constexpr_bitset >= 202207L
|
||||
# define OAKNUT_CPU_FEATURES_CONSTEXPR constexpr
|
||||
#else
|
||||
# define OAKNUT_CPU_FEATURES_CONSTEXPR
|
||||
#endif
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
// NOTE: This file contains code that can be compiled on non-arm64 systems.
|
||||
// For run-time CPU feature detection, include feature_detection.hpp
|
||||
|
||||
enum class CpuFeature {
|
||||
#define OAKNUT_CPU_FEATURE(name) name,
|
||||
#include "oaknut/impl/cpu_feature.inc.hpp"
|
||||
#undef OAKNUT_CPU_FEATURE
|
||||
};
|
||||
|
||||
constexpr std::size_t cpu_feature_count = 0
|
||||
#define OAKNUT_CPU_FEATURE(name) +1
|
||||
#include "oaknut/impl/cpu_feature.inc.hpp"
|
||||
#undef OAKNUT_CPU_FEATURE
|
||||
;
|
||||
|
||||
class CpuFeatures final {
|
||||
public:
|
||||
constexpr CpuFeatures() = default;
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR explicit CpuFeatures(std::initializer_list<CpuFeature> features)
|
||||
{
|
||||
for (CpuFeature f : features) {
|
||||
m_bitset.set(static_cast<std::size_t>(f));
|
||||
}
|
||||
}
|
||||
|
||||
constexpr bool has(CpuFeature feature) const
|
||||
{
|
||||
if (static_cast<std::size_t>(feature) >= cpu_feature_count)
|
||||
return false;
|
||||
return m_bitset[static_cast<std::size_t>(feature)];
|
||||
}
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator&=(const CpuFeatures& other) noexcept
|
||||
{
|
||||
m_bitset &= other.m_bitset;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator|=(const CpuFeatures& other) noexcept
|
||||
{
|
||||
m_bitset |= other.m_bitset;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator^=(const CpuFeatures& other) noexcept
|
||||
{
|
||||
m_bitset ^= other.m_bitset;
|
||||
return *this;
|
||||
}
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator~() const noexcept
|
||||
{
|
||||
CpuFeatures result;
|
||||
result.m_bitset = ~m_bitset;
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
using bitset = std::bitset<cpu_feature_count>;
|
||||
|
||||
friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept;
|
||||
friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept;
|
||||
friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept;
|
||||
|
||||
bitset m_bitset;
|
||||
};
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept
|
||||
{
|
||||
CpuFeatures result;
|
||||
result.m_bitset = a.m_bitset & b.m_bitset;
|
||||
return result;
|
||||
}
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept
|
||||
{
|
||||
CpuFeatures result;
|
||||
result.m_bitset = a.m_bitset | b.m_bitset;
|
||||
return result;
|
||||
}
|
||||
|
||||
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept
|
||||
{
|
||||
CpuFeatures result;
|
||||
result.m_bitset = a.m_bitset ^ b.m_bitset;
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
35
externals/oaknut/include/oaknut/feature_detection/feature_detection.hpp
vendored
Normal file
35
externals/oaknut/include/oaknut/feature_detection/feature_detection.hpp
vendored
Normal file
|
@ -0,0 +1,35 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#if defined(__APPLE__)
|
||||
# define OAKNUT_CPU_FEATURE_DETECTION 1
|
||||
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 0
|
||||
# include "oaknut/feature_detection/feature_detection_apple.hpp"
|
||||
#elif defined(__FreeBSD__)
|
||||
# define OAKNUT_CPU_FEATURE_DETECTION 1
|
||||
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1
|
||||
# include "oaknut/feature_detection/feature_detection_freebsd.hpp"
|
||||
#elif defined(__linux__)
|
||||
# define OAKNUT_CPU_FEATURE_DETECTION 1
|
||||
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1
|
||||
# include "oaknut/feature_detection/feature_detection_linux.hpp"
|
||||
#elif defined(__NetBSD__)
|
||||
# define OAKNUT_CPU_FEATURE_DETECTION 1
|
||||
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 2
|
||||
# include "oaknut/feature_detection/feature_detection_netbsd.hpp"
|
||||
#elif defined(__OpenBSD__)
|
||||
# define OAKNUT_CPU_FEATURE_DETECTION 1
|
||||
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1
|
||||
# include "oaknut/feature_detection/feature_detection_openbsd.hpp"
|
||||
#elif defined(_WIN32)
|
||||
# define OAKNUT_CPU_FEATURE_DETECTION 1
|
||||
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 2
|
||||
# include "oaknut/feature_detection/feature_detection_w32.hpp"
|
||||
#else
|
||||
# define OAKNUT_CPU_FEATURE_DETECTION 0
|
||||
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 0
|
||||
# warning "Unsupported operating system for CPU feature detection"
|
||||
# include "oaknut/feature_detection/feature_detection_generic.hpp"
|
||||
#endif
|
112
externals/oaknut/include/oaknut/feature_detection/feature_detection_apple.hpp
vendored
Normal file
112
externals/oaknut/include/oaknut/feature_detection/feature_detection_apple.hpp
vendored
Normal file
|
@ -0,0 +1,112 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <optional>
|
||||
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
#include "oaknut/feature_detection/id_registers.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
// Ref: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
|
||||
|
||||
namespace detail {
|
||||
|
||||
inline bool detect_feature(const char* const sysctl_name)
|
||||
{
|
||||
int result = 0;
|
||||
std::size_t result_size = sizeof(result);
|
||||
if (::sysctlbyname(sysctl_name, &result, &result_size, nullptr, 0) == 0) {
|
||||
return result != 0;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
inline CpuFeatures detect_features_via_sysctlbyname()
|
||||
{
|
||||
CpuFeatures result;
|
||||
|
||||
if (detail::detect_feature("hw.optional.AdvSIMD") || detail::detect_feature("hw.optional.neon"))
|
||||
result |= CpuFeatures{CpuFeature::ASIMD};
|
||||
if (detail::detect_feature("hw.optional.floatingpoint"))
|
||||
result |= CpuFeatures{CpuFeature::FP};
|
||||
if (detail::detect_feature("hw.optional.AdvSIMD_HPFPCvt") || detail::detect_feature("hw.optional.neon_hpfp"))
|
||||
result |= CpuFeatures{CpuFeature::FP16Conv};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_BF16"))
|
||||
result |= CpuFeatures{CpuFeature::BF16};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_DotProd"))
|
||||
result |= CpuFeatures{CpuFeature::DotProd};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_FCMA") || detail::detect_feature("hw.optional.armv8_3_compnum"))
|
||||
result |= CpuFeatures{CpuFeature::FCMA};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_FHM") || detail::detect_feature("hw.optional.armv8_2_fhm"))
|
||||
result |= CpuFeatures{CpuFeature::FHM};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_FP16") || detail::detect_feature("hw.optional.neon_fp16"))
|
||||
result |= CpuFeatures{CpuFeature::FP16};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_FRINTTS"))
|
||||
result |= CpuFeatures{CpuFeature::FRINTTS};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_I8MM"))
|
||||
result |= CpuFeatures{CpuFeature::I8MM};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_JSCVT"))
|
||||
result |= CpuFeatures{CpuFeature::JSCVT};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_RDM"))
|
||||
result |= CpuFeatures{CpuFeature::RDM};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_FlagM"))
|
||||
result |= CpuFeatures{CpuFeature::FlagM};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_FlagM2"))
|
||||
result |= CpuFeatures{CpuFeature::FlagM2};
|
||||
if (detail::detect_feature("hw.optional.armv8_crc32"))
|
||||
result |= CpuFeatures{CpuFeature::CRC32};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC"))
|
||||
result |= CpuFeatures{CpuFeature::LRCPC};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC2"))
|
||||
result |= CpuFeatures{CpuFeature::LRCPC2};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_LSE") || detail::detect_feature("hw.optional.armv8_1_atomics"))
|
||||
result |= CpuFeatures{CpuFeature::LSE};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_LSE2"))
|
||||
result |= CpuFeatures{CpuFeature::LSE2};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_AES"))
|
||||
result |= CpuFeatures{CpuFeature::AES};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_PMULL"))
|
||||
result |= CpuFeatures{CpuFeature::PMULL};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_SHA1"))
|
||||
result |= CpuFeatures{CpuFeature::SHA1};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_SHA256"))
|
||||
result |= CpuFeatures{CpuFeature::SHA256};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_SHA512") || detail::detect_feature("hw.optional.armv8_2_sha512"))
|
||||
result |= CpuFeatures{CpuFeature::SHA512};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_SHA3") || detail::detect_feature("hw.optional.armv8_2_sha3"))
|
||||
result |= CpuFeatures{CpuFeature::SHA3};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_BTI"))
|
||||
result |= CpuFeatures{CpuFeature::BTI};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_DPB"))
|
||||
result |= CpuFeatures{CpuFeature::DPB};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_DPB2"))
|
||||
result |= CpuFeatures{CpuFeature::DPB2};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_ECV"))
|
||||
result |= CpuFeatures{CpuFeature::ECV};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_SB"))
|
||||
result |= CpuFeatures{CpuFeature::SB};
|
||||
if (detail::detect_feature("hw.optional.arm.FEAT_SSBS"))
|
||||
result |= CpuFeatures{CpuFeature::SSBS};
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline CpuFeatures detect_features()
|
||||
{
|
||||
return detect_features_via_sysctlbyname();
|
||||
}
|
||||
|
||||
inline std::optional<id::IdRegisters> read_id_registers()
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
62
externals/oaknut/include/oaknut/feature_detection/feature_detection_freebsd.hpp
vendored
Normal file
62
externals/oaknut/include/oaknut/feature_detection/feature_detection_freebsd.hpp
vendored
Normal file
|
@ -0,0 +1,62 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
|
||||
#include <sys/auxv.h>
|
||||
#include <sys/param.h>
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
|
||||
#include "oaknut/feature_detection/id_registers.hpp"
|
||||
#include "oaknut/feature_detection/read_id_registers_directly.hpp"
|
||||
|
||||
#ifndef AT_HWCAP
|
||||
# define AT_HWCAP 16
|
||||
#endif
|
||||
#ifndef AT_HWCAP2
|
||||
# define AT_HWCAP2 26
|
||||
#endif
|
||||
|
||||
#if __FreeBSD_version < 1300114
|
||||
# error "Incompatible ABI change (incorrect HWCAP definitions on earlier FreeBSD versions)"
|
||||
#endif
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
namespace detail {
|
||||
|
||||
inline unsigned long getauxval(int aux)
|
||||
{
|
||||
unsigned long result = 0;
|
||||
if (::elf_aux_info(aux, &result, static_cast<int>(sizeof result)) == 0) {
|
||||
return result;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
inline CpuFeatures detect_features_via_hwcap()
|
||||
{
|
||||
const unsigned long hwcap = detail::getauxval(AT_HWCAP);
|
||||
const unsigned long hwcap2 = detail::getauxval(AT_HWCAP2);
|
||||
return detect_features_via_hwcap(hwcap, hwcap2);
|
||||
}
|
||||
|
||||
inline std::optional<id::IdRegisters> read_id_registers()
|
||||
{
|
||||
// HWCAP_CPUID is falsely not set on many FreeBSD kernel versions,
|
||||
// so we don't bother checking it.
|
||||
return id::read_id_registers_directly();
|
||||
}
|
||||
|
||||
inline CpuFeatures detect_features()
|
||||
{
|
||||
return detect_features_via_hwcap();
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
23
externals/oaknut/include/oaknut/feature_detection/feature_detection_generic.hpp
vendored
Normal file
23
externals/oaknut/include/oaknut/feature_detection/feature_detection_generic.hpp
vendored
Normal file
|
@ -0,0 +1,23 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
#include "oaknut/feature_detection/id_registers.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
inline CpuFeatures detect_features()
|
||||
{
|
||||
return CpuFeatures{CpuFeature::FP, CpuFeature::ASIMD};
|
||||
}
|
||||
|
||||
inline std::optional<id::IdRegisters> read_id_registers()
|
||||
{
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
120
externals/oaknut/include/oaknut/feature_detection/feature_detection_hwcaps.hpp
vendored
Normal file
120
externals/oaknut/include/oaknut/feature_detection/feature_detection_hwcaps.hpp
vendored
Normal file
|
@ -0,0 +1,120 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
namespace detail {
|
||||
|
||||
template<std::size_t... bits>
|
||||
constexpr bool bit_test(unsigned long value)
|
||||
{
|
||||
return (((value >> bits) & 1) && ...);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
inline CpuFeatures detect_features_via_hwcap(unsigned long hwcap, unsigned long hwcap2)
|
||||
{
|
||||
CpuFeatures result;
|
||||
|
||||
#define OAKNUT_DETECT_CAP(FEAT, ...) \
|
||||
if (detail::bit_test<__VA_ARGS__>(hwcap)) { \
|
||||
result |= CpuFeatures{CpuFeature::FEAT}; \
|
||||
}
|
||||
#define OAKNUT_DETECT_CAP2(FEAT, ...) \
|
||||
if (detail::bit_test<__VA_ARGS__>(hwcap2)) { \
|
||||
result |= CpuFeatures{CpuFeature::FEAT}; \
|
||||
}
|
||||
|
||||
OAKNUT_DETECT_CAP(FP, 0) // HWCAP_FP
|
||||
OAKNUT_DETECT_CAP(ASIMD, 1) // HWCAP_ASIMD
|
||||
// HWCAP_EVTSTRM (2)
|
||||
OAKNUT_DETECT_CAP(AES, 3) // HWCAP_AES
|
||||
OAKNUT_DETECT_CAP(PMULL, 4) // HWCAP_PMULL
|
||||
OAKNUT_DETECT_CAP(SHA1, 5) // HWCAP_SHA1
|
||||
OAKNUT_DETECT_CAP(SHA256, 6) // HWCAP_SHA2
|
||||
OAKNUT_DETECT_CAP(CRC32, 7) // HWCAP_CRC32
|
||||
OAKNUT_DETECT_CAP(LSE, 8) // HWCAP_ATOMICS
|
||||
OAKNUT_DETECT_CAP(FP16Conv, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP
|
||||
OAKNUT_DETECT_CAP(FP16, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP
|
||||
// HWCAP_CPUID (11)
|
||||
OAKNUT_DETECT_CAP(RDM, 12) // HWCAP_ASIMDRDM
|
||||
OAKNUT_DETECT_CAP(JSCVT, 13) // HWCAP_JSCVT
|
||||
OAKNUT_DETECT_CAP(FCMA, 14) // HWCAP_FCMA
|
||||
OAKNUT_DETECT_CAP(LRCPC, 15) // HWCAP_LRCPC
|
||||
OAKNUT_DETECT_CAP(DPB, 16) // HWCAP_DCPOP
|
||||
OAKNUT_DETECT_CAP(SHA3, 17) // HWCAP_SHA3
|
||||
OAKNUT_DETECT_CAP(SM3, 18) // HWCAP_SM3
|
||||
OAKNUT_DETECT_CAP(SM4, 19) // HWCAP_SM4
|
||||
OAKNUT_DETECT_CAP(DotProd, 20) // HWCAP_ASIMDDP
|
||||
OAKNUT_DETECT_CAP(SHA512, 21) // HWCAP_SHA512
|
||||
OAKNUT_DETECT_CAP(SVE, 22) // HWCAP_SVE
|
||||
OAKNUT_DETECT_CAP(FHM, 23) // HWCAP_ASIMDFHM
|
||||
OAKNUT_DETECT_CAP(DIT, 24) // HWCAP_DIT
|
||||
OAKNUT_DETECT_CAP(LSE2, 25) // HWCAP_USCAT
|
||||
OAKNUT_DETECT_CAP(LRCPC2, 26) // HWCAP_ILRCPC
|
||||
OAKNUT_DETECT_CAP(FlagM, 27) // HWCAP_FLAGM
|
||||
OAKNUT_DETECT_CAP(SSBS, 28) // HWCAP_SSBS
|
||||
OAKNUT_DETECT_CAP(SB, 29) // HWCAP_SB
|
||||
OAKNUT_DETECT_CAP(PACA, 30) // HWCAP_PACA
|
||||
OAKNUT_DETECT_CAP(PACG, 31) // HWCAP_PACG
|
||||
|
||||
OAKNUT_DETECT_CAP2(DPB2, 0) // HWCAP2_DCPODP
|
||||
OAKNUT_DETECT_CAP2(SVE2, 1) // HWCAP2_SVE2
|
||||
OAKNUT_DETECT_CAP2(SVE_AES, 2) // HWCAP2_SVEAES
|
||||
OAKNUT_DETECT_CAP2(SVE_PMULL128, 3) // HWCAP2_SVEPMULL
|
||||
OAKNUT_DETECT_CAP2(SVE_BITPERM, 4) // HWCAP2_SVEBITPERM
|
||||
OAKNUT_DETECT_CAP2(SVE_SHA3, 5) // HWCAP2_SVESHA3
|
||||
OAKNUT_DETECT_CAP2(SVE_SM4, 6) // HWCAP2_SVESM4
|
||||
OAKNUT_DETECT_CAP2(FlagM2, 7) // HWCAP2_FLAGM2
|
||||
OAKNUT_DETECT_CAP2(FRINTTS, 8) // HWCAP2_FRINT
|
||||
OAKNUT_DETECT_CAP2(SVE_I8MM, 9) // HWCAP2_SVEI8MM
|
||||
OAKNUT_DETECT_CAP2(SVE_F32MM, 10) // HWCAP2_SVEF32MM
|
||||
OAKNUT_DETECT_CAP2(SVE_F64MM, 11) // HWCAP2_SVEF64MM
|
||||
OAKNUT_DETECT_CAP2(SVE_BF16, 12) // HWCAP2_SVEBF16
|
||||
OAKNUT_DETECT_CAP2(I8MM, 13) // HWCAP2_I8MM
|
||||
OAKNUT_DETECT_CAP2(BF16, 14) // HWCAP2_BF16
|
||||
OAKNUT_DETECT_CAP2(DGH, 15) // HWCAP2_DGH
|
||||
OAKNUT_DETECT_CAP2(RNG, 16) // HWCAP2_RNG
|
||||
OAKNUT_DETECT_CAP2(BTI, 17) // HWCAP2_BTI
|
||||
OAKNUT_DETECT_CAP2(MTE, 18) // HWCAP2_MTE
|
||||
OAKNUT_DETECT_CAP2(ECV, 19) // HWCAP2_ECV
|
||||
OAKNUT_DETECT_CAP2(AFP, 20) // HWCAP2_AFP
|
||||
OAKNUT_DETECT_CAP2(RPRES, 21) // HWCAP2_RPRES
|
||||
OAKNUT_DETECT_CAP2(MTE3, 22) // HWCAP2_MTE3
|
||||
OAKNUT_DETECT_CAP2(SME, 23) // HWCAP2_SME
|
||||
OAKNUT_DETECT_CAP2(SME_I16I64, 24) // HWCAP2_SME_I16I64
|
||||
OAKNUT_DETECT_CAP2(SME_F64F64, 25) // HWCAP2_SME_F64F64
|
||||
OAKNUT_DETECT_CAP2(SME_I8I32, 26) // HWCAP2_SME_I8I32
|
||||
OAKNUT_DETECT_CAP2(SME_F16F32, 27) // HWCAP2_SME_F16F32
|
||||
OAKNUT_DETECT_CAP2(SME_B16F32, 28) // HWCAP2_SME_B16F32
|
||||
OAKNUT_DETECT_CAP2(SME_F32F32, 29) // HWCAP2_SME_F32F32
|
||||
OAKNUT_DETECT_CAP2(SME_FA64, 30) // HWCAP2_SME_FA64
|
||||
OAKNUT_DETECT_CAP2(WFxT, 31) // HWCAP2_WFXT
|
||||
OAKNUT_DETECT_CAP2(EBF16, 32) // HWCAP2_EBF16
|
||||
OAKNUT_DETECT_CAP2(SVE_EBF16, 33) // HWCAP2_SVE_EBF16
|
||||
OAKNUT_DETECT_CAP2(CSSC, 34) // HWCAP2_CSSC
|
||||
OAKNUT_DETECT_CAP2(RPRFM, 35) // HWCAP2_RPRFM
|
||||
OAKNUT_DETECT_CAP2(SVE2p1, 36) // HWCAP2_SVE2P1
|
||||
OAKNUT_DETECT_CAP2(SME2, 37) // HWCAP2_SME2
|
||||
OAKNUT_DETECT_CAP2(SME2p1, 38) // HWCAP2_SME2P1
|
||||
OAKNUT_DETECT_CAP2(SME_I16I32, 39) // HWCAP2_SME_I16I32
|
||||
OAKNUT_DETECT_CAP2(SME_BI32I32, 40) // HWCAP2_SME_BI32I32
|
||||
OAKNUT_DETECT_CAP2(SME_B16B16, 41) // HWCAP2_SME_B16B16
|
||||
OAKNUT_DETECT_CAP2(SME_F16F16, 42) // HWCAP2_SME_F16F16
|
||||
OAKNUT_DETECT_CAP2(MOPS, 43) // HWCAP2_MOPS
|
||||
OAKNUT_DETECT_CAP2(HBC, 44) // HWCAP2_HBC
|
||||
|
||||
#undef OAKNUT_DETECT_CAP
|
||||
#undef OAKNUT_DETECT_CAP2
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
167
externals/oaknut/include/oaknut/feature_detection/feature_detection_idregs.hpp
vendored
Normal file
167
externals/oaknut/include/oaknut/feature_detection/feature_detection_idregs.hpp
vendored
Normal file
|
@ -0,0 +1,167 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
#include "oaknut/feature_detection/id_registers.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
CpuFeatures detect_features_via_id_registers(id::IdRegisters regs)
|
||||
{
|
||||
CpuFeatures result;
|
||||
|
||||
if (regs.pfr0.FP() >= 0)
|
||||
result |= CpuFeatures{CpuFeature::FP};
|
||||
if (regs.pfr0.AdvSIMD() >= 0)
|
||||
result |= CpuFeatures{CpuFeature::ASIMD};
|
||||
if (regs.isar0.AES() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::AES};
|
||||
if (regs.isar0.AES() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::PMULL};
|
||||
if (regs.isar0.SHA1() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SHA1};
|
||||
if (regs.isar0.SHA2() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SHA256};
|
||||
if (regs.isar0.CRC32() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::CRC32};
|
||||
if (regs.isar0.Atomic() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::LSE};
|
||||
if (regs.pfr0.FP() >= 1 && regs.pfr0.AdvSIMD() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::FP16Conv, CpuFeature::FP16};
|
||||
if (regs.isar0.RDM() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::RDM};
|
||||
if (regs.isar1.JSCVT() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::JSCVT};
|
||||
if (regs.isar1.FCMA() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::FCMA};
|
||||
if (regs.isar1.LRCPC() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::LRCPC};
|
||||
if (regs.isar1.DPB() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::DPB};
|
||||
if (regs.isar0.SHA3() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SHA3};
|
||||
if (regs.isar0.SM3() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SM3};
|
||||
if (regs.isar0.SM4() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SM4};
|
||||
if (regs.isar0.DP() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::DotProd};
|
||||
if (regs.isar0.SHA2() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::SHA512};
|
||||
if (regs.pfr0.SVE() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SVE};
|
||||
if (regs.isar0.FHM() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::FHM};
|
||||
if (regs.pfr0.DIT() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::DIT};
|
||||
if (regs.mmfr2.AT() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::LSE2};
|
||||
if (regs.isar1.LRCPC() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::LRCPC2};
|
||||
if (regs.isar0.TS() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::FlagM};
|
||||
if (regs.pfr1.SSBS() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::SSBS};
|
||||
if (regs.isar1.SB() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SB};
|
||||
if (regs.isar1.APA() >= 1 || regs.isar1.API() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::PACA};
|
||||
if (regs.isar1.GPA() >= 1 || regs.isar1.GPI() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::PACG};
|
||||
if (regs.isar1.DPB() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::DPB2};
|
||||
if (regs.zfr0.SVEver() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SVE2};
|
||||
if (regs.zfr0.AES() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SVE_AES};
|
||||
if (regs.zfr0.AES() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::SVE_PMULL128};
|
||||
if (regs.zfr0.BitPerm() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SVE_BITPERM};
|
||||
if (regs.zfr0.SHA3() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SVE_SHA3};
|
||||
if (regs.zfr0.SM4() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SVE_SM4};
|
||||
if (regs.isar0.TS() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::FlagM2};
|
||||
if (regs.isar1.FRINTTS() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::FRINTTS};
|
||||
if (regs.zfr0.I8MM() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SVE_I8MM};
|
||||
if (regs.zfr0.F32MM() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SVE_F32MM};
|
||||
if (regs.zfr0.F64MM() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SVE_F64MM};
|
||||
if (regs.zfr0.BF16() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SVE_BF16};
|
||||
if (regs.isar1.I8MM() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::I8MM};
|
||||
if (regs.isar1.BF16() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::BF16};
|
||||
if (regs.isar1.DGH() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::DGH};
|
||||
if (regs.isar0.RNDR() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::RNG};
|
||||
if (regs.pfr1.BT() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::BTI};
|
||||
if (regs.pfr1.MTE() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::MTE};
|
||||
if (regs.mmfr0.ECV() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::ECV};
|
||||
if (regs.mmfr1.AFP() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::AFP};
|
||||
if (regs.isar2.RPRES() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::RPRES};
|
||||
if (regs.pfr1.MTE() >= 3)
|
||||
result |= CpuFeatures{CpuFeature::MTE3};
|
||||
if (regs.pfr1.SME() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SME};
|
||||
if (regs.smfr0.I16I64() == 0b1111)
|
||||
result |= CpuFeatures{CpuFeature::SME_I16I64};
|
||||
if (regs.smfr0.F64F64() == 0b1)
|
||||
result |= CpuFeatures{CpuFeature::SME_F64F64};
|
||||
if (regs.smfr0.I8I32() == 0b1111)
|
||||
result |= CpuFeatures{CpuFeature::SME_I8I32};
|
||||
if (regs.smfr0.F16F32() == 0b1)
|
||||
result |= CpuFeatures{CpuFeature::SME_F16F32};
|
||||
if (regs.smfr0.B16F32() == 0b1)
|
||||
result |= CpuFeatures{CpuFeature::SME_B16F32};
|
||||
if (regs.smfr0.F32F32() == 0b1)
|
||||
result |= CpuFeatures{CpuFeature::SME_F32F32};
|
||||
if (regs.smfr0.FA64() == 0b1)
|
||||
result |= CpuFeatures{CpuFeature::SME_FA64};
|
||||
if (regs.isar2.WFxT() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::WFxT};
|
||||
if (regs.isar1.BF16() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::EBF16};
|
||||
if (regs.zfr0.BF16() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::SVE_EBF16};
|
||||
if (regs.isar2.CSSC() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::CSSC};
|
||||
if (regs.isar2.RPRFM() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::RPRFM};
|
||||
if (regs.zfr0.SVEver() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::SVE2p1};
|
||||
if (regs.smfr0.SMEver() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::SME2};
|
||||
if (regs.smfr0.SMEver() >= 2)
|
||||
result |= CpuFeatures{CpuFeature::SME2p1};
|
||||
if (regs.smfr0.I16I32() == 0b0101)
|
||||
result |= CpuFeatures{CpuFeature::SME_I16I32};
|
||||
if (regs.smfr0.BI32I32() == 0b1)
|
||||
result |= CpuFeatures{CpuFeature::SME_BI32I32};
|
||||
if (regs.smfr0.B16B16() == 0b1)
|
||||
result |= CpuFeatures{CpuFeature::SME_B16B16};
|
||||
if (regs.smfr0.F16F16() == 0b1)
|
||||
result |= CpuFeatures{CpuFeature::SME_F16F16};
|
||||
if (regs.isar2.MOPS() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::MOPS};
|
||||
if (regs.isar2.BC() >= 1)
|
||||
result |= CpuFeatures{CpuFeature::HBC};
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
45
externals/oaknut/include/oaknut/feature_detection/feature_detection_linux.hpp
vendored
Normal file
45
externals/oaknut/include/oaknut/feature_detection/feature_detection_linux.hpp
vendored
Normal file
|
@ -0,0 +1,45 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include <sys/auxv.h>
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
|
||||
#include "oaknut/feature_detection/id_registers.hpp"
|
||||
#include "oaknut/feature_detection/read_id_registers_directly.hpp"
|
||||
|
||||
#ifndef AT_HWCAP
|
||||
# define AT_HWCAP 16
|
||||
#endif
|
||||
#ifndef AT_HWCAP2
|
||||
# define AT_HWCAP2 26
|
||||
#endif
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
inline CpuFeatures detect_features_via_hwcap()
|
||||
{
|
||||
const unsigned long hwcap = ::getauxval(AT_HWCAP);
|
||||
const unsigned long hwcap2 = ::getauxval(AT_HWCAP2);
|
||||
return detect_features_via_hwcap(hwcap, hwcap2);
|
||||
}
|
||||
|
||||
inline CpuFeatures detect_features()
|
||||
{
|
||||
return detect_features_via_hwcap();
|
||||
}
|
||||
|
||||
inline std::optional<id::IdRegisters> read_id_registers()
|
||||
{
|
||||
constexpr unsigned long hwcap_cpuid = (1 << 11);
|
||||
if (::getauxval(AT_HWCAP) & hwcap_cpuid) {
|
||||
return id::read_id_registers_directly();
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
81
externals/oaknut/include/oaknut/feature_detection/feature_detection_netbsd.hpp
vendored
Normal file
81
externals/oaknut/include/oaknut/feature_detection/feature_detection_netbsd.hpp
vendored
Normal file
|
@ -0,0 +1,81 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <string>
|
||||
|
||||
#include <aarch64/armreg.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
|
||||
#include "oaknut/feature_detection/feature_detection_idregs.hpp"
|
||||
#include "oaknut/feature_detection/id_registers.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
inline std::optional<id::IdRegisters> read_id_registers(std::size_t core_index)
|
||||
{
|
||||
const std::string path = "machdep.cpu" + std::to_string(core_index) + ".cpu_id";
|
||||
|
||||
aarch64_sysctl_cpu_id id;
|
||||
std::size_t id_len = sizeof id;
|
||||
|
||||
if (sysctlbyname(path.c_str(), &id, &id_len, nullptr, 0) < 0)
|
||||
return std::nullopt;
|
||||
|
||||
return id::IdRegisters{
|
||||
id.ac_midr,
|
||||
id::Pfr0Register{id.ac_aa64pfr0},
|
||||
id::Pfr1Register{id.ac_aa64pfr1},
|
||||
id::Pfr2Register{0},
|
||||
id::Zfr0Register{id.ac_aa64zfr0},
|
||||
id::Smfr0Register{0},
|
||||
id::Isar0Register{id.ac_aa64isar0},
|
||||
id::Isar1Register{id.ac_aa64isar1},
|
||||
id::Isar2Register{0},
|
||||
id::Isar3Register{0},
|
||||
id::Mmfr0Register{id.ac_aa64mmfr0},
|
||||
id::Mmfr1Register{id.ac_aa64mmfr1},
|
||||
id::Mmfr2Register{id.ac_aa64mmfr2},
|
||||
id::Mmfr3Register{0},
|
||||
id::Mmfr4Register{0},
|
||||
};
|
||||
}
|
||||
|
||||
inline std::size_t get_core_count()
|
||||
{
|
||||
int result = 0;
|
||||
size_t result_size = sizeof(result);
|
||||
const std::array<int, 2> mib{CTL_HW, HW_NCPU};
|
||||
if (sysctl(mib.data(), mib.size(), &result, &result_size, nullptr, 0) < 0)
|
||||
return 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
inline CpuFeatures detect_features()
|
||||
{
|
||||
std::optional<CpuFeatures> result;
|
||||
|
||||
const std::size_t core_count = get_core_count();
|
||||
for (std::size_t core_index = 0; core_index < core_count; core_index++) {
|
||||
if (const std::optional<id::IdRegisters> id_regs = read_id_registers(core_index)) {
|
||||
const CpuFeatures current_features = detect_features_via_id_registers(*id_regs);
|
||||
if (result) {
|
||||
result = *result & current_features;
|
||||
} else {
|
||||
result = current_features;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result.value_or(CpuFeatures{});
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
63
externals/oaknut/include/oaknut/feature_detection/feature_detection_openbsd.hpp
vendored
Normal file
63
externals/oaknut/include/oaknut/feature_detection/feature_detection_openbsd.hpp
vendored
Normal file
|
@ -0,0 +1,63 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
|
||||
#include "oaknut/feature_detection/feature_detection_idregs.hpp"
|
||||
#include "oaknut/feature_detection/id_registers.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
namespace detail {
|
||||
|
||||
inline std::uint64_t read_id_register(int index)
|
||||
{
|
||||
uint64_t result = 0;
|
||||
size_t result_size = sizeof(result);
|
||||
std::array<int, 2> mib{CTL_MACHDEP, index};
|
||||
if (sysctl(mib.data(), mib.size(), &result, &result_size, nullptr, 0) < 0)
|
||||
return 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
inline std::optional<id::IdRegisters> read_id_registers()
|
||||
{
|
||||
// See OpenBSD source: sys/arch/arm64/include/cpu.h
|
||||
|
||||
return id::IdRegisters{
|
||||
std::nullopt, // No easy way of getting MIDR_EL1 other than reading /proc/cpu
|
||||
id::Pfr0Register{detail::read_id_register(8)}, // CPU_ID_AA64PFR0
|
||||
id::Pfr1Register{detail::read_id_register(9)}, // CPU_ID_AA64PFR1
|
||||
id::Pfr2Register{0},
|
||||
id::Zfr0Register{detail::read_id_register(11)}, // CPU_ID_AA64ZFR0
|
||||
id::Smfr0Register{detail::read_id_register(10)}, // CPU_ID_AA64SMFR0
|
||||
id::Isar0Register{detail::read_id_register(2)}, // CPU_ID_AA64ISAR0
|
||||
id::Isar1Register{detail::read_id_register(3)}, // CPU_ID_AA64ISAR1
|
||||
id::Isar2Register{detail::read_id_register(4)}, // CPU_ID_AA64ISAR2
|
||||
id::Isar3Register{0},
|
||||
id::Mmfr0Register{detail::read_id_register(5)}, // CPU_ID_AA64MMFR0
|
||||
id::Mmfr1Register{detail::read_id_register(6)}, // CPU_ID_AA64MMFR1
|
||||
id::Mmfr2Register{detail::read_id_register(7)}, // CPU_ID_AA64MMFR2
|
||||
id::Mmfr3Register{0},
|
||||
id::Mmfr4Register{0},
|
||||
};
|
||||
}
|
||||
|
||||
inline CpuFeatures detect_features()
|
||||
{
|
||||
return detect_features_via_id_registers(*read_id_registers());
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
99
externals/oaknut/include/oaknut/feature_detection/feature_detection_w32.hpp
vendored
Normal file
99
externals/oaknut/include/oaknut/feature_detection/feature_detection_w32.hpp
vendored
Normal file
|
@ -0,0 +1,99 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
# define WIN32_LEAN_AND_MEAN
|
||||
#endif
|
||||
|
||||
#include <windows.h>
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
|
||||
#include <processthreadsapi.h>
|
||||
|
||||
#include "oaknut/feature_detection/cpu_feature.hpp"
|
||||
#include "oaknut/feature_detection/id_registers.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
namespace detail {
|
||||
|
||||
inline std::optional<std::uint64_t> read_registry_hklm(const std::string& subkey, const std::string& name)
|
||||
{
|
||||
std::uint64_t value;
|
||||
DWORD value_len = sizeof(value);
|
||||
if (::RegGetValueA(HKEY_LOCAL_MACHINE, subkey.c_str(), name.c_str(), RRF_RT_REG_QWORD, nullptr, &value, &value_len) == ERROR_SUCCESS) {
|
||||
return value;
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
inline std::uint64_t read_id_register(std::size_t core_index, const std::string& name)
|
||||
{
|
||||
return read_registry_hklm("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\" + std::to_string(core_index), "CP " + name).value_or(0);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
// Ref: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
|
||||
|
||||
inline CpuFeatures detect_features_via_IsProcessorFeaturePresent()
|
||||
{
|
||||
CpuFeatures result;
|
||||
|
||||
if (::IsProcessorFeaturePresent(30)) // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE
|
||||
result |= CpuFeatures{CpuFeature::AES, CpuFeature::PMULL, CpuFeature::SHA1, CpuFeature::SHA256};
|
||||
if (::IsProcessorFeaturePresent(31)) // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE
|
||||
result |= CpuFeatures{CpuFeature::CRC32};
|
||||
if (::IsProcessorFeaturePresent(34)) // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
|
||||
result |= CpuFeatures{CpuFeature::LSE};
|
||||
if (::IsProcessorFeaturePresent(43)) // PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
|
||||
result |= CpuFeatures{CpuFeature::DotProd};
|
||||
if (::IsProcessorFeaturePresent(44)) // PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE
|
||||
result |= CpuFeatures{CpuFeature::JSCVT};
|
||||
if (::IsProcessorFeaturePresent(45)) // PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
|
||||
result |= CpuFeatures{CpuFeature::LRCPC};
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
inline CpuFeatures detect_features()
|
||||
{
|
||||
CpuFeatures result{CpuFeature::FP, CpuFeature::ASIMD};
|
||||
result |= detect_features_via_IsProcessorFeaturePresent();
|
||||
return result;
|
||||
}
|
||||
|
||||
inline std::size_t get_core_count()
|
||||
{
|
||||
::SYSTEM_INFO sys_info;
|
||||
::GetSystemInfo(&sys_info);
|
||||
return sys_info.dwNumberOfProcessors;
|
||||
}
|
||||
|
||||
inline std::optional<id::IdRegisters> read_id_registers(std::size_t core_index)
|
||||
{
|
||||
return id::IdRegisters{
|
||||
detail::read_id_register(core_index, "4000"),
|
||||
id::Pfr0Register{detail::read_id_register(core_index, "4020")},
|
||||
id::Pfr1Register{detail::read_id_register(core_index, "4021")},
|
||||
id::Pfr2Register{detail::read_id_register(core_index, "4022")},
|
||||
id::Zfr0Register{detail::read_id_register(core_index, "4024")},
|
||||
id::Smfr0Register{detail::read_id_register(core_index, "4025")},
|
||||
id::Isar0Register{detail::read_id_register(core_index, "4030")},
|
||||
id::Isar1Register{detail::read_id_register(core_index, "4031")},
|
||||
id::Isar2Register{detail::read_id_register(core_index, "4032")},
|
||||
id::Isar3Register{detail::read_id_register(core_index, "4033")},
|
||||
id::Mmfr0Register{detail::read_id_register(core_index, "4038")},
|
||||
id::Mmfr1Register{detail::read_id_register(core_index, "4039")},
|
||||
id::Mmfr2Register{detail::read_id_register(core_index, "403A")},
|
||||
id::Mmfr3Register{detail::read_id_register(core_index, "403B")},
|
||||
id::Mmfr4Register{detail::read_id_register(core_index, "403C")},
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace oaknut
|
318
externals/oaknut/include/oaknut/feature_detection/id_registers.hpp
vendored
Normal file
318
externals/oaknut/include/oaknut/feature_detection/id_registers.hpp
vendored
Normal file
|
@ -0,0 +1,318 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
|
||||
namespace oaknut::id {
|
||||
|
||||
namespace detail {
|
||||
|
||||
template<std::size_t lsb>
|
||||
constexpr unsigned extract_bit(std::uint64_t value)
|
||||
{
|
||||
return (value >> lsb) & 1;
|
||||
}
|
||||
|
||||
template<std::size_t lsb>
|
||||
constexpr unsigned extract_field(std::uint64_t value)
|
||||
{
|
||||
return (value >> lsb) & 0xf;
|
||||
}
|
||||
|
||||
template<std::size_t lsb>
|
||||
constexpr signed extract_signed_field(std::uint64_t value)
|
||||
{
|
||||
return static_cast<signed>(static_cast<std::int64_t>(value << (60 - lsb)) >> 60);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
struct Pfr0Register {
|
||||
std::uint64_t value;
|
||||
|
||||
constexpr signed FP() const { return detail::extract_signed_field<16>(value); }
|
||||
constexpr signed AdvSIMD() const { return detail::extract_signed_field<20>(value); }
|
||||
constexpr unsigned GIC() const { return detail::extract_field<24>(value); }
|
||||
constexpr unsigned RAS() const { return detail::extract_field<28>(value); }
|
||||
constexpr unsigned SVE() const { return detail::extract_field<32>(value); }
|
||||
constexpr unsigned SEL2() const { return detail::extract_field<36>(value); }
|
||||
constexpr unsigned MPAM() const { return detail::extract_field<40>(value); }
|
||||
constexpr unsigned AMU() const { return detail::extract_field<44>(value); }
|
||||
constexpr unsigned DIT() const { return detail::extract_field<48>(value); }
|
||||
constexpr unsigned RME() const { return detail::extract_field<52>(value); }
|
||||
constexpr unsigned CSV2() const { return detail::extract_field<56>(value); }
|
||||
constexpr unsigned CSV3() const { return detail::extract_field<60>(value); }
|
||||
};
|
||||
|
||||
struct Pfr1Register {
|
||||
std::uint64_t value;
|
||||
|
||||
constexpr unsigned BT() const { return detail::extract_field<0>(value); }
|
||||
constexpr unsigned SSBS() const { return detail::extract_field<4>(value); }
|
||||
constexpr unsigned MTE() const { return detail::extract_field<8>(value); }
|
||||
constexpr unsigned RAS_frac() const { return detail::extract_field<12>(value); }
|
||||
constexpr unsigned MPAM_frac() const { return detail::extract_field<16>(value); }
|
||||
// [20:23] - reserved
|
||||
constexpr unsigned SME() const { return detail::extract_field<24>(value); }
|
||||
constexpr unsigned RNDR_trap() const { return detail::extract_field<28>(value); }
|
||||
constexpr unsigned CSV2_frac() const { return detail::extract_field<32>(value); }
|
||||
constexpr unsigned NMI() const { return detail::extract_field<36>(value); }
|
||||
constexpr unsigned MTE_frac() const { return detail::extract_field<40>(value); }
|
||||
constexpr unsigned GCS() const { return detail::extract_field<44>(value); }
|
||||
constexpr unsigned THE() const { return detail::extract_field<48>(value); }
|
||||
constexpr unsigned MTEX() const { return detail::extract_field<52>(value); }
|
||||
constexpr unsigned DF2() const { return detail::extract_field<56>(value); }
|
||||
constexpr unsigned PFAR() const { return detail::extract_field<60>(value); }
|
||||
};
|
||||
|
||||
struct Pfr2Register {
|
||||
std::uint64_t value;
|
||||
|
||||
constexpr unsigned MTEPERM() const { return detail::extract_field<0>(value); }
|
||||
constexpr unsigned MTESTOREONLY() const { return detail::extract_field<4>(value); }
|
||||
constexpr unsigned MTEFAR() const { return detail::extract_field<8>(value); }
|
||||
// [12:31] reserved
|
||||
constexpr unsigned FPMR() const { return detail::extract_field<32>(value); }
|
||||
// [36:63] reserved
|
||||
};
|
||||
|
||||
struct Zfr0Register {
|
||||
std::uint64_t value;
|
||||
|
||||
constexpr unsigned SVEver() const { return detail::extract_field<0>(value); }
|
||||
constexpr unsigned AES() const { return detail::extract_field<4>(value); }
|
||||
// [8:15] reserved
|
||||
constexpr unsigned BitPerm() const { return detail::extract_field<16>(value); }
|
||||
constexpr unsigned BF16() const { return detail::extract_field<20>(value); }
|
||||
constexpr unsigned B16B16() const { return detail::extract_field<24>(value); }
|
||||
// [28:31] reserved
|
||||
constexpr unsigned SHA3() const { return detail::extract_field<32>(value); }
|
||||
// [36:39] reserved
|
||||
constexpr unsigned SM4() const { return detail::extract_field<40>(value); }
|
||||
constexpr unsigned I8MM() const { return detail::extract_field<44>(value); }
|
||||
// [48:51] reserved
|
||||
constexpr unsigned F32MM() const { return detail::extract_field<52>(value); }
|
||||
constexpr unsigned F64MM() const { return detail::extract_field<56>(value); }
|
||||
// [60:63] reserved
|
||||
};
|
||||
|
||||
struct Smfr0Register {
|
||||
std::uint64_t value;
|
||||
|
||||
// [0:27] reserved
|
||||
constexpr unsigned SF8DP2() const { return detail::extract_bit<28>(value); }
|
||||
constexpr unsigned SF8DP4() const { return detail::extract_bit<29>(value); }
|
||||
constexpr unsigned SF8FMA() const { return detail::extract_bit<30>(value); }
|
||||
// [31] reserved
|
||||
constexpr unsigned F32F32() const { return detail::extract_bit<32>(value); }
|
||||
constexpr unsigned BI32I32() const { return detail::extract_bit<33>(value); }
|
||||
constexpr unsigned B16F32() const { return detail::extract_bit<34>(value); }
|
||||
constexpr unsigned F16F32() const { return detail::extract_bit<35>(value); }
|
||||
constexpr unsigned I8I32() const { return detail::extract_field<36>(value); }
|
||||
constexpr unsigned F8F32() const { return detail::extract_bit<40>(value); }
|
||||
constexpr unsigned F8F16() const { return detail::extract_bit<41>(value); }
|
||||
constexpr unsigned F16F16() const { return detail::extract_bit<42>(value); }
|
||||
constexpr unsigned B16B16() const { return detail::extract_bit<43>(value); }
|
||||
constexpr unsigned I16I32() const { return detail::extract_field<44>(value); }
|
||||
constexpr unsigned F64F64() const { return detail::extract_bit<48>(value); }
|
||||
// [49:51] reserved
|
||||
constexpr unsigned I16I64() const { return detail::extract_field<52>(value); }
|
||||
constexpr unsigned SMEver() const { return detail::extract_field<56>(value); }
|
||||
constexpr unsigned LUTv2() const { return detail::extract_bit<60>(value); }
|
||||
// [61:62] reserved
|
||||
constexpr unsigned FA64() const { return detail::extract_bit<63>(value); }
|
||||
};
|
||||
|
||||
struct Isar0Register {
|
||||
std::uint64_t value;
|
||||
|
||||
// [0:3] reserved
|
||||
constexpr unsigned AES() const { return detail::extract_field<4>(value); }
|
||||
constexpr unsigned SHA1() const { return detail::extract_field<8>(value); }
|
||||
constexpr unsigned SHA2() const { return detail::extract_field<12>(value); }
|
||||
constexpr unsigned CRC32() const { return detail::extract_field<16>(value); }
|
||||
constexpr unsigned Atomic() const { return detail::extract_field<20>(value); }
|
||||
constexpr unsigned TME() const { return detail::extract_field<24>(value); }
|
||||
constexpr unsigned RDM() const { return detail::extract_field<28>(value); }
|
||||
constexpr unsigned SHA3() const { return detail::extract_field<32>(value); }
|
||||
constexpr unsigned SM3() const { return detail::extract_field<36>(value); }
|
||||
constexpr unsigned SM4() const { return detail::extract_field<40>(value); }
|
||||
constexpr unsigned DP() const { return detail::extract_field<44>(value); }
|
||||
constexpr unsigned FHM() const { return detail::extract_field<48>(value); }
|
||||
constexpr unsigned TS() const { return detail::extract_field<52>(value); }
|
||||
constexpr unsigned TLB() const { return detail::extract_field<56>(value); }
|
||||
constexpr unsigned RNDR() const { return detail::extract_field<60>(value); }
|
||||
};
|
||||
|
||||
struct Isar1Register {
|
||||
std::uint64_t value;
|
||||
|
||||
constexpr unsigned DPB() const { return detail::extract_field<0>(value); }
|
||||
constexpr unsigned APA() const { return detail::extract_field<4>(value); }
|
||||
constexpr unsigned API() const { return detail::extract_field<8>(value); }
|
||||
constexpr unsigned JSCVT() const { return detail::extract_field<12>(value); }
|
||||
constexpr unsigned FCMA() const { return detail::extract_field<16>(value); }
|
||||
constexpr unsigned LRCPC() const { return detail::extract_field<20>(value); }
|
||||
constexpr unsigned GPA() const { return detail::extract_field<24>(value); }
|
||||
constexpr unsigned GPI() const { return detail::extract_field<28>(value); }
|
||||
constexpr unsigned FRINTTS() const { return detail::extract_field<32>(value); }
|
||||
constexpr unsigned SB() const { return detail::extract_field<36>(value); }
|
||||
constexpr unsigned SPECRES() const { return detail::extract_field<40>(value); }
|
||||
constexpr unsigned BF16() const { return detail::extract_field<44>(value); }
|
||||
constexpr unsigned DGH() const { return detail::extract_field<48>(value); }
|
||||
constexpr unsigned I8MM() const { return detail::extract_field<52>(value); }
|
||||
constexpr unsigned XS() const { return detail::extract_field<56>(value); }
|
||||
constexpr unsigned LS64() const { return detail::extract_field<60>(value); }
|
||||
};
|
||||
|
||||
struct Isar2Register {
|
||||
std::uint64_t value;
|
||||
|
||||
constexpr unsigned WFxT() const { return detail::extract_field<0>(value); }
|
||||
constexpr unsigned RPRES() const { return detail::extract_field<4>(value); }
|
||||
constexpr unsigned GPA3() const { return detail::extract_field<8>(value); }
|
||||
constexpr unsigned APA3() const { return detail::extract_field<12>(value); }
|
||||
constexpr unsigned MOPS() const { return detail::extract_field<16>(value); }
|
||||
constexpr unsigned BC() const { return detail::extract_field<20>(value); }
|
||||
constexpr unsigned PAC_frac() const { return detail::extract_field<24>(value); }
|
||||
constexpr unsigned CLRBHB() const { return detail::extract_field<28>(value); }
|
||||
constexpr unsigned SYSREG_128() const { return detail::extract_field<32>(value); }
|
||||
constexpr unsigned SYSINSTR_128() const { return detail::extract_field<36>(value); }
|
||||
constexpr unsigned PRFMSLC() const { return detail::extract_field<40>(value); }
|
||||
// [44:47] reserved
|
||||
constexpr unsigned RPRFM() const { return detail::extract_field<48>(value); }
|
||||
constexpr unsigned CSSC() const { return detail::extract_field<52>(value); }
|
||||
constexpr unsigned LUT() const { return detail::extract_field<56>(value); }
|
||||
constexpr unsigned ATS1A() const { return detail::extract_field<60>(value); }
|
||||
};
|
||||
|
||||
struct Isar3Register {
|
||||
std::uint64_t value;
|
||||
|
||||
constexpr unsigned CPA() const { return detail::extract_field<0>(value); }
|
||||
constexpr unsigned FAMINMAX() const { return detail::extract_field<4>(value); }
|
||||
constexpr unsigned TLBIW() const { return detail::extract_field<8>(value); }
|
||||
// [12:63] reserved
|
||||
};
|
||||
|
||||
struct Mmfr0Register {
|
||||
std::uint64_t value;
|
||||
|
||||
constexpr unsigned PARange() const { return detail::extract_field<0>(value); }
|
||||
constexpr unsigned ASIDBits() const { return detail::extract_field<4>(value); }
|
||||
constexpr unsigned BigEnd() const { return detail::extract_field<8>(value); }
|
||||
constexpr unsigned SNSMem() const { return detail::extract_field<12>(value); }
|
||||
constexpr unsigned BigEndEL0() const { return detail::extract_field<16>(value); }
|
||||
constexpr unsigned TGran16() const { return detail::extract_field<20>(value); }
|
||||
constexpr unsigned TGran64() const { return detail::extract_field<24>(value); }
|
||||
constexpr unsigned TGran4() const { return detail::extract_field<28>(value); }
|
||||
constexpr unsigned TGran16_2() const { return detail::extract_field<32>(value); }
|
||||
constexpr unsigned TGran64_2() const { return detail::extract_field<36>(value); }
|
||||
constexpr unsigned TGran4_2() const { return detail::extract_field<40>(value); }
|
||||
constexpr unsigned ExS() const { return detail::extract_field<44>(value); }
|
||||
// [48:55] reserved
|
||||
constexpr unsigned FGT() const { return detail::extract_field<56>(value); }
|
||||
constexpr unsigned ECV() const { return detail::extract_field<60>(value); }
|
||||
};
|
||||
|
||||
struct Mmfr1Register {
|
||||
std::uint64_t value;
|
||||
|
||||
constexpr unsigned HAFDBS() const { return detail::extract_field<0>(value); }
|
||||
constexpr unsigned VMIDBits() const { return detail::extract_field<4>(value); }
|
||||
constexpr unsigned VH() const { return detail::extract_field<8>(value); }
|
||||
constexpr unsigned HPDS() const { return detail::extract_field<12>(value); }
|
||||
constexpr unsigned LO() const { return detail::extract_field<16>(value); }
|
||||
constexpr unsigned PAN() const { return detail::extract_field<20>(value); }
|
||||
constexpr unsigned SpecSEI() const { return detail::extract_field<24>(value); }
|
||||
constexpr unsigned XNX() const { return detail::extract_field<28>(value); }
|
||||
constexpr unsigned TWED() const { return detail::extract_field<32>(value); }
|
||||
constexpr unsigned ETS() const { return detail::extract_field<36>(value); }
|
||||
constexpr unsigned HCX() const { return detail::extract_field<40>(value); }
|
||||
constexpr unsigned AFP() const { return detail::extract_field<44>(value); }
|
||||
constexpr unsigned nTLBPA() const { return detail::extract_field<48>(value); }
|
||||
constexpr unsigned TIDCP1() const { return detail::extract_field<52>(value); }
|
||||
constexpr unsigned CMOW() const { return detail::extract_field<56>(value); }
|
||||
constexpr unsigned ECBHB() const { return detail::extract_field<60>(value); }
|
||||
};
|
||||
|
||||
struct Mmfr2Register {
|
||||
std::uint64_t value;
|
||||
|
||||
constexpr unsigned CnP() const { return detail::extract_field<0>(value); }
|
||||
constexpr unsigned UAO() const { return detail::extract_field<4>(value); }
|
||||
constexpr unsigned LSM() const { return detail::extract_field<8>(value); }
|
||||
constexpr unsigned IESB() const { return detail::extract_field<12>(value); }
|
||||
constexpr unsigned VARange() const { return detail::extract_field<16>(value); }
|
||||
constexpr unsigned CCIDX() const { return detail::extract_field<20>(value); }
|
||||
constexpr unsigned NV() const { return detail::extract_field<24>(value); }
|
||||
constexpr unsigned ST() const { return detail::extract_field<28>(value); }
|
||||
constexpr unsigned AT() const { return detail::extract_field<32>(value); }
|
||||
constexpr unsigned IDS() const { return detail::extract_field<36>(value); }
|
||||
constexpr unsigned FWB() const { return detail::extract_field<40>(value); }
|
||||
// [44:47] reserved
|
||||
constexpr unsigned TTL() const { return detail::extract_field<48>(value); }
|
||||
constexpr unsigned BBM() const { return detail::extract_field<52>(value); }
|
||||
constexpr unsigned EVT() const { return detail::extract_field<56>(value); }
|
||||
constexpr unsigned E0PD() const { return detail::extract_field<60>(value); }
|
||||
};
|
||||
|
||||
struct Mmfr3Register {
|
||||
std::uint64_t value;
|
||||
|
||||
constexpr unsigned TCRX() const { return detail::extract_field<0>(value); }
|
||||
constexpr unsigned SCTLRX() const { return detail::extract_field<4>(value); }
|
||||
constexpr unsigned S1PIE() const { return detail::extract_field<8>(value); }
|
||||
constexpr unsigned S2PIE() const { return detail::extract_field<12>(value); }
|
||||
constexpr unsigned S1POE() const { return detail::extract_field<16>(value); }
|
||||
constexpr unsigned S2POE() const { return detail::extract_field<20>(value); }
|
||||
constexpr unsigned AIE() const { return detail::extract_field<24>(value); }
|
||||
constexpr unsigned MEC() const { return detail::extract_field<28>(value); }
|
||||
constexpr unsigned D128() const { return detail::extract_field<32>(value); }
|
||||
constexpr unsigned D128_2() const { return detail::extract_field<36>(value); }
|
||||
constexpr unsigned SNERR() const { return detail::extract_field<40>(value); }
|
||||
constexpr unsigned ANERR() const { return detail::extract_field<44>(value); }
|
||||
// [48:51] reserved
|
||||
constexpr unsigned SDERR() const { return detail::extract_field<52>(value); }
|
||||
constexpr unsigned ADERR() const { return detail::extract_field<56>(value); }
|
||||
constexpr unsigned Spec_FPACC() const { return detail::extract_field<60>(value); }
|
||||
};
|
||||
|
||||
struct Mmfr4Register {
|
||||
std::uint64_t value;
|
||||
|
||||
// [0:3] reserved
|
||||
constexpr unsigned EIESB() const { return detail::extract_field<4>(value); }
|
||||
constexpr unsigned ASID2() const { return detail::extract_field<8>(value); }
|
||||
constexpr unsigned HACDBS() const { return detail::extract_field<12>(value); }
|
||||
constexpr unsigned FGWTE3() const { return detail::extract_field<16>(value); }
|
||||
constexpr unsigned NV_frac() const { return detail::extract_field<20>(value); }
|
||||
constexpr unsigned E2H0() const { return detail::extract_field<24>(value); }
|
||||
// [28:35] reserved
|
||||
constexpr unsigned E3DSE() const { return detail::extract_field<36>(value); }
|
||||
// [40:63] reserved
|
||||
};
|
||||
|
||||
struct IdRegisters {
|
||||
std::optional<std::uint64_t> midr;
|
||||
Pfr0Register pfr0;
|
||||
Pfr1Register pfr1;
|
||||
Pfr2Register pfr2;
|
||||
Zfr0Register zfr0;
|
||||
Smfr0Register smfr0;
|
||||
Isar0Register isar0;
|
||||
Isar1Register isar1;
|
||||
Isar2Register isar2;
|
||||
Isar3Register isar3;
|
||||
Mmfr0Register mmfr0;
|
||||
Mmfr1Register mmfr1;
|
||||
Mmfr2Register mmfr2;
|
||||
Mmfr3Register mmfr3;
|
||||
Mmfr4Register mmfr4;
|
||||
};
|
||||
|
||||
} // namespace oaknut::id
|
52
externals/oaknut/include/oaknut/feature_detection/read_id_registers_directly.hpp
vendored
Normal file
52
externals/oaknut/include/oaknut/feature_detection/read_id_registers_directly.hpp
vendored
Normal file
|
@ -0,0 +1,52 @@
|
|||
#include <cstdint>
|
||||
|
||||
#include "oaknut/feature_detection/id_registers.hpp"
|
||||
|
||||
namespace oaknut::id {
|
||||
|
||||
inline IdRegisters read_id_registers_directly()
|
||||
{
|
||||
std::uint64_t midr, pfr0, pfr1, pfr2, isar0, isar1, isar2, isar3, mmfr0, mmfr1, mmfr2, mmfr3, mmfr4, zfr0, smfr0;
|
||||
|
||||
#define OAKNUT_READ_REGISTER(reg, var) \
|
||||
__asm__("mrs %0, " #reg \
|
||||
: "=r"(var))
|
||||
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c0_0, midr);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c4_0, pfr0);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c4_1, pfr1);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c4_2, pfr2);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c4_4, zfr0);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c4_5, smfr0);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c6_0, isar0);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c6_1, isar1);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c6_2, isar2);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c6_3, isar3);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c7_0, mmfr0);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c7_1, mmfr1);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c7_2, mmfr2);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c7_3, mmfr3);
|
||||
OAKNUT_READ_REGISTER(s3_0_c0_c7_4, mmfr4);
|
||||
|
||||
#undef OAKNUT_READ_ID_REGISTER
|
||||
|
||||
return IdRegisters{
|
||||
midr,
|
||||
Pfr0Register{pfr0},
|
||||
Pfr1Register{pfr1},
|
||||
Pfr2Register{pfr2},
|
||||
Zfr0Register{zfr0},
|
||||
Smfr0Register{smfr0},
|
||||
Isar0Register{isar0},
|
||||
Isar1Register{isar1},
|
||||
Isar2Register{isar2},
|
||||
Isar3Register{isar3},
|
||||
Mmfr0Register{mmfr0},
|
||||
Mmfr1Register{mmfr1},
|
||||
Mmfr2Register{mmfr2},
|
||||
Mmfr3Register{mmfr3},
|
||||
Mmfr4Register{mmfr4},
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace oaknut::id
|
211
externals/oaknut/include/oaknut/impl/arm64_encode_helpers.inc.hpp
vendored
Normal file
211
externals/oaknut/include/oaknut/impl/arm64_encode_helpers.inc.hpp
vendored
Normal file
|
@ -0,0 +1,211 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
template<std::uint32_t mask_>
|
||||
static constexpr std::uint32_t pdep(std::uint32_t val)
|
||||
{
|
||||
std::uint32_t mask = mask_;
|
||||
std::uint32_t res = 0;
|
||||
for (std::uint32_t bb = 1; mask; bb += bb) {
|
||||
if (val & bb)
|
||||
res |= mask & (~mask + 1);
|
||||
mask &= mask - 1;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
#define OAKNUT_STD_ENCODE(TYPE, ACCESS, SIZE) \
|
||||
template<std::uint32_t splat> \
|
||||
std::uint32_t encode(TYPE v) \
|
||||
{ \
|
||||
static_assert(std::popcount(splat) == SIZE); \
|
||||
return pdep<splat>(static_cast<std::uint32_t>(ACCESS)); \
|
||||
}
|
||||
|
||||
OAKNUT_STD_ENCODE(RReg, v.index() & 31, 5)
|
||||
OAKNUT_STD_ENCODE(VReg, v.index() & 31, 5)
|
||||
OAKNUT_STD_ENCODE(VRegArranged, v.index() & 31, 5)
|
||||
|
||||
OAKNUT_STD_ENCODE(AddSubImm, v.m_encoded, 13)
|
||||
OAKNUT_STD_ENCODE(BitImm32, v.m_encoded, 12)
|
||||
OAKNUT_STD_ENCODE(BitImm64, v.m_encoded, 13)
|
||||
OAKNUT_STD_ENCODE(LslShift<32>, v.m_encoded, 12)
|
||||
OAKNUT_STD_ENCODE(LslShift<64>, v.m_encoded, 12)
|
||||
OAKNUT_STD_ENCODE(FImm8, v.m_encoded, 8)
|
||||
OAKNUT_STD_ENCODE(RepImm, v.m_encoded, 8)
|
||||
|
||||
OAKNUT_STD_ENCODE(Cond, v, 4)
|
||||
OAKNUT_STD_ENCODE(Rot, v, 2)
|
||||
OAKNUT_STD_ENCODE(AddSubExt, v, 3)
|
||||
OAKNUT_STD_ENCODE(IndexExt, v, 3)
|
||||
OAKNUT_STD_ENCODE(AddSubShift, v, 2)
|
||||
OAKNUT_STD_ENCODE(LogShift, v, 2)
|
||||
OAKNUT_STD_ENCODE(PstateField, v, 6)
|
||||
OAKNUT_STD_ENCODE(SystemReg, v, 15)
|
||||
OAKNUT_STD_ENCODE(AtOp, v, 7)
|
||||
OAKNUT_STD_ENCODE(BarrierOp, v, 4)
|
||||
OAKNUT_STD_ENCODE(DcOp, v, 10)
|
||||
OAKNUT_STD_ENCODE(IcOp, v, 10)
|
||||
OAKNUT_STD_ENCODE(PrfOp, v, 5)
|
||||
OAKNUT_STD_ENCODE(TlbiOp, v, 10)
|
||||
|
||||
template<std::uint32_t splat>
|
||||
std::uint32_t encode(MovImm16 v)
|
||||
{
|
||||
static_assert(std::popcount(splat) == 17 || std::popcount(splat) == 18);
|
||||
if constexpr (std::popcount(splat) == 17) {
|
||||
constexpr std::uint32_t mask = (1 << std::popcount(splat)) - 1;
|
||||
if ((v.m_encoded & mask) != v.m_encoded)
|
||||
throw OaknutException{ExceptionType::InvalidMovImm16};
|
||||
}
|
||||
return pdep<splat>(v.m_encoded);
|
||||
}
|
||||
|
||||
template<std::uint32_t splat, std::size_t imm_size>
|
||||
std::uint32_t encode(Imm<imm_size> v)
|
||||
{
|
||||
static_assert(std::popcount(splat) >= imm_size);
|
||||
return pdep<splat>(v.value());
|
||||
}
|
||||
|
||||
template<std::uint32_t splat, int A, int B>
|
||||
std::uint32_t encode(ImmChoice<A, B> v)
|
||||
{
|
||||
static_assert(std::popcount(splat) == 1);
|
||||
return pdep<splat>(v.m_encoded);
|
||||
}
|
||||
|
||||
template<std::uint32_t splat, int A, int B, int C, int D>
|
||||
std::uint32_t encode(ImmChoice<A, B, C, D> v)
|
||||
{
|
||||
static_assert(std::popcount(splat) == 2);
|
||||
return pdep<splat>(v.m_encoded);
|
||||
}
|
||||
|
||||
template<std::uint32_t splat, std::size_t size, std::size_t align>
|
||||
std::uint32_t encode(SOffset<size, align> v)
|
||||
{
|
||||
static_assert(std::popcount(splat) == size - align);
|
||||
return pdep<splat>(v.m_encoded);
|
||||
}
|
||||
|
||||
template<std::uint32_t splat, std::size_t size, std::size_t align>
|
||||
std::uint32_t encode(POffset<size, align> v)
|
||||
{
|
||||
static_assert(std::popcount(splat) == size - align);
|
||||
return pdep<splat>(v.m_encoded);
|
||||
}
|
||||
|
||||
template<std::uint32_t splat>
|
||||
std::uint32_t encode(std::uint32_t v)
|
||||
{
|
||||
return pdep<splat>(v);
|
||||
}
|
||||
|
||||
template<std::uint32_t splat, typename T, size_t N>
|
||||
std::uint32_t encode(List<T, N> v)
|
||||
{
|
||||
return encode<splat>(v.m_base);
|
||||
}
|
||||
|
||||
template<std::uint32_t splat, std::size_t size, std::size_t align>
|
||||
std::uint32_t encode(AddrOffset<size, align> v)
|
||||
{
|
||||
static_assert(std::popcount(splat) == size - align);
|
||||
|
||||
const auto encode_fn = [](std::ptrdiff_t current_offset, std::ptrdiff_t target_offset) {
|
||||
const std::ptrdiff_t diff = target_offset - current_offset;
|
||||
return pdep<splat>(AddrOffset<size, align>::encode(diff));
|
||||
};
|
||||
|
||||
return std::visit(detail::overloaded{
|
||||
[&](std::uint32_t encoding) -> std::uint32_t {
|
||||
return pdep<splat>(encoding);
|
||||
},
|
||||
[&](Label* label) -> std::uint32_t {
|
||||
if (label->m_offset) {
|
||||
return encode_fn(Policy::offset(), *label->m_offset);
|
||||
}
|
||||
|
||||
label->m_wbs.emplace_back(Label::Writeback{Policy::offset(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
|
||||
return 0u;
|
||||
},
|
||||
[&](const void* p) -> std::uint32_t {
|
||||
const std::ptrdiff_t diff = reinterpret_cast<std::uintptr_t>(p) - Policy::template xptr<std::uintptr_t>();
|
||||
return pdep<splat>(AddrOffset<size, align>::encode(diff));
|
||||
},
|
||||
},
|
||||
v.m_payload);
|
||||
}
|
||||
|
||||
template<std::uint32_t splat, std::size_t size, std::size_t shift_amount>
|
||||
std::uint32_t encode(PageOffset<size, shift_amount> v)
|
||||
{
|
||||
static_assert(std::popcount(splat) == size);
|
||||
|
||||
const auto encode_fn = [](std::ptrdiff_t current_offset, std::ptrdiff_t target_offset) {
|
||||
return pdep<splat>(PageOffset<size, shift_amount>::encode(static_cast<std::uintptr_t>(current_offset), static_cast<std::uintptr_t>(target_offset)));
|
||||
};
|
||||
|
||||
return std::visit(detail::overloaded{
|
||||
[&](Label* label) -> std::uint32_t {
|
||||
if (label->m_offset) {
|
||||
return encode_fn(Policy::offset(), *label->m_offset);
|
||||
}
|
||||
|
||||
label->m_wbs.emplace_back(Label::Writeback{Policy::offset(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
|
||||
return 0u;
|
||||
},
|
||||
[&](const void* p) -> std::uint32_t {
|
||||
return pdep<splat>(PageOffset<size, shift_amount>::encode(Policy::template xptr<std::uintptr_t>(), reinterpret_cast<std::ptrdiff_t>(p)));
|
||||
},
|
||||
},
|
||||
v.m_payload);
|
||||
}
|
||||
|
||||
#undef OAKNUT_STD_ENCODE
|
||||
|
||||
void addsubext_lsl_correction(AddSubExt& ext, XRegSp)
|
||||
{
|
||||
if (ext == AddSubExt::LSL)
|
||||
ext = AddSubExt::UXTX;
|
||||
}
|
||||
void addsubext_lsl_correction(AddSubExt& ext, WRegWsp)
|
||||
{
|
||||
if (ext == AddSubExt::LSL)
|
||||
ext = AddSubExt::UXTW;
|
||||
}
|
||||
void addsubext_lsl_correction(AddSubExt& ext, XReg)
|
||||
{
|
||||
if (ext == AddSubExt::LSL)
|
||||
ext = AddSubExt::UXTX;
|
||||
}
|
||||
void addsubext_lsl_correction(AddSubExt& ext, WReg)
|
||||
{
|
||||
if (ext == AddSubExt::LSL)
|
||||
ext = AddSubExt::UXTW;
|
||||
}
|
||||
|
||||
void addsubext_verify_reg_size(AddSubExt ext, RReg rm)
|
||||
{
|
||||
if (rm.bitsize() == 32 && (static_cast<int>(ext) & 0b011) != 0b011)
|
||||
return;
|
||||
if (rm.bitsize() == 64 && (static_cast<int>(ext) & 0b011) == 0b011)
|
||||
return;
|
||||
throw OaknutException{ExceptionType::InvalidAddSubExt};
|
||||
}
|
||||
|
||||
void indexext_verify_reg_size(IndexExt ext, RReg rm)
|
||||
{
|
||||
if (rm.bitsize() == 32 && (static_cast<int>(ext) & 1) == 0)
|
||||
return;
|
||||
if (rm.bitsize() == 64 && (static_cast<int>(ext) & 1) == 1)
|
||||
return;
|
||||
throw OaknutException{ExceptionType::InvalidIndexExt};
|
||||
}
|
||||
|
||||
void tbz_verify_reg_size(RReg rt, Imm<6> imm)
|
||||
{
|
||||
if (rt.bitsize() == 32 && imm.value() >= 32)
|
||||
throw OaknutException{ExceptionType::BitPositionOutOfRange};
|
||||
}
|
78
externals/oaknut/include/oaknut/impl/cpu_feature.inc.hpp
vendored
Normal file
78
externals/oaknut/include/oaknut/impl/cpu_feature.inc.hpp
vendored
Normal file
|
@ -0,0 +1,78 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
OAKNUT_CPU_FEATURE(FP)
|
||||
OAKNUT_CPU_FEATURE(ASIMD)
|
||||
OAKNUT_CPU_FEATURE(AES)
|
||||
OAKNUT_CPU_FEATURE(PMULL)
|
||||
OAKNUT_CPU_FEATURE(SHA1)
|
||||
OAKNUT_CPU_FEATURE(SHA256)
|
||||
OAKNUT_CPU_FEATURE(CRC32)
|
||||
OAKNUT_CPU_FEATURE(LSE)
|
||||
OAKNUT_CPU_FEATURE(FP16Conv)
|
||||
OAKNUT_CPU_FEATURE(FP16)
|
||||
OAKNUT_CPU_FEATURE(RDM)
|
||||
OAKNUT_CPU_FEATURE(JSCVT)
|
||||
OAKNUT_CPU_FEATURE(FCMA)
|
||||
OAKNUT_CPU_FEATURE(LRCPC)
|
||||
OAKNUT_CPU_FEATURE(DPB)
|
||||
OAKNUT_CPU_FEATURE(SHA3)
|
||||
OAKNUT_CPU_FEATURE(SM3)
|
||||
OAKNUT_CPU_FEATURE(SM4)
|
||||
OAKNUT_CPU_FEATURE(DotProd)
|
||||
OAKNUT_CPU_FEATURE(SHA512)
|
||||
OAKNUT_CPU_FEATURE(SVE)
|
||||
OAKNUT_CPU_FEATURE(FHM)
|
||||
OAKNUT_CPU_FEATURE(DIT)
|
||||
OAKNUT_CPU_FEATURE(LSE2)
|
||||
OAKNUT_CPU_FEATURE(LRCPC2)
|
||||
OAKNUT_CPU_FEATURE(FlagM)
|
||||
OAKNUT_CPU_FEATURE(SSBS)
|
||||
OAKNUT_CPU_FEATURE(SB)
|
||||
OAKNUT_CPU_FEATURE(PACA)
|
||||
OAKNUT_CPU_FEATURE(PACG)
|
||||
OAKNUT_CPU_FEATURE(DPB2)
|
||||
OAKNUT_CPU_FEATURE(SVE2)
|
||||
OAKNUT_CPU_FEATURE(SVE_AES)
|
||||
OAKNUT_CPU_FEATURE(SVE_PMULL128)
|
||||
OAKNUT_CPU_FEATURE(SVE_BITPERM)
|
||||
OAKNUT_CPU_FEATURE(SVE_SHA3)
|
||||
OAKNUT_CPU_FEATURE(SVE_SM4)
|
||||
OAKNUT_CPU_FEATURE(FlagM2)
|
||||
OAKNUT_CPU_FEATURE(FRINTTS)
|
||||
OAKNUT_CPU_FEATURE(SVE_I8MM)
|
||||
OAKNUT_CPU_FEATURE(SVE_F32MM)
|
||||
OAKNUT_CPU_FEATURE(SVE_F64MM)
|
||||
OAKNUT_CPU_FEATURE(SVE_BF16)
|
||||
OAKNUT_CPU_FEATURE(I8MM)
|
||||
OAKNUT_CPU_FEATURE(BF16)
|
||||
OAKNUT_CPU_FEATURE(DGH)
|
||||
OAKNUT_CPU_FEATURE(RNG)
|
||||
OAKNUT_CPU_FEATURE(BTI)
|
||||
OAKNUT_CPU_FEATURE(MTE)
|
||||
OAKNUT_CPU_FEATURE(ECV)
|
||||
OAKNUT_CPU_FEATURE(AFP)
|
||||
OAKNUT_CPU_FEATURE(RPRES)
|
||||
OAKNUT_CPU_FEATURE(MTE3)
|
||||
OAKNUT_CPU_FEATURE(SME)
|
||||
OAKNUT_CPU_FEATURE(SME_I16I64)
|
||||
OAKNUT_CPU_FEATURE(SME_F64F64)
|
||||
OAKNUT_CPU_FEATURE(SME_I8I32)
|
||||
OAKNUT_CPU_FEATURE(SME_F16F32)
|
||||
OAKNUT_CPU_FEATURE(SME_B16F32)
|
||||
OAKNUT_CPU_FEATURE(SME_F32F32)
|
||||
OAKNUT_CPU_FEATURE(SME_FA64)
|
||||
OAKNUT_CPU_FEATURE(WFxT)
|
||||
OAKNUT_CPU_FEATURE(EBF16)
|
||||
OAKNUT_CPU_FEATURE(SVE_EBF16)
|
||||
OAKNUT_CPU_FEATURE(CSSC)
|
||||
OAKNUT_CPU_FEATURE(RPRFM)
|
||||
OAKNUT_CPU_FEATURE(SVE2p1)
|
||||
OAKNUT_CPU_FEATURE(SME2)
|
||||
OAKNUT_CPU_FEATURE(SME2p1)
|
||||
OAKNUT_CPU_FEATURE(SME_I16I32)
|
||||
OAKNUT_CPU_FEATURE(SME_BI32I32)
|
||||
OAKNUT_CPU_FEATURE(SME_B16B16)
|
||||
OAKNUT_CPU_FEATURE(SME_F16F16)
|
||||
OAKNUT_CPU_FEATURE(MOPS)
|
||||
OAKNUT_CPU_FEATURE(HBC)
|
310
externals/oaknut/include/oaknut/impl/enum.hpp
vendored
Normal file
310
externals/oaknut/include/oaknut/impl/enum.hpp
vendored
Normal file
|
@ -0,0 +1,310 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
struct PostIndexed {};
|
||||
|
||||
struct PreIndexed {};
|
||||
|
||||
enum class LslSymbol {
|
||||
LSL,
|
||||
};
|
||||
|
||||
enum class MslSymbol {
|
||||
MSL,
|
||||
};
|
||||
|
||||
enum class Cond {
|
||||
EQ,
|
||||
NE,
|
||||
CS,
|
||||
CC,
|
||||
MI,
|
||||
PL,
|
||||
VS,
|
||||
VC,
|
||||
HI,
|
||||
LS,
|
||||
GE,
|
||||
LT,
|
||||
GT,
|
||||
LE,
|
||||
AL,
|
||||
NV,
|
||||
HS = CS,
|
||||
LO = CC,
|
||||
};
|
||||
|
||||
constexpr Cond invert(Cond c)
|
||||
{
|
||||
return static_cast<Cond>(static_cast<unsigned>(c) ^ 1);
|
||||
}
|
||||
|
||||
enum class Rot {
|
||||
DEG_0 = 0b00,
|
||||
DEG_90 = 0b01,
|
||||
DEG_180 = 0b10,
|
||||
DEG_270 = 0b11,
|
||||
};
|
||||
|
||||
enum class AddSubExt {
|
||||
UXTB,
|
||||
UXTH,
|
||||
UXTW,
|
||||
UXTX,
|
||||
SXTB,
|
||||
SXTH,
|
||||
SXTW,
|
||||
SXTX,
|
||||
LSL, // UXTW (32-bit) or UXTX (64-bit)
|
||||
};
|
||||
|
||||
enum class IndexExt {
|
||||
UXTW = 0b010,
|
||||
LSL = 0b011,
|
||||
SXTW = 0b110,
|
||||
SXTX = 0b111,
|
||||
};
|
||||
|
||||
enum class AddSubShift {
|
||||
LSL,
|
||||
LSR,
|
||||
ASR,
|
||||
};
|
||||
|
||||
enum class LogShift {
|
||||
LSL,
|
||||
LSR,
|
||||
ASR,
|
||||
ROR,
|
||||
};
|
||||
|
||||
enum class PstateField {
|
||||
UAO = 0b000'011, // ARMv8.2-UAO
|
||||
PAN = 0b000'100, // ARMv8.1-PAN
|
||||
SPSel = 0b000'101,
|
||||
DIT = 0b011'010, // ARMv8.4-DIT
|
||||
DAIFSet = 0b011'110,
|
||||
DAIFClr = 0b011'111,
|
||||
};
|
||||
|
||||
enum class SystemReg {
|
||||
AMCFGR_EL0 = 0b11'011'1101'0010'001,
|
||||
AMCGCR_EL0 = 0b11'011'1101'0010'010,
|
||||
AMCNTENCLR0_EL0 = 0b11'011'1101'0010'100,
|
||||
AMCNTENCLR1_EL0 = 0b11'011'1101'0011'000,
|
||||
AMCNTENSET0_EL0 = 0b11'011'1101'0010'101,
|
||||
AMCNTENSET1_EL0 = 0b11'011'1101'0011'001,
|
||||
AMCR_EL0 = 0b11'011'1101'0010'000,
|
||||
AMEVCNTR0_n_EL0 = 0b11'011'1101'0100'000, // n = 0-3
|
||||
AMEVCNTR1_n_EL0 = 0b11'011'1101'1100'000, // n = 0-15
|
||||
AMEVTYPER0_n_EL0 = 0b11'011'1101'0110'000, // n = 0-3
|
||||
AMEVTYPER1_n_EL0 = 0b11'011'1101'1110'000, // n = 0-15
|
||||
AMUSERENR_EL0 = 0b11'011'1101'0010'011,
|
||||
CNTFRQ_EL0 = 0b11'011'1110'0000'000,
|
||||
CNTP_CTL_EL0 = 0b11'011'1110'0010'001,
|
||||
CNTP_CVAL_EL0 = 0b11'011'1110'0010'010,
|
||||
CNTP_TVAL_EL0 = 0b11'011'1110'0010'000,
|
||||
CNTPCT_EL0 = 0b11'011'1110'0000'001,
|
||||
CNTV_CTL_EL0 = 0b11'011'1110'0011'001,
|
||||
CNTV_CVAL_EL0 = 0b11'011'1110'0011'010,
|
||||
CNTV_TVAL_EL0 = 0b11'011'1110'0011'000,
|
||||
CNTVCT_EL0 = 0b11'011'1110'0000'010,
|
||||
CTR_EL0 = 0b11'011'0000'0000'001,
|
||||
CurrentEL = 0b11'000'0100'0010'010,
|
||||
DAIF = 0b11'011'0100'0010'001,
|
||||
DBGDTR_EL0 = 0b10'011'0000'0100'000,
|
||||
DBGDTRRX_EL0 = 0b10'011'0000'0101'000,
|
||||
DBGDTRTX_EL0 = 0b10'011'0000'0101'000,
|
||||
DCZID_EL0 = 0b11'011'0000'0000'111,
|
||||
DIT = 0b11'011'0100'0010'101,
|
||||
DLR_EL0 = 0b11'011'0100'0101'001,
|
||||
DSPSR_EL0 = 0b11'011'0100'0101'000,
|
||||
FPCR = 0b11'011'0100'0100'000,
|
||||
FPSR = 0b11'011'0100'0100'001,
|
||||
MDCCSR_EL0 = 0b10'011'0000'0001'000,
|
||||
NZCV = 0b11'011'0100'0010'000,
|
||||
PAN = 0b11'000'0100'0010'011,
|
||||
PMCCFILTR_EL0 = 0b11'011'1110'1111'111,
|
||||
PMCCNTR_EL0 = 0b11'011'1001'1101'000,
|
||||
PMCEID0_EL0 = 0b11'011'1001'1100'110,
|
||||
PMCEID1_EL0 = 0b11'011'1001'1100'111,
|
||||
PMCNTENCLR_EL0 = 0b11'011'1001'1100'010,
|
||||
PMCNTENSET_EL0 = 0b11'011'1001'1100'001,
|
||||
PMCR_EL0 = 0b11'011'1001'1100'000,
|
||||
PMEVCNTR_n_EL0 = 0b11'011'1110'1000'000, // n = 0-30
|
||||
PMEVTYPER_n_EL0 = 0b11'011'1110'1100'000, // n = 0-30
|
||||
PMOVSCLR_EL0 = 0b11'011'1001'1100'011,
|
||||
PMOVSSET_EL0 = 0b11'011'1001'1110'011,
|
||||
PMSELR_EL0 = 0b11'011'1001'1100'101,
|
||||
PMSWINC_EL0 = 0b11'011'1001'1100'100,
|
||||
PMUSERENR_EL0 = 0b11'011'1001'1110'000,
|
||||
PMXEVCNTR_EL0 = 0b11'011'1001'1101'010,
|
||||
PMXEVTYPER_EL0 = 0b11'011'1001'1101'001,
|
||||
SP_EL0 = 0b11'000'0100'0001'000,
|
||||
SPSel = 0b11'000'0100'0010'000,
|
||||
SPSR_abt = 0b11'100'0100'0011'001,
|
||||
SPSR_fiq = 0b11'100'0100'0011'011,
|
||||
SPSR_irq = 0b11'100'0100'0011'000,
|
||||
SPSR_und = 0b11'100'0100'0011'010,
|
||||
TPIDR_EL0 = 0b11'011'1101'0000'010,
|
||||
TPIDRRO_EL0 = 0b11'011'1101'0000'011,
|
||||
UAO = 0b11'000'0100'0010'100,
|
||||
};
|
||||
|
||||
enum class AtOp {
|
||||
S1E1R = 0b000'0'000,
|
||||
S1E1W = 0b000'0'001,
|
||||
S1E0R = 0b000'0'010,
|
||||
S1E0W = 0b000'0'011,
|
||||
S1E1RP = 0b000'1'000, // ARMv8.2-ATS1E1
|
||||
S1E1WP = 0b000'1'001, // ARMv8.2-ATS1E1
|
||||
S1E2R = 0b100'0'000,
|
||||
S1E2W = 0b100'0'001,
|
||||
S12E1R = 0b100'0'100,
|
||||
S12E1W = 0b100'0'101,
|
||||
S12E0R = 0b100'0'110,
|
||||
S12E0W = 0b100'0'111,
|
||||
S1E3R = 0b110'0'000,
|
||||
S1E3W = 0b110'0'001,
|
||||
};
|
||||
|
||||
enum class BarrierOp {
|
||||
SY = 0b1111,
|
||||
ST = 0b1110,
|
||||
LD = 0b1101,
|
||||
ISH = 0b1011,
|
||||
ISHST = 0b1010,
|
||||
ISHLD = 0b1001,
|
||||
NSH = 0b0111,
|
||||
NSHST = 0b0110,
|
||||
NSHLD = 0b0101,
|
||||
OSH = 0b0011,
|
||||
OSHST = 0b0010,
|
||||
OSHLD = 0b0001,
|
||||
};
|
||||
|
||||
enum class DcOp {
|
||||
IVAC = 0b000'0110'001,
|
||||
ISW = 0b000'0110'010,
|
||||
CSW = 0b000'1010'010,
|
||||
CISW = 0b000'1110'010,
|
||||
ZVA = 0b011'0100'001,
|
||||
CVAC = 0b011'1010'001,
|
||||
CVAU = 0b011'1011'001,
|
||||
CVAP = 0b011'1100'001, // ARMv8.2-DCPoP
|
||||
CIVAC = 0b011'1110'001,
|
||||
};
|
||||
|
||||
enum class IcOp {
|
||||
IALLUIS = 0b000'0001'000,
|
||||
IALLU = 0b000'0101'000,
|
||||
IVAU = 0b011'0101'001,
|
||||
};
|
||||
|
||||
enum class PrfOp {
|
||||
PLDL1KEEP = 0b00'00'0,
|
||||
PLDL1STRM = 0b00'00'1,
|
||||
PLDL2KEEP = 0b00'01'0,
|
||||
PLDL2STRM = 0b00'01'1,
|
||||
PLDL3KEEP = 0b00'10'0,
|
||||
PLDL3STRM = 0b00'10'1,
|
||||
PLIL1KEEP = 0b01'00'0,
|
||||
PLIL1STRM = 0b01'00'1,
|
||||
PLIL2KEEP = 0b01'01'0,
|
||||
PLIL2STRM = 0b01'01'1,
|
||||
PLIL3KEEP = 0b01'10'0,
|
||||
PLIL3STRM = 0b01'10'1,
|
||||
PSTL1KEEP = 0b10'00'0,
|
||||
PSTL1STRM = 0b10'00'1,
|
||||
PSTL2KEEP = 0b10'01'0,
|
||||
PSTL2STRM = 0b10'01'1,
|
||||
PSTL3KEEP = 0b10'10'0,
|
||||
PSTL3STRM = 0b10'10'1,
|
||||
};
|
||||
|
||||
enum class TlbiOp {
|
||||
VMALLE1OS = 0b000'0001'000, // ARMv8.4-TLBI
|
||||
VAE1OS = 0b000'0001'001, // ARMv8.4-TLBI
|
||||
ASIDE1OS = 0b000'0001'010, // ARMv8.4-TLBI
|
||||
VAAE1OS = 0b000'0001'011, // ARMv8.4-TLBI
|
||||
VALE1OS = 0b000'0001'101, // ARMv8.4-TLBI
|
||||
VAALE1OS = 0b000'0001'111, // ARMv8.4-TLBI
|
||||
RVAE1IS = 0b000'0010'001, // ARMv8.4-TLBI
|
||||
RVAAE1IS = 0b000'0010'011, // ARMv8.4-TLBI
|
||||
RVALE1IS = 0b000'0010'101, // ARMv8.4-TLBI
|
||||
RVAALE1IS = 0b000'0010'111, // ARMv8.4-TLBI
|
||||
VMALLE1IS = 0b000'0011'000,
|
||||
VAE1IS = 0b000'0011'001,
|
||||
ASIDE1IS = 0b000'0011'010,
|
||||
VAAE1IS = 0b000'0011'011,
|
||||
VALE1IS = 0b000'0011'101,
|
||||
VAALE1IS = 0b000'0011'111,
|
||||
RVAE1OS = 0b000'0101'001, // ARMv8.4-TLBI
|
||||
RVAAE1OS = 0b000'0101'011, // ARMv8.4-TLBI
|
||||
RVALE1OS = 0b000'0101'101, // ARMv8.4-TLBI
|
||||
RVAALE1OS = 0b000'0101'111, // ARMv8.4-TLBI
|
||||
RVAE1 = 0b000'0110'001, // ARMv8.4-TLBI
|
||||
RVAAE1 = 0b000'0110'011, // ARMv8.4-TLBI
|
||||
RVALE1 = 0b000'0110'101, // ARMv8.4-TLBI
|
||||
RVAALE1 = 0b000'0110'111, // ARMv8.4-TLBI
|
||||
VMALLE1 = 0b000'0111'000,
|
||||
VAE1 = 0b000'0111'001,
|
||||
ASIDE1 = 0b000'0111'010,
|
||||
VAAE1 = 0b000'0111'011,
|
||||
VALE1 = 0b000'0111'101,
|
||||
VAALE1 = 0b000'0111'111,
|
||||
IPAS2E1IS = 0b100'0000'001,
|
||||
RIPAS2E1IS = 0b100'0000'010, // ARMv8.4-TLBI
|
||||
IPAS2LE1IS = 0b100'0000'101,
|
||||
RIPAS2LE1IS = 0b100'0000'110, // ARMv8.4-TLBI
|
||||
ALLE2OS = 0b100'0001'000, // ARMv8.4-TLBI
|
||||
VAE2OS = 0b100'0001'001, // ARMv8.4-TLBI
|
||||
ALLE1OS = 0b100'0001'100, // ARMv8.4-TLBI
|
||||
VALE2OS = 0b100'0001'101, // ARMv8.4-TLBI
|
||||
VMALLS12E1OS = 0b100'0001'110, // ARMv8.4-TLBI
|
||||
RVAE2IS = 0b100'0010'001, // ARMv8.4-TLBI
|
||||
RVALE2IS = 0b100'0010'101, // ARMv8.4-TLBI
|
||||
ALLE2IS = 0b100'0011'000,
|
||||
VAE2IS = 0b100'0011'001,
|
||||
ALLE1IS = 0b100'0011'100,
|
||||
VALE2IS = 0b100'0011'101,
|
||||
VMALLS12E1IS = 0b100'0011'110,
|
||||
IPAS2E1OS = 0b100'0100'000, // ARMv8.4-TLBI
|
||||
IPAS2E1 = 0b100'0100'001,
|
||||
RIPAS2E1 = 0b100'0100'010, // ARMv8.4-TLBI
|
||||
RIPAS2E1OS = 0b100'0100'011, // ARMv8.4-TLBI
|
||||
IPAS2LE1OS = 0b100'0100'100, // ARMv8.4-TLBI
|
||||
IPAS2LE1 = 0b100'0100'101,
|
||||
RIPAS2LE1 = 0b100'0100'110, // ARMv8.4-TLBI
|
||||
RIPAS2LE1OS = 0b100'0100'111, // ARMv8.4-TLBI
|
||||
RVAE2OS = 0b100'0101'001, // ARMv8.4-TLBI
|
||||
RVALE2OS = 0b100'0101'101, // ARMv8.4-TLBI
|
||||
RVAE2 = 0b100'0110'001, // ARMv8.4-TLBI
|
||||
RVALE2 = 0b100'0110'101, // ARMv8.4-TLBI
|
||||
ALLE2 = 0b100'0111'000,
|
||||
VAE2 = 0b100'0111'001,
|
||||
ALLE1 = 0b100'0111'100,
|
||||
VALE2 = 0b100'0111'101,
|
||||
VMALLS12E1 = 0b100'0111'110,
|
||||
ALLE3OS = 0b110'0001'000, // ARMv8.4-TLBI
|
||||
VAE3OS = 0b110'0001'001, // ARMv8.4-TLBI
|
||||
VALE3OS = 0b110'0001'101, // ARMv8.4-TLBI
|
||||
RVAE3IS = 0b110'0010'001, // ARMv8.4-TLBI
|
||||
RVALE3IS = 0b110'0010'101, // ARMv8.4-TLBI
|
||||
ALLE3IS = 0b110'0011'000,
|
||||
VAE3IS = 0b110'0011'001,
|
||||
VALE3IS = 0b110'0011'101,
|
||||
RVAE3OS = 0b110'0101'001, // ARMv8.4-TLBI
|
||||
RVALE3OS = 0b110'0101'101, // ARMv8.4-TLBI
|
||||
RVAE3 = 0b110'0110'001, // ARMv8.4-TLBI
|
||||
RVALE3 = 0b110'0110'101, // ARMv8.4-TLBI
|
||||
ALLE3 = 0b110'0111'000,
|
||||
VAE3 = 0b110'0111'001,
|
||||
VALE3 = 0b110'0111'101,
|
||||
};
|
||||
|
||||
} // namespace oaknut
|
319
externals/oaknut/include/oaknut/impl/imm.hpp
vendored
Normal file
319
externals/oaknut/include/oaknut/impl/imm.hpp
vendored
Normal file
|
@ -0,0 +1,319 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bit>
|
||||
#include <compare>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
|
||||
#include "oaknut/oaknut_exception.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
template<std::size_t bit_size_>
|
||||
struct Imm {
|
||||
public:
|
||||
static_assert(bit_size_ != 0 && bit_size_ <= 32, "Invalid bit_size");
|
||||
static constexpr std::size_t bit_size = bit_size_;
|
||||
static constexpr std::uint32_t mask = (1 << bit_size) - 1;
|
||||
|
||||
constexpr /* implicit */ Imm(std::uint32_t value_)
|
||||
: m_value(value_)
|
||||
{
|
||||
if (!is_valid(value_))
|
||||
throw OaknutException{ExceptionType::ImmOutOfRange};
|
||||
}
|
||||
|
||||
constexpr auto operator<=>(const Imm& other) const { return m_value <=> other.m_value; }
|
||||
constexpr auto operator<=>(std::uint32_t other) const { return operator<=>(Imm{other}); }
|
||||
|
||||
constexpr std::uint32_t value() const { return m_value; }
|
||||
|
||||
static bool is_valid(std::uint32_t value_)
|
||||
{
|
||||
return ((value_ & mask) == value_);
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::uint32_t m_value;
|
||||
};
|
||||
|
||||
enum class AddSubImmShift {
|
||||
SHL_0,
|
||||
SHL_12,
|
||||
};
|
||||
|
||||
struct AddSubImm {
|
||||
public:
|
||||
constexpr AddSubImm(std::uint32_t value_, AddSubImmShift shift_)
|
||||
: m_encoded(value_ | ((shift_ == AddSubImmShift::SHL_12) ? 1 << 12 : 0))
|
||||
{
|
||||
if ((value_ & 0xFFF) != value_)
|
||||
throw OaknutException{ExceptionType::InvalidAddSubImm};
|
||||
}
|
||||
|
||||
constexpr /* implicit */ AddSubImm(std::uint64_t value_)
|
||||
{
|
||||
if ((value_ & 0xFFF) == value_) {
|
||||
m_encoded = static_cast<std::uint32_t>(value_);
|
||||
} else if ((value_ & 0xFFF000) == value_) {
|
||||
m_encoded = static_cast<std::uint32_t>((value_ >> 12) | (1 << 12));
|
||||
} else {
|
||||
throw OaknutException{ExceptionType::InvalidAddSubImm};
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr bool is_valid(std::uint64_t value_)
|
||||
{
|
||||
return ((value_ & 0xFFF) == value_) || ((value_ & 0xFFF000) == value_);
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::uint32_t m_encoded;
|
||||
};
|
||||
|
||||
enum class MovImm16Shift {
|
||||
SHL_0,
|
||||
SHL_16,
|
||||
SHL_32,
|
||||
SHL_48,
|
||||
};
|
||||
|
||||
struct MovImm16 {
|
||||
public:
|
||||
MovImm16(std::uint16_t value_, MovImm16Shift shift_)
|
||||
: m_encoded(static_cast<std::uint32_t>(value_) | (static_cast<std::uint32_t>(shift_) << 16))
|
||||
{}
|
||||
|
||||
constexpr /* implict */ MovImm16(std::uint64_t value_)
|
||||
{
|
||||
std::uint32_t shift = 0;
|
||||
while (value_ != 0) {
|
||||
const std::uint32_t lsw = static_cast<std::uint16_t>(value_ & 0xFFFF);
|
||||
if (value_ == lsw) {
|
||||
m_encoded = lsw | (shift << 16);
|
||||
return;
|
||||
} else if (lsw != 0) {
|
||||
throw OaknutException{ExceptionType::InvalidMovImm16};
|
||||
}
|
||||
value_ >>= 16;
|
||||
shift++;
|
||||
}
|
||||
}
|
||||
|
||||
static constexpr bool is_valid(std::uint64_t value_)
|
||||
{
|
||||
return ((value_ & 0xFFFF) == value_) || ((value_ & 0xFFFF0000) == value_) || ((value_ & 0xFFFF00000000) == value_) || ((value_ & 0xFFFF000000000000) == value_);
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::uint32_t m_encoded = 0;
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
constexpr std::optional<std::uint32_t> encode_bit_imm(std::uint64_t value)
|
||||
{
|
||||
if (value == 0 || (~value) == 0)
|
||||
return std::nullopt;
|
||||
|
||||
const int rotation = std::countr_zero(value & (value + 1));
|
||||
const std::uint64_t rot_value = std::rotr(value, rotation);
|
||||
|
||||
const int esize = std::countr_zero(rot_value & (rot_value + 1));
|
||||
const int ones = std::countr_one(rot_value);
|
||||
|
||||
if (std::rotr(value, esize) != value)
|
||||
return std::nullopt;
|
||||
|
||||
const int S = ((-esize) << 1) | (ones - 1);
|
||||
const int R = (esize - rotation) & (esize - 1);
|
||||
const int N = (~S >> 6) & 1;
|
||||
|
||||
return static_cast<std::uint32_t>((S & 0b111111) | (R << 6) | (N << 12));
|
||||
}
|
||||
|
||||
constexpr std::optional<std::uint32_t> encode_bit_imm(std::uint32_t value)
|
||||
{
|
||||
const std::uint64_t value_u64 = (static_cast<std::uint64_t>(value) << 32) | static_cast<std::uint64_t>(value);
|
||||
const auto result = encode_bit_imm(value_u64);
|
||||
if (result && (*result & 0b0'111111'111111) != *result)
|
||||
return std::nullopt;
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
struct BitImm32 {
|
||||
public:
|
||||
constexpr BitImm32(Imm<6> imms, Imm<6> immr)
|
||||
: m_encoded((imms.value() << 6) | immr.value())
|
||||
{}
|
||||
|
||||
constexpr /* implicit */ BitImm32(std::uint32_t value)
|
||||
{
|
||||
const auto encoded = detail::encode_bit_imm(value);
|
||||
if (!encoded || (*encoded & 0x1000) != 0)
|
||||
throw OaknutException{ExceptionType::InvalidBitImm32};
|
||||
m_encoded = *encoded;
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::uint32_t m_encoded;
|
||||
};
|
||||
|
||||
struct BitImm64 {
|
||||
public:
|
||||
constexpr BitImm64(bool N, Imm<6> imms, Imm<6> immr)
|
||||
: m_encoded((N ? 1 << 12 : 0) | (imms.value() << 6) | immr.value())
|
||||
{}
|
||||
|
||||
constexpr /* implicit */ BitImm64(std::uint64_t value)
|
||||
{
|
||||
const auto encoded = detail::encode_bit_imm(value);
|
||||
if (!encoded)
|
||||
throw OaknutException{ExceptionType::InvalidBitImm64};
|
||||
m_encoded = *encoded;
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::uint32_t m_encoded;
|
||||
};
|
||||
|
||||
struct FImm8 {
|
||||
public:
|
||||
constexpr explicit FImm8(std::uint8_t encoded)
|
||||
: m_encoded(encoded)
|
||||
{}
|
||||
|
||||
constexpr FImm8(bool sign, Imm<3> exp, Imm<4> mantissa)
|
||||
: m_encoded((sign ? 1 << 7 : 0) | (exp.value() << 4) | (mantissa.value()))
|
||||
{}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::uint32_t m_encoded;
|
||||
};
|
||||
|
||||
struct RepImm {
|
||||
public:
|
||||
constexpr explicit RepImm(std::uint8_t encoded)
|
||||
: m_encoded(encoded)
|
||||
{}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::uint32_t m_encoded;
|
||||
};
|
||||
|
||||
template<int A>
|
||||
struct ImmConst {
|
||||
constexpr /* implicit */ ImmConst(int value)
|
||||
{
|
||||
if (value != A) {
|
||||
throw OaknutException{ExceptionType::InvalidImmConst};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
struct ImmConstFZero {
|
||||
constexpr /* implicit */ ImmConstFZero(double value)
|
||||
{
|
||||
if (value != 0) {
|
||||
throw OaknutException{ExceptionType::InvalidImmConstFZero};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
template<int...>
|
||||
struct ImmChoice;
|
||||
|
||||
template<int A, int B>
|
||||
struct ImmChoice<A, B> {
|
||||
constexpr /* implicit */ ImmChoice(int value)
|
||||
{
|
||||
if (value == A) {
|
||||
m_encoded = 0;
|
||||
} else if (value == B) {
|
||||
m_encoded = 1;
|
||||
} else {
|
||||
throw OaknutException{ExceptionType::InvalidImmChoice};
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::uint32_t m_encoded;
|
||||
};
|
||||
|
||||
template<int A, int B, int C, int D>
|
||||
struct ImmChoice<A, B, C, D> {
|
||||
constexpr /* implicit */ ImmChoice(int value)
|
||||
{
|
||||
if (value == A) {
|
||||
m_encoded = 0;
|
||||
} else if (value == B) {
|
||||
m_encoded = 1;
|
||||
} else if (value == C) {
|
||||
m_encoded = 2;
|
||||
} else if (value == D) {
|
||||
m_encoded = 3;
|
||||
} else {
|
||||
throw OaknutException{ExceptionType::InvalidImmChoice};
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::uint32_t m_encoded;
|
||||
};
|
||||
|
||||
template<unsigned Start, unsigned End>
|
||||
struct ImmRange {
|
||||
constexpr /* implicit */ ImmRange(unsigned value_)
|
||||
: m_value(value_)
|
||||
{
|
||||
if (value_ < Start || value_ > End) {
|
||||
throw OaknutException{ExceptionType::InvalidImmRange};
|
||||
}
|
||||
}
|
||||
|
||||
constexpr unsigned value() const { return m_value; }
|
||||
|
||||
private:
|
||||
unsigned m_value;
|
||||
};
|
||||
|
||||
template<std::size_t max_value>
|
||||
struct LslShift {
|
||||
constexpr /* implicit */ LslShift(std::size_t amount)
|
||||
: m_encoded((((-amount) & (max_value - 1)) << 6) | (max_value - amount - 1))
|
||||
{
|
||||
if (amount >= max_value)
|
||||
throw OaknutException{ExceptionType::LslShiftOutOfRange};
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::uint32_t m_encoded;
|
||||
};
|
||||
|
||||
} // namespace oaknut
|
82
externals/oaknut/include/oaknut/impl/list.hpp
vendored
Normal file
82
externals/oaknut/include/oaknut/impl/list.hpp
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
|
||||
#include "oaknut/oaknut_exception.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
struct Elem;
|
||||
template<typename>
|
||||
struct ElemSelector;
|
||||
struct VRegArranged;
|
||||
|
||||
namespace detail {
|
||||
|
||||
template<typename>
|
||||
struct is_instance_of_ElemSelector : std::false_type {};
|
||||
|
||||
template<typename E>
|
||||
struct is_instance_of_ElemSelector<ElemSelector<E>> : std::true_type {};
|
||||
|
||||
template<class T>
|
||||
constexpr bool is_instance_of_ElemSelector_v = is_instance_of_ElemSelector<T>::value;
|
||||
|
||||
struct BaseOnlyTag {};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template<typename T, std::size_t N>
|
||||
struct List {
|
||||
template<typename... U>
|
||||
constexpr explicit List(U... args)
|
||||
: m_base(std::get<0>(std::tie(args...)))
|
||||
{
|
||||
static_assert((std::is_same_v<T, U> && ...));
|
||||
static_assert(sizeof...(args) == N);
|
||||
static_assert(std::is_base_of_v<VRegArranged, T> || std::is_base_of_v<Elem, T> || detail::is_instance_of_ElemSelector_v<T>);
|
||||
|
||||
if (!verify(std::index_sequence_for<U...>{}, args...))
|
||||
throw OaknutException{ExceptionType::InvalidList};
|
||||
}
|
||||
|
||||
constexpr auto operator[](unsigned elem_index) const
|
||||
{
|
||||
using S = decltype(m_base[elem_index]);
|
||||
return List<S, N>(detail::BaseOnlyTag{}, m_base[elem_index]);
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename>
|
||||
friend class BasicCodeGenerator;
|
||||
template<typename, std::size_t>
|
||||
friend struct List;
|
||||
|
||||
constexpr explicit List(detail::BaseOnlyTag, T base_)
|
||||
: m_base(base_)
|
||||
{}
|
||||
|
||||
template<typename... U, std::size_t... indexes>
|
||||
constexpr bool verify(std::index_sequence<indexes...>, U... args)
|
||||
{
|
||||
if constexpr (std::is_base_of_v<VRegArranged, T>) {
|
||||
return (((m_base.index() + indexes) % 32 == static_cast<std::size_t>(args.index())) && ...);
|
||||
} else if constexpr (std::is_base_of_v<Elem, T>) {
|
||||
return (((m_base.reg_index() + indexes) % 32 == static_cast<std::size_t>(args.reg_index()) && m_base.elem_index() == args.elem_index()) && ...);
|
||||
} else {
|
||||
return (((m_base.reg_index() + indexes) % 32 == static_cast<std::size_t>(args.reg_index())) && ...);
|
||||
}
|
||||
}
|
||||
|
||||
T m_base;
|
||||
};
|
||||
|
||||
template<typename... U>
|
||||
List(U...) -> List<std::common_type_t<U...>, sizeof...(U)>;
|
||||
|
||||
} // namespace oaknut
|
9163
externals/oaknut/include/oaknut/impl/mnemonics_fpsimd_v8.0.inc.hpp
vendored
Normal file
9163
externals/oaknut/include/oaknut/impl/mnemonics_fpsimd_v8.0.inc.hpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
111
externals/oaknut/include/oaknut/impl/mnemonics_fpsimd_v8.1.inc.hpp
vendored
Normal file
111
externals/oaknut/include/oaknut/impl/mnemonics_fpsimd_v8.1.inc.hpp
vendored
Normal file
|
@ -0,0 +1,111 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
void SQRDMLAH(HReg rd, HReg rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0111111101LMmmmm1101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void SQRDMLAH(SReg rd, SReg rn, SElem em)
|
||||
{
|
||||
emit<"0111111110LMmmmm1101H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
|
||||
}
|
||||
void SQRDMLAH(VReg_4H rd, VReg_4H rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0010111101LMmmmm1101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void SQRDMLAH(VReg_8H rd, VReg_8H rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0110111101LMmmmm1101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void SQRDMLAH(VReg_2S rd, VReg_2S rn, SElem em)
|
||||
{
|
||||
emit<"0010111110LMmmmm1101H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
|
||||
}
|
||||
void SQRDMLAH(VReg_4S rd, VReg_4S rn, SElem em)
|
||||
{
|
||||
emit<"0110111110LMmmmm1101H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
|
||||
}
|
||||
void SQRDMLAH(HReg rd, HReg rn, HReg rm)
|
||||
{
|
||||
emit<"01111110010mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SQRDMLAH(SReg rd, SReg rn, SReg rm)
|
||||
{
|
||||
emit<"01111110100mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SQRDMLAH(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110010mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SQRDMLAH(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110010mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SQRDMLAH(VReg_2S rd, VReg_2S rn, VReg_2S rm)
|
||||
{
|
||||
emit<"00101110100mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SQRDMLAH(VReg_4S rd, VReg_4S rn, VReg_4S rm)
|
||||
{
|
||||
emit<"01101110100mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SQRDMLSH(HReg rd, HReg rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0111111101LMmmmm1111H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void SQRDMLSH(SReg rd, SReg rn, SElem em)
|
||||
{
|
||||
emit<"0111111110LMmmmm1111H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
|
||||
}
|
||||
void SQRDMLSH(VReg_4H rd, VReg_4H rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0010111101LMmmmm1111H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void SQRDMLSH(VReg_8H rd, VReg_8H rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0110111101LMmmmm1111H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void SQRDMLSH(VReg_2S rd, VReg_2S rn, SElem em)
|
||||
{
|
||||
emit<"0010111110LMmmmm1111H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
|
||||
}
|
||||
void SQRDMLSH(VReg_4S rd, VReg_4S rn, SElem em)
|
||||
{
|
||||
emit<"0110111110LMmmmm1111H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
|
||||
}
|
||||
void SQRDMLSH(HReg rd, HReg rn, HReg rm)
|
||||
{
|
||||
emit<"01111110010mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SQRDMLSH(SReg rd, SReg rn, SReg rm)
|
||||
{
|
||||
emit<"01111110100mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SQRDMLSH(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110010mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SQRDMLSH(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110010mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SQRDMLSH(VReg_2S rd, VReg_2S rn, VReg_2S rm)
|
||||
{
|
||||
emit<"00101110100mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SQRDMLSH(VReg_4S rd, VReg_4S rn, VReg_4S rm)
|
||||
{
|
||||
emit<"01101110100mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
855
externals/oaknut/include/oaknut/impl/mnemonics_fpsimd_v8.2.inc.hpp
vendored
Normal file
855
externals/oaknut/include/oaknut/impl/mnemonics_fpsimd_v8.2.inc.hpp
vendored
Normal file
|
@ -0,0 +1,855 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
void BCAX(VReg_16B rd, VReg_16B rn, VReg_16B rm, VReg_16B ra)
|
||||
{
|
||||
emit<"11001110001mmmmm0aaaaannnnnddddd", "d", "n", "m", "a">(rd, rn, rm, ra);
|
||||
}
|
||||
void EOR3(VReg_16B rd, VReg_16B rn, VReg_16B rm, VReg_16B ra)
|
||||
{
|
||||
emit<"11001110000mmmmm0aaaaannnnnddddd", "d", "n", "m", "a">(rd, rn, rm, ra);
|
||||
}
|
||||
void FABD(HReg rd, HReg rn, HReg rm)
|
||||
{
|
||||
emit<"01111110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FABD(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FABD(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FABS(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111011111000111110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FABS(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111011111000111110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FACGE(HReg rd, HReg rn, HReg rm)
|
||||
{
|
||||
emit<"01111110010mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FACGE(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110010mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FACGE(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110010mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FACGT(HReg rd, HReg rn, HReg rm)
|
||||
{
|
||||
emit<"01111110110mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FACGT(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110110mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FACGT(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110110mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FADD(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00001110010mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FADD(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01001110010mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FADDP(HReg rd, VReg_2H rn)
|
||||
{
|
||||
emit<"0101111000110000110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FADDP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110010mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FADDP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110010mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FCMEQ(HReg rd, HReg rn, HReg rm)
|
||||
{
|
||||
emit<"01011110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FCMEQ(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00001110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FCMEQ(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01001110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FCMEQ(HReg rd, HReg rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0101111011111000110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMEQ(VReg_4H rd, VReg_4H rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0000111011111000110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMEQ(VReg_8H rd, VReg_8H rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0100111011111000110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMGE(HReg rd, HReg rn, HReg rm)
|
||||
{
|
||||
emit<"01111110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FCMGE(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FCMGE(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FCMGE(HReg rd, HReg rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0111111011111000110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMGE(VReg_4H rd, VReg_4H rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0010111011111000110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMGE(VReg_8H rd, VReg_8H rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0110111011111000110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMGT(HReg rd, HReg rn, HReg rm)
|
||||
{
|
||||
emit<"01111110110mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FCMGT(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110110mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FCMGT(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110110mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FCMGT(HReg rd, HReg rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0101111011111000110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMGT(VReg_4H rd, VReg_4H rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0000111011111000110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMGT(VReg_8H rd, VReg_8H rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0100111011111000110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMLE(HReg rd, HReg rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0111111011111000110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMLE(VReg_4H rd, VReg_4H rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0010111011111000110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMLE(VReg_8H rd, VReg_8H rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0110111011111000110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMLT(HReg rd, HReg rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0101111011111000111010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMLT(VReg_4H rd, VReg_4H rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0000111011111000111010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCMLT(VReg_8H rd, VReg_8H rn, ImmConstFZero)
|
||||
{
|
||||
emit<"0100111011111000111010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTAS(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0101111001111001110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTAS(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111001111001110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTAS(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111001111001110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTAU(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0111111001111001110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTAU(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0010111001111001110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTAU(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0110111001111001110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTMS(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0101111001111001101110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTMS(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111001111001101110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTMS(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111001111001101110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTMU(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0111111001111001101110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTMU(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0010111001111001101110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTMU(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0110111001111001101110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTNS(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0101111001111001101010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTNS(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111001111001101010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTNS(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111001111001101010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTNU(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0111111001111001101010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTNU(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0010111001111001101010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTNU(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0110111001111001101010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTPS(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0101111011111001101010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTPS(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111011111001101010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTPS(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111011111001101010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTPU(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0111111011111001101010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTPU(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0010111011111001101010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTPU(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0110111011111001101010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTZS(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0101111011111001101110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTZS(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111011111001101110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTZS(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111011111001101110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTZU(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0111111011111001101110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTZU(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0010111011111001101110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FCVTZU(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0110111011111001101110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FDIV(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FDIV(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMAX(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00001110010mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMAX(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01001110010mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMAXNM(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00001110010mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMAXNM(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01001110010mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMAXNMP(HReg rd, VReg_2H rn)
|
||||
{
|
||||
emit<"0101111000110000110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FMAXNMP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110010mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMAXNMP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110010mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMAXNMV(HReg rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111000110000110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FMAXNMV(HReg rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111000110000110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FMAXP(HReg rd, VReg_2H rn)
|
||||
{
|
||||
emit<"0101111000110000111110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FMAXP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110010mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMAXP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110010mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMAXV(HReg rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111000110000111110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FMAXV(HReg rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111000110000111110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FMIN(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00001110110mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMIN(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01001110110mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMINNM(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00001110110mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMINNM(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01001110110mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMINNMP(HReg rd, VReg_2H rn)
|
||||
{
|
||||
emit<"0101111010110000110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FMINNMP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110110mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMINNMP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110110mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMINNMV(HReg rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111010110000110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FMINNMV(HReg rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111010110000110010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FMINP(HReg rd, VReg_2H rn)
|
||||
{
|
||||
emit<"0101111010110000111110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FMINP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110110mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMINP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110110mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMINV(HReg rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111010110000111110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FMINV(HReg rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111010110000111110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FMLA(HReg rd, HReg rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0101111100LMmmmm0001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLA(VReg_8B rd, VReg_8B rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0000111100LMmmmm0001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLA(VReg_16B rd, VReg_16B rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0100111100LMmmmm0001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLA(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00001110010mmmmm000011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMLA(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01001110010mmmmm000011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMLAL(VReg_2S rd, VReg_2H rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0000111110LMmmmm0000H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLAL(VReg_4S rd, VReg_4H rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0100111110LMmmmm0000H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLAL2(VReg_2S rd, VReg_2H rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0010111110LMmmmm1000H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLAL2(VReg_4S rd, VReg_4H rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0110111110LMmmmm1000H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLAL(VReg_2S rd, VReg_2H rn, VReg_2H rm)
|
||||
{
|
||||
emit<"00001110001mmmmm111011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMLAL(VReg_4S rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"01001110001mmmmm111011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMLAL2(VReg_2S rd, VReg_2H rn, VReg_2H rm)
|
||||
{
|
||||
emit<"00101110001mmmmm110011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMLAL2(VReg_4S rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"01101110001mmmmm110011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMLS(HReg rd, HReg rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0101111100LMmmmm0101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLS(VReg_8B rd, VReg_8B rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0000111100LMmmmm0101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLS(VReg_16B rd, VReg_16B rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0100111100LMmmmm0101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLS(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00001110110mmmmm000011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMLS(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01001110110mmmmm000011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMLSL(VReg_2S rd, VReg_2H rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0000111110LMmmmm0100H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLSL(VReg_4S rd, VReg_4H rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0100111110LMmmmm0100H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLSL2(VReg_2S rd, VReg_2H rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0010111110LMmmmm1100H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLSL2(VReg_4S rd, VReg_4H rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0110111110LMmmmm1100H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMLSL(VReg_2S rd, VReg_2H rn, VReg_2H rm)
|
||||
{
|
||||
emit<"00001110101mmmmm111011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMLSL(VReg_4S rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"01001110101mmmmm111011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMLSL2(VReg_2S rd, VReg_2H rn, VReg_2H rm)
|
||||
{
|
||||
emit<"00101110101mmmmm110011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMLSL2(VReg_4S rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"01101110101mmmmm110011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMOV(VReg_4H rd, FImm8 imm)
|
||||
{
|
||||
emit<"0000111100000vvv111111vvvvvddddd", "d", "v">(rd, imm);
|
||||
}
|
||||
void FMOV(VReg_8H rd, FImm8 imm)
|
||||
{
|
||||
emit<"0100111100000vvv111111vvvvvddddd", "d", "v">(rd, imm);
|
||||
}
|
||||
void FMUL(HReg rd, HReg rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0101111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMUL(VReg_8B rd, VReg_8B rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0000111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMUL(VReg_16B rd, VReg_16B rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0100111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMUL(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00101110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMUL(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01101110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMULX(HReg rd, HReg rn, HReg rm)
|
||||
{
|
||||
emit<"01011110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMULX(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00001110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMULX(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01001110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FMULX(HReg rd, HReg rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0111111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMULX(VReg_8B rd, VReg_8B rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0010111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FMULX(VReg_16B rd, VReg_16B rn, HElem em)
|
||||
{
|
||||
if (em.reg_index() >= 16)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0110111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FNEG(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0010111011111000111110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FNEG(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0110111011111000111110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRECPE(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0101111011111001110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRECPE(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111011111001110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRECPE(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111011111001110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRECPS(HReg rd, HReg rn, HReg rm)
|
||||
{
|
||||
emit<"01011110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FRECPS(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00001110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FRECPS(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01001110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FRECPX(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0101111011111001111110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTA(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0010111001111001100010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTA(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0110111001111001100010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTI(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0010111011111001100110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTI(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0110111011111001100110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTM(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111001111001100110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTM(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111001111001100110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTN(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111001111001100010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTN(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111001111001100010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTP(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111011111001100010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTP(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111011111001100010nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTX(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0010111001111001100110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTX(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0110111001111001100110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTZ(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111011111001100110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRINTZ(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111011111001100110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRSQRTE(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0111111011111001110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRSQRTE(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0010111011111001110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRSQRTE(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0110111011111001110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FRSQRTS(HReg rd, HReg rn, HReg rm)
|
||||
{
|
||||
emit<"01011110110mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FRSQRTS(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00001110110mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FRSQRTS(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01001110110mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FSQRT(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0010111011111001111110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FSQRT(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0110111011111001111110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void FSUB(VReg_4H rd, VReg_4H rn, VReg_4H rm)
|
||||
{
|
||||
emit<"00001110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void FSUB(VReg_8H rd, VReg_8H rn, VReg_8H rm)
|
||||
{
|
||||
emit<"01001110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void RAX1(VReg_2D rd, VReg_2D rn, VReg_2D rm)
|
||||
{
|
||||
emit<"11001110011mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SCVTF(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0101111001111001110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void SCVTF(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0000111001111001110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void SCVTF(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0100111001111001110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void SDOT(VReg_2S rd, VReg_8B rn, SElem em)
|
||||
{
|
||||
emit<"0000111110LMmmmm1110H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
|
||||
}
|
||||
void SDOT(VReg_4S rd, VReg_16B rn, SElem em)
|
||||
{
|
||||
emit<"0100111110LMmmmm1110H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
|
||||
}
|
||||
void SDOT(VReg_2S rd, VReg_8B rn, VReg_8B rm)
|
||||
{
|
||||
emit<"00001110100mmmmm100101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SDOT(VReg_4S rd, VReg_16B rn, VReg_16B rm)
|
||||
{
|
||||
emit<"01001110100mmmmm100101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SHA512H(QReg rd, QReg rn, VReg_2D rm)
|
||||
{
|
||||
emit<"11001110011mmmmm100000nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SHA512H2(QReg rd, QReg rn, VReg_2D rm)
|
||||
{
|
||||
emit<"11001110011mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SHA512SU0(VReg_2D rd, VReg_2D rn)
|
||||
{
|
||||
emit<"1100111011000000100000nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void SHA512SU1(VReg_2D rd, VReg_2D rn, VReg_2D rm)
|
||||
{
|
||||
emit<"11001110011mmmmm100010nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SM3PARTW1(VReg_4S rd, VReg_4S rn, VReg_4S rm)
|
||||
{
|
||||
emit<"11001110011mmmmm110000nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SM3PARTW2(VReg_4S rd, VReg_4S rn, VReg_4S rm)
|
||||
{
|
||||
emit<"11001110011mmmmm110001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void SM3SS1(VReg_4S rd, VReg_4S rn, VReg_4S rm, VReg_4S ra)
|
||||
{
|
||||
emit<"11001110010mmmmm0aaaaannnnnddddd", "d", "n", "m", "a">(rd, rn, rm, ra);
|
||||
}
|
||||
void SM3TT1A(VReg_4S rd, VReg_4S rn, SElem em)
|
||||
{
|
||||
emit<"11001110010mmmmm10ii00nnnnnddddd", "d", "n", "m", "i">(rd, rn, em.reg_index(), em.elem_index());
|
||||
}
|
||||
void SM3TT1B(VReg_4S rd, VReg_4S rn, SElem em)
|
||||
{
|
||||
emit<"11001110010mmmmm10ii01nnnnnddddd", "d", "n", "m", "i">(rd, rn, em.reg_index(), em.elem_index());
|
||||
}
|
||||
void SM3TT2A(VReg_4S rd, VReg_4S rn, SElem em)
|
||||
{
|
||||
emit<"11001110010mmmmm10ii10nnnnnddddd", "d", "n", "m", "i">(rd, rn, em.reg_index(), em.elem_index());
|
||||
}
|
||||
void SM3TT2B(VReg_4S rd, VReg_4S rn, SElem em)
|
||||
{
|
||||
emit<"11001110010mmmmm10ii11nnnnnddddd", "d", "n", "m", "i">(rd, rn, em.reg_index(), em.elem_index());
|
||||
}
|
||||
void SM4E(VReg_4S rd, VReg_4S rn)
|
||||
{
|
||||
emit<"1100111011000000100001nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void SM4EKEY(VReg_4S rd, VReg_4S rn, VReg_4S rm)
|
||||
{
|
||||
emit<"11001110011mmmmm110010nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void UCVTF(HReg rd, HReg rn)
|
||||
{
|
||||
emit<"0111111001111001110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void UCVTF(VReg_4H rd, VReg_4H rn)
|
||||
{
|
||||
emit<"0010111001111001110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void UCVTF(VReg_8H rd, VReg_8H rn)
|
||||
{
|
||||
emit<"0110111001111001110110nnnnnddddd", "d", "n">(rd, rn);
|
||||
}
|
||||
void UDOT(VReg_2S rd, VReg_8B rn, SElem em)
|
||||
{
|
||||
emit<"0010111110LMmmmm1110H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
|
||||
}
|
||||
void UDOT(VReg_4S rd, VReg_16B rn, SElem em)
|
||||
{
|
||||
emit<"0110111110LMmmmm1110H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
|
||||
}
|
||||
void UDOT(VReg_2S rd, VReg_8B rn, VReg_8B rm)
|
||||
{
|
||||
emit<"00101110100mmmmm100101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void UDOT(VReg_4S rd, VReg_16B rn, VReg_16B rm)
|
||||
{
|
||||
emit<"01101110100mmmmm100101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
|
||||
}
|
||||
void XAR(VReg_2D rd, VReg_2D rn, VReg_2D rm, Imm<6> rotate_amount)
|
||||
{
|
||||
emit<"11001110100mmmmmiiiiiinnnnnddddd", "d", "n", "m", "i">(rd, rn, rm, rotate_amount);
|
||||
}
|
75
externals/oaknut/include/oaknut/impl/mnemonics_fpsimd_v8.3.inc.hpp
vendored
Normal file
75
externals/oaknut/include/oaknut/impl/mnemonics_fpsimd_v8.3.inc.hpp
vendored
Normal file
|
@ -0,0 +1,75 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
void FCADD(VReg_4H rd, VReg_4H rn, VReg_4H rm, Rot rot)
|
||||
{
|
||||
if (rot != Rot::DEG_90 && rot != Rot::DEG_270)
|
||||
throw OaknutException{ExceptionType::InvalidRotation};
|
||||
emit<"00101110010mmmmm111r01nnnnnddddd", "r", "d", "n", "m">(static_cast<std::uint32_t>(rot) >> 1, rd, rn, rm);
|
||||
}
|
||||
void FCADD(VReg_8H rd, VReg_8H rn, VReg_8H rm, Rot rot)
|
||||
{
|
||||
if (rot != Rot::DEG_90 && rot != Rot::DEG_270)
|
||||
throw OaknutException{ExceptionType::InvalidRotation};
|
||||
emit<"01101110010mmmmm111r01nnnnnddddd", "r", "d", "n", "m">(static_cast<std::uint32_t>(rot) >> 1, rd, rn, rm);
|
||||
}
|
||||
void FCADD(VReg_2S rd, VReg_2S rn, VReg_2S rm, Rot rot)
|
||||
{
|
||||
if (rot != Rot::DEG_90 && rot != Rot::DEG_270)
|
||||
throw OaknutException{ExceptionType::InvalidRotation};
|
||||
emit<"00101110100mmmmm111r01nnnnnddddd", "r", "d", "n", "m">(static_cast<std::uint32_t>(rot) >> 1, rd, rn, rm);
|
||||
}
|
||||
void FCADD(VReg_4S rd, VReg_4S rn, VReg_4S rm, Rot rot)
|
||||
{
|
||||
if (rot != Rot::DEG_90 && rot != Rot::DEG_270)
|
||||
throw OaknutException{ExceptionType::InvalidRotation};
|
||||
emit<"01101110100mmmmm111r01nnnnnddddd", "r", "d", "n", "m">(static_cast<std::uint32_t>(rot) >> 1, rd, rn, rm);
|
||||
}
|
||||
void FCADD(VReg_2D rd, VReg_2D rn, VReg_2D rm, Rot rot)
|
||||
{
|
||||
if (rot != Rot::DEG_90 && rot != Rot::DEG_270)
|
||||
throw OaknutException{ExceptionType::InvalidRotation};
|
||||
emit<"01101110110mmmmm111r01nnnnnddddd", "r", "d", "n", "m">(static_cast<std::uint32_t>(rot) >> 1, rd, rn, rm);
|
||||
}
|
||||
void FCMLA(VReg_4H rd, VReg_4H rn, HElem em, Rot rot)
|
||||
{
|
||||
if (em.reg_index() >= 16 || em.elem_index() >= 2)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0010111101LMmmmm0rr1H0nnnnnddddd", "r", "d", "n", "Mm", "H", "L">(rot, rd, rn, em.reg_index(), (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FCMLA(VReg_8H rd, VReg_8H rn, HElem em, Rot rot)
|
||||
{
|
||||
if (em.reg_index() >= 16 || em.elem_index() >= 4)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0110111101LMmmmm0rr1H0nnnnnddddd", "r", "d", "n", "Mm", "H", "L">(rot, rd, rn, em.reg_index(), (em.elem_index() >> 1) & 1, em.elem_index() & 1);
|
||||
}
|
||||
void FCMLA(VReg_4S rd, VReg_4S rn, SElem em, Rot rot)
|
||||
{
|
||||
if (em.reg_index() >= 16 || em.elem_index() >= 2)
|
||||
throw OaknutException{ExceptionType::InvalidCombination};
|
||||
emit<"0110111110LMmmmm0rr1H0nnnnnddddd", "r", "d", "n", "Mm", "H", "L">(rot, rd, rn, em.reg_index(), em.elem_index() & 1, 0);
|
||||
}
|
||||
void FCMLA(VReg_4H rd, VReg_4H rn, VReg_4H rm, Rot rot)
|
||||
{
|
||||
emit<"00101110010mmmmm110rr1nnnnnddddd", "r", "d", "n", "m">(rot, rd, rn, rm);
|
||||
}
|
||||
void FCMLA(VReg_8H rd, VReg_8H rn, VReg_8H rm, Rot rot)
|
||||
{
|
||||
emit<"01101110010mmmmm110rr1nnnnnddddd", "r", "d", "n", "m">(rot, rd, rn, rm);
|
||||
}
|
||||
void FCMLA(VReg_2S rd, VReg_2S rn, VReg_2S rm, Rot rot)
|
||||
{
|
||||
emit<"00101110100mmmmm110rr1nnnnnddddd", "r", "d", "n", "m">(rot, rd, rn, rm);
|
||||
}
|
||||
void FCMLA(VReg_4S rd, VReg_4S rn, VReg_4S rm, Rot rot)
|
||||
{
|
||||
emit<"01101110100mmmmm110rr1nnnnnddddd", "r", "d", "n", "m">(rot, rd, rn, rm);
|
||||
}
|
||||
void FCMLA(VReg_2D rd, VReg_2D rn, VReg_2D rm, Rot rot)
|
||||
{
|
||||
emit<"01101110110mmmmm110rr1nnnnnddddd", "r", "d", "n", "m">(rot, rd, rn, rm);
|
||||
}
|
||||
void FJCVTZS(WReg wd, DReg rn)
|
||||
{
|
||||
emit<"0001111001111110000000nnnnnddddd", "d", "n">(wd, rn);
|
||||
}
|
1709
externals/oaknut/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp
vendored
Normal file
1709
externals/oaknut/include/oaknut/impl/mnemonics_generic_v8.0.inc.hpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
1027
externals/oaknut/include/oaknut/impl/mnemonics_generic_v8.1.inc.hpp
vendored
Normal file
1027
externals/oaknut/include/oaknut/impl/mnemonics_generic_v8.1.inc.hpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
23
externals/oaknut/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp
vendored
Normal file
23
externals/oaknut/include/oaknut/impl/mnemonics_generic_v8.2.inc.hpp
vendored
Normal file
|
@ -0,0 +1,23 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
void BFC(WReg wd, Imm<5> lsb, Imm<5> width)
|
||||
{
|
||||
if (width.value() == 0 || width.value() > (32 - lsb.value()))
|
||||
throw OaknutException{ExceptionType::InvalidBitWidth};
|
||||
emit<"0011001100rrrrrrssssss11111ddddd", "d", "r", "s">(wd, (~lsb.value() + 1) & 31, width.value() - 1);
|
||||
}
|
||||
void BFC(XReg xd, Imm<6> lsb, Imm<6> width)
|
||||
{
|
||||
if (width.value() == 0 || width.value() > (64 - lsb.value()))
|
||||
throw OaknutException{ExceptionType::InvalidBitWidth};
|
||||
emit<"1011001101rrrrrrssssss11111ddddd", "d", "r", "s">(xd, (~lsb.value() + 1) & 63, width.value() - 1);
|
||||
}
|
||||
void ESB()
|
||||
{
|
||||
emit<"11010101000000110010001000011111">();
|
||||
}
|
||||
void PSB()
|
||||
{
|
||||
emit<"11010101000000110010001000111111">();
|
||||
}
|
159
externals/oaknut/include/oaknut/impl/mnemonics_generic_v8.3.inc.hpp
vendored
Normal file
159
externals/oaknut/include/oaknut/impl/mnemonics_generic_v8.3.inc.hpp
vendored
Normal file
|
@ -0,0 +1,159 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
void AUTDA(XReg xd, XRegSp xn)
|
||||
{
|
||||
emit<"110110101100000100Z110nnnnnddddd", "Z", "d", "n">(0, xd, xn);
|
||||
}
|
||||
void AUTDB(XReg xd, XRegSp xn)
|
||||
{
|
||||
emit<"110110101100000100Z111nnnnnddddd", "Z", "d", "n">(0, xd, xn);
|
||||
}
|
||||
void AUTDZA(XReg xd)
|
||||
{
|
||||
emit<"110110101100000100Z11011111ddddd", "Z", "d">(1, xd);
|
||||
}
|
||||
void AUTDZB(XReg xd)
|
||||
{
|
||||
emit<"110110101100000100Z11111111ddddd", "Z", "d">(1, xd);
|
||||
}
|
||||
void AUTIA(XReg xd, XRegSp xn)
|
||||
{
|
||||
emit<"110110101100000100Z100nnnnnddddd", "Z", "d", "n">(0, xd, xn);
|
||||
}
|
||||
void AUTIB(XReg xd, XRegSp xn)
|
||||
{
|
||||
emit<"110110101100000100Z101nnnnnddddd", "Z", "d", "n">(0, xd, xn);
|
||||
}
|
||||
void AUTIZA(XReg xd)
|
||||
{
|
||||
emit<"110110101100000100Z10011111ddddd", "Z", "d">(1, xd);
|
||||
}
|
||||
void AUTIZB(XReg xd)
|
||||
{
|
||||
emit<"110110101100000100Z10111111ddddd", "Z", "d">(1, xd);
|
||||
}
|
||||
void BLRAA(XReg xn, XRegSp xm)
|
||||
{
|
||||
emit<"1101011Z0011111100001Mnnnnnmmmmm", "Z", "M", "n", "m">(1, 0, xn, xm);
|
||||
}
|
||||
void BLRAAZ(XReg xn)
|
||||
{
|
||||
emit<"1101011Z0011111100001Mnnnnn11111", "Z", "M", "n">(0, 0, xn);
|
||||
}
|
||||
void BLRAB(XReg xn, XRegSp xm)
|
||||
{
|
||||
emit<"1101011Z0011111100001Mnnnnnmmmmm", "Z", "M", "n", "m">(1, 1, xn, xm);
|
||||
}
|
||||
void BLRABZ(XReg xn)
|
||||
{
|
||||
emit<"1101011Z0011111100001Mnnnnn11111", "Z", "M", "n">(0, 1, xn);
|
||||
}
|
||||
void BRAA(XReg xn, XRegSp xm)
|
||||
{
|
||||
emit<"1101011Z0001111100001Mnnnnnmmmmm", "Z", "M", "n", "m">(1, 0, xn, xm);
|
||||
}
|
||||
void BRAAZ(XReg xn)
|
||||
{
|
||||
emit<"1101011Z0001111100001Mnnnnn11111", "Z", "M", "n">(0, 0, xn);
|
||||
}
|
||||
void BRAB(XReg xn, XRegSp xm)
|
||||
{
|
||||
emit<"1101011Z0001111100001Mnnnnnmmmmm", "Z", "M", "n", "m">(1, 1, xn, xm);
|
||||
}
|
||||
void BRABZ(XReg xn)
|
||||
{
|
||||
emit<"1101011Z0001111100001Mnnnnn11111", "Z", "M", "n">(0, 1, xn);
|
||||
}
|
||||
void ERETAA()
|
||||
{
|
||||
emit<"110101101001111100001M1111111111", "M">(0);
|
||||
}
|
||||
void ERETAB()
|
||||
{
|
||||
emit<"110101101001111100001M1111111111", "M">(1);
|
||||
}
|
||||
void LDAPR(WReg wt, XRegSp xn)
|
||||
{
|
||||
emit<"1011100010111111110000nnnnnttttt", "t", "n">(wt, xn);
|
||||
}
|
||||
void LDAPR(XReg xt, XRegSp xn)
|
||||
{
|
||||
emit<"1111100010111111110000nnnnnttttt", "t", "n">(xt, xn);
|
||||
}
|
||||
void LDAPRB(WReg wt, XRegSp xn)
|
||||
{
|
||||
emit<"0011100010111111110000nnnnnttttt", "t", "n">(wt, xn);
|
||||
}
|
||||
void LDAPRH(WReg wt, XRegSp xn)
|
||||
{
|
||||
emit<"0111100010111111110000nnnnnttttt", "t", "n">(wt, xn);
|
||||
}
|
||||
void LDRAA(XReg xt, XRegSp xn, SOffset<13, 3> simm = 0)
|
||||
{
|
||||
emit<"11111000Mi1iiiiiiiiiW1nnnnnttttt", "M", "W", "t", "n", "i">(0, 0, xt, xn, simm);
|
||||
}
|
||||
void LDRAB(XReg xt, XRegSp xn, SOffset<13, 3> simm = 0)
|
||||
{
|
||||
emit<"11111000Mi1iiiiiiiiiW1nnnnnttttt", "M", "W", "t", "n", "i">(1, 0, xt, xn, simm);
|
||||
}
|
||||
void LDRAA(XReg xt, XRegSp xn, PreIndexed, SOffset<13, 3> simm)
|
||||
{
|
||||
emit<"11111000Mi1iiiiiiiiiW1nnnnnttttt", "M", "W", "t", "n", "i">(0, 1, xt, xn, simm);
|
||||
}
|
||||
void LDRAB(XReg xt, XRegSp xn, PreIndexed, SOffset<13, 3> simm)
|
||||
{
|
||||
emit<"11111000Mi1iiiiiiiiiW1nnnnnttttt", "M", "W", "t", "n", "i">(1, 1, xt, xn, simm);
|
||||
}
|
||||
void PACDA(XReg xd, XRegSp xn)
|
||||
{
|
||||
emit<"110110101100000100Z010nnnnnddddd", "Z", "d", "n">(0, xd, xn);
|
||||
}
|
||||
void PACDB(XReg xd, XRegSp xn)
|
||||
{
|
||||
emit<"110110101100000100Z011nnnnnddddd", "Z", "d", "n">(0, xd, xn);
|
||||
}
|
||||
void PACDZA(XReg xd)
|
||||
{
|
||||
emit<"110110101100000100Z01011111ddddd", "Z", "d">(1, xd);
|
||||
}
|
||||
void PACDZB(XReg xd)
|
||||
{
|
||||
emit<"110110101100000100Z01111111ddddd", "Z", "d">(1, xd);
|
||||
}
|
||||
void PACGA(XReg xd, XReg xn, XRegSp xm)
|
||||
{
|
||||
emit<"10011010110mmmmm001100nnnnnddddd", "d", "n", "m">(xd, xn, xm);
|
||||
}
|
||||
void PACIA(XReg xd, XRegSp xn)
|
||||
{
|
||||
emit<"110110101100000100Z000nnnnnddddd", "Z", "d", "n">(0, xd, xn);
|
||||
}
|
||||
void PACIB(XReg xd, XRegSp xn)
|
||||
{
|
||||
emit<"110110101100000100Z001nnnnnddddd", "Z", "d", "n">(0, xd, xn);
|
||||
}
|
||||
void PACIZA(XReg xd)
|
||||
{
|
||||
emit<"110110101100000100Z00011111ddddd", "Z", "d">(1, xd);
|
||||
}
|
||||
void PACIZB(XReg xd)
|
||||
{
|
||||
emit<"110110101100000100Z00111111ddddd", "Z", "d">(1, xd);
|
||||
}
|
||||
void RETAA()
|
||||
{
|
||||
emit<"110101100101111100001M1111111111", "M">(0);
|
||||
}
|
||||
void RETAB()
|
||||
{
|
||||
emit<"110101100101111100001M1111111111", "M">(1);
|
||||
}
|
||||
void XPACD(XReg xd)
|
||||
{
|
||||
emit<"110110101100000101000D11111nnnnn", "D", "n">(1, xd);
|
||||
}
|
||||
void XPACI(XReg xd)
|
||||
{
|
||||
emit<"110110101100000101000D11111nnnnn", "D", "n">(0, xd);
|
||||
}
|
19
externals/oaknut/include/oaknut/impl/multi_typed_name.hpp
vendored
Normal file
19
externals/oaknut/include/oaknut/impl/multi_typed_name.hpp
vendored
Normal file
|
@ -0,0 +1,19 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
template<auto... Vs>
|
||||
struct MultiTypedName;
|
||||
|
||||
template<>
|
||||
struct MultiTypedName<> {};
|
||||
|
||||
template<auto V, auto... Vs>
|
||||
struct MultiTypedName<V, Vs...> : public MultiTypedName<Vs...> {
|
||||
constexpr operator decltype(V)() const { return V; }
|
||||
};
|
||||
|
||||
} // namespace oaknut
|
44
externals/oaknut/include/oaknut/impl/oaknut_exception.inc.hpp
vendored
Normal file
44
externals/oaknut/include/oaknut/impl/oaknut_exception.inc.hpp
vendored
Normal file
|
@ -0,0 +1,44 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
// reg.hpp
|
||||
OAKNUT_EXCEPTION(InvalidWSPConversion, "toW: cannot convert WSP to WReg")
|
||||
OAKNUT_EXCEPTION(InvalidXSPConversion, "toX: cannot convert XSP to XReg")
|
||||
OAKNUT_EXCEPTION(InvalidWZRConversion, "unexpected WZR passed into an WRegWsp")
|
||||
OAKNUT_EXCEPTION(InvalidXZRConversion, "unexpected XZR passed into an XRegSp")
|
||||
OAKNUT_EXCEPTION(InvalidDElem_1, "invalid DElem_1")
|
||||
OAKNUT_EXCEPTION(InvalidElementIndex, "elem_index is out of range")
|
||||
|
||||
// imm.hpp / offset.hpp / list.hpp
|
||||
OAKNUT_EXCEPTION(InvalidAddSubImm, "invalid AddSubImm")
|
||||
OAKNUT_EXCEPTION(InvalidBitImm32, "invalid BitImm32")
|
||||
OAKNUT_EXCEPTION(InvalidBitImm64, "invalid BitImm64")
|
||||
OAKNUT_EXCEPTION(InvalidImmChoice, "invalid ImmChoice")
|
||||
OAKNUT_EXCEPTION(InvalidImmConst, "invalid ImmConst")
|
||||
OAKNUT_EXCEPTION(InvalidImmConstFZero, "invalid ImmConstFZero")
|
||||
OAKNUT_EXCEPTION(InvalidImmRange, "invalid ImmRange")
|
||||
OAKNUT_EXCEPTION(InvalidList, "invalid List")
|
||||
OAKNUT_EXCEPTION(InvalidMovImm16, "invalid MovImm16")
|
||||
OAKNUT_EXCEPTION(InvalidBitWidth, "invalid width")
|
||||
OAKNUT_EXCEPTION(LslShiftOutOfRange, "LslShift out of range")
|
||||
OAKNUT_EXCEPTION(OffsetMisaligned, "misalignment")
|
||||
OAKNUT_EXCEPTION(OffsetOutOfRange, "out of range")
|
||||
OAKNUT_EXCEPTION(ImmOutOfRange, "outsized Imm value")
|
||||
|
||||
// arm64_encode_helpers.inc.hpp
|
||||
OAKNUT_EXCEPTION(InvalidAddSubExt, "invalid AddSubExt choice for rm size")
|
||||
OAKNUT_EXCEPTION(InvalidIndexExt, "invalid IndexExt choice for rm size")
|
||||
OAKNUT_EXCEPTION(BitPositionOutOfRange, "bit position exceeds size of rt")
|
||||
OAKNUT_EXCEPTION(RequiresAbsoluteAddressesContext, "absolute addresses required")
|
||||
|
||||
// mnemonics_*.inc.hpp
|
||||
OAKNUT_EXCEPTION(InvalidCombination, "InvalidCombination")
|
||||
OAKNUT_EXCEPTION(InvalidCond, "Cond cannot be AL or NV here")
|
||||
OAKNUT_EXCEPTION(InvalidPairFirst, "Requires even register")
|
||||
OAKNUT_EXCEPTION(InvalidPairSecond, "Invalid second register in pair")
|
||||
OAKNUT_EXCEPTION(InvalidOperandXZR, "xzr invalid here")
|
||||
OAKNUT_EXCEPTION(InvalidRotation, "Invalid rotation operand")
|
||||
|
||||
// oaknut.hpp
|
||||
OAKNUT_EXCEPTION(InvalidAlignment, "invalid alignment")
|
||||
OAKNUT_EXCEPTION(LabelRedefinition, "label already resolved")
|
138
externals/oaknut/include/oaknut/impl/offset.hpp
vendored
Normal file
138
externals/oaknut/include/oaknut/impl/offset.hpp
vendored
Normal file
|
@ -0,0 +1,138 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <variant>
|
||||
|
||||
#include "oaknut/oaknut_exception.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
struct Label;
|
||||
|
||||
namespace detail {
|
||||
|
||||
constexpr std::uint64_t inverse_mask_from_size(std::size_t size)
|
||||
{
|
||||
return (~std::uint64_t{0}) << size;
|
||||
}
|
||||
|
||||
constexpr std::uint64_t mask_from_size(std::size_t size)
|
||||
{
|
||||
return (~std::uint64_t{0}) >> (64 - size);
|
||||
}
|
||||
|
||||
template<std::size_t bit_count>
|
||||
constexpr std::uint64_t sign_extend(std::uint64_t value)
|
||||
{
|
||||
static_assert(bit_count != 0, "cannot sign-extend zero-sized value");
|
||||
constexpr size_t shift_amount = 64 - bit_count;
|
||||
return static_cast<std::uint64_t>(static_cast<std::int64_t>(value << shift_amount) >> shift_amount);
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template<std::size_t bitsize, std::size_t alignment>
|
||||
struct AddrOffset {
|
||||
AddrOffset(std::ptrdiff_t diff)
|
||||
: m_payload(encode(diff))
|
||||
{}
|
||||
|
||||
AddrOffset(Label& label)
|
||||
: m_payload(&label)
|
||||
{}
|
||||
|
||||
AddrOffset(const void* ptr)
|
||||
: m_payload(ptr)
|
||||
{}
|
||||
|
||||
static std::uint32_t encode(std::ptrdiff_t diff)
|
||||
{
|
||||
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(diff);
|
||||
if (detail::sign_extend<bitsize>(diff_u64) != diff_u64)
|
||||
throw OaknutException{ExceptionType::OffsetOutOfRange};
|
||||
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
|
||||
throw OaknutException{ExceptionType::OffsetMisaligned};
|
||||
|
||||
return static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::variant<std::uint32_t, Label*, const void*> m_payload;
|
||||
};
|
||||
|
||||
template<std::size_t bitsize, std::size_t shift_amount>
|
||||
struct PageOffset {
|
||||
PageOffset(const void* ptr)
|
||||
: m_payload(ptr)
|
||||
{}
|
||||
|
||||
PageOffset(Label& label)
|
||||
: m_payload(&label)
|
||||
{}
|
||||
|
||||
static std::uint32_t encode(std::uintptr_t current_addr, std::uintptr_t target)
|
||||
{
|
||||
std::uint64_t diff = static_cast<std::uint64_t>((static_cast<std::int64_t>(target) >> shift_amount) - (static_cast<std::int64_t>(current_addr) >> shift_amount));
|
||||
if (detail::sign_extend<bitsize>(diff) != diff)
|
||||
throw OaknutException{ExceptionType::OffsetOutOfRange};
|
||||
diff &= detail::mask_from_size(bitsize);
|
||||
return static_cast<std::uint32_t>(((diff & 3) << (bitsize - 2)) | (diff >> 2));
|
||||
}
|
||||
|
||||
static bool valid(std::uintptr_t current_addr, std::uintptr_t target)
|
||||
{
|
||||
std::uint64_t diff = static_cast<std::uint64_t>((static_cast<std::int64_t>(target) >> shift_amount) - (static_cast<std::int64_t>(current_addr) >> shift_amount));
|
||||
return detail::sign_extend<bitsize>(diff) == diff;
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::variant<Label*, const void*> m_payload;
|
||||
};
|
||||
|
||||
template<std::size_t bitsize, std::size_t alignment>
|
||||
struct SOffset {
|
||||
SOffset(std::int64_t offset)
|
||||
{
|
||||
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(offset);
|
||||
if (detail::sign_extend<bitsize>(diff_u64) != diff_u64)
|
||||
throw OaknutException{ExceptionType::OffsetOutOfRange};
|
||||
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
|
||||
throw OaknutException{ExceptionType::OffsetMisaligned};
|
||||
|
||||
m_encoded = static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::uint32_t m_encoded;
|
||||
};
|
||||
|
||||
template<std::size_t bitsize, std::size_t alignment>
|
||||
struct POffset {
|
||||
POffset(std::int64_t offset)
|
||||
{
|
||||
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(offset);
|
||||
if (diff_u64 > detail::mask_from_size(bitsize))
|
||||
throw OaknutException{ExceptionType::OffsetOutOfRange};
|
||||
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
|
||||
throw OaknutException{ExceptionType::OffsetMisaligned};
|
||||
|
||||
m_encoded = static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
std::uint32_t m_encoded;
|
||||
};
|
||||
|
||||
} // namespace oaknut
|
16
externals/oaknut/include/oaknut/impl/overloaded.hpp
vendored
Normal file
16
externals/oaknut/include/oaknut/impl/overloaded.hpp
vendored
Normal file
|
@ -0,0 +1,16 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace oaknut::detail {
|
||||
|
||||
template<class... Ts>
|
||||
struct overloaded : Ts... {
|
||||
using Ts::operator()...;
|
||||
};
|
||||
|
||||
template<class... Ts>
|
||||
overloaded(Ts...) -> overloaded<Ts...>;
|
||||
|
||||
} // namespace oaknut::detail
|
475
externals/oaknut/include/oaknut/impl/reg.hpp
vendored
Normal file
475
externals/oaknut/include/oaknut/impl/reg.hpp
vendored
Normal file
|
@ -0,0 +1,475 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "oaknut/oaknut_exception.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
struct Reg;
|
||||
|
||||
struct RReg;
|
||||
struct ZrReg;
|
||||
struct WzrReg;
|
||||
struct XReg;
|
||||
struct WReg;
|
||||
struct SpReg;
|
||||
struct WspReg;
|
||||
struct XRegSp;
|
||||
struct XRegWsp;
|
||||
|
||||
struct VReg;
|
||||
struct VRegArranged;
|
||||
struct BReg;
|
||||
struct HReg;
|
||||
struct SReg;
|
||||
struct DReg;
|
||||
struct QReg;
|
||||
struct VReg_2H;
|
||||
struct VReg_8B;
|
||||
struct VReg_4H;
|
||||
struct VReg_2S;
|
||||
struct VReg_1D;
|
||||
struct VReg_16B;
|
||||
struct VReg_8H;
|
||||
struct VReg_4S;
|
||||
struct VReg_2D;
|
||||
struct VReg_1Q;
|
||||
|
||||
struct VRegSelector;
|
||||
|
||||
template<typename Elem>
|
||||
struct ElemSelector;
|
||||
struct BElem;
|
||||
struct HElem;
|
||||
struct SElem;
|
||||
struct DElem;
|
||||
|
||||
struct Reg {
|
||||
constexpr explicit Reg(bool is_vector_, unsigned bitsize_, int index_)
|
||||
: m_index(static_cast<std::int8_t>(index_))
|
||||
, m_bitsize(static_cast<std::uint8_t>(bitsize_))
|
||||
, m_is_vector(is_vector_)
|
||||
{
|
||||
assert(index_ >= -1 && index_ <= 31);
|
||||
assert(bitsize_ != 0 && (bitsize_ & (bitsize_ - 1)) == 0 && "Bitsize must be a power of two");
|
||||
}
|
||||
|
||||
constexpr int index() const { return m_index; }
|
||||
constexpr unsigned bitsize() const { return m_bitsize; }
|
||||
constexpr bool is_vector() const { return m_is_vector; }
|
||||
|
||||
private:
|
||||
std::int8_t m_index;
|
||||
std::uint8_t m_bitsize;
|
||||
bool m_is_vector;
|
||||
};
|
||||
|
||||
struct RReg : public Reg {
|
||||
constexpr explicit RReg(unsigned bitsize_, int index_)
|
||||
: Reg(false, bitsize_, index_)
|
||||
{
|
||||
assert(bitsize_ == 32 || bitsize_ == 64);
|
||||
}
|
||||
|
||||
XReg toX() const;
|
||||
WReg toW() const;
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct ZrReg : public RReg {
|
||||
constexpr explicit ZrReg()
|
||||
: RReg(64, 31) {}
|
||||
};
|
||||
|
||||
struct WzrReg : public RReg {
|
||||
constexpr explicit WzrReg()
|
||||
: RReg(32, 31) {}
|
||||
};
|
||||
|
||||
struct XReg : public RReg {
|
||||
constexpr explicit XReg(int index_)
|
||||
: RReg(64, index_) {}
|
||||
|
||||
constexpr /* implicit */ XReg(ZrReg)
|
||||
: RReg(64, 31) {}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct WReg : public RReg {
|
||||
constexpr explicit WReg(int index_)
|
||||
: RReg(32, index_) {}
|
||||
|
||||
constexpr /* implicit */ WReg(WzrReg)
|
||||
: RReg(32, 31) {}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
inline XReg RReg::toX() const
|
||||
{
|
||||
if (index() == -1)
|
||||
throw OaknutException{ExceptionType::InvalidXSPConversion};
|
||||
return XReg{index()};
|
||||
}
|
||||
|
||||
inline WReg RReg::toW() const
|
||||
{
|
||||
if (index() == -1)
|
||||
throw OaknutException{ExceptionType::InvalidWSPConversion};
|
||||
return WReg{index()};
|
||||
}
|
||||
|
||||
struct SpReg : public RReg {
|
||||
constexpr explicit SpReg()
|
||||
: RReg(64, -1) {}
|
||||
};
|
||||
|
||||
struct WspReg : public RReg {
|
||||
constexpr explicit WspReg()
|
||||
: RReg(64, -1) {}
|
||||
};
|
||||
|
||||
struct XRegSp : public RReg {
|
||||
constexpr /* implict */ XRegSp(SpReg)
|
||||
: RReg(64, -1) {}
|
||||
|
||||
constexpr /* implict */ XRegSp(XReg xr)
|
||||
: RReg(64, xr.index())
|
||||
{
|
||||
if (xr.index() == 31)
|
||||
throw OaknutException{ExceptionType::InvalidXZRConversion};
|
||||
}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct WRegWsp : public RReg {
|
||||
constexpr /* implict */ WRegWsp(WspReg)
|
||||
: RReg(32, -1) {}
|
||||
|
||||
constexpr /* implict */ WRegWsp(WReg wr)
|
||||
: RReg(32, wr.index())
|
||||
{
|
||||
if (wr.index() == 31)
|
||||
throw OaknutException{ExceptionType::InvalidWZRConversion};
|
||||
}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct VReg : public Reg {
|
||||
constexpr explicit VReg(unsigned bitsize_, int index_)
|
||||
: Reg(true, bitsize_, index_)
|
||||
{
|
||||
assert(bitsize_ == 8 || bitsize_ == 16 || bitsize_ == 32 || bitsize_ == 64 || bitsize_ == 128);
|
||||
}
|
||||
|
||||
constexpr BReg toB() const;
|
||||
constexpr HReg toH() const;
|
||||
constexpr SReg toS() const;
|
||||
constexpr DReg toD() const;
|
||||
constexpr QReg toQ() const;
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct VRegArranged : public Reg {
|
||||
protected:
|
||||
constexpr explicit VRegArranged(unsigned bitsize_, int index_, unsigned esize_)
|
||||
: Reg(true, bitsize_, index_), m_esize(static_cast<std::uint8_t>(esize_))
|
||||
{
|
||||
assert(esize_ != 0 && (esize_ & (esize_ - 1)) == 0 && "esize must be a power of two");
|
||||
assert(esize_ <= bitsize_);
|
||||
}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
|
||||
private:
|
||||
std::uint8_t m_esize;
|
||||
};
|
||||
|
||||
struct VReg_2H : public VRegArranged {
|
||||
constexpr explicit VReg_2H(int reg_index_)
|
||||
: VRegArranged(32, reg_index_, 32 / 2)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct VReg_8B : public VRegArranged {
|
||||
constexpr explicit VReg_8B(int reg_index_)
|
||||
: VRegArranged(64, reg_index_, 64 / 8)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct VReg_4H : public VRegArranged {
|
||||
constexpr explicit VReg_4H(int reg_index_)
|
||||
: VRegArranged(64, reg_index_, 64 / 4)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct VReg_2S : public VRegArranged {
|
||||
constexpr explicit VReg_2S(int reg_index_)
|
||||
: VRegArranged(64, reg_index_, 64 / 2)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct VReg_1D : public VRegArranged {
|
||||
constexpr explicit VReg_1D(int reg_index_)
|
||||
: VRegArranged(64, reg_index_, 64 / 1)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct VReg_16B : public VRegArranged {
|
||||
constexpr explicit VReg_16B(int reg_index_)
|
||||
: VRegArranged(128, reg_index_, 128 / 16)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct VReg_8H : public VRegArranged {
|
||||
constexpr explicit VReg_8H(int reg_index_)
|
||||
: VRegArranged(128, reg_index_, 128 / 8)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct VReg_4S : public VRegArranged {
|
||||
constexpr explicit VReg_4S(int reg_index_)
|
||||
: VRegArranged(128, reg_index_, 128 / 4)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct VReg_2D : public VRegArranged {
|
||||
constexpr explicit VReg_2D(int reg_index_)
|
||||
: VRegArranged(128, reg_index_, 128 / 2)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct VReg_1Q : public VRegArranged {
|
||||
constexpr explicit VReg_1Q(int reg_index_)
|
||||
: VRegArranged(128, reg_index_, 128 / 1)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct Elem {
|
||||
constexpr explicit Elem(unsigned esize_, int reg_, unsigned elem_index_)
|
||||
: m_esize(esize_), m_reg(reg_), m_elem_index(elem_index_)
|
||||
{
|
||||
if (elem_index_ >= 128 / esize_)
|
||||
throw OaknutException{ExceptionType::InvalidElementIndex};
|
||||
}
|
||||
|
||||
constexpr unsigned esize() const { return m_esize; }
|
||||
constexpr int reg_index() const { return m_reg; }
|
||||
constexpr unsigned elem_index() const { return m_elem_index; }
|
||||
|
||||
private:
|
||||
unsigned m_esize;
|
||||
int m_reg;
|
||||
unsigned m_elem_index;
|
||||
};
|
||||
|
||||
struct BElem : public Elem {
|
||||
constexpr explicit BElem(int reg_, unsigned elem_index_)
|
||||
: Elem(2, reg_, elem_index_)
|
||||
{}
|
||||
};
|
||||
|
||||
struct HElem : public Elem {
|
||||
constexpr explicit HElem(int reg_, unsigned elem_index_)
|
||||
: Elem(2, reg_, elem_index_)
|
||||
{}
|
||||
};
|
||||
|
||||
struct SElem : public Elem {
|
||||
constexpr explicit SElem(int reg_, unsigned elem_index_)
|
||||
: Elem(4, reg_, elem_index_)
|
||||
{}
|
||||
};
|
||||
|
||||
struct DElem : public Elem {
|
||||
constexpr explicit DElem(int reg_, unsigned elem_index_)
|
||||
: Elem(8, reg_, elem_index_)
|
||||
{}
|
||||
};
|
||||
|
||||
struct DElem_1 : public DElem {
|
||||
constexpr /* implict */ DElem_1(DElem inner)
|
||||
: DElem(inner)
|
||||
{
|
||||
if (inner.elem_index() != 1)
|
||||
throw OaknutException{ExceptionType::InvalidDElem_1};
|
||||
}
|
||||
};
|
||||
|
||||
template<typename E>
|
||||
struct ElemSelector {
|
||||
constexpr explicit ElemSelector(int reg_index_)
|
||||
: m_reg_index(reg_index_)
|
||||
{}
|
||||
|
||||
constexpr int reg_index() const { return m_reg_index; }
|
||||
|
||||
constexpr E operator[](unsigned elem_index) const { return E{m_reg_index, elem_index}; }
|
||||
|
||||
private:
|
||||
int m_reg_index;
|
||||
};
|
||||
|
||||
struct BReg : public VReg {
|
||||
constexpr explicit BReg(int index_)
|
||||
: VReg(8, index_)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct HReg : public VReg {
|
||||
constexpr explicit HReg(int index_)
|
||||
: VReg(16, index_)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct SReg : public VReg {
|
||||
constexpr explicit SReg(int index_)
|
||||
: VReg(32, index_)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
};
|
||||
|
||||
struct DReg : public VReg {
|
||||
constexpr explicit DReg(int index_)
|
||||
: VReg(64, index_)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
|
||||
constexpr ElemSelector<BElem> Belem() const { return ElemSelector<BElem>(index()); }
|
||||
constexpr ElemSelector<HElem> Helem() const { return ElemSelector<HElem>(index()); }
|
||||
constexpr ElemSelector<SElem> Selem() const { return ElemSelector<SElem>(index()); }
|
||||
constexpr ElemSelector<DElem> Delem() const { return ElemSelector<DElem>(index()); }
|
||||
|
||||
constexpr VReg_8B B8() const { return VReg_8B{index()}; }
|
||||
constexpr VReg_4H H4() const { return VReg_4H{index()}; }
|
||||
constexpr VReg_2S S2() const { return VReg_2S{index()}; }
|
||||
constexpr VReg_1D D1() const { return VReg_1D{index()}; }
|
||||
};
|
||||
|
||||
struct QReg : public VReg {
|
||||
constexpr explicit QReg(int index_)
|
||||
: VReg(128, index_)
|
||||
{}
|
||||
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
|
||||
constexpr ElemSelector<BElem> Belem() const { return ElemSelector<BElem>(index()); }
|
||||
constexpr ElemSelector<HElem> Helem() const { return ElemSelector<HElem>(index()); }
|
||||
constexpr ElemSelector<SElem> Selem() const { return ElemSelector<SElem>(index()); }
|
||||
constexpr ElemSelector<DElem> Delem() const { return ElemSelector<DElem>(index()); }
|
||||
|
||||
constexpr VReg_16B B16() const { return VReg_16B{index()}; }
|
||||
constexpr VReg_8H H8() const { return VReg_8H{index()}; }
|
||||
constexpr VReg_4S S4() const { return VReg_4S{index()}; }
|
||||
constexpr VReg_2D D2() const { return VReg_2D{index()}; }
|
||||
constexpr VReg_1Q Q1() const { return VReg_1Q{index()}; }
|
||||
};
|
||||
|
||||
constexpr BReg VReg::toB() const
|
||||
{
|
||||
return BReg{index()};
|
||||
}
|
||||
constexpr HReg VReg::toH() const
|
||||
{
|
||||
return HReg{index()};
|
||||
}
|
||||
constexpr SReg VReg::toS() const
|
||||
{
|
||||
return SReg{index()};
|
||||
}
|
||||
constexpr DReg VReg::toD() const
|
||||
{
|
||||
return DReg{index()};
|
||||
}
|
||||
constexpr QReg VReg::toQ() const
|
||||
{
|
||||
return QReg{index()};
|
||||
}
|
||||
|
||||
struct VRegSelector {
|
||||
constexpr explicit VRegSelector(int reg_index)
|
||||
: m_reg_index(reg_index)
|
||||
{}
|
||||
|
||||
constexpr int index() const { return m_reg_index; }
|
||||
|
||||
constexpr ElemSelector<BElem> B() const { return ElemSelector<BElem>(index()); }
|
||||
constexpr ElemSelector<HElem> H() const { return ElemSelector<HElem>(index()); }
|
||||
constexpr ElemSelector<SElem> S() const { return ElemSelector<SElem>(index()); }
|
||||
constexpr ElemSelector<DElem> D() const { return ElemSelector<DElem>(index()); }
|
||||
|
||||
constexpr VReg_2H H2() const { return VReg_2H{index()}; }
|
||||
constexpr VReg_8B B8() const { return VReg_8B{index()}; }
|
||||
constexpr VReg_4H H4() const { return VReg_4H{index()}; }
|
||||
constexpr VReg_2S S2() const { return VReg_2S{index()}; }
|
||||
constexpr VReg_1D D1() const { return VReg_1D{index()}; }
|
||||
constexpr VReg_16B B16() const { return VReg_16B{index()}; }
|
||||
constexpr VReg_8H H8() const { return VReg_8H{index()}; }
|
||||
constexpr VReg_4S S4() const { return VReg_4S{index()}; }
|
||||
constexpr VReg_2D D2() const { return VReg_2D{index()}; }
|
||||
constexpr VReg_1Q Q1() const { return VReg_1Q{index()}; }
|
||||
|
||||
private:
|
||||
int m_reg_index;
|
||||
};
|
||||
|
||||
} // namespace oaknut
|
42
externals/oaknut/include/oaknut/impl/string_literal.hpp
vendored
Normal file
42
externals/oaknut/include/oaknut/impl/string_literal.hpp
vendored
Normal file
|
@ -0,0 +1,42 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
template<size_t N>
|
||||
struct StringLiteral {
|
||||
constexpr StringLiteral(const char (&str)[N])
|
||||
{
|
||||
std::copy_n(str, N, value);
|
||||
}
|
||||
|
||||
static constexpr std::size_t strlen = N - 1;
|
||||
static constexpr std::size_t size = N;
|
||||
|
||||
char value[N];
|
||||
};
|
||||
|
||||
namespace detail {
|
||||
|
||||
template<StringLiteral<33> haystack, StringLiteral needles>
|
||||
consteval std::uint32_t find()
|
||||
{
|
||||
std::uint32_t result = 0;
|
||||
for (std::size_t i = 0; i < 32; i++) {
|
||||
for (std::size_t a = 0; a < needles.strlen; a++) {
|
||||
if (haystack.value[i] == needles.value[a]) {
|
||||
result |= 1 << (31 - i);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
} // namespace oaknut
|
358
externals/oaknut/include/oaknut/oaknut.hpp
vendored
Normal file
358
externals/oaknut/include/oaknut/oaknut.hpp
vendored
Normal file
|
@ -0,0 +1,358 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <bit>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <optional>
|
||||
#include <tuple>
|
||||
#include <type_traits>
|
||||
#include <variant>
|
||||
#include <vector>
|
||||
|
||||
#include "oaknut/impl/enum.hpp"
|
||||
#include "oaknut/impl/imm.hpp"
|
||||
#include "oaknut/impl/list.hpp"
|
||||
#include "oaknut/impl/multi_typed_name.hpp"
|
||||
#include "oaknut/impl/offset.hpp"
|
||||
#include "oaknut/impl/overloaded.hpp"
|
||||
#include "oaknut/impl/reg.hpp"
|
||||
#include "oaknut/impl/string_literal.hpp"
|
||||
#include "oaknut/oaknut_exception.hpp"
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
struct Label {
|
||||
public:
|
||||
Label() = default;
|
||||
|
||||
bool is_bound() const
|
||||
{
|
||||
return m_offset.has_value();
|
||||
}
|
||||
|
||||
std::ptrdiff_t offset() const
|
||||
{
|
||||
return m_offset.value();
|
||||
}
|
||||
|
||||
private:
|
||||
template<typename Policy>
|
||||
friend class BasicCodeGenerator;
|
||||
|
||||
explicit Label(std::ptrdiff_t offset)
|
||||
: m_offset(offset)
|
||||
{}
|
||||
|
||||
using EmitFunctionType = std::uint32_t (*)(std::ptrdiff_t wb_offset, std::ptrdiff_t resolved_offset);
|
||||
|
||||
struct Writeback {
|
||||
std::ptrdiff_t m_wb_offset;
|
||||
std::uint32_t m_mask;
|
||||
EmitFunctionType m_fn;
|
||||
};
|
||||
|
||||
std::optional<std::ptrdiff_t> m_offset;
|
||||
std::vector<Writeback> m_wbs;
|
||||
};
|
||||
|
||||
template<typename Policy>
|
||||
class BasicCodeGenerator : public Policy {
|
||||
public:
|
||||
BasicCodeGenerator(typename Policy::constructor_argument_type arg, std::uint32_t* xmem)
|
||||
: Policy(arg, xmem)
|
||||
{}
|
||||
|
||||
Label l() const
|
||||
{
|
||||
return Label{Policy::offset()};
|
||||
}
|
||||
|
||||
void l(Label& label) const
|
||||
{
|
||||
if (label.is_bound())
|
||||
throw OaknutException{ExceptionType::LabelRedefinition};
|
||||
|
||||
const auto target_offset = Policy::offset();
|
||||
label.m_offset = target_offset;
|
||||
for (auto& wb : label.m_wbs) {
|
||||
const std::uint32_t value = wb.m_fn(wb.m_wb_offset, target_offset);
|
||||
Policy::set_at_offset(wb.m_wb_offset, value, wb.m_mask);
|
||||
}
|
||||
label.m_wbs.clear();
|
||||
}
|
||||
|
||||
#include "oaknut/impl/mnemonics_fpsimd_v8.0.inc.hpp"
|
||||
#include "oaknut/impl/mnemonics_fpsimd_v8.1.inc.hpp"
|
||||
#include "oaknut/impl/mnemonics_fpsimd_v8.2.inc.hpp"
|
||||
#include "oaknut/impl/mnemonics_fpsimd_v8.3.inc.hpp"
|
||||
#include "oaknut/impl/mnemonics_generic_v8.0.inc.hpp"
|
||||
#include "oaknut/impl/mnemonics_generic_v8.1.inc.hpp"
|
||||
#include "oaknut/impl/mnemonics_generic_v8.2.inc.hpp"
|
||||
#include "oaknut/impl/mnemonics_generic_v8.3.inc.hpp"
|
||||
|
||||
void RET()
|
||||
{
|
||||
return RET(XReg{30});
|
||||
}
|
||||
|
||||
void ADRL(XReg xd, const void* addr)
|
||||
{
|
||||
ADRP(xd, addr);
|
||||
ADD(xd, xd, reinterpret_cast<uint64_t>(addr) & 0xFFF);
|
||||
}
|
||||
|
||||
void MOV(WReg wd, uint32_t imm)
|
||||
{
|
||||
if (wd.index() == 31)
|
||||
return;
|
||||
if (MovImm16::is_valid(imm))
|
||||
return MOVZ(wd, imm);
|
||||
if (MovImm16::is_valid(static_cast<std::uint32_t>(~imm)))
|
||||
return MOVN(wd, static_cast<std::uint32_t>(~imm));
|
||||
if (detail::encode_bit_imm(imm))
|
||||
return ORR(wd, WzrReg{}, imm);
|
||||
|
||||
MOVZ(wd, {static_cast<std::uint16_t>(imm >> 0), MovImm16Shift::SHL_0});
|
||||
MOVK(wd, {static_cast<std::uint16_t>(imm >> 16), MovImm16Shift::SHL_16});
|
||||
}
|
||||
|
||||
void MOV(XReg xd, uint64_t imm)
|
||||
{
|
||||
if (xd.index() == 31)
|
||||
return;
|
||||
if (imm >> 32 == 0)
|
||||
return MOV(xd.toW(), static_cast<std::uint32_t>(imm));
|
||||
if (MovImm16::is_valid(imm))
|
||||
return MOVZ(xd, imm);
|
||||
if (MovImm16::is_valid(~imm))
|
||||
return MOVN(xd, ~imm);
|
||||
if (detail::encode_bit_imm(imm))
|
||||
return ORR(xd, ZrReg{}, imm);
|
||||
|
||||
bool movz_done = false;
|
||||
int shift_count = 0;
|
||||
|
||||
if (detail::encode_bit_imm(static_cast<std::uint32_t>(imm))) {
|
||||
ORR(xd.toW(), WzrReg{}, static_cast<std::uint32_t>(imm));
|
||||
imm >>= 32;
|
||||
movz_done = true;
|
||||
shift_count = 2;
|
||||
}
|
||||
|
||||
while (imm != 0) {
|
||||
const uint16_t hw = static_cast<uint16_t>(imm);
|
||||
if (hw != 0) {
|
||||
if (movz_done) {
|
||||
MOVK(xd, {hw, static_cast<MovImm16Shift>(shift_count)});
|
||||
} else {
|
||||
MOVZ(xd, {hw, static_cast<MovImm16Shift>(shift_count)});
|
||||
movz_done = true;
|
||||
}
|
||||
}
|
||||
imm >>= 16;
|
||||
shift_count++;
|
||||
}
|
||||
}
|
||||
|
||||
// Convenience function for moving pointers to registers
|
||||
void MOVP2R(XReg xd, const void* addr)
|
||||
{
|
||||
const int64_t diff = reinterpret_cast<std::uint64_t>(addr) - Policy::template xptr<std::uintptr_t>();
|
||||
if (diff >= -0xF'FFFF && diff <= 0xF'FFFF) {
|
||||
ADR(xd, addr);
|
||||
} else if (PageOffset<21, 12>::valid(Policy::template xptr<std::uintptr_t>(), reinterpret_cast<std::uintptr_t>(addr))) {
|
||||
ADRL(xd, addr);
|
||||
} else {
|
||||
MOV(xd, reinterpret_cast<uint64_t>(addr));
|
||||
}
|
||||
}
|
||||
|
||||
void align(std::size_t alignment)
|
||||
{
|
||||
if (alignment < 4 || (alignment & (alignment - 1)) != 0)
|
||||
throw OaknutException{ExceptionType::InvalidAlignment};
|
||||
|
||||
while (Policy::offset() & (alignment - 1)) {
|
||||
NOP();
|
||||
}
|
||||
}
|
||||
|
||||
void dw(std::uint32_t value)
|
||||
{
|
||||
Policy::append(value);
|
||||
}
|
||||
|
||||
void dx(std::uint64_t value)
|
||||
{
|
||||
Policy::append(static_cast<std::uint32_t>(value));
|
||||
Policy::append(static_cast<std::uint32_t>(value >> 32));
|
||||
}
|
||||
|
||||
private:
|
||||
#include "oaknut/impl/arm64_encode_helpers.inc.hpp"
|
||||
|
||||
template<StringLiteral bs, StringLiteral... bargs, typename... Ts>
|
||||
void emit(Ts... args)
|
||||
{
|
||||
constexpr std::uint32_t base = detail::find<bs, "1">();
|
||||
std::uint32_t encoding = (base | ... | encode<detail::find<bs, bargs>()>(std::forward<Ts>(args)));
|
||||
Policy::append(encoding);
|
||||
}
|
||||
};
|
||||
|
||||
struct PointerCodeGeneratorPolicy {
|
||||
public:
|
||||
std::ptrdiff_t offset() const
|
||||
{
|
||||
return (m_ptr - m_wmem) * sizeof(std::uint32_t);
|
||||
}
|
||||
|
||||
void set_offset(std::ptrdiff_t offset)
|
||||
{
|
||||
if ((offset % sizeof(std::uint32_t)) != 0)
|
||||
throw OaknutException{ExceptionType::InvalidAlignment};
|
||||
m_ptr = m_wmem + offset / sizeof(std::uint32_t);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T wptr() const
|
||||
{
|
||||
static_assert(std::is_pointer_v<T> || std::is_same_v<T, std::uintptr_t> || std::is_same_v<T, std::intptr_t>);
|
||||
return reinterpret_cast<T>(m_ptr);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T xptr() const
|
||||
{
|
||||
static_assert(std::is_pointer_v<T> || std::is_same_v<T, std::uintptr_t> || std::is_same_v<T, std::intptr_t>);
|
||||
return reinterpret_cast<T>(m_xmem + (m_ptr - m_wmem));
|
||||
}
|
||||
|
||||
void set_wptr(std::uint32_t* p)
|
||||
{
|
||||
m_ptr = p;
|
||||
}
|
||||
|
||||
void set_xptr(std::uint32_t* p)
|
||||
{
|
||||
m_ptr = m_wmem + (p - m_xmem);
|
||||
}
|
||||
|
||||
protected:
|
||||
using constructor_argument_type = std::uint32_t*;
|
||||
|
||||
PointerCodeGeneratorPolicy(std::uint32_t* wmem, std::uint32_t* xmem)
|
||||
: m_ptr(wmem), m_wmem(wmem), m_xmem(xmem)
|
||||
{}
|
||||
|
||||
void append(std::uint32_t instruction)
|
||||
{
|
||||
*m_ptr++ = instruction;
|
||||
}
|
||||
|
||||
void set_at_offset(std::ptrdiff_t offset, std::uint32_t value, std::uint32_t mask) const
|
||||
{
|
||||
std::uint32_t* p = m_wmem + offset / sizeof(std::uint32_t);
|
||||
*p = (*p & mask) | value;
|
||||
}
|
||||
|
||||
private:
|
||||
std::uint32_t* m_ptr;
|
||||
std::uint32_t* const m_wmem;
|
||||
std::uint32_t* const m_xmem;
|
||||
};
|
||||
|
||||
struct VectorCodeGeneratorPolicy {
|
||||
public:
|
||||
std::ptrdiff_t offset() const
|
||||
{
|
||||
return m_vec.size() * sizeof(std::uint32_t);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T xptr() const
|
||||
{
|
||||
static_assert(std::is_pointer_v<T> || std::is_same_v<T, std::uintptr_t> || std::is_same_v<T, std::intptr_t>);
|
||||
return reinterpret_cast<T>(m_xmem + m_vec.size());
|
||||
}
|
||||
|
||||
protected:
|
||||
using constructor_argument_type = std::vector<std::uint32_t>&;
|
||||
|
||||
VectorCodeGeneratorPolicy(std::vector<std::uint32_t>& vec, std::uint32_t* xmem)
|
||||
: m_vec(vec), m_xmem(xmem)
|
||||
{}
|
||||
|
||||
void append(std::uint32_t instruction)
|
||||
{
|
||||
m_vec.push_back(instruction);
|
||||
}
|
||||
|
||||
void set_at_offset(std::ptrdiff_t offset, std::uint32_t value, std::uint32_t mask) const
|
||||
{
|
||||
std::uint32_t& p = m_vec[offset / sizeof(std::uint32_t)];
|
||||
p = (p & mask) | value;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::uint32_t>& m_vec;
|
||||
std::uint32_t* const m_xmem;
|
||||
};
|
||||
|
||||
struct CodeGenerator : BasicCodeGenerator<PointerCodeGeneratorPolicy> {
|
||||
public:
|
||||
CodeGenerator(std::uint32_t* mem)
|
||||
: BasicCodeGenerator<PointerCodeGeneratorPolicy>(mem, mem) {}
|
||||
CodeGenerator(std::uint32_t* wmem, std::uint32_t* xmem)
|
||||
: BasicCodeGenerator<PointerCodeGeneratorPolicy>(wmem, xmem) {}
|
||||
};
|
||||
|
||||
struct VectorCodeGenerator : BasicCodeGenerator<VectorCodeGeneratorPolicy> {
|
||||
public:
|
||||
VectorCodeGenerator(std::vector<std::uint32_t>& mem)
|
||||
: BasicCodeGenerator<VectorCodeGeneratorPolicy>(mem, nullptr) {}
|
||||
VectorCodeGenerator(std::vector<std::uint32_t>& wmem, std::uint32_t* xmem)
|
||||
: BasicCodeGenerator<VectorCodeGeneratorPolicy>(wmem, xmem) {}
|
||||
};
|
||||
|
||||
namespace util {
|
||||
|
||||
inline constexpr WReg W0{0}, W1{1}, W2{2}, W3{3}, W4{4}, W5{5}, W6{6}, W7{7}, W8{8}, W9{9}, W10{10}, W11{11}, W12{12}, W13{13}, W14{14}, W15{15}, W16{16}, W17{17}, W18{18}, W19{19}, W20{20}, W21{21}, W22{22}, W23{23}, W24{24}, W25{25}, W26{26}, W27{27}, W28{28}, W29{29}, W30{30};
|
||||
inline constexpr XReg X0{0}, X1{1}, X2{2}, X3{3}, X4{4}, X5{5}, X6{6}, X7{7}, X8{8}, X9{9}, X10{10}, X11{11}, X12{12}, X13{13}, X14{14}, X15{15}, X16{16}, X17{17}, X18{18}, X19{19}, X20{20}, X21{21}, X22{22}, X23{23}, X24{24}, X25{25}, X26{26}, X27{27}, X28{28}, X29{29}, X30{30};
|
||||
inline constexpr ZrReg ZR{}, XZR{};
|
||||
inline constexpr WzrReg WZR{};
|
||||
inline constexpr SpReg SP{}, XSP{};
|
||||
inline constexpr WspReg WSP{};
|
||||
|
||||
inline constexpr VRegSelector V0{0}, V1{1}, V2{2}, V3{3}, V4{4}, V5{5}, V6{6}, V7{7}, V8{8}, V9{9}, V10{10}, V11{11}, V12{12}, V13{13}, V14{14}, V15{15}, V16{16}, V17{17}, V18{18}, V19{19}, V20{20}, V21{21}, V22{22}, V23{23}, V24{24}, V25{25}, V26{26}, V27{27}, V28{28}, V29{29}, V30{30}, V31{31};
|
||||
inline constexpr QReg Q0{0}, Q1{1}, Q2{2}, Q3{3}, Q4{4}, Q5{5}, Q6{6}, Q7{7}, Q8{8}, Q9{9}, Q10{10}, Q11{11}, Q12{12}, Q13{13}, Q14{14}, Q15{15}, Q16{16}, Q17{17}, Q18{18}, Q19{19}, Q20{20}, Q21{21}, Q22{22}, Q23{23}, Q24{24}, Q25{25}, Q26{26}, Q27{27}, Q28{28}, Q29{29}, Q30{30}, Q31{31};
|
||||
inline constexpr DReg D0{0}, D1{1}, D2{2}, D3{3}, D4{4}, D5{5}, D6{6}, D7{7}, D8{8}, D9{9}, D10{10}, D11{11}, D12{12}, D13{13}, D14{14}, D15{15}, D16{16}, D17{17}, D18{18}, D19{19}, D20{20}, D21{21}, D22{22}, D23{23}, D24{24}, D25{25}, D26{26}, D27{27}, D28{28}, D29{29}, D30{30}, D31{31};
|
||||
inline constexpr SReg S0{0}, S1{1}, S2{2}, S3{3}, S4{4}, S5{5}, S6{6}, S7{7}, S8{8}, S9{9}, S10{10}, S11{11}, S12{12}, S13{13}, S14{14}, S15{15}, S16{16}, S17{17}, S18{18}, S19{19}, S20{20}, S21{21}, S22{22}, S23{23}, S24{24}, S25{25}, S26{26}, S27{27}, S28{28}, S29{29}, S30{30}, S31{31};
|
||||
inline constexpr HReg H0{0}, H1{1}, H2{2}, H3{3}, H4{4}, H5{5}, H6{6}, H7{7}, H8{8}, H9{9}, H10{10}, H11{11}, H12{12}, H13{13}, H14{14}, H15{15}, H16{16}, H17{17}, H18{18}, H19{19}, H20{20}, H21{21}, H22{22}, H23{23}, H24{24}, H25{25}, H26{26}, H27{27}, H28{28}, H29{29}, H30{30}, H31{31};
|
||||
inline constexpr BReg B0{0}, B1{1}, B2{2}, B3{3}, B4{4}, B5{5}, B6{6}, B7{7}, B8{8}, B9{9}, B10{10}, B11{11}, B12{12}, B13{13}, B14{14}, B15{15}, B16{16}, B17{17}, B18{18}, B19{19}, B20{20}, B21{21}, B22{22}, B23{23}, B24{24}, B25{25}, B26{26}, B27{27}, B28{28}, B29{29}, B30{30}, B31{31};
|
||||
|
||||
inline constexpr Cond EQ{Cond::EQ}, NE{Cond::NE}, CS{Cond::CS}, CC{Cond::CC}, MI{Cond::MI}, PL{Cond::PL}, VS{Cond::VS}, VC{Cond::VC}, HI{Cond::HI}, LS{Cond::LS}, GE{Cond::GE}, LT{Cond::LT}, GT{Cond::GT}, LE{Cond::LE}, AL{Cond::AL}, NV{Cond::NV}, HS{Cond::HS}, LO{Cond::LO};
|
||||
|
||||
inline constexpr auto UXTB{MultiTypedName<AddSubExt::UXTB>{}};
|
||||
inline constexpr auto UXTH{MultiTypedName<AddSubExt::UXTH>{}};
|
||||
inline constexpr auto UXTW{MultiTypedName<AddSubExt::UXTW, IndexExt::UXTW>{}};
|
||||
inline constexpr auto UXTX{MultiTypedName<AddSubExt::UXTX>{}};
|
||||
inline constexpr auto SXTB{MultiTypedName<AddSubExt::SXTB>{}};
|
||||
inline constexpr auto SXTH{MultiTypedName<AddSubExt::SXTH>{}};
|
||||
inline constexpr auto SXTW{MultiTypedName<AddSubExt::SXTW, IndexExt::SXTW>{}};
|
||||
inline constexpr auto SXTX{MultiTypedName<AddSubExt::SXTX, IndexExt::SXTX>{}};
|
||||
inline constexpr auto LSL{MultiTypedName<AddSubExt::LSL, IndexExt::LSL, AddSubShift::LSL, LogShift::LSL, LslSymbol::LSL>{}};
|
||||
inline constexpr auto LSR{MultiTypedName<AddSubShift::LSR, LogShift::LSR>{}};
|
||||
inline constexpr auto ASR{MultiTypedName<AddSubShift::ASR, LogShift::ASR>{}};
|
||||
inline constexpr auto ROR{MultiTypedName<LogShift::ROR>{}};
|
||||
|
||||
inline constexpr PostIndexed POST_INDEXED{};
|
||||
inline constexpr PreIndexed PRE_INDEXED{};
|
||||
inline constexpr MslSymbol MSL{MslSymbol::MSL};
|
||||
|
||||
} // namespace util
|
||||
|
||||
} // namespace oaknut
|
44
externals/oaknut/include/oaknut/oaknut_exception.hpp
vendored
Normal file
44
externals/oaknut/include/oaknut/oaknut_exception.hpp
vendored
Normal file
|
@ -0,0 +1,44 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <exception>
|
||||
|
||||
namespace oaknut {
|
||||
|
||||
enum class ExceptionType {
|
||||
#define OAKNUT_EXCEPTION(tag, str) tag,
|
||||
#include "oaknut/impl/oaknut_exception.inc.hpp"
|
||||
#undef OAKNUT_EXCEPTION
|
||||
};
|
||||
|
||||
inline const char* to_string(ExceptionType et)
|
||||
{
|
||||
switch (et) {
|
||||
#define OAKNUT_EXCEPTION(tag, str) \
|
||||
case ExceptionType::tag: \
|
||||
return str;
|
||||
#include "oaknut/impl/oaknut_exception.inc.hpp"
|
||||
#undef OAKNUT_EXCEPTION
|
||||
default:
|
||||
return "unknown ExceptionType";
|
||||
}
|
||||
}
|
||||
|
||||
class OaknutException : public std::exception {
|
||||
public:
|
||||
explicit OaknutException(ExceptionType et)
|
||||
: type{et}
|
||||
{}
|
||||
|
||||
const char* what() const noexcept override
|
||||
{
|
||||
return to_string(type);
|
||||
}
|
||||
|
||||
private:
|
||||
ExceptionType type;
|
||||
};
|
||||
|
||||
} // namespace oaknut
|
5
externals/oaknut/oaknutConfig.cmake.in
vendored
Normal file
5
externals/oaknut/oaknutConfig.cmake.in
vendored
Normal file
|
@ -0,0 +1,5 @@
|
|||
@PACKAGE_INIT@
|
||||
|
||||
include("${CMAKE_CURRENT_LIST_DIR}/@PROJECT_NAME@Targets.cmake")
|
||||
|
||||
check_required_components(@PROJECT_NAME@)
|
77
externals/oaknut/tests/_feature_detect.cpp
vendored
Normal file
77
externals/oaknut/tests/_feature_detect.cpp
vendored
Normal file
|
@ -0,0 +1,77 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <cstdio>
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "architecture.hpp"
|
||||
|
||||
#ifdef ON_ARM64
|
||||
|
||||
# include "oaknut/feature_detection/feature_detection.hpp"
|
||||
# include "oaknut/feature_detection/feature_detection_idregs.hpp"
|
||||
|
||||
using namespace oaknut;
|
||||
|
||||
TEST_CASE("Print CPU features (Default)")
|
||||
{
|
||||
CpuFeatures features = detect_features();
|
||||
|
||||
std::fputs("CPU Features: ", stdout);
|
||||
|
||||
# define OAKNUT_CPU_FEATURE(name) \
|
||||
if (features.has(CpuFeature::name)) \
|
||||
std::fputs(#name " ", stdout);
|
||||
# include "oaknut/impl/cpu_feature.inc.hpp"
|
||||
# undef OAKNUT_CPU_FEATURE
|
||||
|
||||
std::fputs("\n", stdout);
|
||||
}
|
||||
|
||||
# if OAKNUT_SUPPORTS_READING_ID_REGISTERS == 1
|
||||
|
||||
TEST_CASE("Print CPU features (Using CPUID)")
|
||||
{
|
||||
std::optional<id::IdRegisters> id_regs = read_id_registers();
|
||||
REQUIRE(!!id_regs);
|
||||
|
||||
CpuFeatures features = detect_features_via_id_registers(*id_regs);
|
||||
|
||||
std::fputs("CPU Features (CPUID method): ", stdout);
|
||||
|
||||
# define OAKNUT_CPU_FEATURE(name) \
|
||||
if (features.has(CpuFeature::name)) \
|
||||
std::fputs(#name " ", stdout);
|
||||
# include "oaknut/impl/cpu_feature.inc.hpp"
|
||||
# undef OAKNUT_CPU_FEATURE
|
||||
|
||||
std::fputs("\n", stdout);
|
||||
}
|
||||
|
||||
# elif OAKNUT_SUPPORTS_READING_ID_REGISTERS == 2
|
||||
|
||||
TEST_CASE("Print CPU features (Using CPUID)")
|
||||
{
|
||||
const std::size_t core_count = get_core_count();
|
||||
for (std::size_t core_index = 0; core_index < core_count; core_index++) {
|
||||
std::optional<id::IdRegisters> id_regs = read_id_registers(core_index);
|
||||
REQUIRE(!!id_regs);
|
||||
|
||||
CpuFeatures features = detect_features_via_id_registers(*id_regs);
|
||||
|
||||
std::printf("CPU Features (CPUID method - Core %zu): ", core_index);
|
||||
|
||||
# define OAKNUT_CPU_FEATURE(name) \
|
||||
if (features.has(CpuFeature::name)) \
|
||||
std::fputs(#name " ", stdout);
|
||||
# include "oaknut/impl/cpu_feature.inc.hpp"
|
||||
# undef OAKNUT_CPU_FEATURE
|
||||
|
||||
std::fputs("\n", stdout);
|
||||
}
|
||||
}
|
||||
|
||||
# endif
|
||||
|
||||
#endif
|
6
externals/oaknut/tests/architecture.hpp
vendored
Normal file
6
externals/oaknut/tests/architecture.hpp
vendored
Normal file
|
@ -0,0 +1,6 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2024 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#if defined(__ARM64__) || defined(__aarch64__) || defined(_M_ARM64)
|
||||
# define ON_ARM64
|
||||
#endif
|
333
externals/oaknut/tests/basic.cpp
vendored
Normal file
333
externals/oaknut/tests/basic.cpp
vendored
Normal file
|
@ -0,0 +1,333 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <limits>
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "architecture.hpp"
|
||||
#include "oaknut/oaknut.hpp"
|
||||
#include "rand_int.hpp"
|
||||
|
||||
using namespace oaknut;
|
||||
using namespace oaknut::util;
|
||||
|
||||
#ifdef ON_ARM64
|
||||
|
||||
# include "oaknut/code_block.hpp"
|
||||
# include "oaknut/dual_code_block.hpp"
|
||||
|
||||
TEST_CASE("Basic Test")
|
||||
{
|
||||
CodeBlock mem{4096};
|
||||
CodeGenerator code{mem.ptr()};
|
||||
|
||||
mem.unprotect();
|
||||
|
||||
code.MOV(W0, 42);
|
||||
code.RET();
|
||||
|
||||
mem.protect();
|
||||
mem.invalidate_all();
|
||||
|
||||
int result = ((int (*)())mem.ptr())();
|
||||
REQUIRE(result == 42);
|
||||
}
|
||||
|
||||
TEST_CASE("Basic Test (Dual)")
|
||||
{
|
||||
DualCodeBlock mem{4096};
|
||||
CodeGenerator code{mem.wptr(), mem.xptr()};
|
||||
|
||||
code.MOV(W0, 42);
|
||||
code.RET();
|
||||
|
||||
mem.invalidate_all();
|
||||
|
||||
int result = ((int (*)())mem.xptr())();
|
||||
REQUIRE(result == 42);
|
||||
}
|
||||
|
||||
TEST_CASE("Fibonacci")
|
||||
{
|
||||
CodeBlock mem{4096};
|
||||
CodeGenerator code{mem.ptr()};
|
||||
|
||||
mem.unprotect();
|
||||
|
||||
auto fib = code.xptr<int (*)(int)>();
|
||||
Label start, end, zero, recurse;
|
||||
|
||||
code.l(start);
|
||||
code.STP(X29, X30, SP, PRE_INDEXED, -32);
|
||||
code.STP(X20, X19, SP, 16);
|
||||
code.MOV(X29, SP);
|
||||
code.MOV(W19, W0);
|
||||
code.SUBS(W0, W0, 1);
|
||||
code.B(LT, zero);
|
||||
code.B(NE, recurse);
|
||||
code.MOV(W0, 1);
|
||||
code.B(end);
|
||||
|
||||
code.l(zero);
|
||||
code.MOV(W0, WZR);
|
||||
code.B(end);
|
||||
|
||||
code.l(recurse);
|
||||
code.BL(start);
|
||||
code.MOV(W20, W0);
|
||||
code.SUB(W0, W19, 2);
|
||||
code.BL(start);
|
||||
code.ADD(W0, W0, W20);
|
||||
|
||||
code.l(end);
|
||||
code.LDP(X20, X19, SP, 16);
|
||||
code.LDP(X29, X30, SP, POST_INDEXED, 32);
|
||||
code.RET();
|
||||
|
||||
mem.protect();
|
||||
mem.invalidate_all();
|
||||
|
||||
REQUIRE(fib(0) == 0);
|
||||
REQUIRE(fib(1) == 1);
|
||||
REQUIRE(fib(5) == 5);
|
||||
REQUIRE(fib(9) == 34);
|
||||
}
|
||||
|
||||
TEST_CASE("Fibonacci (Dual)")
|
||||
{
|
||||
DualCodeBlock mem{4096};
|
||||
CodeGenerator code{mem.wptr(), mem.xptr()};
|
||||
|
||||
auto fib = code.xptr<int (*)(int)>();
|
||||
Label start, end, zero, recurse;
|
||||
|
||||
code.l(start);
|
||||
code.STP(X29, X30, SP, PRE_INDEXED, -32);
|
||||
code.STP(X20, X19, SP, 16);
|
||||
code.MOV(X29, SP);
|
||||
code.MOV(W19, W0);
|
||||
code.SUBS(W0, W0, 1);
|
||||
code.B(LT, zero);
|
||||
code.B(NE, recurse);
|
||||
code.MOV(W0, 1);
|
||||
code.B(end);
|
||||
|
||||
code.l(zero);
|
||||
code.MOV(W0, WZR);
|
||||
code.B(end);
|
||||
|
||||
code.l(recurse);
|
||||
code.BL(start);
|
||||
code.MOV(W20, W0);
|
||||
code.SUB(W0, W19, 2);
|
||||
code.BL(start);
|
||||
code.ADD(W0, W0, W20);
|
||||
|
||||
code.l(end);
|
||||
code.LDP(X20, X19, SP, 16);
|
||||
code.LDP(X29, X30, SP, POST_INDEXED, 32);
|
||||
code.RET();
|
||||
|
||||
mem.invalidate_all();
|
||||
|
||||
REQUIRE(fib(0) == 0);
|
||||
REQUIRE(fib(1) == 1);
|
||||
REQUIRE(fib(5) == 5);
|
||||
REQUIRE(fib(9) == 34);
|
||||
}
|
||||
|
||||
TEST_CASE("Immediate generation (32-bit)", "[slow]")
|
||||
{
|
||||
CodeBlock mem{4096};
|
||||
|
||||
for (int i = 0; i < 0x100000; i++) {
|
||||
const std::uint32_t value = RandInt<std::uint32_t>(0, 0xffffffff);
|
||||
|
||||
CodeGenerator code{mem.ptr()};
|
||||
|
||||
auto f = code.xptr<std::uint64_t (*)()>();
|
||||
mem.unprotect();
|
||||
code.MOV(W0, value);
|
||||
code.RET();
|
||||
mem.protect();
|
||||
mem.invalidate_all();
|
||||
|
||||
REQUIRE(f() == value);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("Immediate generation (64-bit)", "[slow]")
|
||||
{
|
||||
CodeBlock mem{4096};
|
||||
|
||||
for (int i = 0; i < 0x100000; i++) {
|
||||
const std::uint64_t value = RandInt<std::uint64_t>(0, 0xffffffff'ffffffff);
|
||||
|
||||
CodeGenerator code{mem.ptr()};
|
||||
|
||||
auto f = code.xptr<std::uint64_t (*)()>();
|
||||
mem.unprotect();
|
||||
code.MOV(X0, value);
|
||||
code.RET();
|
||||
mem.protect();
|
||||
mem.invalidate_all();
|
||||
|
||||
REQUIRE(f() == value);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("ADR", "[slow]")
|
||||
{
|
||||
CodeBlock mem{4096};
|
||||
|
||||
for (std::int64_t i = -1048576; i < 1048576; i++) {
|
||||
const std::intptr_t value = reinterpret_cast<std::intptr_t>(mem.ptr()) + i;
|
||||
|
||||
CodeGenerator code{mem.ptr()};
|
||||
|
||||
auto f = code.xptr<std::intptr_t (*)()>();
|
||||
mem.unprotect();
|
||||
code.ADR(X0, reinterpret_cast<void*>(value));
|
||||
code.RET();
|
||||
mem.protect();
|
||||
mem.invalidate_all();
|
||||
|
||||
INFO(i);
|
||||
REQUIRE(f() == value);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("ADRP", "[slow]")
|
||||
{
|
||||
CodeBlock mem{4096};
|
||||
|
||||
for (int i = 0; i < 0x200000; i++) {
|
||||
const std::int64_t diff = RandInt<std::int64_t>(-4294967296, 4294967295);
|
||||
const std::intptr_t value = reinterpret_cast<std::intptr_t>(mem.ptr()) + diff;
|
||||
const std::uint64_t expect = static_cast<std::uint64_t>(value) & ~static_cast<std::uint64_t>(0xfff);
|
||||
|
||||
CodeGenerator code{mem.ptr()};
|
||||
|
||||
auto f = code.xptr<std::uint64_t (*)()>();
|
||||
mem.unprotect();
|
||||
code.ADRP(X0, reinterpret_cast<void*>(value));
|
||||
code.RET();
|
||||
mem.protect();
|
||||
mem.invalidate_all();
|
||||
|
||||
INFO(i);
|
||||
REQUIRE(f() == expect);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("ADRL (near)")
|
||||
{
|
||||
CodeBlock mem{4096};
|
||||
std::uint32_t* const mem_ptr = mem.ptr() + 42; // create small offset for testing
|
||||
|
||||
for (int i = -0x4000; i < 0x4000; i++) {
|
||||
const std::int64_t diff = i;
|
||||
const std::intptr_t value = reinterpret_cast<std::intptr_t>(mem_ptr) + diff;
|
||||
|
||||
CodeGenerator code{mem_ptr};
|
||||
|
||||
auto f = code.xptr<std::uint64_t (*)()>();
|
||||
mem.unprotect();
|
||||
code.ADRL(X0, reinterpret_cast<void*>(value));
|
||||
code.RET();
|
||||
mem.protect();
|
||||
mem.invalidate_all();
|
||||
|
||||
INFO(i);
|
||||
REQUIRE(f() == static_cast<std::uint64_t>(value));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("ADRL (far)", "[slow]")
|
||||
{
|
||||
CodeBlock mem{4096};
|
||||
std::uint32_t* const mem_ptr = mem.ptr() + 42; // create small offset for testing
|
||||
|
||||
for (int i = 0; i < 0x200000; i++) {
|
||||
const std::int64_t diff = RandInt<std::int64_t>(-4294967296 + 100, 4294967295 - 100);
|
||||
const std::intptr_t value = reinterpret_cast<std::intptr_t>(mem_ptr) + diff;
|
||||
|
||||
CodeGenerator code{mem_ptr};
|
||||
|
||||
auto f = code.xptr<std::uint64_t (*)()>();
|
||||
mem.unprotect();
|
||||
code.ADRL(X0, reinterpret_cast<void*>(value));
|
||||
code.RET();
|
||||
mem.protect();
|
||||
mem.invalidate_all();
|
||||
|
||||
INFO(i);
|
||||
REQUIRE(f() == static_cast<std::uint64_t>(value));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("MOVP2R (far)", "[slow]")
|
||||
{
|
||||
CodeBlock mem{4096};
|
||||
std::uint32_t* const mem_ptr = mem.ptr() + 42; // create small offset for testing
|
||||
|
||||
for (int i = 0; i < 0x200000; i++) {
|
||||
const std::int64_t diff = RandInt<std::int64_t>(std::numeric_limits<std::int64_t>::min(),
|
||||
std::numeric_limits<std::int64_t>::max());
|
||||
const std::intptr_t value = reinterpret_cast<std::intptr_t>(mem_ptr) + diff;
|
||||
|
||||
CodeGenerator code{mem_ptr};
|
||||
|
||||
auto f = code.xptr<std::uint64_t (*)()>();
|
||||
mem.unprotect();
|
||||
code.MOVP2R(X0, reinterpret_cast<void*>(value));
|
||||
code.RET();
|
||||
mem.protect();
|
||||
mem.invalidate_all();
|
||||
|
||||
REQUIRE(f() == static_cast<std::uint64_t>(value));
|
||||
}
|
||||
}
|
||||
|
||||
TEST_CASE("MOVP2R (4GiB boundary)")
|
||||
{
|
||||
CodeBlock mem{4096};
|
||||
std::uint32_t* const mem_ptr = mem.ptr() + 42; // create small offset for testing
|
||||
|
||||
for (std::int64_t i = 0xFFFF'F000; i < 0x1'0000'1000; i++) {
|
||||
const auto test = [&](std::int64_t diff) {
|
||||
const std::intptr_t value = reinterpret_cast<std::intptr_t>(mem_ptr) + diff;
|
||||
|
||||
CodeGenerator code{mem_ptr};
|
||||
|
||||
auto f = code.xptr<std::uint64_t (*)()>();
|
||||
mem.unprotect();
|
||||
code.MOVP2R(X0, reinterpret_cast<void*>(value));
|
||||
code.RET();
|
||||
mem.protect();
|
||||
mem.invalidate_all();
|
||||
|
||||
REQUIRE(f() == static_cast<std::uint64_t>(value));
|
||||
};
|
||||
test(i);
|
||||
test(-i);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
TEST_CASE("PageOffset (rollover)")
|
||||
{
|
||||
REQUIRE(PageOffset<21, 12>::encode(0x0000000088e74000, 0xffffffffd167dece) == 0xd2202);
|
||||
}
|
||||
|
||||
TEST_CASE("PageOffset (page boundary)")
|
||||
{
|
||||
REQUIRE(PageOffset<21, 12>::encode(0x0001000000000002, 0x0001000000000001) == 0);
|
||||
REQUIRE(PageOffset<21, 12>::encode(0x0001000000000001, 0x0001000000000002) == 0);
|
||||
REQUIRE(PageOffset<21, 12>::encode(0x0001000000001000, 0x0001000000000fff) == 0x1fffff);
|
||||
REQUIRE(PageOffset<21, 12>::encode(0x0001000000000fff, 0x0001000000001000) == 0x080000);
|
||||
}
|
815
externals/oaknut/tests/fpsimd.cpp
vendored
Normal file
815
externals/oaknut/tests/fpsimd.cpp
vendored
Normal file
|
@ -0,0 +1,815 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "oaknut/oaknut.hpp"
|
||||
|
||||
#define T(HEX, CMD) \
|
||||
TEST_CASE(#CMD) \
|
||||
{ \
|
||||
using namespace oaknut; \
|
||||
using namespace oaknut::util; \
|
||||
\
|
||||
std::uint32_t result; \
|
||||
CodeGenerator code{&result}; \
|
||||
\
|
||||
code.CMD; \
|
||||
\
|
||||
REQUIRE(result == HEX); \
|
||||
}
|
||||
|
||||
T(0x5ee0bb61, ABS(D1, D27))
|
||||
T(0x4e20ba03, ABS(V3.B16(), V16.B16()))
|
||||
T(0x5ef98449, ADD(D9, D2, D25))
|
||||
T(0x4eef8697, ADD(V23.D2(), V20.D2(), V15.D2()))
|
||||
T(0x0eb743d1, ADDHN(V17.S2(), V30.D2(), V23.D2()))
|
||||
T(0x5ef1b933, ADDP(D19, V9.D2()))
|
||||
T(0x0e7ebf6e, ADDP(V14.H4(), V27.H4(), V30.H4()))
|
||||
T(0x4e31ba47, ADDV(B7, V18.B16()))
|
||||
// AESD
|
||||
// AESE
|
||||
// AESIMC
|
||||
// AESMC
|
||||
T(0x4e2b1d4c, AND(V12.B16(), V10.B16(), V11.B16()))
|
||||
T(0x6f01b7f4, BIC(V20.H8(), 63, LSL, 8))
|
||||
T(0x2f017752, BIC(V18.S2(), 58, LSL, 24))
|
||||
T(0x0e751c85, BIC(V5.B8(), V4.B8(), V21.B8()))
|
||||
T(0x2ef11d4d, BIF(V13.B8(), V10.B8(), V17.B8()))
|
||||
T(0x2eb31f3b, BIT(V27.B8(), V25.B8(), V19.B8()))
|
||||
T(0x2e711ed8, BSL(V24.B8(), V22.B8(), V17.B8()))
|
||||
T(0x0e604aaf, CLS(V15.H4(), V21.H4()))
|
||||
T(0x6e604808, CLZ(V8.H8(), V0.H8()))
|
||||
T(0x7eff8ec2, CMEQ(D2, D22, D31))
|
||||
T(0x2e2b8d57, CMEQ(V23.B8(), V10.B8(), V11.B8()))
|
||||
T(0x5ee09bbf, CMEQ(D31, D29, 0))
|
||||
T(0x4ea09876, CMEQ(V22.S4(), V3.S4(), 0))
|
||||
T(0x5ef23c04, CMGE(D4, D0, D18))
|
||||
T(0x4e203c6f, CMGE(V15.B16(), V3.B16(), V0.B16()))
|
||||
T(0x7ee08822, CMGE(D2, D1, 0))
|
||||
T(0x2ea08bb9, CMGE(V25.S2(), V29.S2(), 0))
|
||||
T(0x5ef036a5, CMGT(D5, D21, D16))
|
||||
T(0x0eb7358b, CMGT(V11.S2(), V12.S2(), V23.S2()))
|
||||
T(0x5ee08957, CMGT(D23, D10, 0))
|
||||
T(0x4ea088eb, CMGT(V11.S4(), V7.S4(), 0))
|
||||
T(0x7ee235f2, CMHI(D18, D15, D2))
|
||||
T(0x6e243596, CMHI(V22.B16(), V12.B16(), V4.B16()))
|
||||
T(0x7ef23faf, CMHS(D15, D29, D18))
|
||||
T(0x2e2d3d8a, CMHS(V10.B8(), V12.B8(), V13.B8()))
|
||||
T(0x7ee098a4, CMLE(D4, D5, 0))
|
||||
T(0x2e2098d3, CMLE(V19.B8(), V6.B8(), 0))
|
||||
T(0x5ee0a980, CMLT(D0, D12, 0))
|
||||
T(0x4e60a892, CMLT(V18.H8(), V4.H8(), 0))
|
||||
T(0x5ee18e03, CMTST(D3, D16, D1))
|
||||
T(0x4e708f65, CMTST(V5.H8(), V27.H8(), V16.H8()))
|
||||
T(0x4e20598d, CNT(V13.B16(), V12.B16()))
|
||||
// DUP
|
||||
T(0x0e0d06cd, DUP(V13.B8(), V22.B()[6]))
|
||||
T(0x0e010fe7, DUP(V7.B8(), WZR))
|
||||
T(0x2e2b1e6e, EOR(V14.B8(), V19.B8(), V11.B8()))
|
||||
T(0x6e1c0a35, EXT(V21.B16(), V17.B16(), V28.B16(), 1))
|
||||
T(0x7ea0d7a4, FABD(S4, S29, S0))
|
||||
T(0x6eecd418, FABD(V24.D2(), V0.D2(), V12.D2()))
|
||||
T(0x1e20c299, FABS(S25, S20))
|
||||
T(0x1e60c114, FABS(D20, D8))
|
||||
T(0x4ee0f999, FABS(V25.D2(), V12.D2()))
|
||||
T(0x7e71ef5b, FACGE(D27, D26, D17))
|
||||
T(0x6e6eed17, FACGE(V23.D2(), V8.D2(), V14.D2()))
|
||||
T(0x7ef8efc0, FACGT(D0, D30, D24))
|
||||
T(0x6eb0ec54, FACGT(V20.S4(), V2.S4(), V16.S4()))
|
||||
T(0x1e242b23, FADD(S3, S25, S4))
|
||||
T(0x1e672a8b, FADD(D11, D20, D7))
|
||||
T(0x4e34d46b, FADD(V11.S4(), V3.S4(), V20.S4()))
|
||||
T(0x7e30db16, FADDP(S22, V24.S2()))
|
||||
T(0x6e21d626, FADDP(V6.S4(), V17.S4(), V1.S4()))
|
||||
T(0x6ec3e441, FCADD(V1.D2(), V2.D2(), V3.D2(), Rot::DEG_90))
|
||||
T(0x6ec3f441, FCADD(V1.D2(), V2.D2(), V3.D2(), Rot::DEG_270))
|
||||
T(0x2e83e441, FCADD(V1.S2(), V2.S2(), V3.S2(), Rot::DEG_90))
|
||||
T(0x6e83e441, FCADD(V1.S4(), V2.S4(), V3.S4(), Rot::DEG_90))
|
||||
T(0x2e43e441, FCADD(V1.H4(), V2.H4(), V3.H4(), Rot::DEG_90))
|
||||
T(0x6e43e441, FCADD(V1.H8(), V2.H8(), V3.H8(), Rot::DEG_90))
|
||||
T(0x1e23c4ce, FCCMP(S6, S3, 14, GT))
|
||||
T(0x1e7104aa, FCCMP(D5, D17, 10, EQ))
|
||||
T(0x1e28c678, FCCMPE(S19, S8, 8, GT))
|
||||
T(0x1e6195dd, FCCMPE(D14, D1, 13, LS))
|
||||
T(0x5e31e659, FCMEQ(S25, S18, S17))
|
||||
T(0x0e27e7ba, FCMEQ(V26.S2(), V29.S2(), V7.S2()))
|
||||
T(0x5ea0da1d, FCMEQ(S29, S16, 0.0))
|
||||
T(0x4ee0db8a, FCMEQ(V10.D2(), V28.D2(), 0.0))
|
||||
T(0x7e2de473, FCMGE(S19, S3, S13))
|
||||
T(0x2e33e726, FCMGE(V6.S2(), V25.S2(), V19.S2()))
|
||||
T(0x7ea0c8d3, FCMGE(S19, S6, 0.0))
|
||||
T(0x6ea0ca7b, FCMGE(V27.S4(), V19.S4(), 0.0))
|
||||
T(0x7eb7e65d, FCMGT(S29, S18, S23))
|
||||
T(0x6ef0e6ac, FCMGT(V12.D2(), V21.D2(), V16.D2()))
|
||||
T(0x5ee0cb5a, FCMGT(D26, D26, 0.0))
|
||||
T(0x4ea0c917, FCMGT(V23.S4(), V8.S4(), 0.0))
|
||||
T(0x6ec3c441, FCMLA(V1.D2(), V2.D2(), V3.D2(), Rot::DEG_0))
|
||||
T(0x6ec3cc41, FCMLA(V1.D2(), V2.D2(), V3.D2(), Rot::DEG_90))
|
||||
T(0x6ec3d441, FCMLA(V1.D2(), V2.D2(), V3.D2(), Rot::DEG_180))
|
||||
T(0x6ec3dc41, FCMLA(V1.D2(), V2.D2(), V3.D2(), Rot::DEG_270))
|
||||
T(0x2e83cc41, FCMLA(V1.S2(), V2.S2(), V3.S2(), Rot::DEG_90))
|
||||
T(0x6e83cc41, FCMLA(V1.S4(), V2.S4(), V3.S4(), Rot::DEG_90))
|
||||
T(0x2e43cc41, FCMLA(V1.H4(), V2.H4(), V3.H4(), Rot::DEG_90))
|
||||
T(0x6e43cc41, FCMLA(V1.H8(), V2.H8(), V3.H8(), Rot::DEG_90))
|
||||
T(0x6f831041, FCMLA(V1.S4(), V2.S4(), V3.S()[0], Rot::DEG_0))
|
||||
T(0x6f833041, FCMLA(V1.S4(), V2.S4(), V3.S()[0], Rot::DEG_90))
|
||||
T(0x6f835041, FCMLA(V1.S4(), V2.S4(), V3.S()[0], Rot::DEG_180))
|
||||
T(0x6f837041, FCMLA(V1.S4(), V2.S4(), V3.S()[0], Rot::DEG_270))
|
||||
T(0x6f833841, FCMLA(V1.S4(), V2.S4(), V3.S()[1], Rot::DEG_90))
|
||||
T(0x2f433041, FCMLA(V1.H4(), V2.H4(), V3.H()[0], Rot::DEG_90))
|
||||
T(0x2f633041, FCMLA(V1.H4(), V2.H4(), V3.H()[1], Rot::DEG_90))
|
||||
T(0x6f433041, FCMLA(V1.H8(), V2.H8(), V3.H()[0], Rot::DEG_90))
|
||||
T(0x6f633041, FCMLA(V1.H8(), V2.H8(), V3.H()[1], Rot::DEG_90))
|
||||
T(0x6f433841, FCMLA(V1.H8(), V2.H8(), V3.H()[2], Rot::DEG_90))
|
||||
T(0x6f633841, FCMLA(V1.H8(), V2.H8(), V3.H()[3], Rot::DEG_90))
|
||||
T(0x7ea0dbe1, FCMLE(S1, S31, 0.0))
|
||||
T(0x6ea0da69, FCMLE(V9.S4(), V19.S4(), 0.0))
|
||||
T(0x5ea0ea5f, FCMLT(S31, S18, 0.0))
|
||||
T(0x4ee0e8de, FCMLT(V30.D2(), V6.D2(), 0.0))
|
||||
T(0x1e322040, FCMP(S2, S18))
|
||||
T(0x1e202248, FCMP(S18, 0.0))
|
||||
T(0x1e6520a0, FCMP(D5, D5))
|
||||
T(0x1e602108, FCMP(D8, 0.0))
|
||||
T(0x1e332370, FCMPE(S27, S19))
|
||||
T(0x1e202018, FCMPE(S0, 0.0))
|
||||
T(0x1e7120b0, FCMPE(D5, D17))
|
||||
T(0x1e602298, FCMPE(D20, 0.0))
|
||||
T(0x1e32ed68, FCSEL(S8, S11, S18, AL))
|
||||
T(0x1e7b1e21, FCSEL(D1, D17, D27, NE))
|
||||
T(0x1ee24022, FCVT(S2, H1))
|
||||
T(0x1ee2c33c, FCVT(D28, H25))
|
||||
T(0x1e23c379, FCVT(H25, S27))
|
||||
T(0x1e22c1a5, FCVT(D5, S13))
|
||||
T(0x1e63c2b1, FCVT(H17, D21))
|
||||
T(0x1e624309, FCVT(S9, D24))
|
||||
T(0x1e2400a0, FCVTAS(W0, S5))
|
||||
T(0x9e24000e, FCVTAS(X14, S0))
|
||||
T(0x1e640191, FCVTAS(W17, D12))
|
||||
T(0x9e6403d6, FCVTAS(X22, D30))
|
||||
T(0x5e21c8a7, FCVTAS(S7, S5))
|
||||
T(0x0e21c8df, FCVTAS(V31.S2(), V6.S2()))
|
||||
T(0x1e25036b, FCVTAU(W11, S27))
|
||||
T(0x9e25030c, FCVTAU(X12, S24))
|
||||
T(0x1e65002e, FCVTAU(W14, D1))
|
||||
T(0x9e65003e, FCVTAU(X30, D1))
|
||||
T(0x7e61cabd, FCVTAU(D29, D21))
|
||||
T(0x2e21c880, FCVTAU(V0.S2(), V4.S2()))
|
||||
T(0x4e217b66, FCVTL2(V6.S4(), V27.H8()))
|
||||
T(0x1e30016d, FCVTMS(W13, S11))
|
||||
T(0x9e3002b5, FCVTMS(X21, S21))
|
||||
T(0x1e7003dd, FCVTMS(W29, D30))
|
||||
T(0x9e700080, FCVTMS(X0, D4))
|
||||
T(0x5e21b9b6, FCVTMS(S22, S13))
|
||||
T(0x4e61ba4e, FCVTMS(V14.D2(), V18.D2()))
|
||||
T(0x1e31002d, FCVTMU(W13, S1))
|
||||
T(0x9e310281, FCVTMU(X1, S20))
|
||||
T(0x1e71000e, FCVTMU(W14, D0))
|
||||
T(0x9e710010, FCVTMU(X16, D0))
|
||||
T(0x7e61bb3b, FCVTMU(D27, D25))
|
||||
T(0x2e21b918, FCVTMU(V24.S2(), V8.S2()))
|
||||
T(0x0e616a68, FCVTN(V8.S2(), V19.D2()))
|
||||
T(0x1e200100, FCVTNS(W0, S8))
|
||||
T(0x9e20037f, FCVTNS(XZR, S27))
|
||||
T(0x1e60015e, FCVTNS(W30, D10))
|
||||
T(0x9e600018, FCVTNS(X24, D0))
|
||||
T(0x5e61a846, FCVTNS(D6, D2))
|
||||
T(0x4e21aa81, FCVTNS(V1.S4(), V20.S4()))
|
||||
T(0x1e210248, FCVTNU(W8, S18))
|
||||
T(0x9e2103da, FCVTNU(X26, S30))
|
||||
T(0x1e610120, FCVTNU(W0, D9))
|
||||
T(0x9e61013a, FCVTNU(X26, D9))
|
||||
T(0x7e61aaba, FCVTNU(D26, D21))
|
||||
T(0x6e21aa16, FCVTNU(V22.S4(), V16.S4()))
|
||||
T(0x1e28010d, FCVTPS(W13, S8))
|
||||
T(0x9e2803df, FCVTPS(XZR, S30))
|
||||
T(0x1e6802e9, FCVTPS(W9, D23))
|
||||
T(0x9e6801f7, FCVTPS(X23, D15))
|
||||
T(0x5ee1a986, FCVTPS(D6, D12))
|
||||
T(0x4ea1aa32, FCVTPS(V18.S4(), V17.S4()))
|
||||
T(0x1e29022b, FCVTPU(W11, S17))
|
||||
T(0x9e290381, FCVTPU(X1, S28))
|
||||
T(0x1e690095, FCVTPU(W21, D4))
|
||||
T(0x9e6902b3, FCVTPU(X19, D21))
|
||||
T(0x7ea1abbb, FCVTPU(S27, S29))
|
||||
T(0x6ee1ab06, FCVTPU(V6.D2(), V24.D2()))
|
||||
T(0x7e61687a, FCVTXN(S26, D3))
|
||||
T(0x2e61694c, FCVTXN(V12.S2(), V10.D2()))
|
||||
T(0x1e18c4d6, FCVTZS(W22, S6, 15))
|
||||
T(0x9e18d131, FCVTZS(X17, S9, 12))
|
||||
T(0x1e58fd9b, FCVTZS(W27, D12, 1))
|
||||
T(0x9e5899ee, FCVTZS(X14, D15, 26))
|
||||
T(0x1e380091, FCVTZS(W17, S4))
|
||||
T(0x9e380289, FCVTZS(X9, S20))
|
||||
T(0x1e780117, FCVTZS(W23, D8))
|
||||
T(0x9e7800f5, FCVTZS(X21, D7))
|
||||
T(0x5f2fffdb, FCVTZS(S27, S30, 17))
|
||||
T(0x4f65ff65, FCVTZS(V5.D2(), V27.D2(), 27))
|
||||
T(0x5ee1b932, FCVTZS(D18, D9))
|
||||
T(0x4ee1ba41, FCVTZS(V1.D2(), V18.D2()))
|
||||
T(0x1e19b5d8, FCVTZU(W24, S14, 19))
|
||||
T(0x9e199462, FCVTZU(X2, S3, 27))
|
||||
T(0x1e59fca1, FCVTZU(W1, D5, 1))
|
||||
T(0x9e599bbd, FCVTZU(X29, D29, 26))
|
||||
T(0x1e3900f6, FCVTZU(W22, S7))
|
||||
T(0x9e3900b7, FCVTZU(X23, S5))
|
||||
T(0x1e79031a, FCVTZU(W26, D24))
|
||||
T(0x9e790248, FCVTZU(X8, D18))
|
||||
T(0x7f5afd37, FCVTZU(D23, D9, 38))
|
||||
T(0x2f34fd38, FCVTZU(V24.S2(), V9.S2(), 12))
|
||||
T(0x7ea1baa3, FCVTZU(S3, S21))
|
||||
T(0x6ee1b8c1, FCVTZU(V1.D2(), V6.D2()))
|
||||
T(0x1e3d1999, FDIV(S25, S12, S29))
|
||||
T(0x1e7e1a4e, FDIV(D14, D18, D30))
|
||||
T(0x2e2cfe45, FDIV(V5.S2(), V18.S2(), V12.S2()))
|
||||
T(0x1e7e0131, FJCVTZS(W17, D9))
|
||||
T(0x1f114362, FMADD(S2, S27, S17, S16))
|
||||
T(0x1f482240, FMADD(D0, D18, D8, D8))
|
||||
T(0x1e234b5f, FMAX(S31, S26, S3))
|
||||
T(0x1e694894, FMAX(D20, D4, D9))
|
||||
T(0x4e29f568, FMAX(V8.S4(), V11.S4(), V9.S4()))
|
||||
T(0x1e2f6a40, FMAXNM(S0, S18, S15))
|
||||
T(0x1e6d6a99, FMAXNM(D25, D20, D13))
|
||||
T(0x4e2dc6da, FMAXNM(V26.S4(), V22.S4(), V13.S4()))
|
||||
T(0x7e30c9b9, FMAXNMP(S25, V13.S2()))
|
||||
T(0x6e36c794, FMAXNMP(V20.S4(), V28.S4(), V22.S4()))
|
||||
T(0x6e30c8f6, FMAXNMV(S22, V7.S4()))
|
||||
T(0x7e30f8dd, FMAXP(S29, V6.S2()))
|
||||
T(0x6e61f4ab, FMAXP(V11.D2(), V5.D2(), V1.D2()))
|
||||
T(0x6e30fb85, FMAXV(S5, V28.S4()))
|
||||
T(0x1e3c5aae, FMIN(S14, S21, S28))
|
||||
T(0x1e7f58f8, FMIN(D24, D7, D31))
|
||||
T(0x0eb0f63b, FMIN(V27.S2(), V17.S2(), V16.S2()))
|
||||
T(0x1e317886, FMINNM(S6, S4, S17))
|
||||
T(0x1e6e7a5d, FMINNM(D29, D18, D14))
|
||||
T(0x4ea4c44c, FMINNM(V12.S4(), V2.S4(), V4.S4()))
|
||||
T(0x7ef0c895, FMINNMP(D21, V4.D2()))
|
||||
T(0x6efbc4e3, FMINNMP(V3.D2(), V7.D2(), V27.D2()))
|
||||
T(0x6eb0c93d, FMINNMV(S29, V9.S4()))
|
||||
T(0x7ef0fa13, FMINP(D19, V16.D2()))
|
||||
T(0x2eb4f4ac, FMINP(V12.S2(), V5.S2(), V20.S2()))
|
||||
T(0x6eb0f801, FMINV(S1, V0.S4()))
|
||||
T(0x5f8219a6, FMLA(S6, S13, V2.S()[2]))
|
||||
T(0x4fc512a1, FMLA(V1.D2(), V21.D2(), V5.D()[0]))
|
||||
T(0x4e6bcecf, FMLA(V15.D2(), V22.D2(), V11.D2()))
|
||||
T(0x5f8a5094, FMLS(S20, S4, V10.S()[0]))
|
||||
T(0x4fd85b79, FMLS(V25.D2(), V27.D2(), V24.D()[1]))
|
||||
T(0x0ebacca4, FMLS(V4.S2(), V5.S2(), V26.S2()))
|
||||
T(0x1e270027, FMOV(S7, W1))
|
||||
T(0x1e260164, FMOV(W4, S11))
|
||||
T(0x9e670008, FMOV(D8, X0))
|
||||
T(0x9eaf03e0, FMOV(V0.D()[1], XZR))
|
||||
T(0x9e660090, FMOV(X16, D4))
|
||||
T(0x9eae025f, FMOV(XZR, V18.D()[1]))
|
||||
T(0x1e204079, FMOV(S25, S3))
|
||||
T(0x1e6042f8, FMOV(D24, D23))
|
||||
T(0x1e32f01c, FMOV(S28, FImm8{true, 0b001, 0b0111})) // -5.75
|
||||
T(0x1e74901e, FMOV(D30, FImm8{true, 0b010, 0b0100})) // -10.0
|
||||
T(0x0f03f51a, FMOV(V26.S2(), FImm8{false, 0b110, 0b1000})) // 0.75
|
||||
T(0x6f02f58e, FMOV(V14.D2(), FImm8{false, 0b100, 0b1100})) // 0.21875
|
||||
T(0x1f0adaf5, FMSUB(S21, S23, S10, S22))
|
||||
T(0x1f5da840, FMSUB(D0, D2, D29, D10))
|
||||
T(0x5fa39bba, FMUL(S26, S29, V3.S()[3]))
|
||||
T(0x4fb89ad2, FMUL(V18.S4(), V22.S4(), V24.S()[3]))
|
||||
T(0x1e2b0a3c, FMUL(S28, S17, S11))
|
||||
T(0x1e720933, FMUL(D19, D9, D18))
|
||||
T(0x6e7edfa3, FMUL(V3.D2(), V29.D2(), V30.D2()))
|
||||
T(0x5e32dee6, FMULX(S6, S23, S18))
|
||||
T(0x0e27deec, FMULX(V12.S2(), V23.S2(), V7.S2()))
|
||||
T(0x7f879a1f, FMULX(S31, S16, V7.S()[2]))
|
||||
T(0x6fce9836, FMULX(V22.D2(), V1.D2(), V14.D()[1]))
|
||||
T(0x1e2142cc, FNEG(S12, S22))
|
||||
T(0x1e61434b, FNEG(D11, D26))
|
||||
T(0x6ea0fb90, FNEG(V16.S4(), V28.S4()))
|
||||
T(0x1f361be5, FNMADD(S5, S31, S22, S6))
|
||||
T(0x1f7a316d, FNMADD(D13, D11, D26, D12))
|
||||
T(0x1f3e9957, FNMSUB(S23, S10, S30, S6))
|
||||
T(0x1f79da66, FNMSUB(D6, D19, D25, D22))
|
||||
T(0x1e208ab5, FNMUL(S21, S21, S0))
|
||||
T(0x1e6f89eb, FNMUL(D11, D15, D15))
|
||||
T(0x5ea1da18, FRECPE(S24, S16))
|
||||
T(0x0ea1d9df, FRECPE(V31.S2(), V14.S2()))
|
||||
T(0x5e2dfe37, FRECPS(S23, S17, S13))
|
||||
T(0x0e29fcec, FRECPS(V12.S2(), V7.S2(), V9.S2()))
|
||||
T(0x5ee1f998, FRECPX(D24, D12))
|
||||
T(0x1e264106, FRINTA(S6, S8))
|
||||
T(0x1e664376, FRINTA(D22, D27))
|
||||
T(0x6e6188a9, FRINTA(V9.D2(), V5.D2()))
|
||||
T(0x1e27c216, FRINTI(S22, S16))
|
||||
T(0x1e67c071, FRINTI(D17, D3))
|
||||
T(0x6ea19b9d, FRINTI(V29.S4(), V28.S4()))
|
||||
T(0x1e25413e, FRINTM(S30, S9))
|
||||
T(0x1e6541a1, FRINTM(D1, D13))
|
||||
T(0x4e619ad8, FRINTM(V24.D2(), V22.D2()))
|
||||
T(0x1e244098, FRINTN(S24, S4))
|
||||
T(0x1e6440b4, FRINTN(D20, D5))
|
||||
T(0x4e618835, FRINTN(V21.D2(), V1.D2()))
|
||||
T(0x1e24c188, FRINTP(S8, S12))
|
||||
T(0x1e64c292, FRINTP(D18, D20))
|
||||
T(0x0ea18a69, FRINTP(V9.S2(), V19.S2()))
|
||||
T(0x1e274146, FRINTX(S6, S10))
|
||||
T(0x1e674333, FRINTX(D19, D25))
|
||||
T(0x6e619902, FRINTX(V2.D2(), V8.D2()))
|
||||
T(0x1e25c2b2, FRINTZ(S18, S21))
|
||||
T(0x1e65c008, FRINTZ(D8, D0))
|
||||
T(0x0ea19918, FRINTZ(V24.S2(), V8.S2()))
|
||||
T(0x7ea1dbdb, FRSQRTE(S27, S30))
|
||||
T(0x6ee1d8df, FRSQRTE(V31.D2(), V6.D2()))
|
||||
T(0x5ee0ff40, FRSQRTS(D0, D26, D0))
|
||||
T(0x4eb6fe31, FRSQRTS(V17.S4(), V17.S4(), V22.S4()))
|
||||
T(0x1e21c204, FSQRT(S4, S16))
|
||||
T(0x1e61c31c, FSQRT(D28, D24))
|
||||
T(0x6ea1fa1f, FSQRT(V31.S4(), V16.S4()))
|
||||
T(0x1e273b28, FSUB(S8, S25, S7))
|
||||
T(0x1e6139b9, FSUB(D25, D13, D1))
|
||||
T(0x0eadd6b0, FSUB(V16.S2(), V21.S2(), V13.S2()))
|
||||
// INS
|
||||
// INS
|
||||
T(0x0c407b24, LD1(List{V4.S2()}, X25))
|
||||
T(0x4c40a891, LD1(List{V17.S4(), V18.S4()}, X4))
|
||||
T(0x0c406d31, LD1(List{V17.D1(), V18.D1(), V19.D1()}, X9))
|
||||
T(0x4c402b00, LD1(List{V0.S4(), V1.S4(), V2.S4(), V3.S4()}, X24))
|
||||
T(0x4cdf72c8, LD1(List{V8.B16()}, X22, POST_INDEXED, 16))
|
||||
T(0x0cd67504, LD1(List{V4.H4()}, X8, POST_INDEXED, X22))
|
||||
T(0x0cdfaeb7, LD1(List{V23.D1(), V24.D1()}, X21, POST_INDEXED, 16))
|
||||
T(0x0cd0a837, LD1(List{V23.S2(), V24.S2()}, X1, POST_INDEXED, X16))
|
||||
T(0x4cdf6d36, LD1(List{V22.D2(), V23.D2(), V24.D2()}, X9, POST_INDEXED, 48))
|
||||
T(0x0cdc685b, LD1(List{V27.S2(), V28.S2(), V29.S2()}, X2, POST_INDEXED, X28))
|
||||
T(0x0cdf2ebc, LD1(List{V28.D1(), V29.D1(), V30.D1(), V31.D1()}, X21, POST_INDEXED, 32))
|
||||
T(0x0cc0260c, LD1(List{V12.H4(), V13.H4(), V14.H4(), V15.H4()}, X16, POST_INDEXED, X0))
|
||||
T(0x0d400665, LD1(List{V5.B()}[1], X19))
|
||||
T(0x0d4041da, LD1(List{V26.H()}[0], X14))
|
||||
T(0x0d40815b, LD1(List{V27.S()}[0], X10))
|
||||
T(0x0d408755, LD1(List{V21.D()}[0], X26))
|
||||
T(0x4ddf0966, LD1(List{V6.B()}[10], X11, POST_INDEXED, 1))
|
||||
T(0x4dcc1951, LD1(List{V17.B()}[14], X10, POST_INDEXED, X12))
|
||||
T(0x0ddf58cf, LD1(List{V15.H()}[3], X6, POST_INDEXED, 2))
|
||||
T(0x0dd14a3d, LD1(List{V29.H()}[1], X17, POST_INDEXED, X17))
|
||||
T(0x0ddf8072, LD1(List{V18.S()}[0], X3, POST_INDEXED, 4))
|
||||
T(0x4dcb90bb, LD1(List{V27.S()}[3], X5, POST_INDEXED, X11))
|
||||
T(0x4ddf8537, LD1(List{V23.D()}[1], X9, POST_INDEXED, 8))
|
||||
T(0x0dcf8784, LD1(List{V4.D()}[0], X28, POST_INDEXED, X15))
|
||||
T(0x0d40c0f1, LD1R(List{V17.B8()}, X7))
|
||||
T(0x0ddfceac, LD1R(List{V12.D1()}, X21, POST_INDEXED, 8))
|
||||
T(0x4dd5c9c2, LD1R(List{V2.S4()}, X14, POST_INDEXED, X21))
|
||||
T(0x0c408bc8, LD2(List{V8.S2(), V9.S2()}, X30))
|
||||
T(0x0cdf842a, LD2(List{V10.H4(), V11.H4()}, X1, POST_INDEXED, 16))
|
||||
T(0x0cd58678, LD2(List{V24.H4(), V25.H4()}, X19, POST_INDEXED, X21))
|
||||
T(0x0d60132f, LD2(List{V15.B(), V16.B()}[4], X25))
|
||||
T(0x4d605156, LD2(List{V22.H(), V23.H()}[6], X10))
|
||||
T(0x0d609293, LD2(List{V19.S(), V20.S()}[1], X20))
|
||||
T(0x4d608599, LD2(List{V25.D(), V26.D()}[1], X12))
|
||||
T(0x4dff0bd6, LD2(List{V22.B(), V23.B()}[10], X30, POST_INDEXED, 2))
|
||||
T(0x0df90bab, LD2(List{V11.B(), V12.B()}[2], X29, POST_INDEXED, X25))
|
||||
T(0x4dff42c3, LD2(List{V3.H(), V4.H()}[4], X22, POST_INDEXED, 4))
|
||||
T(0x4dfa5816, LD2(List{V22.H(), V23.H()}[7], X0, POST_INDEXED, X26))
|
||||
T(0x4dff9372, LD2(List{V18.S(), V19.S()}[3], X27, POST_INDEXED, 8))
|
||||
T(0x4de483c0, LD2(List{V0.S(), V1.S()}[2], X30, POST_INDEXED, X4))
|
||||
T(0x4dff8714, LD2(List{V20.D(), V21.D()}[1], X24, POST_INDEXED, 16))
|
||||
T(0x4dfa854d, LD2(List{V13.D(), V14.D()}[1], X10, POST_INDEXED, X26))
|
||||
T(0x4d60ca33, LD2R(List{V19.S4(), V20.S4()}, X17))
|
||||
T(0x0dffc777, LD2R(List{V23.H4(), V24.H4()}, X27, POST_INDEXED, 4))
|
||||
T(0x4de9c3cd, LD2R(List{V13.B16(), V14.B16()}, X30, POST_INDEXED, X9))
|
||||
T(0x0c404032, LD3(List{V18.B8(), V19.B8(), V20.B8()}, X1))
|
||||
T(0x0cdf4bc8, LD3(List{V8.S2(), V9.S2(), V10.S2()}, X30, POST_INDEXED, 24))
|
||||
T(0x4ccb4960, LD3(List{V0.S4(), V1.S4(), V2.S4()}, X11, POST_INDEXED, X11))
|
||||
T(0x0d40217c, LD3(List{V28.B(), V29.B(), V30.B()}[0], X11))
|
||||
T(0x4d407a38, LD3(List{V24.H(), V25.H(), V26.H()}[7], X17))
|
||||
T(0x4d40a119, LD3(List{V25.S(), V26.S(), V27.S()}[2], X8))
|
||||
T(0x0d40a6bb, LD3(List{V27.D(), V28.D(), V29.D()}[0], X21))
|
||||
T(0x4ddf2bb1, LD3(List{V17.B(), V18.B(), V19.B()}[10], X29, POST_INDEXED, 3))
|
||||
T(0x4dc13519, LD3(List{V25.B(), V26.B(), V27.B()}[13], X8, POST_INDEXED, X1))
|
||||
T(0x4ddf6b3f, LD3(List{V31.H(), V0.H(), V1.H()}[5], X25, POST_INDEXED, 6))
|
||||
T(0x4dc16243, LD3(List{V3.H(), V4.H(), V5.H()}[4], X18, POST_INDEXED, X1))
|
||||
T(0x4ddfa329, LD3(List{V9.S(), V10.S(), V11.S()}[2], X25, POST_INDEXED, 12))
|
||||
T(0x4ddab328, LD3(List{V8.S(), V9.S(), V10.S()}[3], X25, POST_INDEXED, X26))
|
||||
T(0x4ddfa4e4, LD3(List{V4.D(), V5.D(), V6.D()}[1], X7, POST_INDEXED, 24))
|
||||
T(0x0ddba58c, LD3(List{V12.D(), V13.D(), V14.D()}[0], X12, POST_INDEXED, X27))
|
||||
T(0x0d40e3b3, LD3R(List{V19.B8(), V20.B8(), V21.B8()}, X29))
|
||||
T(0x0ddfe2f3, LD3R(List{V19.B8(), V20.B8(), V21.B8()}, X23, POST_INDEXED, 3))
|
||||
T(0x0ddbe8e4, LD3R(List{V4.S2(), V5.S2(), V6.S2()}, X7, POST_INDEXED, X27))
|
||||
T(0x4c400a69, LD4(List{V9.S4(), V10.S4(), V11.S4(), V12.S4()}, X19))
|
||||
T(0x0cdf0bea, LD4(List{V10.S2(), V11.S2(), V12.S2(), V13.S2()}, SP, POST_INDEXED, 32))
|
||||
T(0x4cd705ad, LD4(List{V13.H8(), V14.H8(), V15.H8(), V16.H8()}, X13, POST_INDEXED, X23))
|
||||
T(0x0d603b97, LD4(List{V23.B(), V24.B(), V25.B(), V26.B()}[6], X28))
|
||||
T(0x0d606941, LD4(List{V1.H(), V2.H(), V3.H(), V4.H()}[1], X10))
|
||||
T(0x0d60a039, LD4(List{V25.S(), V26.S(), V27.S(), V28.S()}[0], X1))
|
||||
T(0x4d60a4c5, LD4(List{V5.D(), V6.D(), V7.D(), V8.D()}[1], X6))
|
||||
T(0x0dff2139, LD4(List{V25.B(), V26.B(), V27.B(), V28.B()}[0], X9, POST_INDEXED, 4))
|
||||
T(0x4df32513, LD4(List{V19.B(), V20.B(), V21.B(), V22.B()}[9], X8, POST_INDEXED, X19))
|
||||
T(0x0dff7b45, LD4(List{V5.H(), V6.H(), V7.H(), V8.H()}[3], X26, POST_INDEXED, 8))
|
||||
T(0x0dfa6839, LD4(List{V25.H(), V26.H(), V27.H(), V28.H()}[1], X1, POST_INDEXED, X26))
|
||||
T(0x4dffa176, LD4(List{V22.S(), V23.S(), V24.S(), V25.S()}[2], X11, POST_INDEXED, 16))
|
||||
T(0x4de0a125, LD4(List{V5.S(), V6.S(), V7.S(), V8.S()}[2], X9, POST_INDEXED, X0))
|
||||
T(0x0dffa4ab, LD4(List{V11.D(), V12.D(), V13.D(), V14.D()}[0], X5, POST_INDEXED, 32))
|
||||
T(0x0dfba784, LD4(List{V4.D(), V5.D(), V6.D(), V7.D()}[0], X28, POST_INDEXED, X27))
|
||||
T(0x4d60ef82, LD4R(List{V2.D2(), V3.D2(), V4.D2(), V5.D2()}, X28))
|
||||
T(0x0dffef23, LD4R(List{V3.D1(), V4.D1(), V5.D1(), V6.D1()}, X25, POST_INDEXED, 32))
|
||||
T(0x4df5e36a, LD4R(List{V10.B16(), V11.B16(), V12.B16(), V13.B16()}, X27, POST_INDEXED, X21))
|
||||
T(0x2c6dde58, LDNP(S24, S23, X18, -148))
|
||||
T(0x6c5f8ad5, LDNP(D21, D2, X22, 504))
|
||||
T(0xac793251, LDNP(Q17, Q12, X18, -224))
|
||||
T(0x2cf1b345, LDP(S5, S12, X26, POST_INDEXED, -116))
|
||||
T(0x6cc9489a, LDP(D26, D18, X4, POST_INDEXED, 144))
|
||||
T(0xace34b69, LDP(Q9, Q18, X27, POST_INDEXED, -928))
|
||||
T(0x2dca159f, LDP(S31, S5, X12, PRE_INDEXED, 80))
|
||||
T(0x6df9682d, LDP(D13, D26, X1, PRE_INDEXED, -112))
|
||||
T(0xadc7566f, LDP(Q15, Q21, X19, PRE_INDEXED, 224))
|
||||
T(0x2d4efb01, LDP(S1, S30, X24, 116))
|
||||
T(0x6d710b5a, LDP(D26, D2, X26, -240))
|
||||
T(0xad74fbb0, LDP(Q16, Q30, X29, -368))
|
||||
T(0x3c5b76a9, LDR(B9, X21, POST_INDEXED, -73))
|
||||
T(0x7c5fd798, LDR(H24, X28, POST_INDEXED, -3))
|
||||
T(0xbc4336b6, LDR(S22, X21, POST_INDEXED, 51))
|
||||
T(0xfc53b4d5, LDR(D21, X6, POST_INDEXED, -197))
|
||||
T(0x3cdf571d, LDR(Q29, X24, POST_INDEXED, -11))
|
||||
T(0x3c5baf77, LDR(B23, X27, PRE_INDEXED, -70))
|
||||
T(0x7c41bc79, LDR(H25, X3, PRE_INDEXED, 27))
|
||||
T(0xbc48ecb2, LDR(S18, X5, PRE_INDEXED, 142))
|
||||
T(0xfc4b1dee, LDR(D14, X15, PRE_INDEXED, 177))
|
||||
T(0x3cc31c6a, LDR(Q10, X3, PRE_INDEXED, 49))
|
||||
T(0x3d5a0ef6, LDR(B22, X23, 1667))
|
||||
T(0x7d5d8dd7, LDR(H23, X14, 3782))
|
||||
T(0xbd55d41a, LDR(S26, X0, 5588))
|
||||
T(0xfd58c566, LDR(D6, X11, 12680))
|
||||
T(0x3dce966e, LDR(Q14, X19, 14928))
|
||||
T(0x1c8599c0, LDR(S0, -1002696))
|
||||
T(0x5c8a1ca4, LDR(D4, -965740))
|
||||
T(0x9cfd90fa, LDR(Q26, -19940))
|
||||
T(0x3c634a12, LDR(B18, X16, W3, UXTW))
|
||||
T(0x3c7368e7, LDR(B7, X7, X19, LSL, 0))
|
||||
T(0x7c646a38, LDR(H24, X17, X4))
|
||||
T(0xbc727bda, LDR(S26, X30, X18, LSL, 2))
|
||||
T(0xfc63eb36, LDR(D22, X25, X3, SXTX))
|
||||
T(0x3ce2ca06, LDR(Q6, X16, W2, SXTW))
|
||||
T(0x3c4233e6, LDUR(B6, SP, 35))
|
||||
T(0x7c4d52f1, LDUR(H17, X23, 213))
|
||||
T(0xbc5be12f, LDUR(S15, X9, -66))
|
||||
T(0xfc474197, LDUR(D23, X12, 116))
|
||||
T(0x3cd703db, LDUR(Q27, X30, -144))
|
||||
T(0x2f9a0354, MLA(V20.S2(), V26.S2(), V26.S()[0]))
|
||||
T(0x4e7e9643, MLA(V3.H8(), V18.H8(), V30.H8()))
|
||||
T(0x2f80484e, MLS(V14.S2(), V2.S2(), V0.S()[2]))
|
||||
T(0x6ebb9572, MLS(V18.S4(), V11.S4(), V27.S4()))
|
||||
T(0x6e135ec1, MOV(V1.B()[9], V22.B()[11]))
|
||||
T(0x4e0f1da9, MOV(V9.B()[7], W13))
|
||||
T(0x5e0e045d, MOV(H29, V2.H()[3]))
|
||||
T(0x0e043ca1, MOV(W1, V5.S()[0]))
|
||||
T(0x0e0c3da8, MOV(W8, V13.S()[1]))
|
||||
T(0x0e143da8, MOV(W8, V13.S()[2]))
|
||||
T(0x0e1c3da8, MOV(W8, V13.S()[3]))
|
||||
T(0x4e183d03, MOV(X3, V8.D()[1]))
|
||||
T(0x4e083df7, MOV(X23, V15.D()[0]))
|
||||
// MOV
|
||||
T(0x0f06e58e, MOVI(V14.B8(), 204))
|
||||
T(0x4f058559, MOVI(V25.H8(), 170))
|
||||
T(0x0f030565, MOVI(V5.S2(), 107))
|
||||
T(0x0f05c4dc, MOVI(V28.S2(), 166, MSL, 8))
|
||||
T(0x2f07e47e, MOVI(D30, RepImm{0b11100011})) //
|
||||
T(0x6f03e65b, MOVI(V27.D2(), RepImm{0b01110010})) //
|
||||
T(0x0f9e813e, MUL(V30.S2(), V9.S2(), V30.S()[0]))
|
||||
T(0x4ea59f8e, MUL(V14.S4(), V28.S4(), V5.S4()))
|
||||
T(0x2e205acd, MVN(V13.B8(), V22.B8()))
|
||||
T(0x2f0084e1, MVNI(V1.H4(), 7))
|
||||
T(0x6f026602, MVNI(V2.S4(), 80, LSL, 24))
|
||||
T(0x2f03c71a, MVNI(V26.S2(), 120, MSL, 8))
|
||||
T(0x7ee0ba9e, NEG(D30, D20))
|
||||
T(0x2ea0b9f7, NEG(V23.S2(), V15.S2()))
|
||||
// NOT
|
||||
T(0x4ef81f0f, ORN(V15.B16(), V24.B16(), V24.B16()))
|
||||
T(0x4f03b4e0, ORR(V0.H8(), 103, LSL, 8))
|
||||
T(0x4f043508, ORR(V8.S4(), 136, LSL, 8))
|
||||
T(0x4eb21c9c, ORR(V28.B16(), V4.B16(), V18.B16()))
|
||||
T(0x2e279d77, PMUL(V23.B8(), V11.B8(), V7.B8()))
|
||||
T(0x4e27e299, PMULL2(V25.H8(), V20.B16(), V7.B16()))
|
||||
T(0x2eab4048, RADDHN(V8.S2(), V2.D2(), V11.D2()))
|
||||
T(0x6e605b7e, RBIT(V30.B16(), V27.B16()))
|
||||
T(0x0e201b37, REV16(V23.B8(), V25.B8()))
|
||||
T(0x6e60098a, REV32(V10.H8(), V12.H8()))
|
||||
T(0x0e2009de, REV64(V30.B8(), V14.B8()))
|
||||
T(0x4f218e4e, RSHRN2(V14.S4(), V18.D2(), 31))
|
||||
T(0x6e7460f2, RSUBHN2(V18.H8(), V7.S4(), V20.S4()))
|
||||
T(0x0e377f74, SABA(V20.B8(), V27.B8(), V23.B8()))
|
||||
T(0x4ea851f6, SABAL2(V22.D2(), V15.S4(), V8.S4()))
|
||||
T(0x0e777752, SABD(V18.H4(), V26.H4(), V23.H4()))
|
||||
T(0x0eba7005, SABDL(V5.D2(), V0.S2(), V26.S2()))
|
||||
T(0x4e2069c4, SADALP(V4.H8(), V14.B16()))
|
||||
T(0x4e270017, SADDL2(V23.H8(), V0.B16(), V7.B16()))
|
||||
T(0x0ea028ca, SADDLP(V10.D1(), V6.S2()))
|
||||
T(0x4e703b2a, SADDLV(S10, V25.H8()))
|
||||
T(0x0e6311d2, SADDW(V18.S4(), V14.S4(), V3.H4()))
|
||||
T(0x1e02c782, SCVTF(S2, W28, 15))
|
||||
T(0x1e42d0e2, SCVTF(D2, W7, 12))
|
||||
T(0x9e02e80e, SCVTF(S14, X0, 6))
|
||||
T(0x9e423dda, SCVTF(D26, X14, 49))
|
||||
T(0x1e2202f3, SCVTF(S19, W23))
|
||||
T(0x1e6201e7, SCVTF(D7, W15))
|
||||
T(0x9e22016c, SCVTF(S12, X11))
|
||||
T(0x9e620316, SCVTF(D22, X24))
|
||||
T(0x5f34e509, SCVTF(S9, S8, 12))
|
||||
T(0x4f5ae716, SCVTF(V22.D2(), V24.D2(), 38))
|
||||
T(0x5e61d946, SCVTF(D6, D10))
|
||||
T(0x4e61d86b, SCVTF(V11.D2(), V3.D2()))
|
||||
// SHA1C
|
||||
// SHA1H
|
||||
// SHA1M
|
||||
// SHA1P
|
||||
// SHA1SU0
|
||||
// SHA1SU1
|
||||
// SHA256H
|
||||
// SHA256H2
|
||||
// SHA256SU0
|
||||
// SHA256SU1
|
||||
T(0x4eb90506, SHADD(V6.S4(), V8.S4(), V25.S4()))
|
||||
T(0x5f4d5767, SHL(D7, D27, 13))
|
||||
T(0x4f1f542f, SHL(V15.H8(), V1.H8(), 15))
|
||||
T(0x2ea13a71, SHLL(V17.D2(), V19.S2(), 32))
|
||||
T(0x4f0885fd, SHRN2(V29.B16(), V15.H8(), 8))
|
||||
T(0x0eb42794, SHSUB(V20.S2(), V28.S2(), V20.S2()))
|
||||
T(0x7f5f54ad, SLI(D13, D5, 31))
|
||||
T(0x6f09554e, SLI(V14.B16(), V10.B16(), 1))
|
||||
T(0x0e316452, SMAX(V18.B8(), V2.B8(), V17.B8()))
|
||||
T(0x4e66a478, SMAXP(V24.H8(), V3.H8(), V6.H8()))
|
||||
T(0x0e30a9e6, SMAXV(B6, V15.B8()))
|
||||
T(0x4e276e2a, SMIN(V10.B16(), V17.B16(), V7.B16()))
|
||||
T(0x4e29ad73, SMINP(V19.B16(), V11.B16(), V9.B16()))
|
||||
T(0x0e71aac5, SMINV(H5, V22.H4()))
|
||||
T(0x4f9f2b00, SMLAL2(V0.D2(), V24.S4(), V31.S()[2]))
|
||||
T(0x4e788037, SMLAL2(V23.S4(), V1.H8(), V24.H8()))
|
||||
T(0x4f7362b9, SMLSL2(V25.S4(), V21.H8(), V3.H()[3]))
|
||||
T(0x0e31a0d5, SMLSL(V21.H8(), V6.B8(), V17.B8()))
|
||||
T(0x0e162fc3, SMOV(W3, V30.H()[5]))
|
||||
T(0x4e0a2cf2, SMOV(X18, V7.H()[2]))
|
||||
T(0x0f6ba85c, SMULL(V28.S4(), V2.H4(), V11.H()[6]))
|
||||
T(0x4e61c2a1, SMULL2(V1.S4(), V21.H8(), V1.H8()))
|
||||
T(0x5e20794c, SQABS(B12, B10))
|
||||
T(0x4e607b9b, SQABS(V27.H8(), V28.H8()))
|
||||
T(0x5eb50df4, SQADD(S20, S15, S21))
|
||||
T(0x0e370ff4, SQADD(V20.B8(), V31.B8(), V23.B8()))
|
||||
T(0x5fab3a4e, SQDMLAL(D14, S18, V11.S()[3]))
|
||||
T(0x4f5b3805, SQDMLAL2(V5.S4(), V0.H8(), V11.H()[5]))
|
||||
T(0x5e7f90ed, SQDMLAL(S13, H7, H31))
|
||||
T(0x0ea992b2, SQDMLAL(V18.D2(), V21.S2(), V9.S2()))
|
||||
T(0x5f867ba2, SQDMLSL(D2, S29, V6.S()[2]))
|
||||
T(0x4f997118, SQDMLSL2(V24.D2(), V8.S4(), V25.S()[0]))
|
||||
T(0x5e62b0b2, SQDMLSL(S18, H5, H2))
|
||||
T(0x0e74b089, SQDMLSL(V9.S4(), V4.H4(), V20.H4()))
|
||||
T(0x5f5acb3c, SQDMULH(H28, H25, V10.H()[5]))
|
||||
T(0x4f7bc13d, SQDMULH(V29.H8(), V9.H8(), V11.H()[3]))
|
||||
T(0x5e6ab724, SQDMULH(H4, H25, H10))
|
||||
T(0x4ea6b543, SQDMULH(V3.S4(), V10.S4(), V6.S4()))
|
||||
T(0x5f89b899, SQDMULL(D25, S4, V9.S()[2]))
|
||||
T(0x0f53b2ee, SQDMULL(V14.S4(), V23.H4(), V3.H()[1]))
|
||||
T(0x5e60d01a, SQDMULL(S26, H0, H0))
|
||||
T(0x0eb4d146, SQDMULL(V6.D2(), V10.S2(), V20.S2()))
|
||||
T(0x7ee07b81, SQNEG(D1, D28))
|
||||
T(0x2e607a04, SQNEG(V4.H4(), V16.H4()))
|
||||
T(0x5f47dac8, SQRDMULH(H8, H22, V7.H()[4]))
|
||||
T(0x0f45db93, SQRDMULH(V19.H4(), V28.H4(), V5.H()[4]))
|
||||
T(0x7ea3b621, SQRDMULH(S1, S17, S3))
|
||||
T(0x6ea2b672, SQRDMULH(V18.S4(), V19.S4(), V2.S4()))
|
||||
T(0x5e7c5ee7, SQRSHL(H7, H23, H28))
|
||||
T(0x4e655e4b, SQRSHL(V11.H8(), V18.H8(), V5.H8()))
|
||||
T(0x5f0c9c10, SQRSHRN(B16, H0, 4))
|
||||
T(0x4f309e99, SQRSHRN2(V25.S4(), V20.D2(), 16))
|
||||
T(0x7f1f8de7, SQRSHRUN(H7, S15, 1))
|
||||
T(0x6f178f67, SQRSHRUN2(V7.H8(), V27.S4(), 9))
|
||||
T(0x5f7977b8, SQSHL(D24, D29, 57))
|
||||
T(0x4f1e75f3, SQSHL(V19.H8(), V15.H8(), 14))
|
||||
T(0x5eb24f5d, SQSHL(S29, S26, S18))
|
||||
T(0x4e7c4c93, SQSHL(V19.H8(), V4.H8(), V28.H8()))
|
||||
T(0x7f2e66a1, SQSHLU(S1, S21, 14))
|
||||
T(0x6f4c65a2, SQSHLU(V2.D2(), V13.D2(), 12))
|
||||
T(0x5f3f950b, SQSHRN(S11, D8, 1))
|
||||
T(0x4f329646, SQSHRN2(V6.S4(), V18.D2(), 14))
|
||||
T(0x7f188469, SQSHRUN(H9, S3, 8))
|
||||
T(0x6f328478, SQSHRUN2(V24.S4(), V3.D2(), 14))
|
||||
T(0x5e362dae, SQSUB(B14, B13, B22))
|
||||
T(0x0e3c2c86, SQSUB(V6.B8(), V4.B8(), V28.B8()))
|
||||
T(0x5ea149fc, SQXTN(S28, D15))
|
||||
T(0x4e214b24, SQXTN2(V4.B16(), V25.H8()))
|
||||
T(0x7e61290e, SQXTUN(H14, S8))
|
||||
T(0x6ea12b96, SQXTUN2(V22.S4(), V28.D2()))
|
||||
T(0x4eae1673, SRHADD(V19.S4(), V19.S4(), V14.S4()))
|
||||
T(0x7f794647, SRI(D7, D18, 7))
|
||||
T(0x6f654787, SRI(V7.D2(), V28.D2(), 27))
|
||||
T(0x5ee0549e, SRSHL(D30, D4, D0))
|
||||
T(0x4eba55d2, SRSHL(V18.S4(), V14.S4(), V26.S4()))
|
||||
T(0x5f712744, SRSHR(D4, D26, 15))
|
||||
T(0x4f2025f5, SRSHR(V21.S4(), V15.S4(), 32))
|
||||
T(0x5f7734a9, SRSRA(D9, D5, 9))
|
||||
T(0x0f3a371a, SRSRA(V26.S2(), V24.S2(), 6))
|
||||
T(0x5eed44ee, SSHL(D14, D7, D13))
|
||||
T(0x0e704683, SSHL(V3.H4(), V20.H4(), V16.H4()))
|
||||
T(0x4f2aa7c3, SSHLL2(V3.D2(), V30.S4(), 10))
|
||||
T(0x5f5e058d, SSHR(D13, D12, 34))
|
||||
T(0x4f730496, SSHR(V22.D2(), V4.D2(), 13))
|
||||
T(0x5f5e152a, SSRA(D10, D9, 34))
|
||||
T(0x0f21172b, SSRA(V11.S2(), V25.S2(), 31))
|
||||
T(0x4e24220f, SSUBL2(V15.H8(), V16.B16(), V4.B16()))
|
||||
T(0x4e3f32a2, SSUBW2(V2.H8(), V21.H8(), V31.B16()))
|
||||
T(0x0c007a62, ST1(List{V2.S2()}, X19))
|
||||
T(0x4c00adb7, ST1(List{V23.D2(), V24.D2()}, X13))
|
||||
T(0x0c006b92, ST1(List{V18.S2(), V19.S2(), V20.S2()}, X28))
|
||||
T(0x4c0029b8, ST1(List{V24.S4(), V25.S4(), V26.S4(), V27.S4()}, X13))
|
||||
T(0x0c9f7f60, ST1(List{V0.D1()}, X27, POST_INDEXED, 8))
|
||||
T(0x0c9f7ebc, ST1(List{V28.D1()}, X21, POST_INDEXED, 8))
|
||||
T(0x0c9faf06, ST1(List{V6.D1(), V7.D1()}, X24, POST_INDEXED, 16))
|
||||
T(0x4c93aff5, ST1(List{V21.D2(), V22.D2()}, SP, POST_INDEXED, X19))
|
||||
T(0x4c9f6398, ST1(List{V24.B16(), V25.B16(), V26.B16()}, X28, POST_INDEXED, 48))
|
||||
T(0x4c8162ff, ST1(List{V31.B16(), V0.B16(), V1.B16()}, X23, POST_INDEXED, X1))
|
||||
T(0x0c9f23ee, ST1(List{V14.B8(), V15.B8(), V16.B8(), V17.B8()}, SP, POST_INDEXED, 32))
|
||||
T(0x4c862148, ST1(List{V8.B16(), V9.B16(), V10.B16(), V11.B16()}, X10, POST_INDEXED, X6))
|
||||
T(0x0d001c7a, ST1(List{V26.B()}[7], X3))
|
||||
T(0x0d005b54, ST1(List{V20.H()}[3], X26))
|
||||
T(0x4d009392, ST1(List{V18.S()}[3], X28))
|
||||
T(0x4d008509, ST1(List{V9.D()}[1], X8))
|
||||
T(0x4d9f1246, ST1(List{V6.B()}[12], X18, POST_INDEXED, 1))
|
||||
T(0x0d8c17f5, ST1(List{V21.B()}[5], SP, POST_INDEXED, X12))
|
||||
T(0x4d9f53ee, ST1(List{V14.H()}[6], SP, POST_INDEXED, 2))
|
||||
T(0x0d8f48c4, ST1(List{V4.H()}[1], X6, POST_INDEXED, X15))
|
||||
T(0x4d9f8185, ST1(List{V5.S()}[2], X12, POST_INDEXED, 4))
|
||||
T(0x0d8c92bc, ST1(List{V28.S()}[1], X21, POST_INDEXED, X12))
|
||||
T(0x4d9f86b3, ST1(List{V19.D()}[1], X21, POST_INDEXED, 8))
|
||||
T(0x4d9c8442, ST1(List{V2.D()}[1], X2, POST_INDEXED, X28))
|
||||
T(0x4c008a69, ST2(List{V9.S4(), V10.S4()}, X19))
|
||||
T(0x4c9f8930, ST2(List{V16.S4(), V17.S4()}, X9, POST_INDEXED, 32))
|
||||
T(0x0c9a8993, ST2(List{V19.S2(), V20.S2()}, X12, POST_INDEXED, X26))
|
||||
T(0x0d2001ac, ST2(List{V12.B(), V13.B()}[0], X13))
|
||||
T(0x4d20495c, ST2(List{V28.H(), V29.H()}[5], X10))
|
||||
T(0x4d2093e4, ST2(List{V4.S(), V5.S()}[3], SP))
|
||||
T(0x4d208482, ST2(List{V2.D(), V3.D()}[1], X4))
|
||||
T(0x4dbf0e40, ST2(List{V0.B(), V1.B()}[11], X18, POST_INDEXED, 2))
|
||||
T(0x0db8085f, ST2(List{V31.B(), V0.B()}[2], X2, POST_INDEXED, X24))
|
||||
T(0x0dbf4a2d, ST2(List{V13.H(), V14.H()}[1], X17, POST_INDEXED, 4))
|
||||
T(0x4db1417e, ST2(List{V30.H(), V31.H()}[4], X11, POST_INDEXED, X17))
|
||||
T(0x0dbf81af, ST2(List{V15.S(), V16.S()}[0], X13, POST_INDEXED, 8))
|
||||
T(0x0dbf831c, ST2(List{V28.S(), V29.S()}[0], X24, POST_INDEXED, 8))
|
||||
T(0x0dbf846a, ST2(List{V10.D(), V11.D()}[0], X3, POST_INDEXED, 16))
|
||||
T(0x0dab85dc, ST2(List{V28.D(), V29.D()}[0], X14, POST_INDEXED, X11))
|
||||
T(0x0c004a09, ST3(List{V9.S2(), V10.S2(), V11.S2()}, X16))
|
||||
T(0x4c9f4768, ST3(List{V8.H8(), V9.H8(), V10.H8()}, X27, POST_INDEXED, 48))
|
||||
T(0x0c944918, ST3(List{V24.S2(), V25.S2(), V26.S2()}, X8, POST_INDEXED, X20))
|
||||
T(0x0d003f80, ST3(List{V0.B(), V1.B(), V2.B()}[7], X28))
|
||||
T(0x0d007306, ST3(List{V6.H(), V7.H(), V8.H()}[2], X24))
|
||||
T(0x0d00b131, ST3(List{V17.S(), V18.S(), V19.S()}[1], X9))
|
||||
T(0x4d00a5f8, ST3(List{V24.D(), V25.D(), V26.D()}[1], X15))
|
||||
T(0x0d9f27c1, ST3(List{V1.B(), V2.B(), V3.B()}[1], X30, POST_INDEXED, 3))
|
||||
T(0x4d992bb2, ST3(List{V18.B(), V19.B(), V20.B()}[10], X29, POST_INDEXED, X25))
|
||||
T(0x0d9f785d, ST3(List{V29.H(), V30.H(), V31.H()}[3], X2, POST_INDEXED, 6))
|
||||
T(0x4d8b726b, ST3(List{V11.H(), V12.H(), V13.H()}[6], X19, POST_INDEXED, X11))
|
||||
T(0x4d9fa342, ST3(List{V2.S(), V3.S(), V4.S()}[2], X26, POST_INDEXED, 12))
|
||||
T(0x4d80b206, ST3(List{V6.S(), V7.S(), V8.S()}[3], X16, POST_INDEXED, X0))
|
||||
T(0x4d9fa5de, ST3(List{V30.D(), V31.D(), V0.D()}[1], X14, POST_INDEXED, 24))
|
||||
T(0x4d8ba6d7, ST3(List{V23.D(), V24.D(), V25.D()}[1], X22, POST_INDEXED, X11))
|
||||
T(0x0c00034f, ST4(List{V15.B8(), V16.B8(), V17.B8(), V18.B8()}, X26))
|
||||
T(0x4c9f038c, ST4(List{V12.B16(), V13.B16(), V14.B16(), V15.B16()}, X28, POST_INDEXED, 64))
|
||||
T(0x4c800719, ST4(List{V25.H8(), V26.H8(), V27.H8(), V28.H8()}, X24, POST_INDEXED, X0))
|
||||
T(0x0d2021a8, ST4(List{V8.B(), V9.B(), V10.B(), V11.B()}[0], X13))
|
||||
T(0x4d2062cd, ST4(List{V13.H(), V14.H(), V15.H(), V16.H()}[4], X22))
|
||||
T(0x0d20b146, ST4(List{V6.S(), V7.S(), V8.S(), V9.S()}[1], X10))
|
||||
T(0x4d20a6f5, ST4(List{V21.D(), V22.D(), V23.D(), V24.D()}[1], X23))
|
||||
T(0x0dbf2d56, ST4(List{V22.B(), V23.B(), V24.B(), V25.B()}[3], X10, POST_INDEXED, 4))
|
||||
T(0x4da631df, ST4(List{V31.B(), V0.B(), V1.B(), V2.B()}[12], X14, POST_INDEXED, X6))
|
||||
T(0x0dbf7a76, ST4(List{V22.H(), V23.H(), V24.H(), V25.H()}[3], X19, POST_INDEXED, 8))
|
||||
T(0x0dbb698e, ST4(List{V14.H(), V15.H(), V16.H(), V17.H()}[1], X12, POST_INDEXED, X27))
|
||||
T(0x4dbfb37f, ST4(List{V31.S(), V0.S(), V1.S(), V2.S()}[3], X27, POST_INDEXED, 16))
|
||||
T(0x4dadb3d1, ST4(List{V17.S(), V18.S(), V19.S(), V20.S()}[3], X30, POST_INDEXED, X13))
|
||||
T(0x4dbfa5b3, ST4(List{V19.D(), V20.D(), V21.D(), V22.D()}[1], X13, POST_INDEXED, 32))
|
||||
T(0x4db5a7cf, ST4(List{V15.D(), V16.D(), V17.D(), V18.D()}[1], X30, POST_INDEXED, X21))
|
||||
T(0x2c29149a, STNP(S26, S5, X4, -184))
|
||||
T(0x6c229316, STNP(D22, D4, X24, -472))
|
||||
T(0xac3bc3c8, STNP(Q8, Q16, X30, -144))
|
||||
T(0x2cacdf66, STP(S6, S23, X27, POST_INDEXED, -156))
|
||||
T(0x6c826f4f, STP(D15, D27, X26, POST_INDEXED, 32))
|
||||
T(0xac97955a, STP(Q26, Q5, X10, POST_INDEXED, 752))
|
||||
T(0x2da7ba37, STP(S23, S14, X17, PRE_INDEXED, -196))
|
||||
T(0x6d8bcbce, STP(D14, D18, X30, PRE_INDEXED, 184))
|
||||
T(0xad8b4ba6, STP(Q6, Q18, X29, PRE_INDEXED, 352))
|
||||
T(0x2d1f7434, STP(S20, S29, X1, 248))
|
||||
T(0x6d3bb5d8, STP(D24, D13, X14, -72))
|
||||
T(0xad09088a, STP(Q10, Q2, X4, 288))
|
||||
T(0x3c066467, STR(B7, X3, POST_INDEXED, 102))
|
||||
T(0x7c070723, STR(H3, X25, POST_INDEXED, 112))
|
||||
T(0xbc13175a, STR(S26, X26, POST_INDEXED, -207))
|
||||
T(0xfc1be536, STR(D22, X9, POST_INDEXED, -66))
|
||||
T(0x3c99b56b, STR(Q11, X11, POST_INDEXED, -101))
|
||||
T(0x3c002d49, STR(B9, X10, PRE_INDEXED, 2))
|
||||
T(0x7c158e09, STR(H9, X16, PRE_INDEXED, -168))
|
||||
T(0xbc06bc8d, STR(S13, X4, PRE_INDEXED, 107))
|
||||
T(0xfc080eae, STR(D14, X21, PRE_INDEXED, 128))
|
||||
T(0x3c8e7ed9, STR(Q25, X22, PRE_INDEXED, 231))
|
||||
T(0x3d275492, STR(B18, X4, 2517))
|
||||
T(0x7d0b4265, STR(H5, X19, 1440))
|
||||
T(0xbd0d2595, STR(S21, X12, 3364))
|
||||
T(0xfd237a73, STR(D19, X19, 18160))
|
||||
T(0x3db4a5f5, STR(Q21, X15, 53904))
|
||||
T(0x3c3e693c, STR(B28, X9, X30, LSL, 0))
|
||||
T(0x3c3b6ac5, STR(B5, X22, X27, LSL, 0))
|
||||
T(0x7c36faf0, STR(H16, X23, X22, SXTX, 1))
|
||||
T(0xbc27f838, STR(S24, X1, X7, SXTX, 2))
|
||||
T(0xfc29db51, STR(D17, X26, W9, SXTW, 3))
|
||||
T(0x3cbfea8f, STR(Q15, X20, XZR, SXTX))
|
||||
T(0x3c0441c8, STUR(B8, X14, 68))
|
||||
T(0x7c00b0d7, STUR(H23, X6, 11))
|
||||
T(0xbc0d117d, STUR(S29, X11, 209))
|
||||
T(0xfc1f03c0, STUR(D0, X30, -16))
|
||||
T(0x3c9753f0, STUR(Q16, SP, -139))
|
||||
T(0x7eeb84f9, SUB(D25, D7, D11))
|
||||
T(0x6e708714, SUB(V20.H8(), V24.H8(), V16.H8()))
|
||||
T(0x4e766323, SUBHN2(V3.H8(), V25.S4(), V22.S4()))
|
||||
T(0x5e203935, SUQADD(B21, B9))
|
||||
T(0x4e203b33, SUQADD(V19.B16(), V25.B16()))
|
||||
// SXTL
|
||||
T(0x0e0c20db, TBL(V27.B8(), List{V6.B16(), V7.B16()}, V12.B8()))
|
||||
T(0x4e1d43ab, TBL(V11.B16(), List{V29.B16(), V30.B16(), V31.B16()}, V29.B16()))
|
||||
T(0x0e07634f, TBL(V15.B8(), List{V26.B16(), V27.B16(), V28.B16(), V29.B16()}, V7.B8()))
|
||||
T(0x0e0603b9, TBL(V25.B8(), List{V29.B16()}, V6.B8()))
|
||||
T(0x0e05317a, TBX(V26.B8(), List{V11.B16(), V12.B16()}, V5.B8()))
|
||||
T(0x4e0150ca, TBX(V10.B16(), List{V6.B16(), V7.B16(), V8.B16()}, V1.B16()))
|
||||
T(0x4e0e7190, TBX(V16.B16(), List{V12.B16(), V13.B16(), V14.B16(), V15.B16()}, V14.B16()))
|
||||
T(0x4e1b1333, TBX(V19.B16(), List{V25.B16()}, V27.B16()))
|
||||
T(0x4e0829e3, TRN1(V3.B16(), V15.B16(), V8.B16()))
|
||||
T(0x4ecc6b24, TRN2(V4.D2(), V25.D2(), V12.D2()))
|
||||
T(0x2e697f5d, UABA(V29.H4(), V26.H4(), V9.H4()))
|
||||
T(0x2e36519e, UABAL(V30.H8(), V12.B8(), V22.B8()))
|
||||
T(0x6e6975e0, UABD(V0.H8(), V15.H8(), V9.H8()))
|
||||
T(0x2e2e718a, UABDL(V10.H8(), V12.B8(), V14.B8()))
|
||||
T(0x6ea069b1, UADALP(V17.D2(), V13.S4()))
|
||||
T(0x2e6d0349, UADDL(V9.S4(), V26.H4(), V13.H4()))
|
||||
T(0x6e602bfc, UADDLP(V28.S4(), V31.H8()))
|
||||
T(0x6e703b6d, UADDLV(S13, V27.H8()))
|
||||
T(0x2e781352, UADDW(V18.S4(), V26.S4(), V24.H4()))
|
||||
T(0x1e03ec95, UCVTF(S21, W4, 5))
|
||||
T(0x1e43fd36, UCVTF(D22, W9, 1))
|
||||
T(0x9e03a27b, UCVTF(S27, X19, 24))
|
||||
T(0x9e43e9c4, UCVTF(D4, X14, 6))
|
||||
T(0x1e230096, UCVTF(S22, W4))
|
||||
T(0x1e630076, UCVTF(D22, W3))
|
||||
T(0x9e2302c8, UCVTF(S8, X22))
|
||||
T(0x9e6302cd, UCVTF(D13, X22))
|
||||
T(0x7f2ce5a2, UCVTF(S2, S13, 20))
|
||||
T(0x6f4be788, UCVTF(V8.D2(), V28.D2(), 53))
|
||||
T(0x7e21d87f, UCVTF(S31, S3))
|
||||
T(0x2e21da7d, UCVTF(V29.S2(), V19.S2()))
|
||||
T(0x2e7b0674, UHADD(V20.H4(), V19.H4(), V27.H4()))
|
||||
T(0x6ea9277f, UHSUB(V31.S4(), V27.S4(), V9.S4()))
|
||||
T(0x6e7a6658, UMAX(V24.H8(), V18.H8(), V26.H8()))
|
||||
T(0x2e23a513, UMAXP(V19.B8(), V8.B8(), V3.B8()))
|
||||
T(0x2e70a9b5, UMAXV(H21, V13.H4()))
|
||||
T(0x6e7d6ef2, UMIN(V18.H8(), V23.H8(), V29.H8()))
|
||||
T(0x2e6eae4e, UMINP(V14.H4(), V18.H4(), V14.H4()))
|
||||
T(0x2e71abe6, UMINV(H6, V31.H4()))
|
||||
T(0x6fb820fa, UMLAL2(V26.D2(), V7.S4(), V24.S()[1]))
|
||||
T(0x6ebc83ab, UMLAL2(V11.D2(), V29.S4(), V28.S4()))
|
||||
T(0x2f5c61cf, UMLSL(V15.S4(), V14.H4(), V12.H()[1]))
|
||||
T(0x6e6aa2e2, UMLSL2(V2.S4(), V23.H8(), V10.H8()))
|
||||
T(0x0e0f3fb8, UMOV(W24, V29.B()[7]))
|
||||
// UMOV
|
||||
T(0x6f62a05c, UMULL2(V28.S4(), V2.H8(), V2.H()[2]))
|
||||
T(0x6e6cc3b0, UMULL2(V16.S4(), V29.H8(), V12.H8()))
|
||||
T(0x7ea40f68, UQADD(S8, S27, S4))
|
||||
T(0x6eac0e8f, UQADD(V15.S4(), V20.S4(), V12.S4()))
|
||||
T(0x7e2a5df5, UQRSHL(B21, B15, B10))
|
||||
T(0x6ef55fc9, UQRSHL(V9.D2(), V30.D2(), V21.D2()))
|
||||
T(0x7f0b9db4, UQRSHRN(B20, H13, 5))
|
||||
T(0x2f159d7d, UQRSHRN(V29.H4(), V11.S4(), 11))
|
||||
T(0x7f6c755c, UQSHL(D28, D10, 44))
|
||||
T(0x6f6175ec, UQSHL(V12.D2(), V15.D2(), 33))
|
||||
T(0x7eef4ff4, UQSHL(D20, D31, D15))
|
||||
T(0x6e3d4f2e, UQSHL(V14.B16(), V25.B16(), V29.B16()))
|
||||
T(0x7f1f94d2, UQSHRN(H18, S6, 1))
|
||||
T(0x6f3397e4, UQSHRN2(V4.S4(), V31.D2(), 13))
|
||||
T(0x7ee12cad, UQSUB(D13, D5, D1))
|
||||
T(0x2e712ff3, UQSUB(V19.H4(), V31.H4(), V17.H4()))
|
||||
T(0x7e614b06, UQXTN(H6, S24))
|
||||
T(0x6e2149ec, UQXTN2(V12.B16(), V15.H8()))
|
||||
T(0x0ea1c849, URECPE(V9.S2(), V2.S2()))
|
||||
T(0x6eb51740, URHADD(V0.S4(), V26.S4(), V21.S4()))
|
||||
T(0x7eeb57f8, URSHL(D24, D31, D11))
|
||||
T(0x6e335531, URSHL(V17.B16(), V9.B16(), V19.B16()))
|
||||
T(0x7f65253d, URSHR(D29, D9, 27))
|
||||
T(0x2f102566, URSHR(V6.H4(), V11.H4(), 16))
|
||||
T(0x2ea1cb59, URSQRTE(V25.S2(), V26.S2()))
|
||||
T(0x7f54345f, URSRA(D31, D2, 44))
|
||||
T(0x2f1b345f, URSRA(V31.H4(), V2.H4(), 5))
|
||||
T(0x7ef94448, USHL(D8, D2, D25))
|
||||
T(0x6ea14621, USHL(V1.S4(), V17.S4(), V1.S4()))
|
||||
T(0x2f33a5a1, USHLL(V1.D2(), V13.S2(), 19))
|
||||
T(0x7f5405d0, USHR(D16, D14, 44))
|
||||
T(0x6f450505, USHR(V5.D2(), V8.D2(), 59))
|
||||
T(0x7ea038c1, USQADD(S1, S6))
|
||||
T(0x2e203b60, USQADD(V0.B8(), V27.B8()))
|
||||
T(0x7f4616d2, USRA(D18, D22, 58))
|
||||
T(0x2f1a1713, USRA(V19.H4(), V24.H4(), 6))
|
||||
T(0x2e3f226e, USUBL(V14.H8(), V19.B8(), V31.B8()))
|
||||
T(0x6e7a33a0, USUBW2(V0.S4(), V29.S4(), V26.H8()))
|
||||
// UXTL
|
||||
T(0x4e1b1a1f, UZP1(V31.B16(), V16.B16(), V27.B16()))
|
||||
T(0x4ecc597b, UZP2(V27.D2(), V11.D2(), V12.D2()))
|
||||
T(0x0e212af7, XTN(V23.B8(), V23.H8()))
|
||||
T(0x4e853928, ZIP1(V8.S4(), V9.S4(), V5.S4()))
|
||||
T(0x0e977a78, ZIP2(V24.S2(), V19.S2(), V23.S2()))
|
1149
externals/oaknut/tests/general.cpp
vendored
Normal file
1149
externals/oaknut/tests/general.cpp
vendored
Normal file
File diff suppressed because it is too large
Load diff
20
externals/oaknut/tests/rand_int.hpp
vendored
Normal file
20
externals/oaknut/tests/rand_int.hpp
vendored
Normal file
|
@ -0,0 +1,20 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <random>
|
||||
#include <type_traits>
|
||||
|
||||
template<typename T>
|
||||
T RandInt(T min, T max)
|
||||
{
|
||||
static_assert(std::is_integral_v<T>, "T must be an integral type.");
|
||||
static_assert(!std::is_same_v<T, signed char> && !std::is_same_v<T, unsigned char>,
|
||||
"Using char with uniform_int_distribution is undefined behavior.");
|
||||
|
||||
static std::random_device rd;
|
||||
static std::mt19937 mt(rd());
|
||||
std::uniform_int_distribution<T> rand(min, max);
|
||||
return rand(mt);
|
||||
}
|
91
externals/oaknut/tests/vector_code_gen.cpp
vendored
Normal file
91
externals/oaknut/tests/vector_code_gen.cpp
vendored
Normal file
|
@ -0,0 +1,91 @@
|
|||
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
#include <cstdint>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
#include <vector>
|
||||
|
||||
#include <catch2/catch_test_macros.hpp>
|
||||
|
||||
#include "architecture.hpp"
|
||||
|
||||
#ifdef ON_ARM64
|
||||
|
||||
# include "oaknut/code_block.hpp"
|
||||
# include "oaknut/oaknut.hpp"
|
||||
# include "rand_int.hpp"
|
||||
|
||||
using namespace oaknut;
|
||||
using namespace oaknut::util;
|
||||
|
||||
TEST_CASE("Basic Test (VectorCodeGenerator)")
|
||||
{
|
||||
std::vector<std::uint32_t> vec;
|
||||
VectorCodeGenerator code{vec};
|
||||
|
||||
code.MOV(W0, 42);
|
||||
code.RET();
|
||||
|
||||
CodeBlock mem{4096};
|
||||
|
||||
mem.unprotect();
|
||||
std::memcpy(mem.ptr(), vec.data(), vec.size() * sizeof(std::uint32_t));
|
||||
mem.protect();
|
||||
mem.invalidate_all();
|
||||
|
||||
int result = ((int (*)())mem.ptr())();
|
||||
REQUIRE(result == 42);
|
||||
}
|
||||
|
||||
TEST_CASE("Fibonacci (VectorCodeGenerator)")
|
||||
{
|
||||
std::vector<std::uint32_t> vec;
|
||||
VectorCodeGenerator code{vec};
|
||||
|
||||
Label start, end, zero, recurse;
|
||||
|
||||
code.l(start);
|
||||
code.STP(X29, X30, SP, PRE_INDEXED, -32);
|
||||
code.STP(X20, X19, SP, 16);
|
||||
code.MOV(X29, SP);
|
||||
code.MOV(W19, W0);
|
||||
code.SUBS(W0, W0, 1);
|
||||
code.B(LT, zero);
|
||||
code.B(NE, recurse);
|
||||
code.MOV(W0, 1);
|
||||
code.B(end);
|
||||
|
||||
code.l(zero);
|
||||
code.MOV(W0, WZR);
|
||||
code.B(end);
|
||||
|
||||
code.l(recurse);
|
||||
code.BL(start);
|
||||
code.MOV(W20, W0);
|
||||
code.SUB(W0, W19, 2);
|
||||
code.BL(start);
|
||||
code.ADD(W0, W0, W20);
|
||||
|
||||
code.l(end);
|
||||
code.LDP(X20, X19, SP, 16);
|
||||
code.LDP(X29, X30, SP, POST_INDEXED, 32);
|
||||
code.RET();
|
||||
|
||||
CodeBlock mem{4096};
|
||||
|
||||
mem.unprotect();
|
||||
std::memcpy(mem.ptr(), vec.data(), vec.size() * sizeof(std::uint32_t));
|
||||
mem.protect();
|
||||
mem.invalidate_all();
|
||||
|
||||
auto fib = (int (*)(int))mem.ptr();
|
||||
|
||||
REQUIRE(fib(0) == 0);
|
||||
REQUIRE(fib(1) == 1);
|
||||
REQUIRE(fib(5) == 5);
|
||||
REQUIRE(fib(9) == 34);
|
||||
}
|
||||
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue