Move dead submodules in-tree

Signed-off-by: swurl <swurl@swurl.xyz>
This commit is contained in:
swurl 2025-05-31 02:33:02 -04:00
parent c0cceff365
commit 6c655321e6
No known key found for this signature in database
GPG key ID: A5A7629F109C8FD1
4081 changed files with 1185566 additions and 45 deletions

View file

@ -0,0 +1,143 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <new>
#if defined(_WIN32)
# define NOMINMAX
# include <windows.h>
#elif defined(__APPLE__)
# include <TargetConditionals.h>
# include <libkern/OSCacheControl.h>
# include <pthread.h>
# include <sys/mman.h>
# include <unistd.h>
#else
# include <sys/mman.h>
#endif
namespace oaknut {
class CodeBlock {
public:
explicit CodeBlock(std::size_t size)
: m_size(size)
{
#if defined(_WIN32)
m_memory = (std::uint32_t*)VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
#elif defined(__APPLE__)
# if TARGET_OS_IPHONE
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
# else
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE | MAP_JIT, -1, 0);
# endif
#elif defined(__NetBSD__)
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_MPROTECT(PROT_READ | PROT_WRITE | PROT_EXEC), MAP_ANON | MAP_PRIVATE, -1, 0);
#elif defined(__OpenBSD__)
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
#else
m_memory = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_ANON | MAP_PRIVATE, -1, 0);
#endif
if (m_memory == nullptr)
throw std::bad_alloc{};
}
~CodeBlock()
{
if (m_memory == nullptr)
return;
#if defined(_WIN32)
VirtualFree((void*)m_memory, 0, MEM_RELEASE);
#else
munmap(m_memory, m_size);
#endif
}
CodeBlock(const CodeBlock&) = delete;
CodeBlock& operator=(const CodeBlock&) = delete;
CodeBlock(CodeBlock&&) = delete;
CodeBlock& operator=(CodeBlock&&) = delete;
std::uint32_t* ptr() const
{
return m_memory;
}
void protect()
{
#if defined(__APPLE__) && !TARGET_OS_IPHONE
pthread_jit_write_protect_np(1);
#elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__)
mprotect(m_memory, m_size, PROT_READ | PROT_EXEC);
#endif
}
void unprotect()
{
#if defined(__APPLE__) && !TARGET_OS_IPHONE
pthread_jit_write_protect_np(0);
#elif defined(__APPLE__) || defined(__NetBSD__) || defined(__OpenBSD__)
mprotect(m_memory, m_size, PROT_READ | PROT_WRITE);
#endif
}
void invalidate(std::uint32_t* mem, std::size_t size)
{
#if defined(__APPLE__)
sys_icache_invalidate(mem, size);
#elif defined(_WIN32)
FlushInstructionCache(GetCurrentProcess(), mem, size);
#else
static std::size_t icache_line_size = 0x10000, dcache_line_size = 0x10000;
std::uint64_t ctr;
__asm__ volatile("mrs %0, ctr_el0"
: "=r"(ctr));
const std::size_t isize = icache_line_size = std::min<std::size_t>(icache_line_size, 4 << ((ctr >> 0) & 0xf));
const std::size_t dsize = dcache_line_size = std::min<std::size_t>(dcache_line_size, 4 << ((ctr >> 16) & 0xf));
const std::uintptr_t end = (std::uintptr_t)mem + size;
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(dsize - 1); addr < end; addr += dsize) {
__asm__ volatile("dc cvau, %0"
:
: "r"(addr)
: "memory");
}
__asm__ volatile("dsb ish\n"
:
:
: "memory");
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(isize - 1); addr < end; addr += isize) {
__asm__ volatile("ic ivau, %0"
:
: "r"(addr)
: "memory");
}
__asm__ volatile("dsb ish\nisb\n"
:
:
: "memory");
#endif
}
void invalidate_all()
{
invalidate(m_memory, m_size);
}
protected:
std::uint32_t* m_memory;
std::size_t m_size = 0;
};
} // namespace oaknut

View file

@ -0,0 +1,165 @@
// SPDX-FileCopyrightText: Copyright (c) 2024 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <new>
#if defined(_WIN32)
# define NOMINMAX
# include <windows.h>
#elif defined(__APPLE__)
# include <mach/mach.h>
# include <mach/vm_map.h>
# include <TargetConditionals.h>
# include <libkern/OSCacheControl.h>
# include <pthread.h>
# include <sys/mman.h>
# include <unistd.h>
#else
# if !defined(_GNU_SOURCE)
# define _GNU_SOURCE
# endif
# include <sys/mman.h>
# include <sys/types.h>
# include <unistd.h>
#endif
namespace oaknut {
class DualCodeBlock {
public:
explicit DualCodeBlock(std::size_t size)
: m_size(size)
{
#if defined(_WIN32)
m_wmem = m_xmem = (std::uint32_t*)VirtualAlloc(nullptr, size, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
if (m_wmem == nullptr)
throw std::bad_alloc{};
#elif defined(__APPLE__)
m_wmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
if (m_wmem == MAP_FAILED)
throw std::bad_alloc{};
vm_prot_t cur_prot, max_prot;
kern_return_t ret = vm_remap(mach_task_self(), (vm_address_t*)&m_xmem, size, 0, VM_FLAGS_ANYWHERE | VM_FLAGS_RANDOM_ADDR, mach_task_self(), (mach_vm_address_t)m_wmem, false, &cur_prot, &max_prot, VM_INHERIT_NONE);
if (ret != KERN_SUCCESS)
throw std::bad_alloc{};
mprotect(m_xmem, size, PROT_READ | PROT_EXEC);
#else
# if defined(__OpenBSD__)
char tmpl[] = "oaknut_dual_code_block.XXXXXXXXXX";
fd = shm_mkstemp(tmpl);
if (fd < 0)
throw std::bad_alloc{};
shm_unlink(tmpl);
# else
fd = memfd_create("oaknut_dual_code_block", 0);
if (fd < 0)
throw std::bad_alloc{};
# endif
int ret = ftruncate(fd, size);
if (ret != 0)
throw std::bad_alloc{};
m_wmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
m_xmem = (std::uint32_t*)mmap(nullptr, size, PROT_READ | PROT_EXEC, MAP_SHARED, fd, 0);
if (m_wmem == MAP_FAILED || m_xmem == MAP_FAILED)
throw std::bad_alloc{};
#endif
}
~DualCodeBlock()
{
#if defined(_WIN32)
VirtualFree((void*)m_xmem, 0, MEM_RELEASE);
#elif defined(__APPLE__)
#else
munmap(m_wmem, m_size);
munmap(m_xmem, m_size);
close(fd);
#endif
}
DualCodeBlock(const DualCodeBlock&) = delete;
DualCodeBlock& operator=(const DualCodeBlock&) = delete;
DualCodeBlock(DualCodeBlock&&) = delete;
DualCodeBlock& operator=(DualCodeBlock&&) = delete;
/// Pointer to executable mirror of memory (permissions: R-X)
std::uint32_t* xptr() const
{
return m_xmem;
}
/// Pointer to writeable mirror of memory (permissions: RW-)
std::uint32_t* wptr() const
{
return m_wmem;
}
/// Invalidate should be used with executable memory pointers.
void invalidate(std::uint32_t* mem, std::size_t size)
{
#if defined(__APPLE__)
sys_icache_invalidate(mem, size);
#elif defined(_WIN32)
FlushInstructionCache(GetCurrentProcess(), mem, size);
#else
static std::size_t icache_line_size = 0x10000, dcache_line_size = 0x10000;
std::uint64_t ctr;
__asm__ volatile("mrs %0, ctr_el0"
: "=r"(ctr));
const std::size_t isize = icache_line_size = std::min<std::size_t>(icache_line_size, 4 << ((ctr >> 0) & 0xf));
const std::size_t dsize = dcache_line_size = std::min<std::size_t>(dcache_line_size, 4 << ((ctr >> 16) & 0xf));
const std::uintptr_t end = (std::uintptr_t)mem + size;
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(dsize - 1); addr < end; addr += dsize) {
__asm__ volatile("dc cvau, %0"
:
: "r"(addr)
: "memory");
}
__asm__ volatile("dsb ish\n"
:
:
: "memory");
for (std::uintptr_t addr = ((std::uintptr_t)mem) & ~(isize - 1); addr < end; addr += isize) {
__asm__ volatile("ic ivau, %0"
:
: "r"(addr)
: "memory");
}
__asm__ volatile("dsb ish\nisb\n"
:
:
: "memory");
#endif
}
void invalidate_all()
{
invalidate(m_xmem, m_size);
}
protected:
#if !defined(_WIN32) && !defined(__APPLE__)
int fd = -1;
#endif
std::uint32_t* m_xmem = nullptr;
std::uint32_t* m_wmem = nullptr;
std::size_t m_size = 0;
};
} // namespace oaknut

View file

@ -0,0 +1,107 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <bitset>
#include <cstddef>
#include <initializer_list>
#if defined(__cpp_lib_constexpr_bitset) && __cpp_lib_constexpr_bitset >= 202207L
# define OAKNUT_CPU_FEATURES_CONSTEXPR constexpr
#else
# define OAKNUT_CPU_FEATURES_CONSTEXPR
#endif
namespace oaknut {
// NOTE: This file contains code that can be compiled on non-arm64 systems.
// For run-time CPU feature detection, include feature_detection.hpp
enum class CpuFeature {
#define OAKNUT_CPU_FEATURE(name) name,
#include "oaknut/impl/cpu_feature.inc.hpp"
#undef OAKNUT_CPU_FEATURE
};
constexpr std::size_t cpu_feature_count = 0
#define OAKNUT_CPU_FEATURE(name) +1
#include "oaknut/impl/cpu_feature.inc.hpp"
#undef OAKNUT_CPU_FEATURE
;
class CpuFeatures final {
public:
constexpr CpuFeatures() = default;
OAKNUT_CPU_FEATURES_CONSTEXPR explicit CpuFeatures(std::initializer_list<CpuFeature> features)
{
for (CpuFeature f : features) {
m_bitset.set(static_cast<std::size_t>(f));
}
}
constexpr bool has(CpuFeature feature) const
{
if (static_cast<std::size_t>(feature) >= cpu_feature_count)
return false;
return m_bitset[static_cast<std::size_t>(feature)];
}
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator&=(const CpuFeatures& other) noexcept
{
m_bitset &= other.m_bitset;
return *this;
}
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator|=(const CpuFeatures& other) noexcept
{
m_bitset |= other.m_bitset;
return *this;
}
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures& operator^=(const CpuFeatures& other) noexcept
{
m_bitset ^= other.m_bitset;
return *this;
}
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator~() const noexcept
{
CpuFeatures result;
result.m_bitset = ~m_bitset;
return result;
}
private:
using bitset = std::bitset<cpu_feature_count>;
friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept;
friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept;
friend OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept;
bitset m_bitset;
};
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator&(const CpuFeatures& a, const CpuFeatures& b) noexcept
{
CpuFeatures result;
result.m_bitset = a.m_bitset & b.m_bitset;
return result;
}
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator|(const CpuFeatures& a, const CpuFeatures& b) noexcept
{
CpuFeatures result;
result.m_bitset = a.m_bitset | b.m_bitset;
return result;
}
OAKNUT_CPU_FEATURES_CONSTEXPR CpuFeatures operator^(const CpuFeatures& a, const CpuFeatures& b) noexcept
{
CpuFeatures result;
result.m_bitset = a.m_bitset ^ b.m_bitset;
return result;
}
} // namespace oaknut

View file

@ -0,0 +1,35 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#if defined(__APPLE__)
# define OAKNUT_CPU_FEATURE_DETECTION 1
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 0
# include "oaknut/feature_detection/feature_detection_apple.hpp"
#elif defined(__FreeBSD__)
# define OAKNUT_CPU_FEATURE_DETECTION 1
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1
# include "oaknut/feature_detection/feature_detection_freebsd.hpp"
#elif defined(__linux__)
# define OAKNUT_CPU_FEATURE_DETECTION 1
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1
# include "oaknut/feature_detection/feature_detection_linux.hpp"
#elif defined(__NetBSD__)
# define OAKNUT_CPU_FEATURE_DETECTION 1
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 2
# include "oaknut/feature_detection/feature_detection_netbsd.hpp"
#elif defined(__OpenBSD__)
# define OAKNUT_CPU_FEATURE_DETECTION 1
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 1
# include "oaknut/feature_detection/feature_detection_openbsd.hpp"
#elif defined(_WIN32)
# define OAKNUT_CPU_FEATURE_DETECTION 1
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 2
# include "oaknut/feature_detection/feature_detection_w32.hpp"
#else
# define OAKNUT_CPU_FEATURE_DETECTION 0
# define OAKNUT_SUPPORTS_READING_ID_REGISTERS 0
# warning "Unsupported operating system for CPU feature detection"
# include "oaknut/feature_detection/feature_detection_generic.hpp"
#endif

View file

@ -0,0 +1,112 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>
#include <optional>
#include <sys/sysctl.h>
#include "oaknut/feature_detection/cpu_feature.hpp"
#include "oaknut/feature_detection/id_registers.hpp"
namespace oaknut {
// Ref: https://developer.apple.com/documentation/kernel/1387446-sysctlbyname/determining_instruction_set_characteristics
namespace detail {
inline bool detect_feature(const char* const sysctl_name)
{
int result = 0;
std::size_t result_size = sizeof(result);
if (::sysctlbyname(sysctl_name, &result, &result_size, nullptr, 0) == 0) {
return result != 0;
}
return false;
}
} // namespace detail
inline CpuFeatures detect_features_via_sysctlbyname()
{
CpuFeatures result;
if (detail::detect_feature("hw.optional.AdvSIMD") || detail::detect_feature("hw.optional.neon"))
result |= CpuFeatures{CpuFeature::ASIMD};
if (detail::detect_feature("hw.optional.floatingpoint"))
result |= CpuFeatures{CpuFeature::FP};
if (detail::detect_feature("hw.optional.AdvSIMD_HPFPCvt") || detail::detect_feature("hw.optional.neon_hpfp"))
result |= CpuFeatures{CpuFeature::FP16Conv};
if (detail::detect_feature("hw.optional.arm.FEAT_BF16"))
result |= CpuFeatures{CpuFeature::BF16};
if (detail::detect_feature("hw.optional.arm.FEAT_DotProd"))
result |= CpuFeatures{CpuFeature::DotProd};
if (detail::detect_feature("hw.optional.arm.FEAT_FCMA") || detail::detect_feature("hw.optional.armv8_3_compnum"))
result |= CpuFeatures{CpuFeature::FCMA};
if (detail::detect_feature("hw.optional.arm.FEAT_FHM") || detail::detect_feature("hw.optional.armv8_2_fhm"))
result |= CpuFeatures{CpuFeature::FHM};
if (detail::detect_feature("hw.optional.arm.FEAT_FP16") || detail::detect_feature("hw.optional.neon_fp16"))
result |= CpuFeatures{CpuFeature::FP16};
if (detail::detect_feature("hw.optional.arm.FEAT_FRINTTS"))
result |= CpuFeatures{CpuFeature::FRINTTS};
if (detail::detect_feature("hw.optional.arm.FEAT_I8MM"))
result |= CpuFeatures{CpuFeature::I8MM};
if (detail::detect_feature("hw.optional.arm.FEAT_JSCVT"))
result |= CpuFeatures{CpuFeature::JSCVT};
if (detail::detect_feature("hw.optional.arm.FEAT_RDM"))
result |= CpuFeatures{CpuFeature::RDM};
if (detail::detect_feature("hw.optional.arm.FEAT_FlagM"))
result |= CpuFeatures{CpuFeature::FlagM};
if (detail::detect_feature("hw.optional.arm.FEAT_FlagM2"))
result |= CpuFeatures{CpuFeature::FlagM2};
if (detail::detect_feature("hw.optional.armv8_crc32"))
result |= CpuFeatures{CpuFeature::CRC32};
if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC"))
result |= CpuFeatures{CpuFeature::LRCPC};
if (detail::detect_feature("hw.optional.arm.FEAT_LRCPC2"))
result |= CpuFeatures{CpuFeature::LRCPC2};
if (detail::detect_feature("hw.optional.arm.FEAT_LSE") || detail::detect_feature("hw.optional.armv8_1_atomics"))
result |= CpuFeatures{CpuFeature::LSE};
if (detail::detect_feature("hw.optional.arm.FEAT_LSE2"))
result |= CpuFeatures{CpuFeature::LSE2};
if (detail::detect_feature("hw.optional.arm.FEAT_AES"))
result |= CpuFeatures{CpuFeature::AES};
if (detail::detect_feature("hw.optional.arm.FEAT_PMULL"))
result |= CpuFeatures{CpuFeature::PMULL};
if (detail::detect_feature("hw.optional.arm.FEAT_SHA1"))
result |= CpuFeatures{CpuFeature::SHA1};
if (detail::detect_feature("hw.optional.arm.FEAT_SHA256"))
result |= CpuFeatures{CpuFeature::SHA256};
if (detail::detect_feature("hw.optional.arm.FEAT_SHA512") || detail::detect_feature("hw.optional.armv8_2_sha512"))
result |= CpuFeatures{CpuFeature::SHA512};
if (detail::detect_feature("hw.optional.arm.FEAT_SHA3") || detail::detect_feature("hw.optional.armv8_2_sha3"))
result |= CpuFeatures{CpuFeature::SHA3};
if (detail::detect_feature("hw.optional.arm.FEAT_BTI"))
result |= CpuFeatures{CpuFeature::BTI};
if (detail::detect_feature("hw.optional.arm.FEAT_DPB"))
result |= CpuFeatures{CpuFeature::DPB};
if (detail::detect_feature("hw.optional.arm.FEAT_DPB2"))
result |= CpuFeatures{CpuFeature::DPB2};
if (detail::detect_feature("hw.optional.arm.FEAT_ECV"))
result |= CpuFeatures{CpuFeature::ECV};
if (detail::detect_feature("hw.optional.arm.FEAT_SB"))
result |= CpuFeatures{CpuFeature::SB};
if (detail::detect_feature("hw.optional.arm.FEAT_SSBS"))
result |= CpuFeatures{CpuFeature::SSBS};
return result;
}
inline CpuFeatures detect_features()
{
return detect_features_via_sysctlbyname();
}
inline std::optional<id::IdRegisters> read_id_registers()
{
return std::nullopt;
}
} // namespace oaknut

View file

@ -0,0 +1,62 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstdint>
#include <optional>
#include <sys/auxv.h>
#include <sys/param.h>
#include "oaknut/feature_detection/cpu_feature.hpp"
#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
#include "oaknut/feature_detection/id_registers.hpp"
#include "oaknut/feature_detection/read_id_registers_directly.hpp"
#ifndef AT_HWCAP
# define AT_HWCAP 16
#endif
#ifndef AT_HWCAP2
# define AT_HWCAP2 26
#endif
#if __FreeBSD_version < 1300114
# error "Incompatible ABI change (incorrect HWCAP definitions on earlier FreeBSD versions)"
#endif
namespace oaknut {
namespace detail {
inline unsigned long getauxval(int aux)
{
unsigned long result = 0;
if (::elf_aux_info(aux, &result, static_cast<int>(sizeof result)) == 0) {
return result;
}
return 0;
}
} // namespace detail
inline CpuFeatures detect_features_via_hwcap()
{
const unsigned long hwcap = detail::getauxval(AT_HWCAP);
const unsigned long hwcap2 = detail::getauxval(AT_HWCAP2);
return detect_features_via_hwcap(hwcap, hwcap2);
}
inline std::optional<id::IdRegisters> read_id_registers()
{
// HWCAP_CPUID is falsely not set on many FreeBSD kernel versions,
// so we don't bother checking it.
return id::read_id_registers_directly();
}
inline CpuFeatures detect_features()
{
return detect_features_via_hwcap();
}
} // namespace oaknut

View file

@ -0,0 +1,23 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <optional>
#include "oaknut/feature_detection/cpu_feature.hpp"
#include "oaknut/feature_detection/id_registers.hpp"
namespace oaknut {
inline CpuFeatures detect_features()
{
return CpuFeatures{CpuFeature::FP, CpuFeature::ASIMD};
}
inline std::optional<id::IdRegisters> read_id_registers()
{
return std::nullopt;
}
} // namespace oaknut

View file

@ -0,0 +1,120 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>
#include "oaknut/feature_detection/cpu_feature.hpp"
namespace oaknut {
namespace detail {
template<std::size_t... bits>
constexpr bool bit_test(unsigned long value)
{
return (((value >> bits) & 1) && ...);
}
} // namespace detail
inline CpuFeatures detect_features_via_hwcap(unsigned long hwcap, unsigned long hwcap2)
{
CpuFeatures result;
#define OAKNUT_DETECT_CAP(FEAT, ...) \
if (detail::bit_test<__VA_ARGS__>(hwcap)) { \
result |= CpuFeatures{CpuFeature::FEAT}; \
}
#define OAKNUT_DETECT_CAP2(FEAT, ...) \
if (detail::bit_test<__VA_ARGS__>(hwcap2)) { \
result |= CpuFeatures{CpuFeature::FEAT}; \
}
OAKNUT_DETECT_CAP(FP, 0) // HWCAP_FP
OAKNUT_DETECT_CAP(ASIMD, 1) // HWCAP_ASIMD
// HWCAP_EVTSTRM (2)
OAKNUT_DETECT_CAP(AES, 3) // HWCAP_AES
OAKNUT_DETECT_CAP(PMULL, 4) // HWCAP_PMULL
OAKNUT_DETECT_CAP(SHA1, 5) // HWCAP_SHA1
OAKNUT_DETECT_CAP(SHA256, 6) // HWCAP_SHA2
OAKNUT_DETECT_CAP(CRC32, 7) // HWCAP_CRC32
OAKNUT_DETECT_CAP(LSE, 8) // HWCAP_ATOMICS
OAKNUT_DETECT_CAP(FP16Conv, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP
OAKNUT_DETECT_CAP(FP16, 9, 10) // HWCAP_FPHP && HWCAP_ASIMDHP
// HWCAP_CPUID (11)
OAKNUT_DETECT_CAP(RDM, 12) // HWCAP_ASIMDRDM
OAKNUT_DETECT_CAP(JSCVT, 13) // HWCAP_JSCVT
OAKNUT_DETECT_CAP(FCMA, 14) // HWCAP_FCMA
OAKNUT_DETECT_CAP(LRCPC, 15) // HWCAP_LRCPC
OAKNUT_DETECT_CAP(DPB, 16) // HWCAP_DCPOP
OAKNUT_DETECT_CAP(SHA3, 17) // HWCAP_SHA3
OAKNUT_DETECT_CAP(SM3, 18) // HWCAP_SM3
OAKNUT_DETECT_CAP(SM4, 19) // HWCAP_SM4
OAKNUT_DETECT_CAP(DotProd, 20) // HWCAP_ASIMDDP
OAKNUT_DETECT_CAP(SHA512, 21) // HWCAP_SHA512
OAKNUT_DETECT_CAP(SVE, 22) // HWCAP_SVE
OAKNUT_DETECT_CAP(FHM, 23) // HWCAP_ASIMDFHM
OAKNUT_DETECT_CAP(DIT, 24) // HWCAP_DIT
OAKNUT_DETECT_CAP(LSE2, 25) // HWCAP_USCAT
OAKNUT_DETECT_CAP(LRCPC2, 26) // HWCAP_ILRCPC
OAKNUT_DETECT_CAP(FlagM, 27) // HWCAP_FLAGM
OAKNUT_DETECT_CAP(SSBS, 28) // HWCAP_SSBS
OAKNUT_DETECT_CAP(SB, 29) // HWCAP_SB
OAKNUT_DETECT_CAP(PACA, 30) // HWCAP_PACA
OAKNUT_DETECT_CAP(PACG, 31) // HWCAP_PACG
OAKNUT_DETECT_CAP2(DPB2, 0) // HWCAP2_DCPODP
OAKNUT_DETECT_CAP2(SVE2, 1) // HWCAP2_SVE2
OAKNUT_DETECT_CAP2(SVE_AES, 2) // HWCAP2_SVEAES
OAKNUT_DETECT_CAP2(SVE_PMULL128, 3) // HWCAP2_SVEPMULL
OAKNUT_DETECT_CAP2(SVE_BITPERM, 4) // HWCAP2_SVEBITPERM
OAKNUT_DETECT_CAP2(SVE_SHA3, 5) // HWCAP2_SVESHA3
OAKNUT_DETECT_CAP2(SVE_SM4, 6) // HWCAP2_SVESM4
OAKNUT_DETECT_CAP2(FlagM2, 7) // HWCAP2_FLAGM2
OAKNUT_DETECT_CAP2(FRINTTS, 8) // HWCAP2_FRINT
OAKNUT_DETECT_CAP2(SVE_I8MM, 9) // HWCAP2_SVEI8MM
OAKNUT_DETECT_CAP2(SVE_F32MM, 10) // HWCAP2_SVEF32MM
OAKNUT_DETECT_CAP2(SVE_F64MM, 11) // HWCAP2_SVEF64MM
OAKNUT_DETECT_CAP2(SVE_BF16, 12) // HWCAP2_SVEBF16
OAKNUT_DETECT_CAP2(I8MM, 13) // HWCAP2_I8MM
OAKNUT_DETECT_CAP2(BF16, 14) // HWCAP2_BF16
OAKNUT_DETECT_CAP2(DGH, 15) // HWCAP2_DGH
OAKNUT_DETECT_CAP2(RNG, 16) // HWCAP2_RNG
OAKNUT_DETECT_CAP2(BTI, 17) // HWCAP2_BTI
OAKNUT_DETECT_CAP2(MTE, 18) // HWCAP2_MTE
OAKNUT_DETECT_CAP2(ECV, 19) // HWCAP2_ECV
OAKNUT_DETECT_CAP2(AFP, 20) // HWCAP2_AFP
OAKNUT_DETECT_CAP2(RPRES, 21) // HWCAP2_RPRES
OAKNUT_DETECT_CAP2(MTE3, 22) // HWCAP2_MTE3
OAKNUT_DETECT_CAP2(SME, 23) // HWCAP2_SME
OAKNUT_DETECT_CAP2(SME_I16I64, 24) // HWCAP2_SME_I16I64
OAKNUT_DETECT_CAP2(SME_F64F64, 25) // HWCAP2_SME_F64F64
OAKNUT_DETECT_CAP2(SME_I8I32, 26) // HWCAP2_SME_I8I32
OAKNUT_DETECT_CAP2(SME_F16F32, 27) // HWCAP2_SME_F16F32
OAKNUT_DETECT_CAP2(SME_B16F32, 28) // HWCAP2_SME_B16F32
OAKNUT_DETECT_CAP2(SME_F32F32, 29) // HWCAP2_SME_F32F32
OAKNUT_DETECT_CAP2(SME_FA64, 30) // HWCAP2_SME_FA64
OAKNUT_DETECT_CAP2(WFxT, 31) // HWCAP2_WFXT
OAKNUT_DETECT_CAP2(EBF16, 32) // HWCAP2_EBF16
OAKNUT_DETECT_CAP2(SVE_EBF16, 33) // HWCAP2_SVE_EBF16
OAKNUT_DETECT_CAP2(CSSC, 34) // HWCAP2_CSSC
OAKNUT_DETECT_CAP2(RPRFM, 35) // HWCAP2_RPRFM
OAKNUT_DETECT_CAP2(SVE2p1, 36) // HWCAP2_SVE2P1
OAKNUT_DETECT_CAP2(SME2, 37) // HWCAP2_SME2
OAKNUT_DETECT_CAP2(SME2p1, 38) // HWCAP2_SME2P1
OAKNUT_DETECT_CAP2(SME_I16I32, 39) // HWCAP2_SME_I16I32
OAKNUT_DETECT_CAP2(SME_BI32I32, 40) // HWCAP2_SME_BI32I32
OAKNUT_DETECT_CAP2(SME_B16B16, 41) // HWCAP2_SME_B16B16
OAKNUT_DETECT_CAP2(SME_F16F16, 42) // HWCAP2_SME_F16F16
OAKNUT_DETECT_CAP2(MOPS, 43) // HWCAP2_MOPS
OAKNUT_DETECT_CAP2(HBC, 44) // HWCAP2_HBC
#undef OAKNUT_DETECT_CAP
#undef OAKNUT_DETECT_CAP2
return result;
}
} // namespace oaknut

View file

@ -0,0 +1,167 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include "oaknut/feature_detection/cpu_feature.hpp"
#include "oaknut/feature_detection/id_registers.hpp"
namespace oaknut {
CpuFeatures detect_features_via_id_registers(id::IdRegisters regs)
{
CpuFeatures result;
if (regs.pfr0.FP() >= 0)
result |= CpuFeatures{CpuFeature::FP};
if (regs.pfr0.AdvSIMD() >= 0)
result |= CpuFeatures{CpuFeature::ASIMD};
if (regs.isar0.AES() >= 1)
result |= CpuFeatures{CpuFeature::AES};
if (regs.isar0.AES() >= 2)
result |= CpuFeatures{CpuFeature::PMULL};
if (regs.isar0.SHA1() >= 1)
result |= CpuFeatures{CpuFeature::SHA1};
if (regs.isar0.SHA2() >= 1)
result |= CpuFeatures{CpuFeature::SHA256};
if (regs.isar0.CRC32() >= 1)
result |= CpuFeatures{CpuFeature::CRC32};
if (regs.isar0.Atomic() >= 2)
result |= CpuFeatures{CpuFeature::LSE};
if (regs.pfr0.FP() >= 1 && regs.pfr0.AdvSIMD() >= 1)
result |= CpuFeatures{CpuFeature::FP16Conv, CpuFeature::FP16};
if (regs.isar0.RDM() >= 1)
result |= CpuFeatures{CpuFeature::RDM};
if (regs.isar1.JSCVT() >= 1)
result |= CpuFeatures{CpuFeature::JSCVT};
if (regs.isar1.FCMA() >= 1)
result |= CpuFeatures{CpuFeature::FCMA};
if (regs.isar1.LRCPC() >= 1)
result |= CpuFeatures{CpuFeature::LRCPC};
if (regs.isar1.DPB() >= 1)
result |= CpuFeatures{CpuFeature::DPB};
if (regs.isar0.SHA3() >= 1)
result |= CpuFeatures{CpuFeature::SHA3};
if (regs.isar0.SM3() >= 1)
result |= CpuFeatures{CpuFeature::SM3};
if (regs.isar0.SM4() >= 1)
result |= CpuFeatures{CpuFeature::SM4};
if (regs.isar0.DP() >= 1)
result |= CpuFeatures{CpuFeature::DotProd};
if (regs.isar0.SHA2() >= 2)
result |= CpuFeatures{CpuFeature::SHA512};
if (regs.pfr0.SVE() >= 1)
result |= CpuFeatures{CpuFeature::SVE};
if (regs.isar0.FHM() >= 1)
result |= CpuFeatures{CpuFeature::FHM};
if (regs.pfr0.DIT() >= 1)
result |= CpuFeatures{CpuFeature::DIT};
if (regs.mmfr2.AT() >= 1)
result |= CpuFeatures{CpuFeature::LSE2};
if (regs.isar1.LRCPC() >= 2)
result |= CpuFeatures{CpuFeature::LRCPC2};
if (regs.isar0.TS() >= 1)
result |= CpuFeatures{CpuFeature::FlagM};
if (regs.pfr1.SSBS() >= 2)
result |= CpuFeatures{CpuFeature::SSBS};
if (regs.isar1.SB() >= 1)
result |= CpuFeatures{CpuFeature::SB};
if (regs.isar1.APA() >= 1 || regs.isar1.API() >= 1)
result |= CpuFeatures{CpuFeature::PACA};
if (regs.isar1.GPA() >= 1 || regs.isar1.GPI() >= 1)
result |= CpuFeatures{CpuFeature::PACG};
if (regs.isar1.DPB() >= 2)
result |= CpuFeatures{CpuFeature::DPB2};
if (regs.zfr0.SVEver() >= 1)
result |= CpuFeatures{CpuFeature::SVE2};
if (regs.zfr0.AES() >= 1)
result |= CpuFeatures{CpuFeature::SVE_AES};
if (regs.zfr0.AES() >= 2)
result |= CpuFeatures{CpuFeature::SVE_PMULL128};
if (regs.zfr0.BitPerm() >= 1)
result |= CpuFeatures{CpuFeature::SVE_BITPERM};
if (regs.zfr0.SHA3() >= 1)
result |= CpuFeatures{CpuFeature::SVE_SHA3};
if (regs.zfr0.SM4() >= 1)
result |= CpuFeatures{CpuFeature::SVE_SM4};
if (regs.isar0.TS() >= 2)
result |= CpuFeatures{CpuFeature::FlagM2};
if (regs.isar1.FRINTTS() >= 1)
result |= CpuFeatures{CpuFeature::FRINTTS};
if (regs.zfr0.I8MM() >= 1)
result |= CpuFeatures{CpuFeature::SVE_I8MM};
if (regs.zfr0.F32MM() >= 1)
result |= CpuFeatures{CpuFeature::SVE_F32MM};
if (regs.zfr0.F64MM() >= 1)
result |= CpuFeatures{CpuFeature::SVE_F64MM};
if (regs.zfr0.BF16() >= 1)
result |= CpuFeatures{CpuFeature::SVE_BF16};
if (regs.isar1.I8MM() >= 1)
result |= CpuFeatures{CpuFeature::I8MM};
if (regs.isar1.BF16() >= 1)
result |= CpuFeatures{CpuFeature::BF16};
if (regs.isar1.DGH() >= 1)
result |= CpuFeatures{CpuFeature::DGH};
if (regs.isar0.RNDR() >= 1)
result |= CpuFeatures{CpuFeature::RNG};
if (regs.pfr1.BT() >= 1)
result |= CpuFeatures{CpuFeature::BTI};
if (regs.pfr1.MTE() >= 2)
result |= CpuFeatures{CpuFeature::MTE};
if (regs.mmfr0.ECV() >= 1)
result |= CpuFeatures{CpuFeature::ECV};
if (regs.mmfr1.AFP() >= 1)
result |= CpuFeatures{CpuFeature::AFP};
if (regs.isar2.RPRES() >= 1)
result |= CpuFeatures{CpuFeature::RPRES};
if (regs.pfr1.MTE() >= 3)
result |= CpuFeatures{CpuFeature::MTE3};
if (regs.pfr1.SME() >= 1)
result |= CpuFeatures{CpuFeature::SME};
if (regs.smfr0.I16I64() == 0b1111)
result |= CpuFeatures{CpuFeature::SME_I16I64};
if (regs.smfr0.F64F64() == 0b1)
result |= CpuFeatures{CpuFeature::SME_F64F64};
if (regs.smfr0.I8I32() == 0b1111)
result |= CpuFeatures{CpuFeature::SME_I8I32};
if (regs.smfr0.F16F32() == 0b1)
result |= CpuFeatures{CpuFeature::SME_F16F32};
if (regs.smfr0.B16F32() == 0b1)
result |= CpuFeatures{CpuFeature::SME_B16F32};
if (regs.smfr0.F32F32() == 0b1)
result |= CpuFeatures{CpuFeature::SME_F32F32};
if (regs.smfr0.FA64() == 0b1)
result |= CpuFeatures{CpuFeature::SME_FA64};
if (regs.isar2.WFxT() >= 2)
result |= CpuFeatures{CpuFeature::WFxT};
if (regs.isar1.BF16() >= 2)
result |= CpuFeatures{CpuFeature::EBF16};
if (regs.zfr0.BF16() >= 2)
result |= CpuFeatures{CpuFeature::SVE_EBF16};
if (regs.isar2.CSSC() >= 1)
result |= CpuFeatures{CpuFeature::CSSC};
if (regs.isar2.RPRFM() >= 1)
result |= CpuFeatures{CpuFeature::RPRFM};
if (regs.zfr0.SVEver() >= 2)
result |= CpuFeatures{CpuFeature::SVE2p1};
if (regs.smfr0.SMEver() >= 1)
result |= CpuFeatures{CpuFeature::SME2};
if (regs.smfr0.SMEver() >= 2)
result |= CpuFeatures{CpuFeature::SME2p1};
if (regs.smfr0.I16I32() == 0b0101)
result |= CpuFeatures{CpuFeature::SME_I16I32};
if (regs.smfr0.BI32I32() == 0b1)
result |= CpuFeatures{CpuFeature::SME_BI32I32};
if (regs.smfr0.B16B16() == 0b1)
result |= CpuFeatures{CpuFeature::SME_B16B16};
if (regs.smfr0.F16F16() == 0b1)
result |= CpuFeatures{CpuFeature::SME_F16F16};
if (regs.isar2.MOPS() >= 1)
result |= CpuFeatures{CpuFeature::MOPS};
if (regs.isar2.BC() >= 1)
result |= CpuFeatures{CpuFeature::HBC};
return result;
}
} // namespace oaknut

View file

@ -0,0 +1,45 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <optional>
#include <sys/auxv.h>
#include "oaknut/feature_detection/cpu_feature.hpp"
#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
#include "oaknut/feature_detection/id_registers.hpp"
#include "oaknut/feature_detection/read_id_registers_directly.hpp"
#ifndef AT_HWCAP
# define AT_HWCAP 16
#endif
#ifndef AT_HWCAP2
# define AT_HWCAP2 26
#endif
namespace oaknut {
inline CpuFeatures detect_features_via_hwcap()
{
const unsigned long hwcap = ::getauxval(AT_HWCAP);
const unsigned long hwcap2 = ::getauxval(AT_HWCAP2);
return detect_features_via_hwcap(hwcap, hwcap2);
}
inline CpuFeatures detect_features()
{
return detect_features_via_hwcap();
}
inline std::optional<id::IdRegisters> read_id_registers()
{
constexpr unsigned long hwcap_cpuid = (1 << 11);
if (::getauxval(AT_HWCAP) & hwcap_cpuid) {
return id::read_id_registers_directly();
}
return std::nullopt;
}
} // namespace oaknut

View file

@ -0,0 +1,81 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <array>
#include <cstddef>
#include <cstdint>
#include <optional>
#include <string>
#include <aarch64/armreg.h>
#include <sys/param.h>
#include <sys/sysctl.h>
#include "oaknut/feature_detection/cpu_feature.hpp"
#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
#include "oaknut/feature_detection/feature_detection_idregs.hpp"
#include "oaknut/feature_detection/id_registers.hpp"
namespace oaknut {
inline std::optional<id::IdRegisters> read_id_registers(std::size_t core_index)
{
const std::string path = "machdep.cpu" + std::to_string(core_index) + ".cpu_id";
aarch64_sysctl_cpu_id id;
std::size_t id_len = sizeof id;
if (sysctlbyname(path.c_str(), &id, &id_len, nullptr, 0) < 0)
return std::nullopt;
return id::IdRegisters{
id.ac_midr,
id::Pfr0Register{id.ac_aa64pfr0},
id::Pfr1Register{id.ac_aa64pfr1},
id::Pfr2Register{0},
id::Zfr0Register{id.ac_aa64zfr0},
id::Smfr0Register{0},
id::Isar0Register{id.ac_aa64isar0},
id::Isar1Register{id.ac_aa64isar1},
id::Isar2Register{0},
id::Isar3Register{0},
id::Mmfr0Register{id.ac_aa64mmfr0},
id::Mmfr1Register{id.ac_aa64mmfr1},
id::Mmfr2Register{id.ac_aa64mmfr2},
id::Mmfr3Register{0},
id::Mmfr4Register{0},
};
}
inline std::size_t get_core_count()
{
int result = 0;
size_t result_size = sizeof(result);
const std::array<int, 2> mib{CTL_HW, HW_NCPU};
if (sysctl(mib.data(), mib.size(), &result, &result_size, nullptr, 0) < 0)
return 0;
return result;
}
inline CpuFeatures detect_features()
{
std::optional<CpuFeatures> result;
const std::size_t core_count = get_core_count();
for (std::size_t core_index = 0; core_index < core_count; core_index++) {
if (const std::optional<id::IdRegisters> id_regs = read_id_registers(core_index)) {
const CpuFeatures current_features = detect_features_via_id_registers(*id_regs);
if (result) {
result = *result & current_features;
} else {
result = current_features;
}
}
}
return result.value_or(CpuFeatures{});
}
} // namespace oaknut

View file

@ -0,0 +1,63 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <array>
#include <cstddef>
#include <cstdint>
#include <optional>
#include <sys/sysctl.h>
#include <sys/types.h>
#include "oaknut/feature_detection/cpu_feature.hpp"
#include "oaknut/feature_detection/feature_detection_hwcaps.hpp"
#include "oaknut/feature_detection/feature_detection_idregs.hpp"
#include "oaknut/feature_detection/id_registers.hpp"
namespace oaknut {
namespace detail {
inline std::uint64_t read_id_register(int index)
{
uint64_t result = 0;
size_t result_size = sizeof(result);
std::array<int, 2> mib{CTL_MACHDEP, index};
if (sysctl(mib.data(), mib.size(), &result, &result_size, nullptr, 0) < 0)
return 0;
return result;
}
} // namespace detail
inline std::optional<id::IdRegisters> read_id_registers()
{
// See OpenBSD source: sys/arch/arm64/include/cpu.h
return id::IdRegisters{
std::nullopt, // No easy way of getting MIDR_EL1 other than reading /proc/cpu
id::Pfr0Register{detail::read_id_register(8)}, // CPU_ID_AA64PFR0
id::Pfr1Register{detail::read_id_register(9)}, // CPU_ID_AA64PFR1
id::Pfr2Register{0},
id::Zfr0Register{detail::read_id_register(11)}, // CPU_ID_AA64ZFR0
id::Smfr0Register{detail::read_id_register(10)}, // CPU_ID_AA64SMFR0
id::Isar0Register{detail::read_id_register(2)}, // CPU_ID_AA64ISAR0
id::Isar1Register{detail::read_id_register(3)}, // CPU_ID_AA64ISAR1
id::Isar2Register{detail::read_id_register(4)}, // CPU_ID_AA64ISAR2
id::Isar3Register{0},
id::Mmfr0Register{detail::read_id_register(5)}, // CPU_ID_AA64MMFR0
id::Mmfr1Register{detail::read_id_register(6)}, // CPU_ID_AA64MMFR1
id::Mmfr2Register{detail::read_id_register(7)}, // CPU_ID_AA64MMFR2
id::Mmfr3Register{0},
id::Mmfr4Register{0},
};
}
inline CpuFeatures detect_features()
{
return detect_features_via_id_registers(*read_id_registers());
}
} // namespace oaknut

View file

@ -0,0 +1,99 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#ifndef WIN32_LEAN_AND_MEAN
# define WIN32_LEAN_AND_MEAN
#endif
#include <windows.h>
#include <cstddef>
#include <cstdint>
#include <optional>
#include <processthreadsapi.h>
#include "oaknut/feature_detection/cpu_feature.hpp"
#include "oaknut/feature_detection/id_registers.hpp"
namespace oaknut {
namespace detail {
inline std::optional<std::uint64_t> read_registry_hklm(const std::string& subkey, const std::string& name)
{
std::uint64_t value;
DWORD value_len = sizeof(value);
if (::RegGetValueA(HKEY_LOCAL_MACHINE, subkey.c_str(), name.c_str(), RRF_RT_REG_QWORD, nullptr, &value, &value_len) == ERROR_SUCCESS) {
return value;
}
return std::nullopt;
}
inline std::uint64_t read_id_register(std::size_t core_index, const std::string& name)
{
return read_registry_hklm("HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\" + std::to_string(core_index), "CP " + name).value_or(0);
}
} // namespace detail
// Ref: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent
inline CpuFeatures detect_features_via_IsProcessorFeaturePresent()
{
CpuFeatures result;
if (::IsProcessorFeaturePresent(30)) // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE
result |= CpuFeatures{CpuFeature::AES, CpuFeature::PMULL, CpuFeature::SHA1, CpuFeature::SHA256};
if (::IsProcessorFeaturePresent(31)) // PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE
result |= CpuFeatures{CpuFeature::CRC32};
if (::IsProcessorFeaturePresent(34)) // PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE
result |= CpuFeatures{CpuFeature::LSE};
if (::IsProcessorFeaturePresent(43)) // PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE
result |= CpuFeatures{CpuFeature::DotProd};
if (::IsProcessorFeaturePresent(44)) // PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE
result |= CpuFeatures{CpuFeature::JSCVT};
if (::IsProcessorFeaturePresent(45)) // PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE
result |= CpuFeatures{CpuFeature::LRCPC};
return result;
}
inline CpuFeatures detect_features()
{
CpuFeatures result{CpuFeature::FP, CpuFeature::ASIMD};
result |= detect_features_via_IsProcessorFeaturePresent();
return result;
}
inline std::size_t get_core_count()
{
::SYSTEM_INFO sys_info;
::GetSystemInfo(&sys_info);
return sys_info.dwNumberOfProcessors;
}
inline std::optional<id::IdRegisters> read_id_registers(std::size_t core_index)
{
return id::IdRegisters{
detail::read_id_register(core_index, "4000"),
id::Pfr0Register{detail::read_id_register(core_index, "4020")},
id::Pfr1Register{detail::read_id_register(core_index, "4021")},
id::Pfr2Register{detail::read_id_register(core_index, "4022")},
id::Zfr0Register{detail::read_id_register(core_index, "4024")},
id::Smfr0Register{detail::read_id_register(core_index, "4025")},
id::Isar0Register{detail::read_id_register(core_index, "4030")},
id::Isar1Register{detail::read_id_register(core_index, "4031")},
id::Isar2Register{detail::read_id_register(core_index, "4032")},
id::Isar3Register{detail::read_id_register(core_index, "4033")},
id::Mmfr0Register{detail::read_id_register(core_index, "4038")},
id::Mmfr1Register{detail::read_id_register(core_index, "4039")},
id::Mmfr2Register{detail::read_id_register(core_index, "403A")},
id::Mmfr3Register{detail::read_id_register(core_index, "403B")},
id::Mmfr4Register{detail::read_id_register(core_index, "403C")},
};
}
} // namespace oaknut

View file

@ -0,0 +1,318 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>
#include <cstdint>
#include <optional>
namespace oaknut::id {
namespace detail {
template<std::size_t lsb>
constexpr unsigned extract_bit(std::uint64_t value)
{
return (value >> lsb) & 1;
}
template<std::size_t lsb>
constexpr unsigned extract_field(std::uint64_t value)
{
return (value >> lsb) & 0xf;
}
template<std::size_t lsb>
constexpr signed extract_signed_field(std::uint64_t value)
{
return static_cast<signed>(static_cast<std::int64_t>(value << (60 - lsb)) >> 60);
}
} // namespace detail
struct Pfr0Register {
std::uint64_t value;
constexpr signed FP() const { return detail::extract_signed_field<16>(value); }
constexpr signed AdvSIMD() const { return detail::extract_signed_field<20>(value); }
constexpr unsigned GIC() const { return detail::extract_field<24>(value); }
constexpr unsigned RAS() const { return detail::extract_field<28>(value); }
constexpr unsigned SVE() const { return detail::extract_field<32>(value); }
constexpr unsigned SEL2() const { return detail::extract_field<36>(value); }
constexpr unsigned MPAM() const { return detail::extract_field<40>(value); }
constexpr unsigned AMU() const { return detail::extract_field<44>(value); }
constexpr unsigned DIT() const { return detail::extract_field<48>(value); }
constexpr unsigned RME() const { return detail::extract_field<52>(value); }
constexpr unsigned CSV2() const { return detail::extract_field<56>(value); }
constexpr unsigned CSV3() const { return detail::extract_field<60>(value); }
};
struct Pfr1Register {
std::uint64_t value;
constexpr unsigned BT() const { return detail::extract_field<0>(value); }
constexpr unsigned SSBS() const { return detail::extract_field<4>(value); }
constexpr unsigned MTE() const { return detail::extract_field<8>(value); }
constexpr unsigned RAS_frac() const { return detail::extract_field<12>(value); }
constexpr unsigned MPAM_frac() const { return detail::extract_field<16>(value); }
// [20:23] - reserved
constexpr unsigned SME() const { return detail::extract_field<24>(value); }
constexpr unsigned RNDR_trap() const { return detail::extract_field<28>(value); }
constexpr unsigned CSV2_frac() const { return detail::extract_field<32>(value); }
constexpr unsigned NMI() const { return detail::extract_field<36>(value); }
constexpr unsigned MTE_frac() const { return detail::extract_field<40>(value); }
constexpr unsigned GCS() const { return detail::extract_field<44>(value); }
constexpr unsigned THE() const { return detail::extract_field<48>(value); }
constexpr unsigned MTEX() const { return detail::extract_field<52>(value); }
constexpr unsigned DF2() const { return detail::extract_field<56>(value); }
constexpr unsigned PFAR() const { return detail::extract_field<60>(value); }
};
struct Pfr2Register {
std::uint64_t value;
constexpr unsigned MTEPERM() const { return detail::extract_field<0>(value); }
constexpr unsigned MTESTOREONLY() const { return detail::extract_field<4>(value); }
constexpr unsigned MTEFAR() const { return detail::extract_field<8>(value); }
// [12:31] reserved
constexpr unsigned FPMR() const { return detail::extract_field<32>(value); }
// [36:63] reserved
};
struct Zfr0Register {
std::uint64_t value;
constexpr unsigned SVEver() const { return detail::extract_field<0>(value); }
constexpr unsigned AES() const { return detail::extract_field<4>(value); }
// [8:15] reserved
constexpr unsigned BitPerm() const { return detail::extract_field<16>(value); }
constexpr unsigned BF16() const { return detail::extract_field<20>(value); }
constexpr unsigned B16B16() const { return detail::extract_field<24>(value); }
// [28:31] reserved
constexpr unsigned SHA3() const { return detail::extract_field<32>(value); }
// [36:39] reserved
constexpr unsigned SM4() const { return detail::extract_field<40>(value); }
constexpr unsigned I8MM() const { return detail::extract_field<44>(value); }
// [48:51] reserved
constexpr unsigned F32MM() const { return detail::extract_field<52>(value); }
constexpr unsigned F64MM() const { return detail::extract_field<56>(value); }
// [60:63] reserved
};
struct Smfr0Register {
std::uint64_t value;
// [0:27] reserved
constexpr unsigned SF8DP2() const { return detail::extract_bit<28>(value); }
constexpr unsigned SF8DP4() const { return detail::extract_bit<29>(value); }
constexpr unsigned SF8FMA() const { return detail::extract_bit<30>(value); }
// [31] reserved
constexpr unsigned F32F32() const { return detail::extract_bit<32>(value); }
constexpr unsigned BI32I32() const { return detail::extract_bit<33>(value); }
constexpr unsigned B16F32() const { return detail::extract_bit<34>(value); }
constexpr unsigned F16F32() const { return detail::extract_bit<35>(value); }
constexpr unsigned I8I32() const { return detail::extract_field<36>(value); }
constexpr unsigned F8F32() const { return detail::extract_bit<40>(value); }
constexpr unsigned F8F16() const { return detail::extract_bit<41>(value); }
constexpr unsigned F16F16() const { return detail::extract_bit<42>(value); }
constexpr unsigned B16B16() const { return detail::extract_bit<43>(value); }
constexpr unsigned I16I32() const { return detail::extract_field<44>(value); }
constexpr unsigned F64F64() const { return detail::extract_bit<48>(value); }
// [49:51] reserved
constexpr unsigned I16I64() const { return detail::extract_field<52>(value); }
constexpr unsigned SMEver() const { return detail::extract_field<56>(value); }
constexpr unsigned LUTv2() const { return detail::extract_bit<60>(value); }
// [61:62] reserved
constexpr unsigned FA64() const { return detail::extract_bit<63>(value); }
};
struct Isar0Register {
std::uint64_t value;
// [0:3] reserved
constexpr unsigned AES() const { return detail::extract_field<4>(value); }
constexpr unsigned SHA1() const { return detail::extract_field<8>(value); }
constexpr unsigned SHA2() const { return detail::extract_field<12>(value); }
constexpr unsigned CRC32() const { return detail::extract_field<16>(value); }
constexpr unsigned Atomic() const { return detail::extract_field<20>(value); }
constexpr unsigned TME() const { return detail::extract_field<24>(value); }
constexpr unsigned RDM() const { return detail::extract_field<28>(value); }
constexpr unsigned SHA3() const { return detail::extract_field<32>(value); }
constexpr unsigned SM3() const { return detail::extract_field<36>(value); }
constexpr unsigned SM4() const { return detail::extract_field<40>(value); }
constexpr unsigned DP() const { return detail::extract_field<44>(value); }
constexpr unsigned FHM() const { return detail::extract_field<48>(value); }
constexpr unsigned TS() const { return detail::extract_field<52>(value); }
constexpr unsigned TLB() const { return detail::extract_field<56>(value); }
constexpr unsigned RNDR() const { return detail::extract_field<60>(value); }
};
struct Isar1Register {
std::uint64_t value;
constexpr unsigned DPB() const { return detail::extract_field<0>(value); }
constexpr unsigned APA() const { return detail::extract_field<4>(value); }
constexpr unsigned API() const { return detail::extract_field<8>(value); }
constexpr unsigned JSCVT() const { return detail::extract_field<12>(value); }
constexpr unsigned FCMA() const { return detail::extract_field<16>(value); }
constexpr unsigned LRCPC() const { return detail::extract_field<20>(value); }
constexpr unsigned GPA() const { return detail::extract_field<24>(value); }
constexpr unsigned GPI() const { return detail::extract_field<28>(value); }
constexpr unsigned FRINTTS() const { return detail::extract_field<32>(value); }
constexpr unsigned SB() const { return detail::extract_field<36>(value); }
constexpr unsigned SPECRES() const { return detail::extract_field<40>(value); }
constexpr unsigned BF16() const { return detail::extract_field<44>(value); }
constexpr unsigned DGH() const { return detail::extract_field<48>(value); }
constexpr unsigned I8MM() const { return detail::extract_field<52>(value); }
constexpr unsigned XS() const { return detail::extract_field<56>(value); }
constexpr unsigned LS64() const { return detail::extract_field<60>(value); }
};
struct Isar2Register {
std::uint64_t value;
constexpr unsigned WFxT() const { return detail::extract_field<0>(value); }
constexpr unsigned RPRES() const { return detail::extract_field<4>(value); }
constexpr unsigned GPA3() const { return detail::extract_field<8>(value); }
constexpr unsigned APA3() const { return detail::extract_field<12>(value); }
constexpr unsigned MOPS() const { return detail::extract_field<16>(value); }
constexpr unsigned BC() const { return detail::extract_field<20>(value); }
constexpr unsigned PAC_frac() const { return detail::extract_field<24>(value); }
constexpr unsigned CLRBHB() const { return detail::extract_field<28>(value); }
constexpr unsigned SYSREG_128() const { return detail::extract_field<32>(value); }
constexpr unsigned SYSINSTR_128() const { return detail::extract_field<36>(value); }
constexpr unsigned PRFMSLC() const { return detail::extract_field<40>(value); }
// [44:47] reserved
constexpr unsigned RPRFM() const { return detail::extract_field<48>(value); }
constexpr unsigned CSSC() const { return detail::extract_field<52>(value); }
constexpr unsigned LUT() const { return detail::extract_field<56>(value); }
constexpr unsigned ATS1A() const { return detail::extract_field<60>(value); }
};
struct Isar3Register {
std::uint64_t value;
constexpr unsigned CPA() const { return detail::extract_field<0>(value); }
constexpr unsigned FAMINMAX() const { return detail::extract_field<4>(value); }
constexpr unsigned TLBIW() const { return detail::extract_field<8>(value); }
// [12:63] reserved
};
struct Mmfr0Register {
std::uint64_t value;
constexpr unsigned PARange() const { return detail::extract_field<0>(value); }
constexpr unsigned ASIDBits() const { return detail::extract_field<4>(value); }
constexpr unsigned BigEnd() const { return detail::extract_field<8>(value); }
constexpr unsigned SNSMem() const { return detail::extract_field<12>(value); }
constexpr unsigned BigEndEL0() const { return detail::extract_field<16>(value); }
constexpr unsigned TGran16() const { return detail::extract_field<20>(value); }
constexpr unsigned TGran64() const { return detail::extract_field<24>(value); }
constexpr unsigned TGran4() const { return detail::extract_field<28>(value); }
constexpr unsigned TGran16_2() const { return detail::extract_field<32>(value); }
constexpr unsigned TGran64_2() const { return detail::extract_field<36>(value); }
constexpr unsigned TGran4_2() const { return detail::extract_field<40>(value); }
constexpr unsigned ExS() const { return detail::extract_field<44>(value); }
// [48:55] reserved
constexpr unsigned FGT() const { return detail::extract_field<56>(value); }
constexpr unsigned ECV() const { return detail::extract_field<60>(value); }
};
struct Mmfr1Register {
std::uint64_t value;
constexpr unsigned HAFDBS() const { return detail::extract_field<0>(value); }
constexpr unsigned VMIDBits() const { return detail::extract_field<4>(value); }
constexpr unsigned VH() const { return detail::extract_field<8>(value); }
constexpr unsigned HPDS() const { return detail::extract_field<12>(value); }
constexpr unsigned LO() const { return detail::extract_field<16>(value); }
constexpr unsigned PAN() const { return detail::extract_field<20>(value); }
constexpr unsigned SpecSEI() const { return detail::extract_field<24>(value); }
constexpr unsigned XNX() const { return detail::extract_field<28>(value); }
constexpr unsigned TWED() const { return detail::extract_field<32>(value); }
constexpr unsigned ETS() const { return detail::extract_field<36>(value); }
constexpr unsigned HCX() const { return detail::extract_field<40>(value); }
constexpr unsigned AFP() const { return detail::extract_field<44>(value); }
constexpr unsigned nTLBPA() const { return detail::extract_field<48>(value); }
constexpr unsigned TIDCP1() const { return detail::extract_field<52>(value); }
constexpr unsigned CMOW() const { return detail::extract_field<56>(value); }
constexpr unsigned ECBHB() const { return detail::extract_field<60>(value); }
};
struct Mmfr2Register {
std::uint64_t value;
constexpr unsigned CnP() const { return detail::extract_field<0>(value); }
constexpr unsigned UAO() const { return detail::extract_field<4>(value); }
constexpr unsigned LSM() const { return detail::extract_field<8>(value); }
constexpr unsigned IESB() const { return detail::extract_field<12>(value); }
constexpr unsigned VARange() const { return detail::extract_field<16>(value); }
constexpr unsigned CCIDX() const { return detail::extract_field<20>(value); }
constexpr unsigned NV() const { return detail::extract_field<24>(value); }
constexpr unsigned ST() const { return detail::extract_field<28>(value); }
constexpr unsigned AT() const { return detail::extract_field<32>(value); }
constexpr unsigned IDS() const { return detail::extract_field<36>(value); }
constexpr unsigned FWB() const { return detail::extract_field<40>(value); }
// [44:47] reserved
constexpr unsigned TTL() const { return detail::extract_field<48>(value); }
constexpr unsigned BBM() const { return detail::extract_field<52>(value); }
constexpr unsigned EVT() const { return detail::extract_field<56>(value); }
constexpr unsigned E0PD() const { return detail::extract_field<60>(value); }
};
struct Mmfr3Register {
std::uint64_t value;
constexpr unsigned TCRX() const { return detail::extract_field<0>(value); }
constexpr unsigned SCTLRX() const { return detail::extract_field<4>(value); }
constexpr unsigned S1PIE() const { return detail::extract_field<8>(value); }
constexpr unsigned S2PIE() const { return detail::extract_field<12>(value); }
constexpr unsigned S1POE() const { return detail::extract_field<16>(value); }
constexpr unsigned S2POE() const { return detail::extract_field<20>(value); }
constexpr unsigned AIE() const { return detail::extract_field<24>(value); }
constexpr unsigned MEC() const { return detail::extract_field<28>(value); }
constexpr unsigned D128() const { return detail::extract_field<32>(value); }
constexpr unsigned D128_2() const { return detail::extract_field<36>(value); }
constexpr unsigned SNERR() const { return detail::extract_field<40>(value); }
constexpr unsigned ANERR() const { return detail::extract_field<44>(value); }
// [48:51] reserved
constexpr unsigned SDERR() const { return detail::extract_field<52>(value); }
constexpr unsigned ADERR() const { return detail::extract_field<56>(value); }
constexpr unsigned Spec_FPACC() const { return detail::extract_field<60>(value); }
};
struct Mmfr4Register {
std::uint64_t value;
// [0:3] reserved
constexpr unsigned EIESB() const { return detail::extract_field<4>(value); }
constexpr unsigned ASID2() const { return detail::extract_field<8>(value); }
constexpr unsigned HACDBS() const { return detail::extract_field<12>(value); }
constexpr unsigned FGWTE3() const { return detail::extract_field<16>(value); }
constexpr unsigned NV_frac() const { return detail::extract_field<20>(value); }
constexpr unsigned E2H0() const { return detail::extract_field<24>(value); }
// [28:35] reserved
constexpr unsigned E3DSE() const { return detail::extract_field<36>(value); }
// [40:63] reserved
};
struct IdRegisters {
std::optional<std::uint64_t> midr;
Pfr0Register pfr0;
Pfr1Register pfr1;
Pfr2Register pfr2;
Zfr0Register zfr0;
Smfr0Register smfr0;
Isar0Register isar0;
Isar1Register isar1;
Isar2Register isar2;
Isar3Register isar3;
Mmfr0Register mmfr0;
Mmfr1Register mmfr1;
Mmfr2Register mmfr2;
Mmfr3Register mmfr3;
Mmfr4Register mmfr4;
};
} // namespace oaknut::id

View file

@ -0,0 +1,52 @@
#include <cstdint>
#include "oaknut/feature_detection/id_registers.hpp"
namespace oaknut::id {
inline IdRegisters read_id_registers_directly()
{
std::uint64_t midr, pfr0, pfr1, pfr2, isar0, isar1, isar2, isar3, mmfr0, mmfr1, mmfr2, mmfr3, mmfr4, zfr0, smfr0;
#define OAKNUT_READ_REGISTER(reg, var) \
__asm__("mrs %0, " #reg \
: "=r"(var))
OAKNUT_READ_REGISTER(s3_0_c0_c0_0, midr);
OAKNUT_READ_REGISTER(s3_0_c0_c4_0, pfr0);
OAKNUT_READ_REGISTER(s3_0_c0_c4_1, pfr1);
OAKNUT_READ_REGISTER(s3_0_c0_c4_2, pfr2);
OAKNUT_READ_REGISTER(s3_0_c0_c4_4, zfr0);
OAKNUT_READ_REGISTER(s3_0_c0_c4_5, smfr0);
OAKNUT_READ_REGISTER(s3_0_c0_c6_0, isar0);
OAKNUT_READ_REGISTER(s3_0_c0_c6_1, isar1);
OAKNUT_READ_REGISTER(s3_0_c0_c6_2, isar2);
OAKNUT_READ_REGISTER(s3_0_c0_c6_3, isar3);
OAKNUT_READ_REGISTER(s3_0_c0_c7_0, mmfr0);
OAKNUT_READ_REGISTER(s3_0_c0_c7_1, mmfr1);
OAKNUT_READ_REGISTER(s3_0_c0_c7_2, mmfr2);
OAKNUT_READ_REGISTER(s3_0_c0_c7_3, mmfr3);
OAKNUT_READ_REGISTER(s3_0_c0_c7_4, mmfr4);
#undef OAKNUT_READ_ID_REGISTER
return IdRegisters{
midr,
Pfr0Register{pfr0},
Pfr1Register{pfr1},
Pfr2Register{pfr2},
Zfr0Register{zfr0},
Smfr0Register{smfr0},
Isar0Register{isar0},
Isar1Register{isar1},
Isar2Register{isar2},
Isar3Register{isar3},
Mmfr0Register{mmfr0},
Mmfr1Register{mmfr1},
Mmfr2Register{mmfr2},
Mmfr3Register{mmfr3},
Mmfr4Register{mmfr4},
};
}
} // namespace oaknut::id

View file

@ -0,0 +1,211 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
template<std::uint32_t mask_>
static constexpr std::uint32_t pdep(std::uint32_t val)
{
std::uint32_t mask = mask_;
std::uint32_t res = 0;
for (std::uint32_t bb = 1; mask; bb += bb) {
if (val & bb)
res |= mask & (~mask + 1);
mask &= mask - 1;
}
return res;
}
#define OAKNUT_STD_ENCODE(TYPE, ACCESS, SIZE) \
template<std::uint32_t splat> \
std::uint32_t encode(TYPE v) \
{ \
static_assert(std::popcount(splat) == SIZE); \
return pdep<splat>(static_cast<std::uint32_t>(ACCESS)); \
}
OAKNUT_STD_ENCODE(RReg, v.index() & 31, 5)
OAKNUT_STD_ENCODE(VReg, v.index() & 31, 5)
OAKNUT_STD_ENCODE(VRegArranged, v.index() & 31, 5)
OAKNUT_STD_ENCODE(AddSubImm, v.m_encoded, 13)
OAKNUT_STD_ENCODE(BitImm32, v.m_encoded, 12)
OAKNUT_STD_ENCODE(BitImm64, v.m_encoded, 13)
OAKNUT_STD_ENCODE(LslShift<32>, v.m_encoded, 12)
OAKNUT_STD_ENCODE(LslShift<64>, v.m_encoded, 12)
OAKNUT_STD_ENCODE(FImm8, v.m_encoded, 8)
OAKNUT_STD_ENCODE(RepImm, v.m_encoded, 8)
OAKNUT_STD_ENCODE(Cond, v, 4)
OAKNUT_STD_ENCODE(Rot, v, 2)
OAKNUT_STD_ENCODE(AddSubExt, v, 3)
OAKNUT_STD_ENCODE(IndexExt, v, 3)
OAKNUT_STD_ENCODE(AddSubShift, v, 2)
OAKNUT_STD_ENCODE(LogShift, v, 2)
OAKNUT_STD_ENCODE(PstateField, v, 6)
OAKNUT_STD_ENCODE(SystemReg, v, 15)
OAKNUT_STD_ENCODE(AtOp, v, 7)
OAKNUT_STD_ENCODE(BarrierOp, v, 4)
OAKNUT_STD_ENCODE(DcOp, v, 10)
OAKNUT_STD_ENCODE(IcOp, v, 10)
OAKNUT_STD_ENCODE(PrfOp, v, 5)
OAKNUT_STD_ENCODE(TlbiOp, v, 10)
template<std::uint32_t splat>
std::uint32_t encode(MovImm16 v)
{
static_assert(std::popcount(splat) == 17 || std::popcount(splat) == 18);
if constexpr (std::popcount(splat) == 17) {
constexpr std::uint32_t mask = (1 << std::popcount(splat)) - 1;
if ((v.m_encoded & mask) != v.m_encoded)
throw OaknutException{ExceptionType::InvalidMovImm16};
}
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat, std::size_t imm_size>
std::uint32_t encode(Imm<imm_size> v)
{
static_assert(std::popcount(splat) >= imm_size);
return pdep<splat>(v.value());
}
template<std::uint32_t splat, int A, int B>
std::uint32_t encode(ImmChoice<A, B> v)
{
static_assert(std::popcount(splat) == 1);
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat, int A, int B, int C, int D>
std::uint32_t encode(ImmChoice<A, B, C, D> v)
{
static_assert(std::popcount(splat) == 2);
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat, std::size_t size, std::size_t align>
std::uint32_t encode(SOffset<size, align> v)
{
static_assert(std::popcount(splat) == size - align);
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat, std::size_t size, std::size_t align>
std::uint32_t encode(POffset<size, align> v)
{
static_assert(std::popcount(splat) == size - align);
return pdep<splat>(v.m_encoded);
}
template<std::uint32_t splat>
std::uint32_t encode(std::uint32_t v)
{
return pdep<splat>(v);
}
template<std::uint32_t splat, typename T, size_t N>
std::uint32_t encode(List<T, N> v)
{
return encode<splat>(v.m_base);
}
template<std::uint32_t splat, std::size_t size, std::size_t align>
std::uint32_t encode(AddrOffset<size, align> v)
{
static_assert(std::popcount(splat) == size - align);
const auto encode_fn = [](std::ptrdiff_t current_offset, std::ptrdiff_t target_offset) {
const std::ptrdiff_t diff = target_offset - current_offset;
return pdep<splat>(AddrOffset<size, align>::encode(diff));
};
return std::visit(detail::overloaded{
[&](std::uint32_t encoding) -> std::uint32_t {
return pdep<splat>(encoding);
},
[&](Label* label) -> std::uint32_t {
if (label->m_offset) {
return encode_fn(Policy::offset(), *label->m_offset);
}
label->m_wbs.emplace_back(Label::Writeback{Policy::offset(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
return 0u;
},
[&](const void* p) -> std::uint32_t {
const std::ptrdiff_t diff = reinterpret_cast<std::uintptr_t>(p) - Policy::template xptr<std::uintptr_t>();
return pdep<splat>(AddrOffset<size, align>::encode(diff));
},
},
v.m_payload);
}
template<std::uint32_t splat, std::size_t size, std::size_t shift_amount>
std::uint32_t encode(PageOffset<size, shift_amount> v)
{
static_assert(std::popcount(splat) == size);
const auto encode_fn = [](std::ptrdiff_t current_offset, std::ptrdiff_t target_offset) {
return pdep<splat>(PageOffset<size, shift_amount>::encode(static_cast<std::uintptr_t>(current_offset), static_cast<std::uintptr_t>(target_offset)));
};
return std::visit(detail::overloaded{
[&](Label* label) -> std::uint32_t {
if (label->m_offset) {
return encode_fn(Policy::offset(), *label->m_offset);
}
label->m_wbs.emplace_back(Label::Writeback{Policy::offset(), ~splat, static_cast<Label::EmitFunctionType>(encode_fn)});
return 0u;
},
[&](const void* p) -> std::uint32_t {
return pdep<splat>(PageOffset<size, shift_amount>::encode(Policy::template xptr<std::uintptr_t>(), reinterpret_cast<std::ptrdiff_t>(p)));
},
},
v.m_payload);
}
#undef OAKNUT_STD_ENCODE
void addsubext_lsl_correction(AddSubExt& ext, XRegSp)
{
if (ext == AddSubExt::LSL)
ext = AddSubExt::UXTX;
}
void addsubext_lsl_correction(AddSubExt& ext, WRegWsp)
{
if (ext == AddSubExt::LSL)
ext = AddSubExt::UXTW;
}
void addsubext_lsl_correction(AddSubExt& ext, XReg)
{
if (ext == AddSubExt::LSL)
ext = AddSubExt::UXTX;
}
void addsubext_lsl_correction(AddSubExt& ext, WReg)
{
if (ext == AddSubExt::LSL)
ext = AddSubExt::UXTW;
}
void addsubext_verify_reg_size(AddSubExt ext, RReg rm)
{
if (rm.bitsize() == 32 && (static_cast<int>(ext) & 0b011) != 0b011)
return;
if (rm.bitsize() == 64 && (static_cast<int>(ext) & 0b011) == 0b011)
return;
throw OaknutException{ExceptionType::InvalidAddSubExt};
}
void indexext_verify_reg_size(IndexExt ext, RReg rm)
{
if (rm.bitsize() == 32 && (static_cast<int>(ext) & 1) == 0)
return;
if (rm.bitsize() == 64 && (static_cast<int>(ext) & 1) == 1)
return;
throw OaknutException{ExceptionType::InvalidIndexExt};
}
void tbz_verify_reg_size(RReg rt, Imm<6> imm)
{
if (rt.bitsize() == 32 && imm.value() >= 32)
throw OaknutException{ExceptionType::BitPositionOutOfRange};
}

View file

@ -0,0 +1,78 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
OAKNUT_CPU_FEATURE(FP)
OAKNUT_CPU_FEATURE(ASIMD)
OAKNUT_CPU_FEATURE(AES)
OAKNUT_CPU_FEATURE(PMULL)
OAKNUT_CPU_FEATURE(SHA1)
OAKNUT_CPU_FEATURE(SHA256)
OAKNUT_CPU_FEATURE(CRC32)
OAKNUT_CPU_FEATURE(LSE)
OAKNUT_CPU_FEATURE(FP16Conv)
OAKNUT_CPU_FEATURE(FP16)
OAKNUT_CPU_FEATURE(RDM)
OAKNUT_CPU_FEATURE(JSCVT)
OAKNUT_CPU_FEATURE(FCMA)
OAKNUT_CPU_FEATURE(LRCPC)
OAKNUT_CPU_FEATURE(DPB)
OAKNUT_CPU_FEATURE(SHA3)
OAKNUT_CPU_FEATURE(SM3)
OAKNUT_CPU_FEATURE(SM4)
OAKNUT_CPU_FEATURE(DotProd)
OAKNUT_CPU_FEATURE(SHA512)
OAKNUT_CPU_FEATURE(SVE)
OAKNUT_CPU_FEATURE(FHM)
OAKNUT_CPU_FEATURE(DIT)
OAKNUT_CPU_FEATURE(LSE2)
OAKNUT_CPU_FEATURE(LRCPC2)
OAKNUT_CPU_FEATURE(FlagM)
OAKNUT_CPU_FEATURE(SSBS)
OAKNUT_CPU_FEATURE(SB)
OAKNUT_CPU_FEATURE(PACA)
OAKNUT_CPU_FEATURE(PACG)
OAKNUT_CPU_FEATURE(DPB2)
OAKNUT_CPU_FEATURE(SVE2)
OAKNUT_CPU_FEATURE(SVE_AES)
OAKNUT_CPU_FEATURE(SVE_PMULL128)
OAKNUT_CPU_FEATURE(SVE_BITPERM)
OAKNUT_CPU_FEATURE(SVE_SHA3)
OAKNUT_CPU_FEATURE(SVE_SM4)
OAKNUT_CPU_FEATURE(FlagM2)
OAKNUT_CPU_FEATURE(FRINTTS)
OAKNUT_CPU_FEATURE(SVE_I8MM)
OAKNUT_CPU_FEATURE(SVE_F32MM)
OAKNUT_CPU_FEATURE(SVE_F64MM)
OAKNUT_CPU_FEATURE(SVE_BF16)
OAKNUT_CPU_FEATURE(I8MM)
OAKNUT_CPU_FEATURE(BF16)
OAKNUT_CPU_FEATURE(DGH)
OAKNUT_CPU_FEATURE(RNG)
OAKNUT_CPU_FEATURE(BTI)
OAKNUT_CPU_FEATURE(MTE)
OAKNUT_CPU_FEATURE(ECV)
OAKNUT_CPU_FEATURE(AFP)
OAKNUT_CPU_FEATURE(RPRES)
OAKNUT_CPU_FEATURE(MTE3)
OAKNUT_CPU_FEATURE(SME)
OAKNUT_CPU_FEATURE(SME_I16I64)
OAKNUT_CPU_FEATURE(SME_F64F64)
OAKNUT_CPU_FEATURE(SME_I8I32)
OAKNUT_CPU_FEATURE(SME_F16F32)
OAKNUT_CPU_FEATURE(SME_B16F32)
OAKNUT_CPU_FEATURE(SME_F32F32)
OAKNUT_CPU_FEATURE(SME_FA64)
OAKNUT_CPU_FEATURE(WFxT)
OAKNUT_CPU_FEATURE(EBF16)
OAKNUT_CPU_FEATURE(SVE_EBF16)
OAKNUT_CPU_FEATURE(CSSC)
OAKNUT_CPU_FEATURE(RPRFM)
OAKNUT_CPU_FEATURE(SVE2p1)
OAKNUT_CPU_FEATURE(SME2)
OAKNUT_CPU_FEATURE(SME2p1)
OAKNUT_CPU_FEATURE(SME_I16I32)
OAKNUT_CPU_FEATURE(SME_BI32I32)
OAKNUT_CPU_FEATURE(SME_B16B16)
OAKNUT_CPU_FEATURE(SME_F16F16)
OAKNUT_CPU_FEATURE(MOPS)
OAKNUT_CPU_FEATURE(HBC)

View file

@ -0,0 +1,310 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
namespace oaknut {
struct PostIndexed {};
struct PreIndexed {};
enum class LslSymbol {
LSL,
};
enum class MslSymbol {
MSL,
};
enum class Cond {
EQ,
NE,
CS,
CC,
MI,
PL,
VS,
VC,
HI,
LS,
GE,
LT,
GT,
LE,
AL,
NV,
HS = CS,
LO = CC,
};
constexpr Cond invert(Cond c)
{
return static_cast<Cond>(static_cast<unsigned>(c) ^ 1);
}
enum class Rot {
DEG_0 = 0b00,
DEG_90 = 0b01,
DEG_180 = 0b10,
DEG_270 = 0b11,
};
enum class AddSubExt {
UXTB,
UXTH,
UXTW,
UXTX,
SXTB,
SXTH,
SXTW,
SXTX,
LSL, // UXTW (32-bit) or UXTX (64-bit)
};
enum class IndexExt {
UXTW = 0b010,
LSL = 0b011,
SXTW = 0b110,
SXTX = 0b111,
};
enum class AddSubShift {
LSL,
LSR,
ASR,
};
enum class LogShift {
LSL,
LSR,
ASR,
ROR,
};
enum class PstateField {
UAO = 0b000'011, // ARMv8.2-UAO
PAN = 0b000'100, // ARMv8.1-PAN
SPSel = 0b000'101,
DIT = 0b011'010, // ARMv8.4-DIT
DAIFSet = 0b011'110,
DAIFClr = 0b011'111,
};
enum class SystemReg {
AMCFGR_EL0 = 0b11'011'1101'0010'001,
AMCGCR_EL0 = 0b11'011'1101'0010'010,
AMCNTENCLR0_EL0 = 0b11'011'1101'0010'100,
AMCNTENCLR1_EL0 = 0b11'011'1101'0011'000,
AMCNTENSET0_EL0 = 0b11'011'1101'0010'101,
AMCNTENSET1_EL0 = 0b11'011'1101'0011'001,
AMCR_EL0 = 0b11'011'1101'0010'000,
AMEVCNTR0_n_EL0 = 0b11'011'1101'0100'000, // n = 0-3
AMEVCNTR1_n_EL0 = 0b11'011'1101'1100'000, // n = 0-15
AMEVTYPER0_n_EL0 = 0b11'011'1101'0110'000, // n = 0-3
AMEVTYPER1_n_EL0 = 0b11'011'1101'1110'000, // n = 0-15
AMUSERENR_EL0 = 0b11'011'1101'0010'011,
CNTFRQ_EL0 = 0b11'011'1110'0000'000,
CNTP_CTL_EL0 = 0b11'011'1110'0010'001,
CNTP_CVAL_EL0 = 0b11'011'1110'0010'010,
CNTP_TVAL_EL0 = 0b11'011'1110'0010'000,
CNTPCT_EL0 = 0b11'011'1110'0000'001,
CNTV_CTL_EL0 = 0b11'011'1110'0011'001,
CNTV_CVAL_EL0 = 0b11'011'1110'0011'010,
CNTV_TVAL_EL0 = 0b11'011'1110'0011'000,
CNTVCT_EL0 = 0b11'011'1110'0000'010,
CTR_EL0 = 0b11'011'0000'0000'001,
CurrentEL = 0b11'000'0100'0010'010,
DAIF = 0b11'011'0100'0010'001,
DBGDTR_EL0 = 0b10'011'0000'0100'000,
DBGDTRRX_EL0 = 0b10'011'0000'0101'000,
DBGDTRTX_EL0 = 0b10'011'0000'0101'000,
DCZID_EL0 = 0b11'011'0000'0000'111,
DIT = 0b11'011'0100'0010'101,
DLR_EL0 = 0b11'011'0100'0101'001,
DSPSR_EL0 = 0b11'011'0100'0101'000,
FPCR = 0b11'011'0100'0100'000,
FPSR = 0b11'011'0100'0100'001,
MDCCSR_EL0 = 0b10'011'0000'0001'000,
NZCV = 0b11'011'0100'0010'000,
PAN = 0b11'000'0100'0010'011,
PMCCFILTR_EL0 = 0b11'011'1110'1111'111,
PMCCNTR_EL0 = 0b11'011'1001'1101'000,
PMCEID0_EL0 = 0b11'011'1001'1100'110,
PMCEID1_EL0 = 0b11'011'1001'1100'111,
PMCNTENCLR_EL0 = 0b11'011'1001'1100'010,
PMCNTENSET_EL0 = 0b11'011'1001'1100'001,
PMCR_EL0 = 0b11'011'1001'1100'000,
PMEVCNTR_n_EL0 = 0b11'011'1110'1000'000, // n = 0-30
PMEVTYPER_n_EL0 = 0b11'011'1110'1100'000, // n = 0-30
PMOVSCLR_EL0 = 0b11'011'1001'1100'011,
PMOVSSET_EL0 = 0b11'011'1001'1110'011,
PMSELR_EL0 = 0b11'011'1001'1100'101,
PMSWINC_EL0 = 0b11'011'1001'1100'100,
PMUSERENR_EL0 = 0b11'011'1001'1110'000,
PMXEVCNTR_EL0 = 0b11'011'1001'1101'010,
PMXEVTYPER_EL0 = 0b11'011'1001'1101'001,
SP_EL0 = 0b11'000'0100'0001'000,
SPSel = 0b11'000'0100'0010'000,
SPSR_abt = 0b11'100'0100'0011'001,
SPSR_fiq = 0b11'100'0100'0011'011,
SPSR_irq = 0b11'100'0100'0011'000,
SPSR_und = 0b11'100'0100'0011'010,
TPIDR_EL0 = 0b11'011'1101'0000'010,
TPIDRRO_EL0 = 0b11'011'1101'0000'011,
UAO = 0b11'000'0100'0010'100,
};
enum class AtOp {
S1E1R = 0b000'0'000,
S1E1W = 0b000'0'001,
S1E0R = 0b000'0'010,
S1E0W = 0b000'0'011,
S1E1RP = 0b000'1'000, // ARMv8.2-ATS1E1
S1E1WP = 0b000'1'001, // ARMv8.2-ATS1E1
S1E2R = 0b100'0'000,
S1E2W = 0b100'0'001,
S12E1R = 0b100'0'100,
S12E1W = 0b100'0'101,
S12E0R = 0b100'0'110,
S12E0W = 0b100'0'111,
S1E3R = 0b110'0'000,
S1E3W = 0b110'0'001,
};
enum class BarrierOp {
SY = 0b1111,
ST = 0b1110,
LD = 0b1101,
ISH = 0b1011,
ISHST = 0b1010,
ISHLD = 0b1001,
NSH = 0b0111,
NSHST = 0b0110,
NSHLD = 0b0101,
OSH = 0b0011,
OSHST = 0b0010,
OSHLD = 0b0001,
};
enum class DcOp {
IVAC = 0b000'0110'001,
ISW = 0b000'0110'010,
CSW = 0b000'1010'010,
CISW = 0b000'1110'010,
ZVA = 0b011'0100'001,
CVAC = 0b011'1010'001,
CVAU = 0b011'1011'001,
CVAP = 0b011'1100'001, // ARMv8.2-DCPoP
CIVAC = 0b011'1110'001,
};
enum class IcOp {
IALLUIS = 0b000'0001'000,
IALLU = 0b000'0101'000,
IVAU = 0b011'0101'001,
};
enum class PrfOp {
PLDL1KEEP = 0b00'00'0,
PLDL1STRM = 0b00'00'1,
PLDL2KEEP = 0b00'01'0,
PLDL2STRM = 0b00'01'1,
PLDL3KEEP = 0b00'10'0,
PLDL3STRM = 0b00'10'1,
PLIL1KEEP = 0b01'00'0,
PLIL1STRM = 0b01'00'1,
PLIL2KEEP = 0b01'01'0,
PLIL2STRM = 0b01'01'1,
PLIL3KEEP = 0b01'10'0,
PLIL3STRM = 0b01'10'1,
PSTL1KEEP = 0b10'00'0,
PSTL1STRM = 0b10'00'1,
PSTL2KEEP = 0b10'01'0,
PSTL2STRM = 0b10'01'1,
PSTL3KEEP = 0b10'10'0,
PSTL3STRM = 0b10'10'1,
};
enum class TlbiOp {
VMALLE1OS = 0b000'0001'000, // ARMv8.4-TLBI
VAE1OS = 0b000'0001'001, // ARMv8.4-TLBI
ASIDE1OS = 0b000'0001'010, // ARMv8.4-TLBI
VAAE1OS = 0b000'0001'011, // ARMv8.4-TLBI
VALE1OS = 0b000'0001'101, // ARMv8.4-TLBI
VAALE1OS = 0b000'0001'111, // ARMv8.4-TLBI
RVAE1IS = 0b000'0010'001, // ARMv8.4-TLBI
RVAAE1IS = 0b000'0010'011, // ARMv8.4-TLBI
RVALE1IS = 0b000'0010'101, // ARMv8.4-TLBI
RVAALE1IS = 0b000'0010'111, // ARMv8.4-TLBI
VMALLE1IS = 0b000'0011'000,
VAE1IS = 0b000'0011'001,
ASIDE1IS = 0b000'0011'010,
VAAE1IS = 0b000'0011'011,
VALE1IS = 0b000'0011'101,
VAALE1IS = 0b000'0011'111,
RVAE1OS = 0b000'0101'001, // ARMv8.4-TLBI
RVAAE1OS = 0b000'0101'011, // ARMv8.4-TLBI
RVALE1OS = 0b000'0101'101, // ARMv8.4-TLBI
RVAALE1OS = 0b000'0101'111, // ARMv8.4-TLBI
RVAE1 = 0b000'0110'001, // ARMv8.4-TLBI
RVAAE1 = 0b000'0110'011, // ARMv8.4-TLBI
RVALE1 = 0b000'0110'101, // ARMv8.4-TLBI
RVAALE1 = 0b000'0110'111, // ARMv8.4-TLBI
VMALLE1 = 0b000'0111'000,
VAE1 = 0b000'0111'001,
ASIDE1 = 0b000'0111'010,
VAAE1 = 0b000'0111'011,
VALE1 = 0b000'0111'101,
VAALE1 = 0b000'0111'111,
IPAS2E1IS = 0b100'0000'001,
RIPAS2E1IS = 0b100'0000'010, // ARMv8.4-TLBI
IPAS2LE1IS = 0b100'0000'101,
RIPAS2LE1IS = 0b100'0000'110, // ARMv8.4-TLBI
ALLE2OS = 0b100'0001'000, // ARMv8.4-TLBI
VAE2OS = 0b100'0001'001, // ARMv8.4-TLBI
ALLE1OS = 0b100'0001'100, // ARMv8.4-TLBI
VALE2OS = 0b100'0001'101, // ARMv8.4-TLBI
VMALLS12E1OS = 0b100'0001'110, // ARMv8.4-TLBI
RVAE2IS = 0b100'0010'001, // ARMv8.4-TLBI
RVALE2IS = 0b100'0010'101, // ARMv8.4-TLBI
ALLE2IS = 0b100'0011'000,
VAE2IS = 0b100'0011'001,
ALLE1IS = 0b100'0011'100,
VALE2IS = 0b100'0011'101,
VMALLS12E1IS = 0b100'0011'110,
IPAS2E1OS = 0b100'0100'000, // ARMv8.4-TLBI
IPAS2E1 = 0b100'0100'001,
RIPAS2E1 = 0b100'0100'010, // ARMv8.4-TLBI
RIPAS2E1OS = 0b100'0100'011, // ARMv8.4-TLBI
IPAS2LE1OS = 0b100'0100'100, // ARMv8.4-TLBI
IPAS2LE1 = 0b100'0100'101,
RIPAS2LE1 = 0b100'0100'110, // ARMv8.4-TLBI
RIPAS2LE1OS = 0b100'0100'111, // ARMv8.4-TLBI
RVAE2OS = 0b100'0101'001, // ARMv8.4-TLBI
RVALE2OS = 0b100'0101'101, // ARMv8.4-TLBI
RVAE2 = 0b100'0110'001, // ARMv8.4-TLBI
RVALE2 = 0b100'0110'101, // ARMv8.4-TLBI
ALLE2 = 0b100'0111'000,
VAE2 = 0b100'0111'001,
ALLE1 = 0b100'0111'100,
VALE2 = 0b100'0111'101,
VMALLS12E1 = 0b100'0111'110,
ALLE3OS = 0b110'0001'000, // ARMv8.4-TLBI
VAE3OS = 0b110'0001'001, // ARMv8.4-TLBI
VALE3OS = 0b110'0001'101, // ARMv8.4-TLBI
RVAE3IS = 0b110'0010'001, // ARMv8.4-TLBI
RVALE3IS = 0b110'0010'101, // ARMv8.4-TLBI
ALLE3IS = 0b110'0011'000,
VAE3IS = 0b110'0011'001,
VALE3IS = 0b110'0011'101,
RVAE3OS = 0b110'0101'001, // ARMv8.4-TLBI
RVALE3OS = 0b110'0101'101, // ARMv8.4-TLBI
RVAE3 = 0b110'0110'001, // ARMv8.4-TLBI
RVALE3 = 0b110'0110'101, // ARMv8.4-TLBI
ALLE3 = 0b110'0111'000,
VAE3 = 0b110'0111'001,
VALE3 = 0b110'0111'101,
};
} // namespace oaknut

View file

@ -0,0 +1,319 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <bit>
#include <compare>
#include <cstddef>
#include <cstdint>
#include <optional>
#include "oaknut/oaknut_exception.hpp"
namespace oaknut {
template<std::size_t bit_size_>
struct Imm {
public:
static_assert(bit_size_ != 0 && bit_size_ <= 32, "Invalid bit_size");
static constexpr std::size_t bit_size = bit_size_;
static constexpr std::uint32_t mask = (1 << bit_size) - 1;
constexpr /* implicit */ Imm(std::uint32_t value_)
: m_value(value_)
{
if (!is_valid(value_))
throw OaknutException{ExceptionType::ImmOutOfRange};
}
constexpr auto operator<=>(const Imm& other) const { return m_value <=> other.m_value; }
constexpr auto operator<=>(std::uint32_t other) const { return operator<=>(Imm{other}); }
constexpr std::uint32_t value() const { return m_value; }
static bool is_valid(std::uint32_t value_)
{
return ((value_ & mask) == value_);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_value;
};
enum class AddSubImmShift {
SHL_0,
SHL_12,
};
struct AddSubImm {
public:
constexpr AddSubImm(std::uint32_t value_, AddSubImmShift shift_)
: m_encoded(value_ | ((shift_ == AddSubImmShift::SHL_12) ? 1 << 12 : 0))
{
if ((value_ & 0xFFF) != value_)
throw OaknutException{ExceptionType::InvalidAddSubImm};
}
constexpr /* implicit */ AddSubImm(std::uint64_t value_)
{
if ((value_ & 0xFFF) == value_) {
m_encoded = static_cast<std::uint32_t>(value_);
} else if ((value_ & 0xFFF000) == value_) {
m_encoded = static_cast<std::uint32_t>((value_ >> 12) | (1 << 12));
} else {
throw OaknutException{ExceptionType::InvalidAddSubImm};
}
}
static constexpr bool is_valid(std::uint64_t value_)
{
return ((value_ & 0xFFF) == value_) || ((value_ & 0xFFF000) == value_);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
enum class MovImm16Shift {
SHL_0,
SHL_16,
SHL_32,
SHL_48,
};
struct MovImm16 {
public:
MovImm16(std::uint16_t value_, MovImm16Shift shift_)
: m_encoded(static_cast<std::uint32_t>(value_) | (static_cast<std::uint32_t>(shift_) << 16))
{}
constexpr /* implict */ MovImm16(std::uint64_t value_)
{
std::uint32_t shift = 0;
while (value_ != 0) {
const std::uint32_t lsw = static_cast<std::uint16_t>(value_ & 0xFFFF);
if (value_ == lsw) {
m_encoded = lsw | (shift << 16);
return;
} else if (lsw != 0) {
throw OaknutException{ExceptionType::InvalidMovImm16};
}
value_ >>= 16;
shift++;
}
}
static constexpr bool is_valid(std::uint64_t value_)
{
return ((value_ & 0xFFFF) == value_) || ((value_ & 0xFFFF0000) == value_) || ((value_ & 0xFFFF00000000) == value_) || ((value_ & 0xFFFF000000000000) == value_);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded = 0;
};
namespace detail {
constexpr std::optional<std::uint32_t> encode_bit_imm(std::uint64_t value)
{
if (value == 0 || (~value) == 0)
return std::nullopt;
const int rotation = std::countr_zero(value & (value + 1));
const std::uint64_t rot_value = std::rotr(value, rotation);
const int esize = std::countr_zero(rot_value & (rot_value + 1));
const int ones = std::countr_one(rot_value);
if (std::rotr(value, esize) != value)
return std::nullopt;
const int S = ((-esize) << 1) | (ones - 1);
const int R = (esize - rotation) & (esize - 1);
const int N = (~S >> 6) & 1;
return static_cast<std::uint32_t>((S & 0b111111) | (R << 6) | (N << 12));
}
constexpr std::optional<std::uint32_t> encode_bit_imm(std::uint32_t value)
{
const std::uint64_t value_u64 = (static_cast<std::uint64_t>(value) << 32) | static_cast<std::uint64_t>(value);
const auto result = encode_bit_imm(value_u64);
if (result && (*result & 0b0'111111'111111) != *result)
return std::nullopt;
return result;
}
} // namespace detail
struct BitImm32 {
public:
constexpr BitImm32(Imm<6> imms, Imm<6> immr)
: m_encoded((imms.value() << 6) | immr.value())
{}
constexpr /* implicit */ BitImm32(std::uint32_t value)
{
const auto encoded = detail::encode_bit_imm(value);
if (!encoded || (*encoded & 0x1000) != 0)
throw OaknutException{ExceptionType::InvalidBitImm32};
m_encoded = *encoded;
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
struct BitImm64 {
public:
constexpr BitImm64(bool N, Imm<6> imms, Imm<6> immr)
: m_encoded((N ? 1 << 12 : 0) | (imms.value() << 6) | immr.value())
{}
constexpr /* implicit */ BitImm64(std::uint64_t value)
{
const auto encoded = detail::encode_bit_imm(value);
if (!encoded)
throw OaknutException{ExceptionType::InvalidBitImm64};
m_encoded = *encoded;
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
struct FImm8 {
public:
constexpr explicit FImm8(std::uint8_t encoded)
: m_encoded(encoded)
{}
constexpr FImm8(bool sign, Imm<3> exp, Imm<4> mantissa)
: m_encoded((sign ? 1 << 7 : 0) | (exp.value() << 4) | (mantissa.value()))
{}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
struct RepImm {
public:
constexpr explicit RepImm(std::uint8_t encoded)
: m_encoded(encoded)
{}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
template<int A>
struct ImmConst {
constexpr /* implicit */ ImmConst(int value)
{
if (value != A) {
throw OaknutException{ExceptionType::InvalidImmConst};
}
}
};
struct ImmConstFZero {
constexpr /* implicit */ ImmConstFZero(double value)
{
if (value != 0) {
throw OaknutException{ExceptionType::InvalidImmConstFZero};
}
}
};
template<int...>
struct ImmChoice;
template<int A, int B>
struct ImmChoice<A, B> {
constexpr /* implicit */ ImmChoice(int value)
{
if (value == A) {
m_encoded = 0;
} else if (value == B) {
m_encoded = 1;
} else {
throw OaknutException{ExceptionType::InvalidImmChoice};
}
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
template<int A, int B, int C, int D>
struct ImmChoice<A, B, C, D> {
constexpr /* implicit */ ImmChoice(int value)
{
if (value == A) {
m_encoded = 0;
} else if (value == B) {
m_encoded = 1;
} else if (value == C) {
m_encoded = 2;
} else if (value == D) {
m_encoded = 3;
} else {
throw OaknutException{ExceptionType::InvalidImmChoice};
}
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
template<unsigned Start, unsigned End>
struct ImmRange {
constexpr /* implicit */ ImmRange(unsigned value_)
: m_value(value_)
{
if (value_ < Start || value_ > End) {
throw OaknutException{ExceptionType::InvalidImmRange};
}
}
constexpr unsigned value() const { return m_value; }
private:
unsigned m_value;
};
template<std::size_t max_value>
struct LslShift {
constexpr /* implicit */ LslShift(std::size_t amount)
: m_encoded((((-amount) & (max_value - 1)) << 6) | (max_value - amount - 1))
{
if (amount >= max_value)
throw OaknutException{ExceptionType::LslShiftOutOfRange};
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
} // namespace oaknut

View file

@ -0,0 +1,82 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>
#include <tuple>
#include <type_traits>
#include "oaknut/oaknut_exception.hpp"
namespace oaknut {
struct Elem;
template<typename>
struct ElemSelector;
struct VRegArranged;
namespace detail {
template<typename>
struct is_instance_of_ElemSelector : std::false_type {};
template<typename E>
struct is_instance_of_ElemSelector<ElemSelector<E>> : std::true_type {};
template<class T>
constexpr bool is_instance_of_ElemSelector_v = is_instance_of_ElemSelector<T>::value;
struct BaseOnlyTag {};
} // namespace detail
template<typename T, std::size_t N>
struct List {
template<typename... U>
constexpr explicit List(U... args)
: m_base(std::get<0>(std::tie(args...)))
{
static_assert((std::is_same_v<T, U> && ...));
static_assert(sizeof...(args) == N);
static_assert(std::is_base_of_v<VRegArranged, T> || std::is_base_of_v<Elem, T> || detail::is_instance_of_ElemSelector_v<T>);
if (!verify(std::index_sequence_for<U...>{}, args...))
throw OaknutException{ExceptionType::InvalidList};
}
constexpr auto operator[](unsigned elem_index) const
{
using S = decltype(m_base[elem_index]);
return List<S, N>(detail::BaseOnlyTag{}, m_base[elem_index]);
}
private:
template<typename>
friend class BasicCodeGenerator;
template<typename, std::size_t>
friend struct List;
constexpr explicit List(detail::BaseOnlyTag, T base_)
: m_base(base_)
{}
template<typename... U, std::size_t... indexes>
constexpr bool verify(std::index_sequence<indexes...>, U... args)
{
if constexpr (std::is_base_of_v<VRegArranged, T>) {
return (((m_base.index() + indexes) % 32 == static_cast<std::size_t>(args.index())) && ...);
} else if constexpr (std::is_base_of_v<Elem, T>) {
return (((m_base.reg_index() + indexes) % 32 == static_cast<std::size_t>(args.reg_index()) && m_base.elem_index() == args.elem_index()) && ...);
} else {
return (((m_base.reg_index() + indexes) % 32 == static_cast<std::size_t>(args.reg_index())) && ...);
}
}
T m_base;
};
template<typename... U>
List(U...) -> List<std::common_type_t<U...>, sizeof...(U)>;
} // namespace oaknut

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,111 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
void SQRDMLAH(HReg rd, HReg rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0111111101LMmmmm1101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void SQRDMLAH(SReg rd, SReg rn, SElem em)
{
emit<"0111111110LMmmmm1101H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SQRDMLAH(VReg_4H rd, VReg_4H rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0010111101LMmmmm1101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void SQRDMLAH(VReg_8H rd, VReg_8H rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0110111101LMmmmm1101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void SQRDMLAH(VReg_2S rd, VReg_2S rn, SElem em)
{
emit<"0010111110LMmmmm1101H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SQRDMLAH(VReg_4S rd, VReg_4S rn, SElem em)
{
emit<"0110111110LMmmmm1101H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SQRDMLAH(HReg rd, HReg rn, HReg rm)
{
emit<"01111110010mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLAH(SReg rd, SReg rn, SReg rm)
{
emit<"01111110100mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLAH(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLAH(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLAH(VReg_2S rd, VReg_2S rn, VReg_2S rm)
{
emit<"00101110100mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLAH(VReg_4S rd, VReg_4S rn, VReg_4S rm)
{
emit<"01101110100mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLSH(HReg rd, HReg rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0111111101LMmmmm1111H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void SQRDMLSH(SReg rd, SReg rn, SElem em)
{
emit<"0111111110LMmmmm1111H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SQRDMLSH(VReg_4H rd, VReg_4H rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0010111101LMmmmm1111H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void SQRDMLSH(VReg_8H rd, VReg_8H rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0110111101LMmmmm1111H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void SQRDMLSH(VReg_2S rd, VReg_2S rn, SElem em)
{
emit<"0010111110LMmmmm1111H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SQRDMLSH(VReg_4S rd, VReg_4S rn, SElem em)
{
emit<"0110111110LMmmmm1111H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SQRDMLSH(HReg rd, HReg rn, HReg rm)
{
emit<"01111110010mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLSH(SReg rd, SReg rn, SReg rm)
{
emit<"01111110100mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLSH(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLSH(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLSH(VReg_2S rd, VReg_2S rn, VReg_2S rm)
{
emit<"00101110100mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SQRDMLSH(VReg_4S rd, VReg_4S rn, VReg_4S rm)
{
emit<"01101110100mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}

View file

@ -0,0 +1,855 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
void BCAX(VReg_16B rd, VReg_16B rn, VReg_16B rm, VReg_16B ra)
{
emit<"11001110001mmmmm0aaaaannnnnddddd", "d", "n", "m", "a">(rd, rn, rm, ra);
}
void EOR3(VReg_16B rd, VReg_16B rn, VReg_16B rm, VReg_16B ra)
{
emit<"11001110000mmmmm0aaaaannnnnddddd", "d", "n", "m", "a">(rd, rn, rm, ra);
}
void FABD(HReg rd, HReg rn, HReg rm)
{
emit<"01111110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FABD(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FABD(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FABS(VReg_4H rd, VReg_4H rn)
{
emit<"0000111011111000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FABS(VReg_8H rd, VReg_8H rn)
{
emit<"0100111011111000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FACGE(HReg rd, HReg rn, HReg rm)
{
emit<"01111110010mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FACGE(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FACGE(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FACGT(HReg rd, HReg rn, HReg rm)
{
emit<"01111110110mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FACGT(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110110mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FACGT(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110110mmmmm001011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FADD(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FADD(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FADDP(HReg rd, VReg_2H rn)
{
emit<"0101111000110000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FADDP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FADDP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMEQ(HReg rd, HReg rn, HReg rm)
{
emit<"01011110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMEQ(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMEQ(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMEQ(HReg rd, HReg rn, ImmConstFZero)
{
emit<"0101111011111000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FCMEQ(VReg_4H rd, VReg_4H rn, ImmConstFZero)
{
emit<"0000111011111000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FCMEQ(VReg_8H rd, VReg_8H rn, ImmConstFZero)
{
emit<"0100111011111000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FCMGE(HReg rd, HReg rn, HReg rm)
{
emit<"01111110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMGE(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMGE(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMGE(HReg rd, HReg rn, ImmConstFZero)
{
emit<"0111111011111000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMGE(VReg_4H rd, VReg_4H rn, ImmConstFZero)
{
emit<"0010111011111000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMGE(VReg_8H rd, VReg_8H rn, ImmConstFZero)
{
emit<"0110111011111000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMGT(HReg rd, HReg rn, HReg rm)
{
emit<"01111110110mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMGT(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110110mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMGT(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110110mmmmm001001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FCMGT(HReg rd, HReg rn, ImmConstFZero)
{
emit<"0101111011111000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMGT(VReg_4H rd, VReg_4H rn, ImmConstFZero)
{
emit<"0000111011111000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMGT(VReg_8H rd, VReg_8H rn, ImmConstFZero)
{
emit<"0100111011111000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMLE(HReg rd, HReg rn, ImmConstFZero)
{
emit<"0111111011111000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FCMLE(VReg_4H rd, VReg_4H rn, ImmConstFZero)
{
emit<"0010111011111000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FCMLE(VReg_8H rd, VReg_8H rn, ImmConstFZero)
{
emit<"0110111011111000110110nnnnnddddd", "d", "n">(rd, rn);
}
void FCMLT(HReg rd, HReg rn, ImmConstFZero)
{
emit<"0101111011111000111010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMLT(VReg_4H rd, VReg_4H rn, ImmConstFZero)
{
emit<"0000111011111000111010nnnnnddddd", "d", "n">(rd, rn);
}
void FCMLT(VReg_8H rd, VReg_8H rn, ImmConstFZero)
{
emit<"0100111011111000111010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTAS(HReg rd, HReg rn)
{
emit<"0101111001111001110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTAS(VReg_4H rd, VReg_4H rn)
{
emit<"0000111001111001110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTAS(VReg_8H rd, VReg_8H rn)
{
emit<"0100111001111001110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTAU(HReg rd, HReg rn)
{
emit<"0111111001111001110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTAU(VReg_4H rd, VReg_4H rn)
{
emit<"0010111001111001110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTAU(VReg_8H rd, VReg_8H rn)
{
emit<"0110111001111001110010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTMS(HReg rd, HReg rn)
{
emit<"0101111001111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTMS(VReg_4H rd, VReg_4H rn)
{
emit<"0000111001111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTMS(VReg_8H rd, VReg_8H rn)
{
emit<"0100111001111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTMU(HReg rd, HReg rn)
{
emit<"0111111001111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTMU(VReg_4H rd, VReg_4H rn)
{
emit<"0010111001111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTMU(VReg_8H rd, VReg_8H rn)
{
emit<"0110111001111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTNS(HReg rd, HReg rn)
{
emit<"0101111001111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTNS(VReg_4H rd, VReg_4H rn)
{
emit<"0000111001111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTNS(VReg_8H rd, VReg_8H rn)
{
emit<"0100111001111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTNU(HReg rd, HReg rn)
{
emit<"0111111001111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTNU(VReg_4H rd, VReg_4H rn)
{
emit<"0010111001111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTNU(VReg_8H rd, VReg_8H rn)
{
emit<"0110111001111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTPS(HReg rd, HReg rn)
{
emit<"0101111011111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTPS(VReg_4H rd, VReg_4H rn)
{
emit<"0000111011111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTPS(VReg_8H rd, VReg_8H rn)
{
emit<"0100111011111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTPU(HReg rd, HReg rn)
{
emit<"0111111011111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTPU(VReg_4H rd, VReg_4H rn)
{
emit<"0010111011111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTPU(VReg_8H rd, VReg_8H rn)
{
emit<"0110111011111001101010nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTZS(HReg rd, HReg rn)
{
emit<"0101111011111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTZS(VReg_4H rd, VReg_4H rn)
{
emit<"0000111011111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTZS(VReg_8H rd, VReg_8H rn)
{
emit<"0100111011111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTZU(HReg rd, HReg rn)
{
emit<"0111111011111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTZU(VReg_4H rd, VReg_4H rn)
{
emit<"0010111011111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FCVTZU(VReg_8H rd, VReg_8H rn)
{
emit<"0110111011111001101110nnnnnddddd", "d", "n">(rd, rn);
}
void FDIV(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FDIV(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAX(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAX(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXNM(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXNM(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXNMP(HReg rd, VReg_2H rn)
{
emit<"0101111000110000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FMAXNMP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXNMP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXNMV(HReg rd, VReg_4H rn)
{
emit<"0000111000110000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FMAXNMV(HReg rd, VReg_8H rn)
{
emit<"0100111000110000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FMAXP(HReg rd, VReg_2H rn)
{
emit<"0101111000110000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FMAXP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMAXV(HReg rd, VReg_4H rn)
{
emit<"0000111000110000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FMAXV(HReg rd, VReg_8H rn)
{
emit<"0100111000110000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FMIN(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110110mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMIN(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110110mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINNM(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110110mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINNM(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110110mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINNMP(HReg rd, VReg_2H rn)
{
emit<"0101111010110000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FMINNMP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110110mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINNMP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110110mmmmm000001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINNMV(HReg rd, VReg_4H rn)
{
emit<"0000111010110000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FMINNMV(HReg rd, VReg_8H rn)
{
emit<"0100111010110000110010nnnnnddddd", "d", "n">(rd, rn);
}
void FMINP(HReg rd, VReg_2H rn)
{
emit<"0101111010110000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FMINP(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110110mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINP(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110110mmmmm001101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMINV(HReg rd, VReg_4H rn)
{
emit<"0000111010110000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FMINV(HReg rd, VReg_8H rn)
{
emit<"0100111010110000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FMLA(HReg rd, HReg rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0101111100LMmmmm0001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLA(VReg_8B rd, VReg_8B rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0000111100LMmmmm0001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLA(VReg_16B rd, VReg_16B rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0100111100LMmmmm0001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLA(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm000011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLA(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm000011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLAL(VReg_2S rd, VReg_2H rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0000111110LMmmmm0000H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLAL(VReg_4S rd, VReg_4H rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0100111110LMmmmm0000H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLAL2(VReg_2S rd, VReg_2H rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0010111110LMmmmm1000H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLAL2(VReg_4S rd, VReg_4H rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0110111110LMmmmm1000H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLAL(VReg_2S rd, VReg_2H rn, VReg_2H rm)
{
emit<"00001110001mmmmm111011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLAL(VReg_4S rd, VReg_4H rn, VReg_4H rm)
{
emit<"01001110001mmmmm111011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLAL2(VReg_2S rd, VReg_2H rn, VReg_2H rm)
{
emit<"00101110001mmmmm110011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLAL2(VReg_4S rd, VReg_4H rn, VReg_4H rm)
{
emit<"01101110001mmmmm110011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLS(HReg rd, HReg rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0101111100LMmmmm0101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLS(VReg_8B rd, VReg_8B rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0000111100LMmmmm0101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLS(VReg_16B rd, VReg_16B rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0100111100LMmmmm0101H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLS(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110110mmmmm000011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLS(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110110mmmmm000011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLSL(VReg_2S rd, VReg_2H rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0000111110LMmmmm0100H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLSL(VReg_4S rd, VReg_4H rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0100111110LMmmmm0100H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLSL2(VReg_2S rd, VReg_2H rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0010111110LMmmmm1100H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLSL2(VReg_4S rd, VReg_4H rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0110111110LMmmmm1100H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMLSL(VReg_2S rd, VReg_2H rn, VReg_2H rm)
{
emit<"00001110101mmmmm111011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLSL(VReg_4S rd, VReg_4H rn, VReg_4H rm)
{
emit<"01001110101mmmmm111011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLSL2(VReg_2S rd, VReg_2H rn, VReg_2H rm)
{
emit<"00101110101mmmmm110011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMLSL2(VReg_4S rd, VReg_4H rn, VReg_4H rm)
{
emit<"01101110101mmmmm110011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMOV(VReg_4H rd, FImm8 imm)
{
emit<"0000111100000vvv111111vvvvvddddd", "d", "v">(rd, imm);
}
void FMOV(VReg_8H rd, FImm8 imm)
{
emit<"0100111100000vvv111111vvvvvddddd", "d", "v">(rd, imm);
}
void FMUL(HReg rd, HReg rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0101111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMUL(VReg_8B rd, VReg_8B rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0000111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMUL(VReg_16B rd, VReg_16B rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0100111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMUL(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00101110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMUL(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01101110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMULX(HReg rd, HReg rn, HReg rm)
{
emit<"01011110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMULX(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMULX(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm000111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FMULX(HReg rd, HReg rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0111111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMULX(VReg_8B rd, VReg_8B rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0010111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FMULX(VReg_16B rd, VReg_16B rn, HElem em)
{
if (em.reg_index() >= 16)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0110111100LMmmmm1001H0nnnnnddddd", "d", "n", "m", "H", "L", "M">(rd, rn, em.reg_index(), em.elem_index() >> 2, (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FNEG(VReg_4H rd, VReg_4H rn)
{
emit<"0010111011111000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FNEG(VReg_8H rd, VReg_8H rn)
{
emit<"0110111011111000111110nnnnnddddd", "d", "n">(rd, rn);
}
void FRECPE(HReg rd, HReg rn)
{
emit<"0101111011111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void FRECPE(VReg_4H rd, VReg_4H rn)
{
emit<"0000111011111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void FRECPE(VReg_8H rd, VReg_8H rn)
{
emit<"0100111011111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void FRECPS(HReg rd, HReg rn, HReg rm)
{
emit<"01011110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FRECPS(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FRECPS(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110010mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FRECPX(HReg rd, HReg rn)
{
emit<"0101111011111001111110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTA(VReg_4H rd, VReg_4H rn)
{
emit<"0010111001111001100010nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTA(VReg_8H rd, VReg_8H rn)
{
emit<"0110111001111001100010nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTI(VReg_4H rd, VReg_4H rn)
{
emit<"0010111011111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTI(VReg_8H rd, VReg_8H rn)
{
emit<"0110111011111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTM(VReg_4H rd, VReg_4H rn)
{
emit<"0000111001111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTM(VReg_8H rd, VReg_8H rn)
{
emit<"0100111001111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTN(VReg_4H rd, VReg_4H rn)
{
emit<"0000111001111001100010nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTN(VReg_8H rd, VReg_8H rn)
{
emit<"0100111001111001100010nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTP(VReg_4H rd, VReg_4H rn)
{
emit<"0000111011111001100010nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTP(VReg_8H rd, VReg_8H rn)
{
emit<"0100111011111001100010nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTX(VReg_4H rd, VReg_4H rn)
{
emit<"0010111001111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTX(VReg_8H rd, VReg_8H rn)
{
emit<"0110111001111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTZ(VReg_4H rd, VReg_4H rn)
{
emit<"0000111011111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRINTZ(VReg_8H rd, VReg_8H rn)
{
emit<"0100111011111001100110nnnnnddddd", "d", "n">(rd, rn);
}
void FRSQRTE(HReg rd, HReg rn)
{
emit<"0111111011111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void FRSQRTE(VReg_4H rd, VReg_4H rn)
{
emit<"0010111011111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void FRSQRTE(VReg_8H rd, VReg_8H rn)
{
emit<"0110111011111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void FRSQRTS(HReg rd, HReg rn, HReg rm)
{
emit<"01011110110mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FRSQRTS(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110110mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FRSQRTS(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110110mmmmm001111nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FSQRT(VReg_4H rd, VReg_4H rn)
{
emit<"0010111011111001111110nnnnnddddd", "d", "n">(rd, rn);
}
void FSQRT(VReg_8H rd, VReg_8H rn)
{
emit<"0110111011111001111110nnnnnddddd", "d", "n">(rd, rn);
}
void FSUB(VReg_4H rd, VReg_4H rn, VReg_4H rm)
{
emit<"00001110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void FSUB(VReg_8H rd, VReg_8H rn, VReg_8H rm)
{
emit<"01001110110mmmmm000101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void RAX1(VReg_2D rd, VReg_2D rn, VReg_2D rm)
{
emit<"11001110011mmmmm100011nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SCVTF(HReg rd, HReg rn)
{
emit<"0101111001111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void SCVTF(VReg_4H rd, VReg_4H rn)
{
emit<"0000111001111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void SCVTF(VReg_8H rd, VReg_8H rn)
{
emit<"0100111001111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void SDOT(VReg_2S rd, VReg_8B rn, SElem em)
{
emit<"0000111110LMmmmm1110H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SDOT(VReg_4S rd, VReg_16B rn, SElem em)
{
emit<"0100111110LMmmmm1110H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void SDOT(VReg_2S rd, VReg_8B rn, VReg_8B rm)
{
emit<"00001110100mmmmm100101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SDOT(VReg_4S rd, VReg_16B rn, VReg_16B rm)
{
emit<"01001110100mmmmm100101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SHA512H(QReg rd, QReg rn, VReg_2D rm)
{
emit<"11001110011mmmmm100000nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SHA512H2(QReg rd, QReg rn, VReg_2D rm)
{
emit<"11001110011mmmmm100001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SHA512SU0(VReg_2D rd, VReg_2D rn)
{
emit<"1100111011000000100000nnnnnddddd", "d", "n">(rd, rn);
}
void SHA512SU1(VReg_2D rd, VReg_2D rn, VReg_2D rm)
{
emit<"11001110011mmmmm100010nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SM3PARTW1(VReg_4S rd, VReg_4S rn, VReg_4S rm)
{
emit<"11001110011mmmmm110000nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SM3PARTW2(VReg_4S rd, VReg_4S rn, VReg_4S rm)
{
emit<"11001110011mmmmm110001nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void SM3SS1(VReg_4S rd, VReg_4S rn, VReg_4S rm, VReg_4S ra)
{
emit<"11001110010mmmmm0aaaaannnnnddddd", "d", "n", "m", "a">(rd, rn, rm, ra);
}
void SM3TT1A(VReg_4S rd, VReg_4S rn, SElem em)
{
emit<"11001110010mmmmm10ii00nnnnnddddd", "d", "n", "m", "i">(rd, rn, em.reg_index(), em.elem_index());
}
void SM3TT1B(VReg_4S rd, VReg_4S rn, SElem em)
{
emit<"11001110010mmmmm10ii01nnnnnddddd", "d", "n", "m", "i">(rd, rn, em.reg_index(), em.elem_index());
}
void SM3TT2A(VReg_4S rd, VReg_4S rn, SElem em)
{
emit<"11001110010mmmmm10ii10nnnnnddddd", "d", "n", "m", "i">(rd, rn, em.reg_index(), em.elem_index());
}
void SM3TT2B(VReg_4S rd, VReg_4S rn, SElem em)
{
emit<"11001110010mmmmm10ii11nnnnnddddd", "d", "n", "m", "i">(rd, rn, em.reg_index(), em.elem_index());
}
void SM4E(VReg_4S rd, VReg_4S rn)
{
emit<"1100111011000000100001nnnnnddddd", "d", "n">(rd, rn);
}
void SM4EKEY(VReg_4S rd, VReg_4S rn, VReg_4S rm)
{
emit<"11001110011mmmmm110010nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void UCVTF(HReg rd, HReg rn)
{
emit<"0111111001111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void UCVTF(VReg_4H rd, VReg_4H rn)
{
emit<"0010111001111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void UCVTF(VReg_8H rd, VReg_8H rn)
{
emit<"0110111001111001110110nnnnnddddd", "d", "n">(rd, rn);
}
void UDOT(VReg_2S rd, VReg_8B rn, SElem em)
{
emit<"0010111110LMmmmm1110H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void UDOT(VReg_4S rd, VReg_16B rn, SElem em)
{
emit<"0110111110LMmmmm1110H0nnnnnddddd", "d", "n", "Mm", "H", "L">(rd, rn, em.reg_index(), em.elem_index() >> 1, em.elem_index() & 1);
}
void UDOT(VReg_2S rd, VReg_8B rn, VReg_8B rm)
{
emit<"00101110100mmmmm100101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void UDOT(VReg_4S rd, VReg_16B rn, VReg_16B rm)
{
emit<"01101110100mmmmm100101nnnnnddddd", "d", "n", "m">(rd, rn, rm);
}
void XAR(VReg_2D rd, VReg_2D rn, VReg_2D rm, Imm<6> rotate_amount)
{
emit<"11001110100mmmmmiiiiiinnnnnddddd", "d", "n", "m", "i">(rd, rn, rm, rotate_amount);
}

View file

@ -0,0 +1,75 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
void FCADD(VReg_4H rd, VReg_4H rn, VReg_4H rm, Rot rot)
{
if (rot != Rot::DEG_90 && rot != Rot::DEG_270)
throw OaknutException{ExceptionType::InvalidRotation};
emit<"00101110010mmmmm111r01nnnnnddddd", "r", "d", "n", "m">(static_cast<std::uint32_t>(rot) >> 1, rd, rn, rm);
}
void FCADD(VReg_8H rd, VReg_8H rn, VReg_8H rm, Rot rot)
{
if (rot != Rot::DEG_90 && rot != Rot::DEG_270)
throw OaknutException{ExceptionType::InvalidRotation};
emit<"01101110010mmmmm111r01nnnnnddddd", "r", "d", "n", "m">(static_cast<std::uint32_t>(rot) >> 1, rd, rn, rm);
}
void FCADD(VReg_2S rd, VReg_2S rn, VReg_2S rm, Rot rot)
{
if (rot != Rot::DEG_90 && rot != Rot::DEG_270)
throw OaknutException{ExceptionType::InvalidRotation};
emit<"00101110100mmmmm111r01nnnnnddddd", "r", "d", "n", "m">(static_cast<std::uint32_t>(rot) >> 1, rd, rn, rm);
}
void FCADD(VReg_4S rd, VReg_4S rn, VReg_4S rm, Rot rot)
{
if (rot != Rot::DEG_90 && rot != Rot::DEG_270)
throw OaknutException{ExceptionType::InvalidRotation};
emit<"01101110100mmmmm111r01nnnnnddddd", "r", "d", "n", "m">(static_cast<std::uint32_t>(rot) >> 1, rd, rn, rm);
}
void FCADD(VReg_2D rd, VReg_2D rn, VReg_2D rm, Rot rot)
{
if (rot != Rot::DEG_90 && rot != Rot::DEG_270)
throw OaknutException{ExceptionType::InvalidRotation};
emit<"01101110110mmmmm111r01nnnnnddddd", "r", "d", "n", "m">(static_cast<std::uint32_t>(rot) >> 1, rd, rn, rm);
}
void FCMLA(VReg_4H rd, VReg_4H rn, HElem em, Rot rot)
{
if (em.reg_index() >= 16 || em.elem_index() >= 2)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0010111101LMmmmm0rr1H0nnnnnddddd", "r", "d", "n", "Mm", "H", "L">(rot, rd, rn, em.reg_index(), (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FCMLA(VReg_8H rd, VReg_8H rn, HElem em, Rot rot)
{
if (em.reg_index() >= 16 || em.elem_index() >= 4)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0110111101LMmmmm0rr1H0nnnnnddddd", "r", "d", "n", "Mm", "H", "L">(rot, rd, rn, em.reg_index(), (em.elem_index() >> 1) & 1, em.elem_index() & 1);
}
void FCMLA(VReg_4S rd, VReg_4S rn, SElem em, Rot rot)
{
if (em.reg_index() >= 16 || em.elem_index() >= 2)
throw OaknutException{ExceptionType::InvalidCombination};
emit<"0110111110LMmmmm0rr1H0nnnnnddddd", "r", "d", "n", "Mm", "H", "L">(rot, rd, rn, em.reg_index(), em.elem_index() & 1, 0);
}
void FCMLA(VReg_4H rd, VReg_4H rn, VReg_4H rm, Rot rot)
{
emit<"00101110010mmmmm110rr1nnnnnddddd", "r", "d", "n", "m">(rot, rd, rn, rm);
}
void FCMLA(VReg_8H rd, VReg_8H rn, VReg_8H rm, Rot rot)
{
emit<"01101110010mmmmm110rr1nnnnnddddd", "r", "d", "n", "m">(rot, rd, rn, rm);
}
void FCMLA(VReg_2S rd, VReg_2S rn, VReg_2S rm, Rot rot)
{
emit<"00101110100mmmmm110rr1nnnnnddddd", "r", "d", "n", "m">(rot, rd, rn, rm);
}
void FCMLA(VReg_4S rd, VReg_4S rn, VReg_4S rm, Rot rot)
{
emit<"01101110100mmmmm110rr1nnnnnddddd", "r", "d", "n", "m">(rot, rd, rn, rm);
}
void FCMLA(VReg_2D rd, VReg_2D rn, VReg_2D rm, Rot rot)
{
emit<"01101110110mmmmm110rr1nnnnnddddd", "r", "d", "n", "m">(rot, rd, rn, rm);
}
void FJCVTZS(WReg wd, DReg rn)
{
emit<"0001111001111110000000nnnnnddddd", "d", "n">(wd, rn);
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,23 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
void BFC(WReg wd, Imm<5> lsb, Imm<5> width)
{
if (width.value() == 0 || width.value() > (32 - lsb.value()))
throw OaknutException{ExceptionType::InvalidBitWidth};
emit<"0011001100rrrrrrssssss11111ddddd", "d", "r", "s">(wd, (~lsb.value() + 1) & 31, width.value() - 1);
}
void BFC(XReg xd, Imm<6> lsb, Imm<6> width)
{
if (width.value() == 0 || width.value() > (64 - lsb.value()))
throw OaknutException{ExceptionType::InvalidBitWidth};
emit<"1011001101rrrrrrssssss11111ddddd", "d", "r", "s">(xd, (~lsb.value() + 1) & 63, width.value() - 1);
}
void ESB()
{
emit<"11010101000000110010001000011111">();
}
void PSB()
{
emit<"11010101000000110010001000111111">();
}

View file

@ -0,0 +1,159 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
void AUTDA(XReg xd, XRegSp xn)
{
emit<"110110101100000100Z110nnnnnddddd", "Z", "d", "n">(0, xd, xn);
}
void AUTDB(XReg xd, XRegSp xn)
{
emit<"110110101100000100Z111nnnnnddddd", "Z", "d", "n">(0, xd, xn);
}
void AUTDZA(XReg xd)
{
emit<"110110101100000100Z11011111ddddd", "Z", "d">(1, xd);
}
void AUTDZB(XReg xd)
{
emit<"110110101100000100Z11111111ddddd", "Z", "d">(1, xd);
}
void AUTIA(XReg xd, XRegSp xn)
{
emit<"110110101100000100Z100nnnnnddddd", "Z", "d", "n">(0, xd, xn);
}
void AUTIB(XReg xd, XRegSp xn)
{
emit<"110110101100000100Z101nnnnnddddd", "Z", "d", "n">(0, xd, xn);
}
void AUTIZA(XReg xd)
{
emit<"110110101100000100Z10011111ddddd", "Z", "d">(1, xd);
}
void AUTIZB(XReg xd)
{
emit<"110110101100000100Z10111111ddddd", "Z", "d">(1, xd);
}
void BLRAA(XReg xn, XRegSp xm)
{
emit<"1101011Z0011111100001Mnnnnnmmmmm", "Z", "M", "n", "m">(1, 0, xn, xm);
}
void BLRAAZ(XReg xn)
{
emit<"1101011Z0011111100001Mnnnnn11111", "Z", "M", "n">(0, 0, xn);
}
void BLRAB(XReg xn, XRegSp xm)
{
emit<"1101011Z0011111100001Mnnnnnmmmmm", "Z", "M", "n", "m">(1, 1, xn, xm);
}
void BLRABZ(XReg xn)
{
emit<"1101011Z0011111100001Mnnnnn11111", "Z", "M", "n">(0, 1, xn);
}
void BRAA(XReg xn, XRegSp xm)
{
emit<"1101011Z0001111100001Mnnnnnmmmmm", "Z", "M", "n", "m">(1, 0, xn, xm);
}
void BRAAZ(XReg xn)
{
emit<"1101011Z0001111100001Mnnnnn11111", "Z", "M", "n">(0, 0, xn);
}
void BRAB(XReg xn, XRegSp xm)
{
emit<"1101011Z0001111100001Mnnnnnmmmmm", "Z", "M", "n", "m">(1, 1, xn, xm);
}
void BRABZ(XReg xn)
{
emit<"1101011Z0001111100001Mnnnnn11111", "Z", "M", "n">(0, 1, xn);
}
void ERETAA()
{
emit<"110101101001111100001M1111111111", "M">(0);
}
void ERETAB()
{
emit<"110101101001111100001M1111111111", "M">(1);
}
void LDAPR(WReg wt, XRegSp xn)
{
emit<"1011100010111111110000nnnnnttttt", "t", "n">(wt, xn);
}
void LDAPR(XReg xt, XRegSp xn)
{
emit<"1111100010111111110000nnnnnttttt", "t", "n">(xt, xn);
}
void LDAPRB(WReg wt, XRegSp xn)
{
emit<"0011100010111111110000nnnnnttttt", "t", "n">(wt, xn);
}
void LDAPRH(WReg wt, XRegSp xn)
{
emit<"0111100010111111110000nnnnnttttt", "t", "n">(wt, xn);
}
void LDRAA(XReg xt, XRegSp xn, SOffset<13, 3> simm = 0)
{
emit<"11111000Mi1iiiiiiiiiW1nnnnnttttt", "M", "W", "t", "n", "i">(0, 0, xt, xn, simm);
}
void LDRAB(XReg xt, XRegSp xn, SOffset<13, 3> simm = 0)
{
emit<"11111000Mi1iiiiiiiiiW1nnnnnttttt", "M", "W", "t", "n", "i">(1, 0, xt, xn, simm);
}
void LDRAA(XReg xt, XRegSp xn, PreIndexed, SOffset<13, 3> simm)
{
emit<"11111000Mi1iiiiiiiiiW1nnnnnttttt", "M", "W", "t", "n", "i">(0, 1, xt, xn, simm);
}
void LDRAB(XReg xt, XRegSp xn, PreIndexed, SOffset<13, 3> simm)
{
emit<"11111000Mi1iiiiiiiiiW1nnnnnttttt", "M", "W", "t", "n", "i">(1, 1, xt, xn, simm);
}
void PACDA(XReg xd, XRegSp xn)
{
emit<"110110101100000100Z010nnnnnddddd", "Z", "d", "n">(0, xd, xn);
}
void PACDB(XReg xd, XRegSp xn)
{
emit<"110110101100000100Z011nnnnnddddd", "Z", "d", "n">(0, xd, xn);
}
void PACDZA(XReg xd)
{
emit<"110110101100000100Z01011111ddddd", "Z", "d">(1, xd);
}
void PACDZB(XReg xd)
{
emit<"110110101100000100Z01111111ddddd", "Z", "d">(1, xd);
}
void PACGA(XReg xd, XReg xn, XRegSp xm)
{
emit<"10011010110mmmmm001100nnnnnddddd", "d", "n", "m">(xd, xn, xm);
}
void PACIA(XReg xd, XRegSp xn)
{
emit<"110110101100000100Z000nnnnnddddd", "Z", "d", "n">(0, xd, xn);
}
void PACIB(XReg xd, XRegSp xn)
{
emit<"110110101100000100Z001nnnnnddddd", "Z", "d", "n">(0, xd, xn);
}
void PACIZA(XReg xd)
{
emit<"110110101100000100Z00011111ddddd", "Z", "d">(1, xd);
}
void PACIZB(XReg xd)
{
emit<"110110101100000100Z00111111ddddd", "Z", "d">(1, xd);
}
void RETAA()
{
emit<"110101100101111100001M1111111111", "M">(0);
}
void RETAB()
{
emit<"110101100101111100001M1111111111", "M">(1);
}
void XPACD(XReg xd)
{
emit<"110110101100000101000D11111nnnnn", "D", "n">(1, xd);
}
void XPACI(XReg xd)
{
emit<"110110101100000101000D11111nnnnn", "D", "n">(0, xd);
}

View file

@ -0,0 +1,19 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
namespace oaknut {
template<auto... Vs>
struct MultiTypedName;
template<>
struct MultiTypedName<> {};
template<auto V, auto... Vs>
struct MultiTypedName<V, Vs...> : public MultiTypedName<Vs...> {
constexpr operator decltype(V)() const { return V; }
};
} // namespace oaknut

View file

@ -0,0 +1,44 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
// reg.hpp
OAKNUT_EXCEPTION(InvalidWSPConversion, "toW: cannot convert WSP to WReg")
OAKNUT_EXCEPTION(InvalidXSPConversion, "toX: cannot convert XSP to XReg")
OAKNUT_EXCEPTION(InvalidWZRConversion, "unexpected WZR passed into an WRegWsp")
OAKNUT_EXCEPTION(InvalidXZRConversion, "unexpected XZR passed into an XRegSp")
OAKNUT_EXCEPTION(InvalidDElem_1, "invalid DElem_1")
OAKNUT_EXCEPTION(InvalidElementIndex, "elem_index is out of range")
// imm.hpp / offset.hpp / list.hpp
OAKNUT_EXCEPTION(InvalidAddSubImm, "invalid AddSubImm")
OAKNUT_EXCEPTION(InvalidBitImm32, "invalid BitImm32")
OAKNUT_EXCEPTION(InvalidBitImm64, "invalid BitImm64")
OAKNUT_EXCEPTION(InvalidImmChoice, "invalid ImmChoice")
OAKNUT_EXCEPTION(InvalidImmConst, "invalid ImmConst")
OAKNUT_EXCEPTION(InvalidImmConstFZero, "invalid ImmConstFZero")
OAKNUT_EXCEPTION(InvalidImmRange, "invalid ImmRange")
OAKNUT_EXCEPTION(InvalidList, "invalid List")
OAKNUT_EXCEPTION(InvalidMovImm16, "invalid MovImm16")
OAKNUT_EXCEPTION(InvalidBitWidth, "invalid width")
OAKNUT_EXCEPTION(LslShiftOutOfRange, "LslShift out of range")
OAKNUT_EXCEPTION(OffsetMisaligned, "misalignment")
OAKNUT_EXCEPTION(OffsetOutOfRange, "out of range")
OAKNUT_EXCEPTION(ImmOutOfRange, "outsized Imm value")
// arm64_encode_helpers.inc.hpp
OAKNUT_EXCEPTION(InvalidAddSubExt, "invalid AddSubExt choice for rm size")
OAKNUT_EXCEPTION(InvalidIndexExt, "invalid IndexExt choice for rm size")
OAKNUT_EXCEPTION(BitPositionOutOfRange, "bit position exceeds size of rt")
OAKNUT_EXCEPTION(RequiresAbsoluteAddressesContext, "absolute addresses required")
// mnemonics_*.inc.hpp
OAKNUT_EXCEPTION(InvalidCombination, "InvalidCombination")
OAKNUT_EXCEPTION(InvalidCond, "Cond cannot be AL or NV here")
OAKNUT_EXCEPTION(InvalidPairFirst, "Requires even register")
OAKNUT_EXCEPTION(InvalidPairSecond, "Invalid second register in pair")
OAKNUT_EXCEPTION(InvalidOperandXZR, "xzr invalid here")
OAKNUT_EXCEPTION(InvalidRotation, "Invalid rotation operand")
// oaknut.hpp
OAKNUT_EXCEPTION(InvalidAlignment, "invalid alignment")
OAKNUT_EXCEPTION(LabelRedefinition, "label already resolved")

View file

@ -0,0 +1,138 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cstddef>
#include <cstdint>
#include <variant>
#include "oaknut/oaknut_exception.hpp"
namespace oaknut {
struct Label;
namespace detail {
constexpr std::uint64_t inverse_mask_from_size(std::size_t size)
{
return (~std::uint64_t{0}) << size;
}
constexpr std::uint64_t mask_from_size(std::size_t size)
{
return (~std::uint64_t{0}) >> (64 - size);
}
template<std::size_t bit_count>
constexpr std::uint64_t sign_extend(std::uint64_t value)
{
static_assert(bit_count != 0, "cannot sign-extend zero-sized value");
constexpr size_t shift_amount = 64 - bit_count;
return static_cast<std::uint64_t>(static_cast<std::int64_t>(value << shift_amount) >> shift_amount);
}
} // namespace detail
template<std::size_t bitsize, std::size_t alignment>
struct AddrOffset {
AddrOffset(std::ptrdiff_t diff)
: m_payload(encode(diff))
{}
AddrOffset(Label& label)
: m_payload(&label)
{}
AddrOffset(const void* ptr)
: m_payload(ptr)
{}
static std::uint32_t encode(std::ptrdiff_t diff)
{
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(diff);
if (detail::sign_extend<bitsize>(diff_u64) != diff_u64)
throw OaknutException{ExceptionType::OffsetOutOfRange};
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
throw OaknutException{ExceptionType::OffsetMisaligned};
return static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::variant<std::uint32_t, Label*, const void*> m_payload;
};
template<std::size_t bitsize, std::size_t shift_amount>
struct PageOffset {
PageOffset(const void* ptr)
: m_payload(ptr)
{}
PageOffset(Label& label)
: m_payload(&label)
{}
static std::uint32_t encode(std::uintptr_t current_addr, std::uintptr_t target)
{
std::uint64_t diff = static_cast<std::uint64_t>((static_cast<std::int64_t>(target) >> shift_amount) - (static_cast<std::int64_t>(current_addr) >> shift_amount));
if (detail::sign_extend<bitsize>(diff) != diff)
throw OaknutException{ExceptionType::OffsetOutOfRange};
diff &= detail::mask_from_size(bitsize);
return static_cast<std::uint32_t>(((diff & 3) << (bitsize - 2)) | (diff >> 2));
}
static bool valid(std::uintptr_t current_addr, std::uintptr_t target)
{
std::uint64_t diff = static_cast<std::uint64_t>((static_cast<std::int64_t>(target) >> shift_amount) - (static_cast<std::int64_t>(current_addr) >> shift_amount));
return detail::sign_extend<bitsize>(diff) == diff;
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::variant<Label*, const void*> m_payload;
};
template<std::size_t bitsize, std::size_t alignment>
struct SOffset {
SOffset(std::int64_t offset)
{
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(offset);
if (detail::sign_extend<bitsize>(diff_u64) != diff_u64)
throw OaknutException{ExceptionType::OffsetOutOfRange};
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
throw OaknutException{ExceptionType::OffsetMisaligned};
m_encoded = static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
template<std::size_t bitsize, std::size_t alignment>
struct POffset {
POffset(std::int64_t offset)
{
const std::uint64_t diff_u64 = static_cast<std::uint64_t>(offset);
if (diff_u64 > detail::mask_from_size(bitsize))
throw OaknutException{ExceptionType::OffsetOutOfRange};
if (diff_u64 != (diff_u64 & detail::inverse_mask_from_size(alignment)))
throw OaknutException{ExceptionType::OffsetMisaligned};
m_encoded = static_cast<std::uint32_t>((diff_u64 & detail::mask_from_size(bitsize)) >> alignment);
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
std::uint32_t m_encoded;
};
} // namespace oaknut

View file

@ -0,0 +1,16 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
namespace oaknut::detail {
template<class... Ts>
struct overloaded : Ts... {
using Ts::operator()...;
};
template<class... Ts>
overloaded(Ts...) -> overloaded<Ts...>;
} // namespace oaknut::detail

View file

@ -0,0 +1,475 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <cassert>
#include <cstddef>
#include <cstdint>
#include "oaknut/oaknut_exception.hpp"
namespace oaknut {
struct Reg;
struct RReg;
struct ZrReg;
struct WzrReg;
struct XReg;
struct WReg;
struct SpReg;
struct WspReg;
struct XRegSp;
struct XRegWsp;
struct VReg;
struct VRegArranged;
struct BReg;
struct HReg;
struct SReg;
struct DReg;
struct QReg;
struct VReg_2H;
struct VReg_8B;
struct VReg_4H;
struct VReg_2S;
struct VReg_1D;
struct VReg_16B;
struct VReg_8H;
struct VReg_4S;
struct VReg_2D;
struct VReg_1Q;
struct VRegSelector;
template<typename Elem>
struct ElemSelector;
struct BElem;
struct HElem;
struct SElem;
struct DElem;
struct Reg {
constexpr explicit Reg(bool is_vector_, unsigned bitsize_, int index_)
: m_index(static_cast<std::int8_t>(index_))
, m_bitsize(static_cast<std::uint8_t>(bitsize_))
, m_is_vector(is_vector_)
{
assert(index_ >= -1 && index_ <= 31);
assert(bitsize_ != 0 && (bitsize_ & (bitsize_ - 1)) == 0 && "Bitsize must be a power of two");
}
constexpr int index() const { return m_index; }
constexpr unsigned bitsize() const { return m_bitsize; }
constexpr bool is_vector() const { return m_is_vector; }
private:
std::int8_t m_index;
std::uint8_t m_bitsize;
bool m_is_vector;
};
struct RReg : public Reg {
constexpr explicit RReg(unsigned bitsize_, int index_)
: Reg(false, bitsize_, index_)
{
assert(bitsize_ == 32 || bitsize_ == 64);
}
XReg toX() const;
WReg toW() const;
template<typename Policy>
friend class BasicCodeGenerator;
};
struct ZrReg : public RReg {
constexpr explicit ZrReg()
: RReg(64, 31) {}
};
struct WzrReg : public RReg {
constexpr explicit WzrReg()
: RReg(32, 31) {}
};
struct XReg : public RReg {
constexpr explicit XReg(int index_)
: RReg(64, index_) {}
constexpr /* implicit */ XReg(ZrReg)
: RReg(64, 31) {}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct WReg : public RReg {
constexpr explicit WReg(int index_)
: RReg(32, index_) {}
constexpr /* implicit */ WReg(WzrReg)
: RReg(32, 31) {}
template<typename Policy>
friend class BasicCodeGenerator;
};
inline XReg RReg::toX() const
{
if (index() == -1)
throw OaknutException{ExceptionType::InvalidXSPConversion};
return XReg{index()};
}
inline WReg RReg::toW() const
{
if (index() == -1)
throw OaknutException{ExceptionType::InvalidWSPConversion};
return WReg{index()};
}
struct SpReg : public RReg {
constexpr explicit SpReg()
: RReg(64, -1) {}
};
struct WspReg : public RReg {
constexpr explicit WspReg()
: RReg(64, -1) {}
};
struct XRegSp : public RReg {
constexpr /* implict */ XRegSp(SpReg)
: RReg(64, -1) {}
constexpr /* implict */ XRegSp(XReg xr)
: RReg(64, xr.index())
{
if (xr.index() == 31)
throw OaknutException{ExceptionType::InvalidXZRConversion};
}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct WRegWsp : public RReg {
constexpr /* implict */ WRegWsp(WspReg)
: RReg(32, -1) {}
constexpr /* implict */ WRegWsp(WReg wr)
: RReg(32, wr.index())
{
if (wr.index() == 31)
throw OaknutException{ExceptionType::InvalidWZRConversion};
}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg : public Reg {
constexpr explicit VReg(unsigned bitsize_, int index_)
: Reg(true, bitsize_, index_)
{
assert(bitsize_ == 8 || bitsize_ == 16 || bitsize_ == 32 || bitsize_ == 64 || bitsize_ == 128);
}
constexpr BReg toB() const;
constexpr HReg toH() const;
constexpr SReg toS() const;
constexpr DReg toD() const;
constexpr QReg toQ() const;
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VRegArranged : public Reg {
protected:
constexpr explicit VRegArranged(unsigned bitsize_, int index_, unsigned esize_)
: Reg(true, bitsize_, index_), m_esize(static_cast<std::uint8_t>(esize_))
{
assert(esize_ != 0 && (esize_ & (esize_ - 1)) == 0 && "esize must be a power of two");
assert(esize_ <= bitsize_);
}
template<typename Policy>
friend class BasicCodeGenerator;
private:
std::uint8_t m_esize;
};
struct VReg_2H : public VRegArranged {
constexpr explicit VReg_2H(int reg_index_)
: VRegArranged(32, reg_index_, 32 / 2)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_8B : public VRegArranged {
constexpr explicit VReg_8B(int reg_index_)
: VRegArranged(64, reg_index_, 64 / 8)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_4H : public VRegArranged {
constexpr explicit VReg_4H(int reg_index_)
: VRegArranged(64, reg_index_, 64 / 4)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_2S : public VRegArranged {
constexpr explicit VReg_2S(int reg_index_)
: VRegArranged(64, reg_index_, 64 / 2)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_1D : public VRegArranged {
constexpr explicit VReg_1D(int reg_index_)
: VRegArranged(64, reg_index_, 64 / 1)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_16B : public VRegArranged {
constexpr explicit VReg_16B(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 16)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_8H : public VRegArranged {
constexpr explicit VReg_8H(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 8)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_4S : public VRegArranged {
constexpr explicit VReg_4S(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 4)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_2D : public VRegArranged {
constexpr explicit VReg_2D(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 2)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct VReg_1Q : public VRegArranged {
constexpr explicit VReg_1Q(int reg_index_)
: VRegArranged(128, reg_index_, 128 / 1)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct Elem {
constexpr explicit Elem(unsigned esize_, int reg_, unsigned elem_index_)
: m_esize(esize_), m_reg(reg_), m_elem_index(elem_index_)
{
if (elem_index_ >= 128 / esize_)
throw OaknutException{ExceptionType::InvalidElementIndex};
}
constexpr unsigned esize() const { return m_esize; }
constexpr int reg_index() const { return m_reg; }
constexpr unsigned elem_index() const { return m_elem_index; }
private:
unsigned m_esize;
int m_reg;
unsigned m_elem_index;
};
struct BElem : public Elem {
constexpr explicit BElem(int reg_, unsigned elem_index_)
: Elem(2, reg_, elem_index_)
{}
};
struct HElem : public Elem {
constexpr explicit HElem(int reg_, unsigned elem_index_)
: Elem(2, reg_, elem_index_)
{}
};
struct SElem : public Elem {
constexpr explicit SElem(int reg_, unsigned elem_index_)
: Elem(4, reg_, elem_index_)
{}
};
struct DElem : public Elem {
constexpr explicit DElem(int reg_, unsigned elem_index_)
: Elem(8, reg_, elem_index_)
{}
};
struct DElem_1 : public DElem {
constexpr /* implict */ DElem_1(DElem inner)
: DElem(inner)
{
if (inner.elem_index() != 1)
throw OaknutException{ExceptionType::InvalidDElem_1};
}
};
template<typename E>
struct ElemSelector {
constexpr explicit ElemSelector(int reg_index_)
: m_reg_index(reg_index_)
{}
constexpr int reg_index() const { return m_reg_index; }
constexpr E operator[](unsigned elem_index) const { return E{m_reg_index, elem_index}; }
private:
int m_reg_index;
};
struct BReg : public VReg {
constexpr explicit BReg(int index_)
: VReg(8, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct HReg : public VReg {
constexpr explicit HReg(int index_)
: VReg(16, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct SReg : public VReg {
constexpr explicit SReg(int index_)
: VReg(32, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
};
struct DReg : public VReg {
constexpr explicit DReg(int index_)
: VReg(64, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
constexpr ElemSelector<BElem> Belem() const { return ElemSelector<BElem>(index()); }
constexpr ElemSelector<HElem> Helem() const { return ElemSelector<HElem>(index()); }
constexpr ElemSelector<SElem> Selem() const { return ElemSelector<SElem>(index()); }
constexpr ElemSelector<DElem> Delem() const { return ElemSelector<DElem>(index()); }
constexpr VReg_8B B8() const { return VReg_8B{index()}; }
constexpr VReg_4H H4() const { return VReg_4H{index()}; }
constexpr VReg_2S S2() const { return VReg_2S{index()}; }
constexpr VReg_1D D1() const { return VReg_1D{index()}; }
};
struct QReg : public VReg {
constexpr explicit QReg(int index_)
: VReg(128, index_)
{}
template<typename Policy>
friend class BasicCodeGenerator;
constexpr ElemSelector<BElem> Belem() const { return ElemSelector<BElem>(index()); }
constexpr ElemSelector<HElem> Helem() const { return ElemSelector<HElem>(index()); }
constexpr ElemSelector<SElem> Selem() const { return ElemSelector<SElem>(index()); }
constexpr ElemSelector<DElem> Delem() const { return ElemSelector<DElem>(index()); }
constexpr VReg_16B B16() const { return VReg_16B{index()}; }
constexpr VReg_8H H8() const { return VReg_8H{index()}; }
constexpr VReg_4S S4() const { return VReg_4S{index()}; }
constexpr VReg_2D D2() const { return VReg_2D{index()}; }
constexpr VReg_1Q Q1() const { return VReg_1Q{index()}; }
};
constexpr BReg VReg::toB() const
{
return BReg{index()};
}
constexpr HReg VReg::toH() const
{
return HReg{index()};
}
constexpr SReg VReg::toS() const
{
return SReg{index()};
}
constexpr DReg VReg::toD() const
{
return DReg{index()};
}
constexpr QReg VReg::toQ() const
{
return QReg{index()};
}
struct VRegSelector {
constexpr explicit VRegSelector(int reg_index)
: m_reg_index(reg_index)
{}
constexpr int index() const { return m_reg_index; }
constexpr ElemSelector<BElem> B() const { return ElemSelector<BElem>(index()); }
constexpr ElemSelector<HElem> H() const { return ElemSelector<HElem>(index()); }
constexpr ElemSelector<SElem> S() const { return ElemSelector<SElem>(index()); }
constexpr ElemSelector<DElem> D() const { return ElemSelector<DElem>(index()); }
constexpr VReg_2H H2() const { return VReg_2H{index()}; }
constexpr VReg_8B B8() const { return VReg_8B{index()}; }
constexpr VReg_4H H4() const { return VReg_4H{index()}; }
constexpr VReg_2S S2() const { return VReg_2S{index()}; }
constexpr VReg_1D D1() const { return VReg_1D{index()}; }
constexpr VReg_16B B16() const { return VReg_16B{index()}; }
constexpr VReg_8H H8() const { return VReg_8H{index()}; }
constexpr VReg_4S S4() const { return VReg_4S{index()}; }
constexpr VReg_2D D2() const { return VReg_2D{index()}; }
constexpr VReg_1Q Q1() const { return VReg_1Q{index()}; }
private:
int m_reg_index;
};
} // namespace oaknut

View file

@ -0,0 +1,42 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <algorithm>
#include <cstddef>
namespace oaknut {
template<size_t N>
struct StringLiteral {
constexpr StringLiteral(const char (&str)[N])
{
std::copy_n(str, N, value);
}
static constexpr std::size_t strlen = N - 1;
static constexpr std::size_t size = N;
char value[N];
};
namespace detail {
template<StringLiteral<33> haystack, StringLiteral needles>
consteval std::uint32_t find()
{
std::uint32_t result = 0;
for (std::size_t i = 0; i < 32; i++) {
for (std::size_t a = 0; a < needles.strlen; a++) {
if (haystack.value[i] == needles.value[a]) {
result |= 1 << (31 - i);
}
}
}
return result;
}
} // namespace detail
} // namespace oaknut

View file

@ -0,0 +1,358 @@
// SPDX-FileCopyrightText: Copyright (c) 2022 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <bit>
#include <cstddef>
#include <cstdint>
#include <optional>
#include <tuple>
#include <type_traits>
#include <variant>
#include <vector>
#include "oaknut/impl/enum.hpp"
#include "oaknut/impl/imm.hpp"
#include "oaknut/impl/list.hpp"
#include "oaknut/impl/multi_typed_name.hpp"
#include "oaknut/impl/offset.hpp"
#include "oaknut/impl/overloaded.hpp"
#include "oaknut/impl/reg.hpp"
#include "oaknut/impl/string_literal.hpp"
#include "oaknut/oaknut_exception.hpp"
namespace oaknut {
struct Label {
public:
Label() = default;
bool is_bound() const
{
return m_offset.has_value();
}
std::ptrdiff_t offset() const
{
return m_offset.value();
}
private:
template<typename Policy>
friend class BasicCodeGenerator;
explicit Label(std::ptrdiff_t offset)
: m_offset(offset)
{}
using EmitFunctionType = std::uint32_t (*)(std::ptrdiff_t wb_offset, std::ptrdiff_t resolved_offset);
struct Writeback {
std::ptrdiff_t m_wb_offset;
std::uint32_t m_mask;
EmitFunctionType m_fn;
};
std::optional<std::ptrdiff_t> m_offset;
std::vector<Writeback> m_wbs;
};
template<typename Policy>
class BasicCodeGenerator : public Policy {
public:
BasicCodeGenerator(typename Policy::constructor_argument_type arg, std::uint32_t* xmem)
: Policy(arg, xmem)
{}
Label l() const
{
return Label{Policy::offset()};
}
void l(Label& label) const
{
if (label.is_bound())
throw OaknutException{ExceptionType::LabelRedefinition};
const auto target_offset = Policy::offset();
label.m_offset = target_offset;
for (auto& wb : label.m_wbs) {
const std::uint32_t value = wb.m_fn(wb.m_wb_offset, target_offset);
Policy::set_at_offset(wb.m_wb_offset, value, wb.m_mask);
}
label.m_wbs.clear();
}
#include "oaknut/impl/mnemonics_fpsimd_v8.0.inc.hpp"
#include "oaknut/impl/mnemonics_fpsimd_v8.1.inc.hpp"
#include "oaknut/impl/mnemonics_fpsimd_v8.2.inc.hpp"
#include "oaknut/impl/mnemonics_fpsimd_v8.3.inc.hpp"
#include "oaknut/impl/mnemonics_generic_v8.0.inc.hpp"
#include "oaknut/impl/mnemonics_generic_v8.1.inc.hpp"
#include "oaknut/impl/mnemonics_generic_v8.2.inc.hpp"
#include "oaknut/impl/mnemonics_generic_v8.3.inc.hpp"
void RET()
{
return RET(XReg{30});
}
void ADRL(XReg xd, const void* addr)
{
ADRP(xd, addr);
ADD(xd, xd, reinterpret_cast<uint64_t>(addr) & 0xFFF);
}
void MOV(WReg wd, uint32_t imm)
{
if (wd.index() == 31)
return;
if (MovImm16::is_valid(imm))
return MOVZ(wd, imm);
if (MovImm16::is_valid(static_cast<std::uint32_t>(~imm)))
return MOVN(wd, static_cast<std::uint32_t>(~imm));
if (detail::encode_bit_imm(imm))
return ORR(wd, WzrReg{}, imm);
MOVZ(wd, {static_cast<std::uint16_t>(imm >> 0), MovImm16Shift::SHL_0});
MOVK(wd, {static_cast<std::uint16_t>(imm >> 16), MovImm16Shift::SHL_16});
}
void MOV(XReg xd, uint64_t imm)
{
if (xd.index() == 31)
return;
if (imm >> 32 == 0)
return MOV(xd.toW(), static_cast<std::uint32_t>(imm));
if (MovImm16::is_valid(imm))
return MOVZ(xd, imm);
if (MovImm16::is_valid(~imm))
return MOVN(xd, ~imm);
if (detail::encode_bit_imm(imm))
return ORR(xd, ZrReg{}, imm);
bool movz_done = false;
int shift_count = 0;
if (detail::encode_bit_imm(static_cast<std::uint32_t>(imm))) {
ORR(xd.toW(), WzrReg{}, static_cast<std::uint32_t>(imm));
imm >>= 32;
movz_done = true;
shift_count = 2;
}
while (imm != 0) {
const uint16_t hw = static_cast<uint16_t>(imm);
if (hw != 0) {
if (movz_done) {
MOVK(xd, {hw, static_cast<MovImm16Shift>(shift_count)});
} else {
MOVZ(xd, {hw, static_cast<MovImm16Shift>(shift_count)});
movz_done = true;
}
}
imm >>= 16;
shift_count++;
}
}
// Convenience function for moving pointers to registers
void MOVP2R(XReg xd, const void* addr)
{
const int64_t diff = reinterpret_cast<std::uint64_t>(addr) - Policy::template xptr<std::uintptr_t>();
if (diff >= -0xF'FFFF && diff <= 0xF'FFFF) {
ADR(xd, addr);
} else if (PageOffset<21, 12>::valid(Policy::template xptr<std::uintptr_t>(), reinterpret_cast<std::uintptr_t>(addr))) {
ADRL(xd, addr);
} else {
MOV(xd, reinterpret_cast<uint64_t>(addr));
}
}
void align(std::size_t alignment)
{
if (alignment < 4 || (alignment & (alignment - 1)) != 0)
throw OaknutException{ExceptionType::InvalidAlignment};
while (Policy::offset() & (alignment - 1)) {
NOP();
}
}
void dw(std::uint32_t value)
{
Policy::append(value);
}
void dx(std::uint64_t value)
{
Policy::append(static_cast<std::uint32_t>(value));
Policy::append(static_cast<std::uint32_t>(value >> 32));
}
private:
#include "oaknut/impl/arm64_encode_helpers.inc.hpp"
template<StringLiteral bs, StringLiteral... bargs, typename... Ts>
void emit(Ts... args)
{
constexpr std::uint32_t base = detail::find<bs, "1">();
std::uint32_t encoding = (base | ... | encode<detail::find<bs, bargs>()>(std::forward<Ts>(args)));
Policy::append(encoding);
}
};
struct PointerCodeGeneratorPolicy {
public:
std::ptrdiff_t offset() const
{
return (m_ptr - m_wmem) * sizeof(std::uint32_t);
}
void set_offset(std::ptrdiff_t offset)
{
if ((offset % sizeof(std::uint32_t)) != 0)
throw OaknutException{ExceptionType::InvalidAlignment};
m_ptr = m_wmem + offset / sizeof(std::uint32_t);
}
template<typename T>
T wptr() const
{
static_assert(std::is_pointer_v<T> || std::is_same_v<T, std::uintptr_t> || std::is_same_v<T, std::intptr_t>);
return reinterpret_cast<T>(m_ptr);
}
template<typename T>
T xptr() const
{
static_assert(std::is_pointer_v<T> || std::is_same_v<T, std::uintptr_t> || std::is_same_v<T, std::intptr_t>);
return reinterpret_cast<T>(m_xmem + (m_ptr - m_wmem));
}
void set_wptr(std::uint32_t* p)
{
m_ptr = p;
}
void set_xptr(std::uint32_t* p)
{
m_ptr = m_wmem + (p - m_xmem);
}
protected:
using constructor_argument_type = std::uint32_t*;
PointerCodeGeneratorPolicy(std::uint32_t* wmem, std::uint32_t* xmem)
: m_ptr(wmem), m_wmem(wmem), m_xmem(xmem)
{}
void append(std::uint32_t instruction)
{
*m_ptr++ = instruction;
}
void set_at_offset(std::ptrdiff_t offset, std::uint32_t value, std::uint32_t mask) const
{
std::uint32_t* p = m_wmem + offset / sizeof(std::uint32_t);
*p = (*p & mask) | value;
}
private:
std::uint32_t* m_ptr;
std::uint32_t* const m_wmem;
std::uint32_t* const m_xmem;
};
struct VectorCodeGeneratorPolicy {
public:
std::ptrdiff_t offset() const
{
return m_vec.size() * sizeof(std::uint32_t);
}
template<typename T>
T xptr() const
{
static_assert(std::is_pointer_v<T> || std::is_same_v<T, std::uintptr_t> || std::is_same_v<T, std::intptr_t>);
return reinterpret_cast<T>(m_xmem + m_vec.size());
}
protected:
using constructor_argument_type = std::vector<std::uint32_t>&;
VectorCodeGeneratorPolicy(std::vector<std::uint32_t>& vec, std::uint32_t* xmem)
: m_vec(vec), m_xmem(xmem)
{}
void append(std::uint32_t instruction)
{
m_vec.push_back(instruction);
}
void set_at_offset(std::ptrdiff_t offset, std::uint32_t value, std::uint32_t mask) const
{
std::uint32_t& p = m_vec[offset / sizeof(std::uint32_t)];
p = (p & mask) | value;
}
private:
std::vector<std::uint32_t>& m_vec;
std::uint32_t* const m_xmem;
};
struct CodeGenerator : BasicCodeGenerator<PointerCodeGeneratorPolicy> {
public:
CodeGenerator(std::uint32_t* mem)
: BasicCodeGenerator<PointerCodeGeneratorPolicy>(mem, mem) {}
CodeGenerator(std::uint32_t* wmem, std::uint32_t* xmem)
: BasicCodeGenerator<PointerCodeGeneratorPolicy>(wmem, xmem) {}
};
struct VectorCodeGenerator : BasicCodeGenerator<VectorCodeGeneratorPolicy> {
public:
VectorCodeGenerator(std::vector<std::uint32_t>& mem)
: BasicCodeGenerator<VectorCodeGeneratorPolicy>(mem, nullptr) {}
VectorCodeGenerator(std::vector<std::uint32_t>& wmem, std::uint32_t* xmem)
: BasicCodeGenerator<VectorCodeGeneratorPolicy>(wmem, xmem) {}
};
namespace util {
inline constexpr WReg W0{0}, W1{1}, W2{2}, W3{3}, W4{4}, W5{5}, W6{6}, W7{7}, W8{8}, W9{9}, W10{10}, W11{11}, W12{12}, W13{13}, W14{14}, W15{15}, W16{16}, W17{17}, W18{18}, W19{19}, W20{20}, W21{21}, W22{22}, W23{23}, W24{24}, W25{25}, W26{26}, W27{27}, W28{28}, W29{29}, W30{30};
inline constexpr XReg X0{0}, X1{1}, X2{2}, X3{3}, X4{4}, X5{5}, X6{6}, X7{7}, X8{8}, X9{9}, X10{10}, X11{11}, X12{12}, X13{13}, X14{14}, X15{15}, X16{16}, X17{17}, X18{18}, X19{19}, X20{20}, X21{21}, X22{22}, X23{23}, X24{24}, X25{25}, X26{26}, X27{27}, X28{28}, X29{29}, X30{30};
inline constexpr ZrReg ZR{}, XZR{};
inline constexpr WzrReg WZR{};
inline constexpr SpReg SP{}, XSP{};
inline constexpr WspReg WSP{};
inline constexpr VRegSelector V0{0}, V1{1}, V2{2}, V3{3}, V4{4}, V5{5}, V6{6}, V7{7}, V8{8}, V9{9}, V10{10}, V11{11}, V12{12}, V13{13}, V14{14}, V15{15}, V16{16}, V17{17}, V18{18}, V19{19}, V20{20}, V21{21}, V22{22}, V23{23}, V24{24}, V25{25}, V26{26}, V27{27}, V28{28}, V29{29}, V30{30}, V31{31};
inline constexpr QReg Q0{0}, Q1{1}, Q2{2}, Q3{3}, Q4{4}, Q5{5}, Q6{6}, Q7{7}, Q8{8}, Q9{9}, Q10{10}, Q11{11}, Q12{12}, Q13{13}, Q14{14}, Q15{15}, Q16{16}, Q17{17}, Q18{18}, Q19{19}, Q20{20}, Q21{21}, Q22{22}, Q23{23}, Q24{24}, Q25{25}, Q26{26}, Q27{27}, Q28{28}, Q29{29}, Q30{30}, Q31{31};
inline constexpr DReg D0{0}, D1{1}, D2{2}, D3{3}, D4{4}, D5{5}, D6{6}, D7{7}, D8{8}, D9{9}, D10{10}, D11{11}, D12{12}, D13{13}, D14{14}, D15{15}, D16{16}, D17{17}, D18{18}, D19{19}, D20{20}, D21{21}, D22{22}, D23{23}, D24{24}, D25{25}, D26{26}, D27{27}, D28{28}, D29{29}, D30{30}, D31{31};
inline constexpr SReg S0{0}, S1{1}, S2{2}, S3{3}, S4{4}, S5{5}, S6{6}, S7{7}, S8{8}, S9{9}, S10{10}, S11{11}, S12{12}, S13{13}, S14{14}, S15{15}, S16{16}, S17{17}, S18{18}, S19{19}, S20{20}, S21{21}, S22{22}, S23{23}, S24{24}, S25{25}, S26{26}, S27{27}, S28{28}, S29{29}, S30{30}, S31{31};
inline constexpr HReg H0{0}, H1{1}, H2{2}, H3{3}, H4{4}, H5{5}, H6{6}, H7{7}, H8{8}, H9{9}, H10{10}, H11{11}, H12{12}, H13{13}, H14{14}, H15{15}, H16{16}, H17{17}, H18{18}, H19{19}, H20{20}, H21{21}, H22{22}, H23{23}, H24{24}, H25{25}, H26{26}, H27{27}, H28{28}, H29{29}, H30{30}, H31{31};
inline constexpr BReg B0{0}, B1{1}, B2{2}, B3{3}, B4{4}, B5{5}, B6{6}, B7{7}, B8{8}, B9{9}, B10{10}, B11{11}, B12{12}, B13{13}, B14{14}, B15{15}, B16{16}, B17{17}, B18{18}, B19{19}, B20{20}, B21{21}, B22{22}, B23{23}, B24{24}, B25{25}, B26{26}, B27{27}, B28{28}, B29{29}, B30{30}, B31{31};
inline constexpr Cond EQ{Cond::EQ}, NE{Cond::NE}, CS{Cond::CS}, CC{Cond::CC}, MI{Cond::MI}, PL{Cond::PL}, VS{Cond::VS}, VC{Cond::VC}, HI{Cond::HI}, LS{Cond::LS}, GE{Cond::GE}, LT{Cond::LT}, GT{Cond::GT}, LE{Cond::LE}, AL{Cond::AL}, NV{Cond::NV}, HS{Cond::HS}, LO{Cond::LO};
inline constexpr auto UXTB{MultiTypedName<AddSubExt::UXTB>{}};
inline constexpr auto UXTH{MultiTypedName<AddSubExt::UXTH>{}};
inline constexpr auto UXTW{MultiTypedName<AddSubExt::UXTW, IndexExt::UXTW>{}};
inline constexpr auto UXTX{MultiTypedName<AddSubExt::UXTX>{}};
inline constexpr auto SXTB{MultiTypedName<AddSubExt::SXTB>{}};
inline constexpr auto SXTH{MultiTypedName<AddSubExt::SXTH>{}};
inline constexpr auto SXTW{MultiTypedName<AddSubExt::SXTW, IndexExt::SXTW>{}};
inline constexpr auto SXTX{MultiTypedName<AddSubExt::SXTX, IndexExt::SXTX>{}};
inline constexpr auto LSL{MultiTypedName<AddSubExt::LSL, IndexExt::LSL, AddSubShift::LSL, LogShift::LSL, LslSymbol::LSL>{}};
inline constexpr auto LSR{MultiTypedName<AddSubShift::LSR, LogShift::LSR>{}};
inline constexpr auto ASR{MultiTypedName<AddSubShift::ASR, LogShift::ASR>{}};
inline constexpr auto ROR{MultiTypedName<LogShift::ROR>{}};
inline constexpr PostIndexed POST_INDEXED{};
inline constexpr PreIndexed PRE_INDEXED{};
inline constexpr MslSymbol MSL{MslSymbol::MSL};
} // namespace util
} // namespace oaknut

View file

@ -0,0 +1,44 @@
// SPDX-FileCopyrightText: Copyright (c) 2023 merryhime <https://mary.rs>
// SPDX-License-Identifier: MIT
#pragma once
#include <exception>
namespace oaknut {
enum class ExceptionType {
#define OAKNUT_EXCEPTION(tag, str) tag,
#include "oaknut/impl/oaknut_exception.inc.hpp"
#undef OAKNUT_EXCEPTION
};
inline const char* to_string(ExceptionType et)
{
switch (et) {
#define OAKNUT_EXCEPTION(tag, str) \
case ExceptionType::tag: \
return str;
#include "oaknut/impl/oaknut_exception.inc.hpp"
#undef OAKNUT_EXCEPTION
default:
return "unknown ExceptionType";
}
}
class OaknutException : public std::exception {
public:
explicit OaknutException(ExceptionType et)
: type{et}
{}
const char* what() const noexcept override
{
return to_string(type);
}
private:
ExceptionType type;
};
} // namespace oaknut