Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial support for compiling on ARM64. #788

Merged
merged 1 commit into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 27 additions & 4 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,22 @@ endif()

option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF)

# First, determine whether to use CMAKE_OSX_ARCHITECTURES or CMAKE_SYSTEM_PROCESSOR.
if (APPLE AND CMAKE_OSX_ARCHITECTURES)
set(BASE_ARCHITECTURE "${CMAKE_OSX_ARCHITECTURES}")
else()
set(BASE_ARCHITECTURE "${CMAKE_SYSTEM_PROCESSOR}")
endif()

# Next, match common architecture strings down to a known common value.
if (BASE_ARCHITECTURE MATCHES "(x86)|(X86)|(amd64)|(AMD64)")
set(ARCHITECTURE "x86_64")
elseif (BASE_ARCHITECTURE MATCHES "(aarch64)|(AARCH64)|(arm64)|(ARM64)")
set(ARCHITECTURE "arm64")
else()
message(FATAL_ERROR "Unsupported CPU architecture: ${BASE_ARCHITECTURE}")
endif()

# This function should be passed a list of all files in a target. It will automatically generate file groups
# following the directory hierarchy, so that the layout of the files in IDEs matches the one in the filesystem.
function(create_target_directory_groups target_name)
Expand Down Expand Up @@ -308,6 +324,7 @@ set(COMMON src/common/logging/backend.cpp
src/common/logging/text_formatter.h
src/common/logging/types.h
src/common/alignment.h
src/common/arch.h
src/common/assert.cpp
src/common/assert.h
src/common/bit_field.h
Expand Down Expand Up @@ -356,8 +373,6 @@ set(CORE src/core/aerolib/stubs.cpp
src/core/aerolib/aerolib.h
src/core/address_space.cpp
src/core/address_space.h
src/core/cpu_patches.cpp
src/core/cpu_patches.h
src/core/crypto/crypto.cpp
src/core/crypto/crypto.h
src/core/crypto/keys.h
Expand Down Expand Up @@ -415,6 +430,12 @@ set(CORE src/core/aerolib/stubs.cpp
src/core/virtual_memory.h
)

if (ARCHITECTURE STREQUAL "x86_64")
set(CORE ${CORE}
src/core/cpu_patches.cpp
src/core/cpu_patches.h)
endif()

set(SHADER_RECOMPILER src/shader_recompiler/exception.h
src/shader_recompiler/profile.h
src/shader_recompiler/recompiler.cpp
Expand Down Expand Up @@ -658,8 +679,10 @@ if (APPLE)
target_link_libraries(shadps4 PRIVATE ${MOLTENVK})
endif()

# Reserve system-managed memory space.
target_link_options(shadps4 PRIVATE -Wl,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x4000,-segaddr,TCB_SPACE,0x4000,-segaddr,GUEST_SYSTEM,0x400000,-image_base,0x20000000000)
if (ARCHITECTURE STREQUAL "x86_64")
# Reserve system-managed memory space.
target_link_options(shadps4 PRIVATE -Wl,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x4000,-segaddr,TCB_SPACE,0x4000,-segaddr,GUEST_SYSTEM,0x400000,-image_base,0x20000000000)
endif()

# Replacement for std::chrono::time_zone
target_link_libraries(shadps4 PRIVATE date::date-tz)
Expand Down
1 change: 0 additions & 1 deletion externals/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@ else()
endif()

if (NOT TARGET FFmpeg::ffmpeg)
set(ARCHITECTURE "x86_64")
add_subdirectory(ffmpeg-core)
add_library(FFmpeg::ffmpeg ALIAS ffmpeg)
endif()
Expand Down
10 changes: 10 additions & 0 deletions src/common/arch.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once

#if defined(__x86_64__) || defined(_M_X64)
#define ARCH_X86_64 1
#elif defined(__aarch64__) || defined(_M_ARM64)
#define ARCH_ARM64 1
#endif
7 changes: 7 additions & 0 deletions src/common/assert.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "common/arch.h"
#include "common/assert.h"
#include "common/logging/backend.h"

#if defined(ARCH_X86_64)
#define Crash() __asm__ __volatile__("int $3")
#elif defined(ARCH_ARM64)
#define Crash() __asm__ __volatile__("brk 0")
#else
#error "Missing Crash() implementation for target CPU architecture."
#endif

void assert_fail_impl() {
Common::Log::Stop();
Expand Down
17 changes: 17 additions & 0 deletions src/common/rdtsc.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@

#pragma once

#include "common/arch.h"

#ifdef _MSC_VER
#include <intrin.h>
#endif
Expand All @@ -13,22 +15,37 @@ namespace Common {

#ifdef _MSC_VER
__forceinline static u64 FencedRDTSC() {
#ifdef ARCH_X86_64
_mm_lfence();
_ReadWriteBarrier();
const u64 result = __rdtsc();
_mm_lfence();
_ReadWriteBarrier();
return result;
#else
#error "Missing FencedRDTSC() implementation for target CPU architecture."
#endif
}
#else
static inline u64 FencedRDTSC() {
#ifdef ARCH_X86_64
u64 eax;
u64 edx;
asm volatile("lfence\n\t"
"rdtsc\n\t"
"lfence\n\t"
: "=a"(eax), "=d"(edx));
return (edx << 32) | eax;
#elif defined(ARCH_ARM64)
u64 ret;
asm volatile("isb\n\t"
"mrs %0, cntvct_el0\n\t"
"isb\n\t"
: "=r"(ret)::"memory");
return ret;
#else
#error "Missing FencedRDTSC() implementation for target CPU architecture."
#endif
}
#endif

Expand Down
39 changes: 29 additions & 10 deletions src/core/address_space.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,22 @@

#include <boost/icl/separate_interval_set.hpp>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/error.h"
#include "core/address_space.h"
#include "core/libraries/kernel/memory_management.h"
#include "core/memory.h"
#include "libraries/error_codes.h"

#ifdef _WIN32
#include <windows.h>
#else
#include <fcntl.h>
#include <sys/mman.h>
#endif
#include "libraries/error_codes.h"

#ifdef __APPLE__
#if defined(__APPLE__) && defined(ARCH_X86_64)
// Reserve space for the system address space using a zerofill section.
asm(".zerofill GUEST_SYSTEM,GUEST_SYSTEM,__guest_system,0xFBFC00000");
#endif
Expand Down Expand Up @@ -308,12 +309,12 @@ struct AddressSpace::Impl {

constexpr int protection_flags = PROT_READ | PROT_WRITE;
constexpr int base_map_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
#ifdef __APPLE__
// On ARM64 Macs, we run into limitations due to the commpage from 0xFC0000000 - 0xFFFFFFFFF
// and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF. We can allocate the system
// managed region, as well as system reserved if reduced in size slightly, but we cannot map
// the user region where we want, so we must let the OS put it wherever possible and hope
// the game won't rely on its location.
#if defined(__APPLE__) && defined(ARCH_X86_64)
// On ARM64 Macs under Rosetta 2, we run into limitations due to the commpage from
// 0xFC0000000 - 0xFFFFFFFFF and the GPU carveout region from 0x1000000000 - 0x6FFFFFFFFF.
// We can allocate the system managed region, as well as system reserved if reduced in size
// slightly, but we cannot map the user region where we want, so we must let the OS put it
// wherever possible and hope the game won't rely on its location.
system_managed_base = reinterpret_cast<u8*>(
mmap(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN), system_managed_size, protection_flags,
base_map_flags | MAP_FIXED, -1, 0));
Expand All @@ -325,12 +326,22 @@ struct AddressSpace::Impl {
protection_flags, base_map_flags, -1, 0));
#else
const auto virtual_size = system_managed_size + system_reserved_size + user_size;
#if defined(ARCH_X86_64)
const auto virtual_base =
reinterpret_cast<u8*>(mmap(reinterpret_cast<void*>(SYSTEM_MANAGED_MIN), virtual_size,
protection_flags, base_map_flags | MAP_FIXED, -1, 0));
system_managed_base = virtual_base;
system_reserved_base = reinterpret_cast<u8*>(SYSTEM_RESERVED_MIN);
user_base = reinterpret_cast<u8*>(USER_MIN);
#else
// Map memory wherever possible and instruction translation can handle offsetting to the
// base.
const auto virtual_base = reinterpret_cast<u8*>(
mmap(nullptr, virtual_size, protection_flags, base_map_flags, -1, 0));
system_managed_base = virtual_base;
system_reserved_base = virtual_base + SYSTEM_RESERVED_MIN - SYSTEM_MANAGED_MIN;
user_base = virtual_base + USER_MIN - SYSTEM_MANAGED_MIN;
#endif
#endif
if (system_managed_base == MAP_FAILED || system_reserved_base == MAP_FAILED ||
user_base == MAP_FAILED) {
Expand Down Expand Up @@ -430,9 +441,11 @@ struct AddressSpace::Impl {
if (write) {
flags |= PROT_WRITE;
}
#ifdef ARCH_X86_64
if (execute) {
flags |= PROT_EXEC;
}
#endif
int ret = mprotect(reinterpret_cast<void*>(virtual_addr), size, flags);
ASSERT_MSG(ret == 0, "mprotect failed: {}", strerror(errno));
}
Expand Down Expand Up @@ -463,8 +476,14 @@ AddressSpace::~AddressSpace() = default;

void* AddressSpace::Map(VAddr virtual_addr, size_t size, u64 alignment, PAddr phys_addr,
bool is_exec) {
return impl->Map(virtual_addr, phys_addr, size,
is_exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE);
#if ARCH_X86_64
const auto prot = is_exec ? PAGE_EXECUTE_READWRITE : PAGE_READWRITE;
#else
// On non-native architectures, we can simplify things by ignoring the execute flag for the
// canonical copy of the memory and rely on the JIT to map translated code as executable.
constexpr auto prot = PAGE_READWRITE;
#endif
return impl->Map(virtual_addr, phys_addr, size, prot);
}

void* AddressSpace::MapFile(VAddr virtual_addr, size_t size, size_t offset, u32 prot,
Expand Down
3 changes: 2 additions & 1 deletion src/core/address_space.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#pragma once

#include <memory>
#include "common/arch.h"
#include "common/enum.h"
#include "common/types.h"

Expand All @@ -23,7 +24,7 @@ constexpr VAddr CODE_BASE_OFFSET = 0x100000000ULL;
constexpr VAddr SYSTEM_MANAGED_MIN = 0x00000400000ULL;
constexpr VAddr SYSTEM_MANAGED_MAX = 0x07FFFFBFFFULL;
constexpr VAddr SYSTEM_RESERVED_MIN = 0x07FFFFC000ULL;
#ifdef __APPLE__
#if defined(__APPLE__) && defined(ARCH_X86_64)
// Can only comfortably reserve the first 0x7C0000000 of system reserved space.
constexpr VAddr SYSTEM_RESERVED_MAX = 0xFBFFFFFFFULL;
#else
Expand Down
3 changes: 3 additions & 0 deletions src/core/libraries/kernel/thread_management.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include <thread>

#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/error.h"
#include "common/logging/log.h"
Expand Down Expand Up @@ -995,7 +996,9 @@ static void cleanup_thread(void* arg) {
static void* run_thread(void* arg) {
auto* thread = static_cast<ScePthread>(arg);
Common::SetCurrentThreadName(thread->name.c_str());
#ifdef ARCH_X86_64
Core::InitializeThreadPatchStack();
#endif
auto* linker = Common::Singleton<Core::Linker>::Instance();
linker->InitTlsForThread(false);
void* ret = nullptr;
Expand Down
7 changes: 7 additions & 0 deletions src/core/linker.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// SPDX-License-Identifier: GPL-2.0-or-later

#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/config.h"
#include "common/logging/log.h"
Expand All @@ -27,6 +28,7 @@ static PS4_SYSV_ABI void ProgramExitFunc() {
}

static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
#ifdef ARCH_X86_64
// reinterpret_cast<entry_func_t>(addr)(params, exit_func); // can't be used, stack has to have
// a specific layout
asm volatile("andq $-16, %%rsp\n" // Align to 16 bytes
Expand All @@ -46,6 +48,9 @@ static void RunMainEntry(VAddr addr, EntryParams* params, ExitFunc exit_func) {
:
: "r"(addr), "r"(params), "r"(exit_func)
: "rax", "rsi", "rdi");
#else
UNIMPLEMENTED_MSG("Missing RunMainEntry() implementation for target CPU architecture.");
#endif
}

Linker::Linker() : memory{Memory::Instance()} {}
Expand Down Expand Up @@ -85,7 +90,9 @@ void Linker::Execute() {

// Init primary thread.
Common::SetCurrentThreadName("GAME_MainThread");
#ifdef ARCH_X86_64
InitializeThreadPatchStack();
#endif
Libraries::Kernel::pthreadInitSelfMainThread();
InitTlsForThread(true);

Expand Down
3 changes: 3 additions & 0 deletions src/core/module.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

#include <xbyak/xbyak.h>
#include "common/alignment.h"
#include "common/arch.h"
#include "common/assert.h"
#include "common/logging/log.h"
#ifdef ENABLE_QT_GUI
Expand Down Expand Up @@ -134,9 +135,11 @@ void Module::LoadModuleToMemory(u32& max_tls_index) {
LOG_INFO(Core_Linker, "segment_mode ..........: {}", segment_mode);

add_segment(elf_pheader[i]);
#ifdef ARCH_X86_64
if (elf_pheader[i].p_flags & PF_EXEC) {
PatchInstructions(segment_addr, segment_file_size, c);
}
#endif
break;
}
case PT_DYNAMIC:
Expand Down
Loading
Loading