mirror of
https://github.com/xenia-project/xenia.git
synced 2025-12-06 07:12:03 +01:00
Merge pull request #60 from chrisps/canary_experimental
Superbig boost in performance?
This commit is contained in:
commit
6bc3191b97
4
.gitmodules
vendored
4
.gitmodules
vendored
|
|
@ -48,7 +48,7 @@
|
||||||
url = https://github.com/fmtlib/fmt.git
|
url = https://github.com/fmtlib/fmt.git
|
||||||
[submodule "third_party/disruptorplus"]
|
[submodule "third_party/disruptorplus"]
|
||||||
path = third_party/disruptorplus
|
path = third_party/disruptorplus
|
||||||
url = https://github.com/xenia-project/disruptorplus.git
|
url = https://github.com/chrisps/disruptorpus.git
|
||||||
[submodule "third_party/DirectXShaderCompiler"]
|
[submodule "third_party/DirectXShaderCompiler"]
|
||||||
path = third_party/DirectXShaderCompiler
|
path = third_party/DirectXShaderCompiler
|
||||||
url = https://github.com/microsoft/DirectXShaderCompiler.git
|
url = https://github.com/microsoft/DirectXShaderCompiler.git
|
||||||
|
|
@ -63,7 +63,7 @@
|
||||||
url = https://github.com/Cyan4973/xxHash.git
|
url = https://github.com/Cyan4973/xxHash.git
|
||||||
[submodule "third_party/FFmpeg"]
|
[submodule "third_party/FFmpeg"]
|
||||||
path = third_party/FFmpeg
|
path = third_party/FFmpeg
|
||||||
url = https://github.com/xenia-project/FFmpeg.git
|
url = https://github.com/chrisps/FFmpeg_radixsplit.git
|
||||||
[submodule "third_party/premake-androidndk"]
|
[submodule "third_party/premake-androidndk"]
|
||||||
path = third_party/premake-androidndk
|
path = third_party/premake-androidndk
|
||||||
url = https://github.com/Triang3l/premake-androidndk.git
|
url = https://github.com/Triang3l/premake-androidndk.git
|
||||||
|
|
|
||||||
|
|
@ -46,7 +46,9 @@ static_assert((std::endian::native == std::endian::big) ||
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
|
|
||||||
#if XE_COMPILER_MSVC
|
// chrispy: added workaround for clang, otherwise byteswap_ulong becomes calls
|
||||||
|
// to ucrtbase
|
||||||
|
#if XE_COMPILER_MSVC == 1 && !defined(__clang__)
|
||||||
#define XENIA_BASE_BYTE_SWAP_16 _byteswap_ushort
|
#define XENIA_BASE_BYTE_SWAP_16 _byteswap_ushort
|
||||||
#define XENIA_BASE_BYTE_SWAP_32 _byteswap_ulong
|
#define XENIA_BASE_BYTE_SWAP_32 _byteswap_ulong
|
||||||
#define XENIA_BASE_BYTE_SWAP_64 _byteswap_uint64
|
#define XENIA_BASE_BYTE_SWAP_64 _byteswap_uint64
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,8 @@ namespace xe {
|
||||||
class Win32MappedMemory : public MappedMemory {
|
class Win32MappedMemory : public MappedMemory {
|
||||||
public:
|
public:
|
||||||
// CreateFile returns INVALID_HANDLE_VALUE in case of failure.
|
// CreateFile returns INVALID_HANDLE_VALUE in case of failure.
|
||||||
static constexpr HANDLE kFileHandleInvalid = INVALID_HANDLE_VALUE;
|
// chrispy: made inline const to get around clang error
|
||||||
|
static inline const HANDLE kFileHandleInvalid = INVALID_HANDLE_VALUE;
|
||||||
// CreateFileMapping returns nullptr in case of failure.
|
// CreateFileMapping returns nullptr in case of failure.
|
||||||
static constexpr HANDLE kMappingHandleInvalid = nullptr;
|
static constexpr HANDLE kMappingHandleInvalid = nullptr;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,7 +15,15 @@
|
||||||
WINAPI_PARTITION_SYSTEM | WINAPI_PARTITION_GAMES)
|
WINAPI_PARTITION_SYSTEM | WINAPI_PARTITION_GAMES)
|
||||||
#define XE_BASE_MEMORY_WIN_USE_DESKTOP_FUNCTIONS
|
#define XE_BASE_MEMORY_WIN_USE_DESKTOP_FUNCTIONS
|
||||||
#endif
|
#endif
|
||||||
|
/*
|
||||||
|
these two dont bypass much ms garbage compared to the threading ones,
|
||||||
|
but Protect is used by PhysicalHeap::EnableAccessCallbacks which eats a lot
|
||||||
|
of cpu time, so every bit counts
|
||||||
|
*/
|
||||||
|
XE_NTDLL_IMPORT(NtProtectVirtualMemory, cls_NtProtectVirtualMemory,
|
||||||
|
NtProtectVirtualMemoryPointer);
|
||||||
|
XE_NTDLL_IMPORT(NtQueryVirtualMemory, cls_NtQueryVirtualMemory,
|
||||||
|
NtQueryVirtualMemoryPointer);
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace memory {
|
namespace memory {
|
||||||
|
|
||||||
|
|
@ -139,6 +147,18 @@ bool Protect(void* base_address, size_t length, PageAccess access,
|
||||||
*out_old_access = PageAccess::kNoAccess;
|
*out_old_access = PageAccess::kNoAccess;
|
||||||
}
|
}
|
||||||
DWORD new_protect = ToWin32ProtectFlags(access);
|
DWORD new_protect = ToWin32ProtectFlags(access);
|
||||||
|
|
||||||
|
#if XE_USE_NTDLL_FUNCTIONS == 1
|
||||||
|
|
||||||
|
DWORD old_protect = 0;
|
||||||
|
SIZE_T MemoryLength = length;
|
||||||
|
PVOID MemoryCache = base_address;
|
||||||
|
|
||||||
|
BOOL result = NtProtectVirtualMemoryPointer.invoke<NTSTATUS>(
|
||||||
|
(HANDLE)0xFFFFFFFFFFFFFFFFLL, &MemoryCache, &MemoryLength,
|
||||||
|
new_protect, &old_protect) >= 0;
|
||||||
|
|
||||||
|
#else
|
||||||
#ifdef XE_BASE_MEMORY_WIN_USE_DESKTOP_FUNCTIONS
|
#ifdef XE_BASE_MEMORY_WIN_USE_DESKTOP_FUNCTIONS
|
||||||
DWORD old_protect = 0;
|
DWORD old_protect = 0;
|
||||||
BOOL result = VirtualProtect(base_address, length, new_protect, &old_protect);
|
BOOL result = VirtualProtect(base_address, length, new_protect, &old_protect);
|
||||||
|
|
@ -146,6 +166,7 @@ bool Protect(void* base_address, size_t length, PageAccess access,
|
||||||
ULONG old_protect = 0;
|
ULONG old_protect = 0;
|
||||||
BOOL result = VirtualProtectFromApp(base_address, length, ULONG(new_protect),
|
BOOL result = VirtualProtectFromApp(base_address, length, ULONG(new_protect),
|
||||||
&old_protect);
|
&old_protect);
|
||||||
|
#endif
|
||||||
#endif
|
#endif
|
||||||
if (!result) {
|
if (!result) {
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -161,8 +182,17 @@ bool QueryProtect(void* base_address, size_t& length, PageAccess& access_out) {
|
||||||
|
|
||||||
MEMORY_BASIC_INFORMATION info;
|
MEMORY_BASIC_INFORMATION info;
|
||||||
ZeroMemory(&info, sizeof(info));
|
ZeroMemory(&info, sizeof(info));
|
||||||
|
#if XE_USE_NTDLL_FUNCTIONS == 1
|
||||||
|
ULONG_PTR ResultLength;
|
||||||
|
|
||||||
|
NTSTATUS query_result = NtQueryVirtualMemoryPointer.invoke<NTSTATUS>(
|
||||||
|
(HANDLE)0xFFFFFFFFFFFFFFFFLL, (PVOID)base_address,
|
||||||
|
0 /* MemoryBasicInformation*/, &info, length, &ResultLength);
|
||||||
|
SIZE_T result = query_result >= 0 ? ResultLength : 0;
|
||||||
|
#else
|
||||||
SIZE_T result = VirtualQuery(base_address, &info, length);
|
SIZE_T result = VirtualQuery(base_address, &info, length);
|
||||||
|
|
||||||
|
#endif
|
||||||
if (!result) {
|
if (!result) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -10,10 +10,9 @@
|
||||||
#include "xenia/base/mutex.h"
|
#include "xenia/base/mutex.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
|
// chrispy: moved this out of body of function to eliminate the initialization
|
||||||
std::recursive_mutex& global_critical_region::mutex() {
|
// guards
|
||||||
static std::recursive_mutex global_mutex;
|
static std::recursive_mutex global_mutex;
|
||||||
return global_mutex;
|
std::recursive_mutex& global_critical_region::mutex() { return global_mutex; }
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
||||||
|
|
@ -41,19 +41,33 @@
|
||||||
#error Unsupported target OS.
|
#error Unsupported target OS.
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__clang__)
|
#if defined(__clang__) && !defined(_MSC_VER) // chrispy: support clang-cl
|
||||||
#define XE_COMPILER_CLANG 1
|
#define XE_COMPILER_CLANG 1
|
||||||
|
#define XE_COMPILER_HAS_CLANG_EXTENSIONS 1
|
||||||
#elif defined(__GNUC__)
|
#elif defined(__GNUC__)
|
||||||
#define XE_COMPILER_GNUC 1
|
#define XE_COMPILER_GNUC 1
|
||||||
|
#define XE_COMPILER_HAS_GNU_EXTENSIONS 1
|
||||||
#elif defined(_MSC_VER)
|
#elif defined(_MSC_VER)
|
||||||
#define XE_COMPILER_MSVC 1
|
#define XE_COMPILER_MSVC 1
|
||||||
|
#define XE_COMPILER_HAS_MSVC_EXTENSIONS 1
|
||||||
#elif defined(__MINGW32)
|
#elif defined(__MINGW32)
|
||||||
#define XE_COMPILER_MINGW32 1
|
#define XE_COMPILER_MINGW32 1
|
||||||
|
#define XE_COMPILER_HAS_GNU_EXTENSIONS 1
|
||||||
#elif defined(__INTEL_COMPILER)
|
#elif defined(__INTEL_COMPILER)
|
||||||
#define XE_COMPILER_INTEL 1
|
#define XE_COMPILER_INTEL 1
|
||||||
#else
|
#else
|
||||||
#define XE_COMPILER_UNKNOWN 1
|
#define XE_COMPILER_UNKNOWN 1
|
||||||
#endif
|
#endif
|
||||||
|
// chrispy: had to place this here.
|
||||||
|
#if defined(__clang__) && defined(_MSC_VER)
|
||||||
|
#define XE_COMPILER_CLANG_CL 1
|
||||||
|
#define XE_COMPILER_HAS_CLANG_EXTENSIONS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// clang extensions == superset of gnu extensions
|
||||||
|
#if XE_COMPILER_HAS_CLANG_EXTENSIONS == 1
|
||||||
|
#define XE_COMPILER_HAS_GNU_EXTENSIONS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
#if defined(_M_AMD64) || defined(__amd64__)
|
#if defined(_M_AMD64) || defined(__amd64__)
|
||||||
#define XE_ARCH_AMD64 1
|
#define XE_ARCH_AMD64 1
|
||||||
|
|
@ -93,6 +107,29 @@
|
||||||
#define XEPACKEDSTRUCTANONYMOUS(value) _XEPACKEDSCOPE(struct value)
|
#define XEPACKEDSTRUCTANONYMOUS(value) _XEPACKEDSCOPE(struct value)
|
||||||
#define XEPACKEDUNION(name, value) _XEPACKEDSCOPE(union name value)
|
#define XEPACKEDUNION(name, value) _XEPACKEDSCOPE(union name value)
|
||||||
|
|
||||||
|
#if XE_COMPILER_HAS_MSVC_EXTENSIONS == 1
|
||||||
|
#define XE_FORCEINLINE __forceinline
|
||||||
|
#define XE_NOINLINE __declspec(noinline)
|
||||||
|
// can't properly emulate "cold" in msvc, but can still segregate the function
|
||||||
|
// into its own seg
|
||||||
|
#define XE_COLD __declspec(code_seg(".cold"))
|
||||||
|
#define XE_LIKELY(...) (!!(__VA_ARGS__))
|
||||||
|
#define XE_UNLIKELY(...) (!!(__VA_ARGS__))
|
||||||
|
|
||||||
|
#elif XE_COMPILER_HAS_GNU_EXTENSIONS == 1
|
||||||
|
#define XE_FORCEINLINE __attribute__((always_inline))
|
||||||
|
#define XE_NOINLINE __attribute__((noinline))
|
||||||
|
#define XE_COLD __attribute__((cold))
|
||||||
|
#define XE_LIKELY(...) __builtin_expect(!!(__VA_ARGS__), true)
|
||||||
|
#define XE_UNLIKELY(...) __builtin_expect(!!(__VA_ARGS__), false)
|
||||||
|
#else
|
||||||
|
#define XE_FORCEINLINE inline
|
||||||
|
#define XE_NOINLINE
|
||||||
|
#define XE_COLD
|
||||||
|
#define XE_LIKELY(...) (!!(__VA_ARGS__))
|
||||||
|
#define XE_UNLIKELY(...) (!!(__VA_ARGS__))
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
|
|
||||||
#if XE_PLATFORM_WIN32
|
#if XE_PLATFORM_WIN32
|
||||||
|
|
|
||||||
|
|
@ -34,4 +34,31 @@
|
||||||
#undef DeleteFile
|
#undef DeleteFile
|
||||||
#undef GetFirstChild
|
#undef GetFirstChild
|
||||||
|
|
||||||
|
#define XE_USE_NTDLL_FUNCTIONS 1
|
||||||
|
#if XE_USE_NTDLL_FUNCTIONS==1
|
||||||
|
/*
|
||||||
|
ntdll versions of functions often skip through a lot of extra garbage in KernelBase
|
||||||
|
*/
|
||||||
|
#define XE_NTDLL_IMPORT(name, cls, clsvar) \
|
||||||
|
static class cls { \
|
||||||
|
public: \
|
||||||
|
FARPROC fn;\
|
||||||
|
cls() : fn(nullptr) {\
|
||||||
|
auto ntdll = GetModuleHandleA("ntdll.dll");\
|
||||||
|
if (ntdll) { \
|
||||||
|
fn = GetProcAddress(ntdll, #name );\
|
||||||
|
}\
|
||||||
|
} \
|
||||||
|
template <typename TRet = void, typename... TArgs> \
|
||||||
|
inline TRet invoke(TArgs... args) {\
|
||||||
|
return reinterpret_cast<NTSYSAPI TRet(NTAPI*)(TArgs...)>(fn)(args...);\
|
||||||
|
}\
|
||||||
|
inline operator bool() const {\
|
||||||
|
return fn!=nullptr;\
|
||||||
|
}\
|
||||||
|
} clsvar
|
||||||
|
#else
|
||||||
|
#define XE_NTDLL_IMPORT(name, cls, clsvar) static constexpr bool clsvar = false
|
||||||
|
|
||||||
|
#endif
|
||||||
#endif // XENIA_BASE_PLATFORM_WIN_H_
|
#endif // XENIA_BASE_PLATFORM_WIN_H_
|
||||||
|
|
|
||||||
|
|
@ -20,7 +20,7 @@
|
||||||
#include "xenia/ui/virtual_key.h"
|
#include "xenia/ui/virtual_key.h"
|
||||||
#include "xenia/ui/window_listener.h"
|
#include "xenia/ui/window_listener.h"
|
||||||
|
|
||||||
#if XE_PLATFORM_WIN32
|
#if XE_PLATFORM_WIN32 && 0
|
||||||
#define XE_OPTION_PROFILING 1
|
#define XE_OPTION_PROFILING 1
|
||||||
#define XE_OPTION_PROFILING_UI 1
|
#define XE_OPTION_PROFILING_UI 1
|
||||||
#else
|
#else
|
||||||
|
|
|
||||||
|
|
@ -19,7 +19,26 @@
|
||||||
#include "xenia/base/byte_order.h"
|
#include "xenia/base/byte_order.h"
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
|
/*
|
||||||
|
todo: this class is CRITICAL to the performance of the entire emulator
|
||||||
|
currently, about 0.74% cpu time is still taken up by ReadAndSwap, 0.23
|
||||||
|
is used by read_count I believe that part of the issue is that smaller
|
||||||
|
ringbuffers are kicking off an automatic prefetcher stream, that ends up
|
||||||
|
reading ahead of the end of the ring because it can only go in a straight
|
||||||
|
line it then gets a cache miss when it eventually wraps around to the start
|
||||||
|
of the ring? really hard to tell whats going on there honestly, maybe we can
|
||||||
|
occasionally prefetch the first line of the ring to L1? For the automatic
|
||||||
|
prefetching i don't think there are any good options. I don't know if we have
|
||||||
|
any control over where these buffers will be (they seem to be in guest memory
|
||||||
|
:/), but if we did we could right-justify the buffer so that the final byte
|
||||||
|
of the ring ends at the end of a page. i think most automatic prefetchers
|
||||||
|
cannot cross page boundaries it does feel like something isnt right here
|
||||||
|
though
|
||||||
|
|
||||||
|
todo: microoptimization, we can change our size members to be uint32 so
|
||||||
|
that the registers no longer need the rex prefix, shrinking the generated
|
||||||
|
code a bit.. like i said, every bit helps in this class
|
||||||
|
*/
|
||||||
class RingBuffer {
|
class RingBuffer {
|
||||||
public:
|
public:
|
||||||
RingBuffer(uint8_t* buffer, size_t capacity);
|
RingBuffer(uint8_t* buffer, size_t capacity);
|
||||||
|
|
@ -32,6 +51,8 @@ class RingBuffer {
|
||||||
uintptr_t read_ptr() const { return uintptr_t(buffer_) + read_offset_; }
|
uintptr_t read_ptr() const { return uintptr_t(buffer_) + read_offset_; }
|
||||||
void set_read_offset(size_t offset) { read_offset_ = offset % capacity_; }
|
void set_read_offset(size_t offset) { read_offset_ = offset % capacity_; }
|
||||||
size_t read_count() const {
|
size_t read_count() const {
|
||||||
|
// chrispy: these branches are unpredictable
|
||||||
|
#if 0
|
||||||
if (read_offset_ == write_offset_) {
|
if (read_offset_ == write_offset_) {
|
||||||
return 0;
|
return 0;
|
||||||
} else if (read_offset_ < write_offset_) {
|
} else if (read_offset_ < write_offset_) {
|
||||||
|
|
@ -39,6 +60,33 @@ class RingBuffer {
|
||||||
} else {
|
} else {
|
||||||
return (capacity_ - read_offset_) + write_offset_;
|
return (capacity_ - read_offset_) + write_offset_;
|
||||||
}
|
}
|
||||||
|
#else
|
||||||
|
size_t read_offs = read_offset_;
|
||||||
|
size_t write_offs = write_offset_;
|
||||||
|
size_t cap = capacity_;
|
||||||
|
|
||||||
|
size_t offset_delta = write_offs - read_offs;
|
||||||
|
size_t wrap_read_count = (cap - read_offs) + write_offs;
|
||||||
|
|
||||||
|
size_t comparison_value = read_offs <= write_offs;
|
||||||
|
#if 0
|
||||||
|
size_t selector =
|
||||||
|
static_cast<size_t>(-static_cast<ptrdiff_t>(comparison_value));
|
||||||
|
offset_delta &= selector;
|
||||||
|
|
||||||
|
wrap_read_count &= ~selector;
|
||||||
|
return offset_delta | wrap_read_count;
|
||||||
|
#else
|
||||||
|
|
||||||
|
if (XE_LIKELY(read_offs <= write_offs)) {
|
||||||
|
return offset_delta; // will be 0 if they are equal, semantically
|
||||||
|
// identical to old code (i checked the asm, msvc
|
||||||
|
// does not automatically do this)
|
||||||
|
} else {
|
||||||
|
return wrap_read_count;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t write_offset() const { return write_offset_; }
|
size_t write_offset() const { return write_offset_; }
|
||||||
|
|
@ -113,6 +161,28 @@ class RingBuffer {
|
||||||
size_t write_offset_ = 0;
|
size_t write_offset_ = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
inline uint32_t RingBuffer::ReadAndSwap<uint32_t>() {
|
||||||
|
size_t read_offset = this->read_offset_;
|
||||||
|
xenia_assert(this->capacity_ >= 4);
|
||||||
|
|
||||||
|
size_t next_read_offset = read_offset + 4;
|
||||||
|
#if 0
|
||||||
|
size_t zerotest = next_read_offset - this->capacity_;
|
||||||
|
// unpredictable branch, use bit arith instead
|
||||||
|
// todo: it would be faster to use lzcnt, but we need to figure out if all
|
||||||
|
// machines we support support it
|
||||||
|
next_read_offset &= -static_cast<ptrdiff_t>(!!zerotest);
|
||||||
|
#else
|
||||||
|
if (XE_UNLIKELY(next_read_offset == this->capacity_)) {
|
||||||
|
next_read_offset = 0;
|
||||||
|
//todo: maybe prefetch next? or should that happen much earlier?
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
this->read_offset_ = next_read_offset;
|
||||||
|
unsigned int ring_value = *(uint32_t*)&this->buffer_[read_offset];
|
||||||
|
return xe::byte_swap(ring_value);
|
||||||
|
}
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
||||||
#endif // XENIA_BASE_RING_BUFFER_H_
|
#endif // XENIA_BASE_RING_BUFFER_H_
|
||||||
|
|
|
||||||
|
|
@ -10,12 +10,12 @@
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <forward_list>
|
#include <forward_list>
|
||||||
|
|
||||||
|
#include "third_party/disruptorplus/include/disruptorplus/blocking_wait_strategy.hpp"
|
||||||
#include "third_party/disruptorplus/include/disruptorplus/multi_threaded_claim_strategy.hpp"
|
#include "third_party/disruptorplus/include/disruptorplus/multi_threaded_claim_strategy.hpp"
|
||||||
#include "third_party/disruptorplus/include/disruptorplus/ring_buffer.hpp"
|
#include "third_party/disruptorplus/include/disruptorplus/ring_buffer.hpp"
|
||||||
#include "third_party/disruptorplus/include/disruptorplus/sequence_barrier.hpp"
|
#include "third_party/disruptorplus/include/disruptorplus/sequence_barrier.hpp"
|
||||||
#include "third_party/disruptorplus/include/disruptorplus/spin_wait.hpp"
|
#include "third_party/disruptorplus/include/disruptorplus/spin_wait.hpp"
|
||||||
#include "third_party/disruptorplus/include/disruptorplus/spin_wait_strategy.hpp"
|
#include "third_party/disruptorplus/include/disruptorplus/spin_wait_strategy.hpp"
|
||||||
|
|
||||||
#include "xenia/base/assert.h"
|
#include "xenia/base/assert.h"
|
||||||
#include "xenia/base/threading.h"
|
#include "xenia/base/threading.h"
|
||||||
#include "xenia/base/threading_timer_queue.h"
|
#include "xenia/base/threading_timer_queue.h"
|
||||||
|
|
@ -26,6 +26,17 @@ namespace xe {
|
||||||
namespace threading {
|
namespace threading {
|
||||||
|
|
||||||
using WaitItem = TimerQueueWaitItem;
|
using WaitItem = TimerQueueWaitItem;
|
||||||
|
/*
|
||||||
|
chrispy: changed this to a blocking wait from a spin-wait, the spin was
|
||||||
|
monopolizing a ton of cpu time (depending on the game 2-4% of total cpu time)
|
||||||
|
on my 3990x no complaints since that change
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
edit: actually had to change it back, when i was testing it only worked because i fixed disruptorplus' code to compile (it gives wrong args to condition_variable::wait_until) but now builds
|
||||||
|
|
||||||
|
*/
|
||||||
|
using WaitStrat = dp::spin_wait_strategy; //dp::blocking_wait_strategy;
|
||||||
|
|
||||||
class TimerQueue {
|
class TimerQueue {
|
||||||
public:
|
public:
|
||||||
|
|
@ -147,9 +158,10 @@ class TimerQueue {
|
||||||
// This ring buffer will be used to introduce timers queued by the public API
|
// This ring buffer will be used to introduce timers queued by the public API
|
||||||
static constexpr size_t kWaitCount = 512;
|
static constexpr size_t kWaitCount = 512;
|
||||||
dp::ring_buffer<std::shared_ptr<WaitItem>> buffer_;
|
dp::ring_buffer<std::shared_ptr<WaitItem>> buffer_;
|
||||||
dp::spin_wait_strategy wait_strategy_;
|
|
||||||
dp::multi_threaded_claim_strategy<dp::spin_wait_strategy> claim_strategy_;
|
WaitStrat wait_strategy_;
|
||||||
dp::sequence_barrier<dp::spin_wait_strategy> consumed_;
|
dp::multi_threaded_claim_strategy<WaitStrat> claim_strategy_;
|
||||||
|
dp::sequence_barrier<WaitStrat> consumed_;
|
||||||
|
|
||||||
// This is a _sorted_ (ascending due_) list of active timers managed by a
|
// This is a _sorted_ (ascending due_) list of active timers managed by a
|
||||||
// dedicated thread
|
// dedicated thread
|
||||||
|
|
|
||||||
|
|
@ -7,19 +7,49 @@
|
||||||
******************************************************************************
|
******************************************************************************
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include <winternl.h>
|
||||||
#include "xenia/base/assert.h"
|
#include "xenia/base/assert.h"
|
||||||
#include "xenia/base/chrono_steady_cast.h"
|
#include "xenia/base/chrono_steady_cast.h"
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
#include "xenia/base/platform_win.h"
|
#include "xenia/base/platform_win.h"
|
||||||
#include "xenia/base/threading.h"
|
#include "xenia/base/threading.h"
|
||||||
#include "xenia/base/threading_timer_queue.h"
|
#include "xenia/base/threading_timer_queue.h"
|
||||||
|
#if defined(__clang__)
|
||||||
|
// chrispy: i do not understand why this is an error for clang here
|
||||||
|
// something about the quoted __FUNCTION__ freaks it out (clang 14.0.1)
|
||||||
#define LOG_LASTERROR() \
|
#define LOG_LASTERROR() \
|
||||||
{ XELOGI("Win32 Error 0x{:08X} in " __FUNCTION__ "(...)", GetLastError()); }
|
do { \
|
||||||
|
XELOGI("Win32 Error 0x{:08X} in {} (...)", GetLastError(), __FUNCTION__); \
|
||||||
|
} while (false)
|
||||||
|
#else
|
||||||
|
#define LOG_LASTERROR() \
|
||||||
|
do { \
|
||||||
|
XELOGI("Win32 Error 0x{:08X} in " __FUNCTION__ "(...)", GetLastError()); \
|
||||||
|
} while (false)
|
||||||
|
#endif
|
||||||
typedef HANDLE (*SetThreadDescriptionFn)(HANDLE hThread,
|
typedef HANDLE (*SetThreadDescriptionFn)(HANDLE hThread,
|
||||||
PCWSTR lpThreadDescription);
|
PCWSTR lpThreadDescription);
|
||||||
|
|
||||||
|
// sys function for ntyieldexecution, by calling it we sidestep
|
||||||
|
// RtlGetCurrentUmsThread
|
||||||
|
XE_NTDLL_IMPORT(NtYieldExecution, cls_NtYieldExecution,
|
||||||
|
NtYieldExecutionPointer);
|
||||||
|
// sidestep the activation context/remapping special windows handles like stdout
|
||||||
|
XE_NTDLL_IMPORT(NtWaitForSingleObject, cls_NtWaitForSingleObject,
|
||||||
|
NtWaitForSingleObjectPointer);
|
||||||
|
|
||||||
|
XE_NTDLL_IMPORT(NtSetEvent, cls_NtSetEvent, NtSetEventPointer);
|
||||||
|
// difference between NtClearEvent and NtResetEvent is that NtResetEvent returns
|
||||||
|
// the events state prior to the call, but we dont need that. might need to
|
||||||
|
// check whether one or the other is faster in the kernel though yeah, just
|
||||||
|
// checked, the code in ntoskrnl is way simpler for clearevent than resetevent
|
||||||
|
XE_NTDLL_IMPORT(NtClearEvent, cls_NtClearEvent, NtClearEventPointer);
|
||||||
|
XE_NTDLL_IMPORT(NtPulseEvent, cls_NtPulseEvent, NtPulseEventPointer);
|
||||||
|
|
||||||
|
// heavily called, we dont skip much garbage by calling this, but every bit
|
||||||
|
// counts
|
||||||
|
XE_NTDLL_IMPORT(NtReleaseSemaphore, cls_NtReleaseSemaphore,
|
||||||
|
NtReleaseSemaphorePointer);
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace threading {
|
namespace threading {
|
||||||
|
|
||||||
|
|
@ -80,7 +110,13 @@ void set_name(const std::string_view name) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void MaybeYield() {
|
void MaybeYield() {
|
||||||
|
#if defined(XE_USE_NTDLL_FUNCTIONS)
|
||||||
|
NtYieldExecutionPointer.invoke();
|
||||||
|
#else
|
||||||
SwitchToThread();
|
SwitchToThread();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// memorybarrier is really not necessary here...
|
||||||
MemoryBarrier();
|
MemoryBarrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -134,8 +170,26 @@ class Win32Handle : public T {
|
||||||
WaitResult Wait(WaitHandle* wait_handle, bool is_alertable,
|
WaitResult Wait(WaitHandle* wait_handle, bool is_alertable,
|
||||||
std::chrono::milliseconds timeout) {
|
std::chrono::milliseconds timeout) {
|
||||||
HANDLE handle = wait_handle->native_handle();
|
HANDLE handle = wait_handle->native_handle();
|
||||||
DWORD result = WaitForSingleObjectEx(handle, DWORD(timeout.count()),
|
DWORD result;
|
||||||
is_alertable ? TRUE : FALSE);
|
DWORD timeout_dw = DWORD(timeout.count());
|
||||||
|
BOOL bAlertable = is_alertable ? TRUE : FALSE;
|
||||||
|
// todo: we might actually be able to use NtWaitForSingleObject even if its
|
||||||
|
// alertable, just need to study whether
|
||||||
|
// RtlDeactivateActivationContextUnsafeFast/RtlActivateActivationContext are
|
||||||
|
// actually needed for us
|
||||||
|
#if XE_USE_NTDLL_FUNCTIONS == 1
|
||||||
|
if (bAlertable) {
|
||||||
|
result = WaitForSingleObjectEx(handle, timeout_dw, bAlertable);
|
||||||
|
} else {
|
||||||
|
LARGE_INTEGER timeout_big;
|
||||||
|
timeout_big.QuadPart = -10000LL * static_cast<int64_t>(timeout_dw);
|
||||||
|
|
||||||
|
result = NtWaitForSingleObjectPointer.invoke<NTSTATUS>(
|
||||||
|
handle, bAlertable, timeout_dw == INFINITE ? nullptr : &timeout_big);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
result = WaitForSingleObjectEx(handle, timeout_dw, bAlertable);
|
||||||
|
#endif
|
||||||
switch (result) {
|
switch (result) {
|
||||||
case WAIT_OBJECT_0:
|
case WAIT_OBJECT_0:
|
||||||
return WaitResult::kSuccess;
|
return WaitResult::kSuccess;
|
||||||
|
|
@ -178,7 +232,9 @@ std::pair<WaitResult, size_t> WaitMultiple(WaitHandle* wait_handles[],
|
||||||
size_t wait_handle_count,
|
size_t wait_handle_count,
|
||||||
bool wait_all, bool is_alertable,
|
bool wait_all, bool is_alertable,
|
||||||
std::chrono::milliseconds timeout) {
|
std::chrono::milliseconds timeout) {
|
||||||
std::vector<HANDLE> handles(wait_handle_count);
|
std::vector<HANDLE> handles(
|
||||||
|
wait_handle_count); // max handles is like 64, so it would make more
|
||||||
|
// sense to just do a fixed size array here
|
||||||
for (size_t i = 0; i < wait_handle_count; ++i) {
|
for (size_t i = 0; i < wait_handle_count; ++i) {
|
||||||
handles[i] = wait_handles[i]->native_handle();
|
handles[i] = wait_handles[i]->native_handle();
|
||||||
}
|
}
|
||||||
|
|
@ -208,9 +264,16 @@ class Win32Event : public Win32Handle<Event> {
|
||||||
public:
|
public:
|
||||||
explicit Win32Event(HANDLE handle) : Win32Handle(handle) {}
|
explicit Win32Event(HANDLE handle) : Win32Handle(handle) {}
|
||||||
~Win32Event() override = default;
|
~Win32Event() override = default;
|
||||||
|
#if XE_USE_NTDLL_FUNCTIONS == 1
|
||||||
|
void Set() override { NtSetEventPointer.invoke(handle_, nullptr); }
|
||||||
|
void Reset() override { NtClearEventPointer.invoke(handle_); }
|
||||||
|
void Pulse() override { NtPulseEventPointer.invoke(handle_, nullptr); }
|
||||||
|
#else
|
||||||
void Set() override { SetEvent(handle_); }
|
void Set() override { SetEvent(handle_); }
|
||||||
void Reset() override { ResetEvent(handle_); }
|
void Reset() override { ResetEvent(handle_); }
|
||||||
void Pulse() override { PulseEvent(handle_); }
|
void Pulse() override { PulseEvent(handle_); }
|
||||||
|
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unique_ptr<Event> Event::CreateManualResetEvent(bool initial_state) {
|
std::unique_ptr<Event> Event::CreateManualResetEvent(bool initial_state) {
|
||||||
|
|
@ -220,6 +283,7 @@ std::unique_ptr<Event> Event::CreateManualResetEvent(bool initial_state) {
|
||||||
return std::make_unique<Win32Event>(handle);
|
return std::make_unique<Win32Event>(handle);
|
||||||
} else {
|
} else {
|
||||||
LOG_LASTERROR();
|
LOG_LASTERROR();
|
||||||
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -240,10 +304,15 @@ class Win32Semaphore : public Win32Handle<Semaphore> {
|
||||||
explicit Win32Semaphore(HANDLE handle) : Win32Handle(handle) {}
|
explicit Win32Semaphore(HANDLE handle) : Win32Handle(handle) {}
|
||||||
~Win32Semaphore() override = default;
|
~Win32Semaphore() override = default;
|
||||||
bool Release(int release_count, int* out_previous_count) override {
|
bool Release(int release_count, int* out_previous_count) override {
|
||||||
|
#if XE_USE_NTDLL_FUNCTIONS == 1
|
||||||
|
return NtReleaseSemaphorePointer.invoke<NTSTATUS>(handle_, release_count,
|
||||||
|
out_previous_count) >= 0;
|
||||||
|
#else
|
||||||
return ReleaseSemaphore(handle_, release_count,
|
return ReleaseSemaphore(handle_, release_count,
|
||||||
reinterpret_cast<LPLONG>(out_previous_count))
|
reinterpret_cast<LPLONG>(out_previous_count))
|
||||||
? true
|
? true
|
||||||
: false;
|
: false;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -82,8 +82,9 @@ std::string upper_ascii(const std::string_view view) {
|
||||||
template <bool LOWER>
|
template <bool LOWER>
|
||||||
inline size_t hash_fnv1a(const std::string_view view) {
|
inline size_t hash_fnv1a(const std::string_view view) {
|
||||||
const size_t offset_basis = 0xCBF29CE484222325ull;
|
const size_t offset_basis = 0xCBF29CE484222325ull;
|
||||||
|
// chrispy: constant capture errors on clang
|
||||||
|
auto work = [](size_t hash, uint8_t byte_of_data) {
|
||||||
const size_t prime = 0x00000100000001B3ull;
|
const size_t prime = 0x00000100000001B3ull;
|
||||||
auto work = [&prime](size_t hash, uint8_t byte_of_data) {
|
|
||||||
hash ^= byte_of_data;
|
hash ^= byte_of_data;
|
||||||
hash *= prime;
|
hash *= prime;
|
||||||
return hash;
|
return hash;
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@
|
||||||
#include "xenia/cpu/breakpoint.h"
|
#include "xenia/cpu/breakpoint.h"
|
||||||
#include "xenia/cpu/processor.h"
|
#include "xenia/cpu/processor.h"
|
||||||
#include "xenia/cpu/stack_walker.h"
|
#include "xenia/cpu/stack_walker.h"
|
||||||
|
#include "xenia/cpu/xex_module.h"
|
||||||
DEFINE_int32(x64_extension_mask, -1,
|
DEFINE_int32(x64_extension_mask, -1,
|
||||||
"Allow the detection and utilization of specific instruction set "
|
"Allow the detection and utilization of specific instruction set "
|
||||||
"features.\n"
|
"features.\n"
|
||||||
|
|
@ -45,6 +45,12 @@ DEFINE_int32(x64_extension_mask, -1,
|
||||||
" -1 = Detect and utilize all possible processor features\n",
|
" -1 = Detect and utilize all possible processor features\n",
|
||||||
"x64");
|
"x64");
|
||||||
|
|
||||||
|
DEFINE_bool(record_mmio_access_exceptions, true,
|
||||||
|
"For guest addresses records whether we caught any mmio accesses "
|
||||||
|
"for them. This info can then be used on a subsequent run to "
|
||||||
|
"instruct the recompiler to emit checks",
|
||||||
|
"CPU");
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace cpu {
|
namespace cpu {
|
||||||
namespace backend {
|
namespace backend {
|
||||||
|
|
@ -86,6 +92,11 @@ X64Backend::~X64Backend() {
|
||||||
ExceptionHandler::Uninstall(&ExceptionCallbackThunk, this);
|
ExceptionHandler::Uninstall(&ExceptionCallbackThunk, this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ForwardMMIOAccessForRecording(void* context, void* hostaddr) {
|
||||||
|
reinterpret_cast<X64Backend*>(context)
|
||||||
|
->RecordMMIOExceptionForGuestInstruction(hostaddr);
|
||||||
|
}
|
||||||
|
|
||||||
bool X64Backend::Initialize(Processor* processor) {
|
bool X64Backend::Initialize(Processor* processor) {
|
||||||
if (!Backend::Initialize(processor)) {
|
if (!Backend::Initialize(processor)) {
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -146,6 +157,8 @@ bool X64Backend::Initialize(Processor* processor) {
|
||||||
// Setup exception callback
|
// Setup exception callback
|
||||||
ExceptionHandler::Install(&ExceptionCallbackThunk, this);
|
ExceptionHandler::Install(&ExceptionCallbackThunk, this);
|
||||||
|
|
||||||
|
processor->memory()->SetMMIOExceptionRecordingCallback(
|
||||||
|
ForwardMMIOAccessForRecording, (void*)this);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -390,7 +403,28 @@ bool X64Backend::ExceptionCallbackThunk(Exception* ex, void* data) {
|
||||||
auto backend = reinterpret_cast<X64Backend*>(data);
|
auto backend = reinterpret_cast<X64Backend*>(data);
|
||||||
return backend->ExceptionCallback(ex);
|
return backend->ExceptionCallback(ex);
|
||||||
}
|
}
|
||||||
|
void X64Backend::RecordMMIOExceptionForGuestInstruction(void* host_address) {
|
||||||
|
uint64_t host_addr_u64 = (uint64_t)host_address;
|
||||||
|
|
||||||
|
auto fnfor = code_cache()->LookupFunction(host_addr_u64);
|
||||||
|
if (fnfor) {
|
||||||
|
uint32_t guestaddr = fnfor->MapMachineCodeToGuestAddress(host_addr_u64);
|
||||||
|
|
||||||
|
Module* guest_module = fnfor->module();
|
||||||
|
if (guest_module) {
|
||||||
|
XexModule* xex_guest_module = dynamic_cast<XexModule*>(guest_module);
|
||||||
|
|
||||||
|
if (xex_guest_module) {
|
||||||
|
cpu::InfoCacheFlags* icf =
|
||||||
|
xex_guest_module->GetInstructionAddressFlags(guestaddr);
|
||||||
|
|
||||||
|
if (icf) {
|
||||||
|
icf->accessed_mmio = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
bool X64Backend::ExceptionCallback(Exception* ex) {
|
bool X64Backend::ExceptionCallback(Exception* ex) {
|
||||||
if (ex->code() != Exception::Code::kIllegalInstruction) {
|
if (ex->code() != Exception::Code::kIllegalInstruction) {
|
||||||
// We only care about illegal instructions. Other things will be handled by
|
// We only care about illegal instructions. Other things will be handled by
|
||||||
|
|
@ -399,6 +433,8 @@ bool X64Backend::ExceptionCallback(Exception* ex) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// processor_->memory()->LookupVirtualMappedRange()
|
||||||
|
|
||||||
// Verify an expected illegal instruction.
|
// Verify an expected illegal instruction.
|
||||||
auto instruction_bytes =
|
auto instruction_bytes =
|
||||||
xe::load_and_swap<uint16_t>(reinterpret_cast<void*>(ex->pc()));
|
xe::load_and_swap<uint16_t>(reinterpret_cast<void*>(ex->pc()));
|
||||||
|
|
|
||||||
|
|
@ -92,6 +92,8 @@ class X64Backend : public Backend {
|
||||||
}
|
}
|
||||||
virtual void SetGuestRoundingMode(void* ctx, unsigned int mode) override;
|
virtual void SetGuestRoundingMode(void* ctx, unsigned int mode) override;
|
||||||
|
|
||||||
|
void RecordMMIOExceptionForGuestInstruction(void* host_address);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static bool ExceptionCallbackThunk(Exception* ex, void* data);
|
static bool ExceptionCallbackThunk(Exception* ex, void* data);
|
||||||
bool ExceptionCallback(Exception* ex);
|
bool ExceptionCallback(Exception* ex);
|
||||||
|
|
|
||||||
|
|
@ -156,7 +156,7 @@ bool X64Emitter::Emit(GuestFunction* function, HIRBuilder* builder,
|
||||||
void** out_code_address, size_t* out_code_size,
|
void** out_code_address, size_t* out_code_size,
|
||||||
std::vector<SourceMapEntry>* out_source_map) {
|
std::vector<SourceMapEntry>* out_source_map) {
|
||||||
SCOPE_profile_cpu_f("cpu");
|
SCOPE_profile_cpu_f("cpu");
|
||||||
|
guest_module_ = dynamic_cast<XexModule*>(function->module());
|
||||||
// Reset.
|
// Reset.
|
||||||
debug_info_ = debug_info;
|
debug_info_ = debug_info;
|
||||||
debug_info_flags_ = debug_info_flags;
|
debug_info_flags_ = debug_info_flags;
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,8 @@
|
||||||
#include "xenia/cpu/hir/hir_builder.h"
|
#include "xenia/cpu/hir/hir_builder.h"
|
||||||
#include "xenia/cpu/hir/instr.h"
|
#include "xenia/cpu/hir/instr.h"
|
||||||
#include "xenia/cpu/hir/value.h"
|
#include "xenia/cpu/hir/value.h"
|
||||||
|
#include "xenia/cpu/xex_module.h"
|
||||||
#include "xenia/memory.h"
|
#include "xenia/memory.h"
|
||||||
|
|
||||||
// NOTE: must be included last as it expects windows.h to already be included.
|
// NOTE: must be included last as it expects windows.h to already be included.
|
||||||
#include "third_party/xbyak/xbyak/xbyak.h"
|
#include "third_party/xbyak/xbyak/xbyak.h"
|
||||||
#include "third_party/xbyak/xbyak/xbyak_util.h"
|
#include "third_party/xbyak/xbyak/xbyak_util.h"
|
||||||
|
|
@ -65,11 +65,7 @@ enum class SimdDomain : uint32_t {
|
||||||
// CONFLICTING means its used in multiple domains)
|
// CONFLICTING means its used in multiple domains)
|
||||||
};
|
};
|
||||||
|
|
||||||
enum class MXCSRMode : uint32_t {
|
enum class MXCSRMode : uint32_t { Unknown, Fpu, Vmx };
|
||||||
Unknown,
|
|
||||||
Fpu,
|
|
||||||
Vmx
|
|
||||||
};
|
|
||||||
|
|
||||||
static SimdDomain PickDomain2(SimdDomain dom1, SimdDomain dom2) {
|
static SimdDomain PickDomain2(SimdDomain dom1, SimdDomain dom2) {
|
||||||
if (dom1 == dom2) {
|
if (dom1 == dom2) {
|
||||||
|
|
@ -326,16 +322,21 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
||||||
size_t stack_size() const { return stack_size_; }
|
size_t stack_size() const { return stack_size_; }
|
||||||
SimdDomain DeduceSimdDomain(const hir::Value* for_value);
|
SimdDomain DeduceSimdDomain(const hir::Value* for_value);
|
||||||
|
|
||||||
void ForgetMxcsrMode() {
|
void ForgetMxcsrMode() { mxcsr_mode_ = MXCSRMode::Unknown; }
|
||||||
mxcsr_mode_ = MXCSRMode::Unknown;
|
|
||||||
}
|
|
||||||
/*
|
/*
|
||||||
returns true if had to load mxcsr. DOT_PRODUCT can use this to skip clearing the overflow flag, as it will never be set in the vmx fpscr
|
returns true if had to load mxcsr. DOT_PRODUCT can use this to skip
|
||||||
|
clearing the overflow flag, as it will never be set in the vmx fpscr
|
||||||
*/
|
*/
|
||||||
bool ChangeMxcsrMode(MXCSRMode new_mode, bool already_set=false);//already_set means that the caller already did vldmxcsr, used for SET_ROUNDING_MODE
|
bool ChangeMxcsrMode(
|
||||||
|
MXCSRMode new_mode,
|
||||||
|
bool already_set = false); // already_set means that the caller already
|
||||||
|
// did vldmxcsr, used for SET_ROUNDING_MODE
|
||||||
|
|
||||||
|
void LoadFpuMxcsrDirect(); // unsafe, does not change mxcsr_mode_
|
||||||
|
void LoadVmxMxcsrDirect(); // unsafe, does not change mxcsr_mode_
|
||||||
|
|
||||||
|
XexModule* GuestModule() { return guest_module_; }
|
||||||
|
|
||||||
void LoadFpuMxcsrDirect(); //unsafe, does not change mxcsr_mode_
|
|
||||||
void LoadVmxMxcsrDirect(); //unsafe, does not change mxcsr_mode_
|
|
||||||
protected:
|
protected:
|
||||||
void* Emplace(const EmitFunctionInfo& func_info,
|
void* Emplace(const EmitFunctionInfo& func_info,
|
||||||
GuestFunction* function = nullptr);
|
GuestFunction* function = nullptr);
|
||||||
|
|
@ -348,6 +349,7 @@ class X64Emitter : public Xbyak::CodeGenerator {
|
||||||
X64Backend* backend_ = nullptr;
|
X64Backend* backend_ = nullptr;
|
||||||
X64CodeCache* code_cache_ = nullptr;
|
X64CodeCache* code_cache_ = nullptr;
|
||||||
XbyakAllocator* allocator_ = nullptr;
|
XbyakAllocator* allocator_ = nullptr;
|
||||||
|
XexModule* guest_module_ = nullptr;
|
||||||
Xbyak::util::Cpu cpu_;
|
Xbyak::util::Cpu cpu_;
|
||||||
uint32_t feature_flags_ = 0;
|
uint32_t feature_flags_ = 0;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -60,23 +60,46 @@ union InstrKey {
|
||||||
|
|
||||||
InstrKey() : value(0) { static_assert_size(*this, sizeof(value)); }
|
InstrKey() : value(0) { static_assert_size(*this, sizeof(value)); }
|
||||||
InstrKey(uint32_t v) : value(v) {}
|
InstrKey(uint32_t v) : value(v) {}
|
||||||
|
|
||||||
|
// this used to take about 1% cpu while precompiling
|
||||||
|
// it kept reloading opcode, and also constantly repacking and unpacking the
|
||||||
|
// bitfields. instead, we pack the fields at the very end
|
||||||
InstrKey(const Instr* i) : value(0) {
|
InstrKey(const Instr* i) : value(0) {
|
||||||
opcode = i->opcode->num;
|
const OpcodeInfo* info = i->GetOpcodeInfo();
|
||||||
uint32_t sig = i->opcode->signature;
|
|
||||||
dest =
|
uint32_t sig = info->signature;
|
||||||
GET_OPCODE_SIG_TYPE_DEST(sig) ? OPCODE_SIG_TYPE_V + i->dest->type : 0;
|
|
||||||
src1 = GET_OPCODE_SIG_TYPE_SRC1(sig);
|
OpcodeSignatureType dest_type, src1_type, src2_type, src3_type;
|
||||||
if (src1 == OPCODE_SIG_TYPE_V) {
|
|
||||||
src1 += i->src1.value->type;
|
UnpackOpcodeSig(sig, dest_type, src1_type, src2_type, src3_type);
|
||||||
|
|
||||||
|
uint32_t out_desttype = (uint32_t)dest_type;
|
||||||
|
uint32_t out_src1type = (uint32_t)src1_type;
|
||||||
|
uint32_t out_src2type = (uint32_t)src2_type;
|
||||||
|
uint32_t out_src3type = (uint32_t)src3_type;
|
||||||
|
|
||||||
|
Value* destv = i->dest;
|
||||||
|
// pre-deref, even if not value
|
||||||
|
Value* src1v = i->src1.value;
|
||||||
|
Value* src2v = i->src2.value;
|
||||||
|
Value* src3v = i->src3.value;
|
||||||
|
|
||||||
|
if (out_src1type == OPCODE_SIG_TYPE_V) {
|
||||||
|
out_src1type += src1v->type;
|
||||||
}
|
}
|
||||||
src2 = GET_OPCODE_SIG_TYPE_SRC2(sig);
|
|
||||||
if (src2 == OPCODE_SIG_TYPE_V) {
|
if (out_src2type == OPCODE_SIG_TYPE_V) {
|
||||||
src2 += i->src2.value->type;
|
out_src2type += src2v->type;
|
||||||
}
|
}
|
||||||
src3 = GET_OPCODE_SIG_TYPE_SRC3(sig);
|
|
||||||
if (src3 == OPCODE_SIG_TYPE_V) {
|
if (out_src3type == OPCODE_SIG_TYPE_V) {
|
||||||
src3 += i->src3.value->type;
|
out_src3type += src3v->type;
|
||||||
}
|
}
|
||||||
|
opcode = info->num;
|
||||||
|
dest = out_desttype ? OPCODE_SIG_TYPE_V + destv->type : 0;
|
||||||
|
src1 = out_src1type;
|
||||||
|
src2 = out_src2type;
|
||||||
|
src3 = out_src3type;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <Opcode OPCODE, KeyType DEST = KEY_TYPE_X, KeyType SRC1 = KEY_TYPE_X,
|
template <Opcode OPCODE, KeyType DEST = KEY_TYPE_X, KeyType SRC1 = KEY_TYPE_X,
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@
|
||||||
#include "xenia/cpu/backend/x64/x64_op.h"
|
#include "xenia/cpu/backend/x64/x64_op.h"
|
||||||
#include "xenia/cpu/backend/x64/x64_tracers.h"
|
#include "xenia/cpu/backend/x64/x64_tracers.h"
|
||||||
#include "xenia/cpu/ppc/ppc_context.h"
|
#include "xenia/cpu/ppc/ppc_context.h"
|
||||||
|
#include "xenia/cpu/processor.h"
|
||||||
DEFINE_bool(
|
DEFINE_bool(
|
||||||
elide_e0_check, false,
|
elide_e0_check, false,
|
||||||
"Eliminate e0 check on some memory accesses, like to r13(tls) or r1(sp)",
|
"Eliminate e0 check on some memory accesses, like to r13(tls) or r1(sp)",
|
||||||
|
|
@ -27,6 +27,10 @@ DEFINE_bool(enable_rmw_context_merging, false,
|
||||||
"Permit merging read-modify-write HIR instr sequences together "
|
"Permit merging read-modify-write HIR instr sequences together "
|
||||||
"into x86 instructions that use a memory operand.",
|
"into x86 instructions that use a memory operand.",
|
||||||
"x64");
|
"x64");
|
||||||
|
DEFINE_bool(emit_mmio_aware_stores_for_recorded_exception_addresses, true,
|
||||||
|
"Uses info gathered via record_mmio_access_exceptions to emit "
|
||||||
|
"special stores that are faster than trapping the exception",
|
||||||
|
"CPU");
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace cpu {
|
namespace cpu {
|
||||||
|
|
@ -965,6 +969,21 @@ struct STORE_MMIO_I32
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
EMITTER_OPCODE_TABLE(OPCODE_STORE_MMIO, STORE_MMIO_I32);
|
EMITTER_OPCODE_TABLE(OPCODE_STORE_MMIO, STORE_MMIO_I32);
|
||||||
|
// according to triangle we dont support mmio reads atm so no point in
|
||||||
|
// implementing this for them
|
||||||
|
static bool IsPossibleMMIOInstruction(X64Emitter& e, const hir::Instr* i) {
|
||||||
|
if (!cvars::emit_mmio_aware_stores_for_recorded_exception_addresses) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
uint32_t guestaddr = i->GuestAddressFor();
|
||||||
|
if (!guestaddr) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto flags = e.GuestModule()->GetInstructionAddressFlags(guestaddr);
|
||||||
|
|
||||||
|
return flags && flags->accessed_mmio;
|
||||||
|
}
|
||||||
|
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// OPCODE_LOAD_OFFSET
|
// OPCODE_LOAD_OFFSET
|
||||||
|
|
@ -1030,6 +1049,28 @@ struct LOAD_OFFSET_I64
|
||||||
EMITTER_OPCODE_TABLE(OPCODE_LOAD_OFFSET, LOAD_OFFSET_I8, LOAD_OFFSET_I16,
|
EMITTER_OPCODE_TABLE(OPCODE_LOAD_OFFSET, LOAD_OFFSET_I8, LOAD_OFFSET_I16,
|
||||||
LOAD_OFFSET_I32, LOAD_OFFSET_I64);
|
LOAD_OFFSET_I32, LOAD_OFFSET_I64);
|
||||||
|
|
||||||
|
template <typename T, bool swap>
|
||||||
|
static void MMIOAwareStore(void* _ctx, unsigned int guestaddr, T value) {
|
||||||
|
if (swap) {
|
||||||
|
value = xe::byte_swap(value);
|
||||||
|
}
|
||||||
|
if (guestaddr >= 0xE0000000) {
|
||||||
|
guestaddr += 0x1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto ctx = reinterpret_cast<ppc::PPCContext*>(_ctx);
|
||||||
|
|
||||||
|
auto gaddr = ctx->processor->memory()->LookupVirtualMappedRange(guestaddr);
|
||||||
|
if (!gaddr) {
|
||||||
|
*reinterpret_cast<T*>(ctx->virtual_membase + guestaddr) = value;
|
||||||
|
} else {
|
||||||
|
value = xe::byte_swap(value); /*
|
||||||
|
was having issues, found by comparing the values used with exceptions
|
||||||
|
to these that we were reversed...
|
||||||
|
*/
|
||||||
|
gaddr->write(nullptr, gaddr->callback_context, guestaddr, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
// OPCODE_STORE_OFFSET
|
// OPCODE_STORE_OFFSET
|
||||||
// ============================================================================
|
// ============================================================================
|
||||||
|
|
@ -1038,6 +1079,7 @@ struct STORE_OFFSET_I8
|
||||||
I<OPCODE_STORE_OFFSET, VoidOp, I64Op, I64Op, I8Op>> {
|
I<OPCODE_STORE_OFFSET, VoidOp, I64Op, I64Op, I8Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
|
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
|
||||||
|
|
||||||
if (i.src3.is_constant) {
|
if (i.src3.is_constant) {
|
||||||
e.mov(e.byte[addr], i.src3.constant());
|
e.mov(e.byte[addr], i.src3.constant());
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -1076,6 +1118,30 @@ struct STORE_OFFSET_I32
|
||||||
: Sequence<STORE_OFFSET_I32,
|
: Sequence<STORE_OFFSET_I32,
|
||||||
I<OPCODE_STORE_OFFSET, VoidOp, I64Op, I64Op, I32Op>> {
|
I<OPCODE_STORE_OFFSET, VoidOp, I64Op, I64Op, I32Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
|
if (IsPossibleMMIOInstruction(e, i.instr)) {
|
||||||
|
void* addrptr = (void*)&MMIOAwareStore<uint32_t, false>;
|
||||||
|
|
||||||
|
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
|
||||||
|
addrptr = (void*)&MMIOAwareStore<uint32_t, true>;
|
||||||
|
}
|
||||||
|
if (i.src1.is_constant) {
|
||||||
|
e.mov(e.GetNativeParam(0).cvt32(), i.src1.constant());
|
||||||
|
} else {
|
||||||
|
e.mov(e.GetNativeParam(0).cvt32(), i.src1.reg().cvt32());
|
||||||
|
}
|
||||||
|
if (i.src2.is_constant) {
|
||||||
|
e.add(e.GetNativeParam(0).cvt32(), (uint32_t)i.src2.constant());
|
||||||
|
} else {
|
||||||
|
e.add(e.GetNativeParam(0).cvt32(), i.src2);
|
||||||
|
}
|
||||||
|
if (i.src3.is_constant) {
|
||||||
|
e.mov(e.GetNativeParam(1).cvt32(), i.src3.constant());
|
||||||
|
} else {
|
||||||
|
e.mov(e.GetNativeParam(1).cvt32(), i.src3);
|
||||||
|
}
|
||||||
|
e.CallNativeSafe(addrptr);
|
||||||
|
|
||||||
|
} else {
|
||||||
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
|
auto addr = ComputeMemoryAddressOffset(e, i.src1, i.src2);
|
||||||
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
|
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
|
||||||
assert_false(i.src3.is_constant);
|
assert_false(i.src3.is_constant);
|
||||||
|
|
@ -1096,6 +1162,7 @@ struct STORE_OFFSET_I32
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct STORE_OFFSET_I64
|
struct STORE_OFFSET_I64
|
||||||
|
|
@ -1290,6 +1357,25 @@ struct STORE_I16 : Sequence<STORE_I16, I<OPCODE_STORE, VoidOp, I64Op, I16Op>> {
|
||||||
};
|
};
|
||||||
struct STORE_I32 : Sequence<STORE_I32, I<OPCODE_STORE, VoidOp, I64Op, I32Op>> {
|
struct STORE_I32 : Sequence<STORE_I32, I<OPCODE_STORE, VoidOp, I64Op, I32Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
|
if (IsPossibleMMIOInstruction(e, i.instr)) {
|
||||||
|
void* addrptr = (void*)&MMIOAwareStore<uint32_t, false>;
|
||||||
|
|
||||||
|
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
|
||||||
|
addrptr = (void*)&MMIOAwareStore<uint32_t, true>;
|
||||||
|
}
|
||||||
|
if (i.src1.is_constant) {
|
||||||
|
e.mov(e.GetNativeParam(0).cvt32(), (uint32_t)i.src1.constant());
|
||||||
|
} else {
|
||||||
|
e.mov(e.GetNativeParam(0).cvt32(), i.src1.reg().cvt32());
|
||||||
|
}
|
||||||
|
if (i.src2.is_constant) {
|
||||||
|
e.mov(e.GetNativeParam(1).cvt32(), i.src2.constant());
|
||||||
|
} else {
|
||||||
|
e.mov(e.GetNativeParam(1).cvt32(), i.src2);
|
||||||
|
}
|
||||||
|
e.CallNativeSafe(addrptr);
|
||||||
|
|
||||||
|
} else {
|
||||||
auto addr = ComputeMemoryAddress(e, i.src1);
|
auto addr = ComputeMemoryAddress(e, i.src1);
|
||||||
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
|
if (i.instr->flags & LoadStoreFlags::LOAD_STORE_BYTE_SWAP) {
|
||||||
assert_false(i.src2.is_constant);
|
assert_false(i.src2.is_constant);
|
||||||
|
|
@ -1305,8 +1391,9 @@ struct STORE_I32 : Sequence<STORE_I32, I<OPCODE_STORE, VoidOp, I64Op, I32Op>> {
|
||||||
e.mov(e.dword[addr], i.src2);
|
e.mov(e.dword[addr], i.src2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
if (IsTracingData()) {
|
if (IsTracingData()) {
|
||||||
addr = ComputeMemoryAddress(e, i.src1);
|
auto addr = ComputeMemoryAddress(e, i.src1);
|
||||||
e.mov(e.GetNativeParam(1).cvt32(), e.dword[addr]);
|
e.mov(e.GetNativeParam(1).cvt32(), e.dword[addr]);
|
||||||
e.lea(e.GetNativeParam(0), e.ptr[addr]);
|
e.lea(e.GetNativeParam(0), e.ptr[addr]);
|
||||||
e.CallNative(reinterpret_cast<void*>(TraceMemoryStoreI32));
|
e.CallNative(reinterpret_cast<void*>(TraceMemoryStoreI32));
|
||||||
|
|
|
||||||
|
|
@ -1683,6 +1683,9 @@ struct DIV_I16 : Sequence<DIV_I16, I<OPCODE_DIV, I16Op, I16Op, I16Op>> {
|
||||||
assert_impossible_sequence(DIV_I16);
|
assert_impossible_sequence(DIV_I16);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
/*
|
||||||
|
TODO: hoist the overflow/zero checks into HIR
|
||||||
|
*/
|
||||||
struct DIV_I32 : Sequence<DIV_I32, I<OPCODE_DIV, I32Op, I32Op, I32Op>> {
|
struct DIV_I32 : Sequence<DIV_I32, I<OPCODE_DIV, I32Op, I32Op, I32Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
Xbyak::Label skip;
|
Xbyak::Label skip;
|
||||||
|
|
@ -1766,6 +1769,9 @@ struct DIV_I32 : Sequence<DIV_I32, I<OPCODE_DIV, I32Op, I32Op, I32Op>> {
|
||||||
e.mov(i.dest, e.eax);
|
e.mov(i.dest, e.eax);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
/*
|
||||||
|
TODO: hoist the overflow/zero checks into HIR
|
||||||
|
*/
|
||||||
struct DIV_I64 : Sequence<DIV_I64, I<OPCODE_DIV, I64Op, I64Op, I64Op>> {
|
struct DIV_I64 : Sequence<DIV_I64, I<OPCODE_DIV, I64Op, I64Op, I64Op>> {
|
||||||
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
||||||
Xbyak::Label skip;
|
Xbyak::Label skip;
|
||||||
|
|
@ -1811,7 +1817,7 @@ struct DIV_I64 : Sequence<DIV_I64, I<OPCODE_DIV, I64Op, I64Op, I64Op>> {
|
||||||
} else {
|
} else {
|
||||||
// check for signed overflow
|
// check for signed overflow
|
||||||
if (i.src1.is_constant) {
|
if (i.src1.is_constant) {
|
||||||
if (i.src1.constant() != (1 << 31)) {
|
if (i.src1.constant() != (1ll << 63)) {
|
||||||
// we're good, overflow is impossible
|
// we're good, overflow is impossible
|
||||||
} else {
|
} else {
|
||||||
e.cmp(i.src2, -1); // otherwise, if src2 is -1 then we have
|
e.cmp(i.src2, -1); // otherwise, if src2 is -1 then we have
|
||||||
|
|
|
||||||
|
|
@ -149,7 +149,20 @@ bool ConstantPropagationPass::Run(HIRBuilder* builder, bool& result) {
|
||||||
i->Remove();
|
i->Remove();
|
||||||
}
|
}
|
||||||
result = true;
|
result = true;
|
||||||
|
} else if (i->src2.value->IsConstant()) { // chrispy: fix h3 bug from
|
||||||
|
// const indirect call true
|
||||||
|
auto function = processor_->LookupFunction(
|
||||||
|
uint32_t(i->src2.value->constant.i32));
|
||||||
|
if (!function) {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
// i->Replace(&OPCODE_CALL_TRUE_info, i->flags);
|
||||||
|
i->opcode = &OPCODE_CALL_TRUE_info;
|
||||||
|
i->set_src2(nullptr);
|
||||||
|
i->src2.symbol = function;
|
||||||
|
result = true;
|
||||||
|
}
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case OPCODE_BRANCH_TRUE:
|
case OPCODE_BRANCH_TRUE:
|
||||||
|
|
|
||||||
|
|
@ -796,10 +796,13 @@ bool SimplificationPass::CheckScalarConstCmp(hir::Instr* i,
|
||||||
|
|
||||||
if (var_definition) {
|
if (var_definition) {
|
||||||
var_definition = var_definition->GetDestDefSkipAssigns();
|
var_definition = var_definition->GetDestDefSkipAssigns();
|
||||||
if (var_definition != NULL)
|
if (!var_definition) {
|
||||||
{
|
return false;
|
||||||
|
}
|
||||||
def_opcode = var_definition->opcode->num;
|
def_opcode = var_definition->opcode->num;
|
||||||
}
|
}
|
||||||
|
if (!var_definition) {
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
// x == 0 -> !x
|
// x == 0 -> !x
|
||||||
if (cmpop == OPCODE_COMPARE_EQ && constant_unpacked == 0) {
|
if (cmpop == OPCODE_COMPARE_EQ && constant_unpacked == 0) {
|
||||||
|
|
@ -1231,13 +1234,12 @@ Value* SimplificationPass::CheckValue(Value* value, bool& result) {
|
||||||
result = false;
|
result = false;
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
bool SimplificationPass::SimplifyAddWithSHL(hir::Instr* i,
|
||||||
bool SimplificationPass::SimplifyAddArith(hir::Instr* i,
|
|
||||||
hir::HIRBuilder* builder) {
|
hir::HIRBuilder* builder) {
|
||||||
/*
|
/*
|
||||||
example: (x <<1 ) + x == (x*3)
|
example: (x <<1 ) + x == (x*3)
|
||||||
|
|
||||||
*/
|
*/
|
||||||
auto [shlinsn, addend] =
|
auto [shlinsn, addend] =
|
||||||
i->BinaryValueArrangeByDefiningOpcode(&OPCODE_SHL_info);
|
i->BinaryValueArrangeByDefiningOpcode(&OPCODE_SHL_info);
|
||||||
if (!shlinsn) {
|
if (!shlinsn) {
|
||||||
|
|
@ -1278,11 +1280,81 @@ bool SimplificationPass::SimplifyAddArith(hir::Instr* i,
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
bool SimplificationPass::SimplifyAddToSelf(hir::Instr* i,
|
||||||
|
hir::HIRBuilder* builder) {
|
||||||
|
/*
|
||||||
|
heres a super easy one
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (i->src1.value != i->src2.value) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
i->opcode = &OPCODE_SHL_info;
|
||||||
|
|
||||||
|
i->set_src2(builder->LoadConstantUint8(1));
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
bool SimplificationPass::SimplifyAddArith(hir::Instr* i,
|
||||||
|
hir::HIRBuilder* builder) {
|
||||||
|
if (SimplifyAddWithSHL(i, builder)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (SimplifyAddToSelf(i, builder)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool SimplificationPass::SimplifySubArith(hir::Instr* i,
|
bool SimplificationPass::SimplifySubArith(hir::Instr* i,
|
||||||
hir::HIRBuilder* builder) {
|
hir::HIRBuilder* builder) {
|
||||||
|
/*
|
||||||
|
todo: handle expressions like (x*8) - (x*5) == (x*3)...if these can even
|
||||||
|
happen of course */
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
bool SimplificationPass::SimplifySHLArith(hir::Instr* i,
|
||||||
|
hir::HIRBuilder* builder) {
|
||||||
|
Value* sh = i->src2.value;
|
||||||
|
|
||||||
|
Value* shifted = i->src1.value;
|
||||||
|
|
||||||
|
if (!sh->IsConstant()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
hir::Instr* definition = shifted->GetDefSkipAssigns();
|
||||||
|
|
||||||
|
if (!definition) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (definition->GetOpcodeNum() != OPCODE_MUL) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (definition->flags != ARITHMETIC_UNSIGNED) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto [mulconst, mulnonconst] = definition->BinaryValueArrangeAsConstAndVar();
|
||||||
|
|
||||||
|
if (!mulconst) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto newmul = builder->AllocValue(mulconst->type);
|
||||||
|
newmul->set_from(mulconst);
|
||||||
|
|
||||||
|
newmul->Shl(sh);
|
||||||
|
|
||||||
|
i->Replace(&OPCODE_MUL_info, ARITHMETIC_UNSIGNED);
|
||||||
|
i->set_src1(mulnonconst);
|
||||||
|
i->set_src2(newmul);
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
bool SimplificationPass::SimplifyBasicArith(hir::Instr* i,
|
bool SimplificationPass::SimplifyBasicArith(hir::Instr* i,
|
||||||
hir::HIRBuilder* builder) {
|
hir::HIRBuilder* builder) {
|
||||||
if (!i->dest) {
|
if (!i->dest) {
|
||||||
|
|
@ -1301,6 +1373,9 @@ bool SimplificationPass::SimplifyBasicArith(hir::Instr* i,
|
||||||
case OPCODE_SUB: {
|
case OPCODE_SUB: {
|
||||||
return SimplifySubArith(i, builder);
|
return SimplifySubArith(i, builder);
|
||||||
}
|
}
|
||||||
|
case OPCODE_SHL: {
|
||||||
|
return SimplifySHLArith(i, builder);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -1317,6 +1392,97 @@ bool SimplificationPass::SimplifyBasicArith(hir::HIRBuilder* builder) {
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
todo: add load-store simplification pass
|
||||||
|
|
||||||
|
do things like load-store byteswap elimination, for instance,
|
||||||
|
|
||||||
|
if a value is loaded, ored with a constant mask, and then stored, we
|
||||||
|
simply have to byteswap the mask it will be ored with and then we can
|
||||||
|
eliminate the two byteswaps
|
||||||
|
|
||||||
|
the same can be done for and, or, xor, andn with constant masks
|
||||||
|
|
||||||
|
|
||||||
|
this can also be done for comparisons with 0 for equality and not equal
|
||||||
|
|
||||||
|
|
||||||
|
another optimization: with ppc you cannot move a floating point register
|
||||||
|
directly to a gp one, a gp one directly to a floating point register, or a
|
||||||
|
vmx one to either. so guest code will store the result to the stack, and then
|
||||||
|
load it to the register it needs in HIR we can sidestep this. we will still
|
||||||
|
need to byteswap and store the result for correctness, but we can eliminate
|
||||||
|
the load and byteswap by grabbing the original value from the store
|
||||||
|
|
||||||
|
skyth's sanic idb, 0x824D7724
|
||||||
|
lis r11,
|
||||||
|
lfs f0, flt_8200CBCC@l(r11)
|
||||||
|
fmuls f0, time, f0
|
||||||
|
fctidz f0, f0 # vcvttss2si
|
||||||
|
stfd f0, 0x190+var_138(r1)
|
||||||
|
lwz r30, 0x190+var_138+4(r1)
|
||||||
|
cmplwi cr6, r30, 0x63 # 'c'
|
||||||
|
ble cr6, counter_op
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
todo: simple loop unrolling
|
||||||
|
skyth sanic 0x831D9908
|
||||||
|
|
||||||
|
mr r30, r4
|
||||||
|
mr r29, r5
|
||||||
|
mr r11, r7
|
||||||
|
li r31, 0
|
||||||
|
|
||||||
|
loc_831D9928:
|
||||||
|
slwi r9, r11, 1
|
||||||
|
addi r10, r11, 1
|
||||||
|
addi r8, r1, 0xD0+var_80
|
||||||
|
clrlwi r11, r10, 16
|
||||||
|
cmplwi cr6, r11, 0x10
|
||||||
|
sthx r31, r9, r8
|
||||||
|
ble cr6, loc_831D9928
|
||||||
|
|
||||||
|
v5 = 1;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
v6 = 2 * v5;
|
||||||
|
v5 = (unsigned __int16)(v5 + 1);
|
||||||
|
*(_WORD *)&v24[v6] = 0;
|
||||||
|
}
|
||||||
|
while ( v5 <= 0x10 );
|
||||||
|
v7 = 0;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
v8 = __ROL4__(*(unsigned __int8 *)(v7 + a2), 1);
|
||||||
|
v7 = (unsigned __int16)(v7 + 1);
|
||||||
|
++*(_WORD *)&v24[v8];
|
||||||
|
}
|
||||||
|
while ( v7 < 8 );
|
||||||
|
v9 = 1;
|
||||||
|
v25[0] = 0;
|
||||||
|
do
|
||||||
|
{
|
||||||
|
v10 = 2 * v9;
|
||||||
|
v11 = 16 - v9;
|
||||||
|
v9 = (unsigned __int16)(v9 + 1);
|
||||||
|
v25[v10 / 2] = (*(_WORD *)&v24[v10] << v11) + *(_WORD
|
||||||
|
*)&v24[v10 + 48];
|
||||||
|
}
|
||||||
|
while ( v9 <= 0x10 );
|
||||||
|
|
||||||
|
|
||||||
|
skyth sanic:
|
||||||
|
sub_831BBAE0
|
||||||
|
|
||||||
|
sub_831A41A8
|
||||||
|
|
||||||
|
|
||||||
|
*/
|
||||||
} // namespace passes
|
} // namespace passes
|
||||||
} // namespace compiler
|
} // namespace compiler
|
||||||
} // namespace cpu
|
} // namespace cpu
|
||||||
|
|
|
||||||
|
|
@ -36,9 +36,11 @@ class SimplificationPass : public ConditionalGroupSubpass {
|
||||||
// handles simple multiplication/addition rules
|
// handles simple multiplication/addition rules
|
||||||
bool SimplifyBasicArith(hir::HIRBuilder* builder);
|
bool SimplifyBasicArith(hir::HIRBuilder* builder);
|
||||||
bool SimplifyBasicArith(hir::Instr* i, hir::HIRBuilder* builder);
|
bool SimplifyBasicArith(hir::Instr* i, hir::HIRBuilder* builder);
|
||||||
|
bool SimplifyAddWithSHL(hir::Instr* i, hir::HIRBuilder* builder);
|
||||||
|
bool SimplifyAddToSelf(hir::Instr* i, hir::HIRBuilder* builder);
|
||||||
bool SimplifyAddArith(hir::Instr* i, hir::HIRBuilder* builder);
|
bool SimplifyAddArith(hir::Instr* i, hir::HIRBuilder* builder);
|
||||||
bool SimplifySubArith(hir::Instr* i, hir::HIRBuilder* builder);
|
bool SimplifySubArith(hir::Instr* i, hir::HIRBuilder* builder);
|
||||||
|
bool SimplifySHLArith(hir::Instr* i, hir::HIRBuilder* builder);
|
||||||
// handle either or or xor with 0
|
// handle either or or xor with 0
|
||||||
bool CheckOrXorZero(hir::Instr* i);
|
bool CheckOrXorZero(hir::Instr* i);
|
||||||
bool CheckOr(hir::Instr* i, hir::HIRBuilder* builder);
|
bool CheckOr(hir::Instr* i, hir::HIRBuilder* builder);
|
||||||
|
|
|
||||||
|
|
@ -200,6 +200,20 @@ const Instr* Instr::GetNonFakePrev() const {
|
||||||
}
|
}
|
||||||
return curr;
|
return curr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t Instr::GuestAddressFor() const {
|
||||||
|
Instr* srch = prev;
|
||||||
|
|
||||||
|
while (srch) {
|
||||||
|
if (srch->GetOpcodeNum() == OPCODE_SOURCE_OFFSET) {
|
||||||
|
return (uint32_t)srch->src1.offset;
|
||||||
|
}
|
||||||
|
srch = srch->prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0; // eek.
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace hir
|
} // namespace hir
|
||||||
} // namespace cpu
|
} // namespace cpu
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
||||||
|
|
@ -169,6 +169,8 @@ if both are constant, return nullptr, nullptr
|
||||||
// gets previous instr, skipping instrs like COMMENT, OPCODE_CONTEXT_BARRIER,
|
// gets previous instr, skipping instrs like COMMENT, OPCODE_CONTEXT_BARRIER,
|
||||||
// OPCODE_SOURCE_OFFSET
|
// OPCODE_SOURCE_OFFSET
|
||||||
const hir::Instr* GetNonFakePrev() const;
|
const hir::Instr* GetNonFakePrev() const;
|
||||||
|
|
||||||
|
uint32_t GuestAddressFor() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace hir
|
} // namespace hir
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,8 @@ std::unique_ptr<MMIOHandler> MMIOHandler::Install(
|
||||||
HostToGuestVirtual host_to_guest_virtual,
|
HostToGuestVirtual host_to_guest_virtual,
|
||||||
const void* host_to_guest_virtual_context,
|
const void* host_to_guest_virtual_context,
|
||||||
AccessViolationCallback access_violation_callback,
|
AccessViolationCallback access_violation_callback,
|
||||||
void* access_violation_callback_context) {
|
void* access_violation_callback_context,
|
||||||
|
MmioAccessRecordCallback record_mmio_callback, void* record_mmio_context) {
|
||||||
// There can be only one handler at a time.
|
// There can be only one handler at a time.
|
||||||
assert_null(global_handler_);
|
assert_null(global_handler_);
|
||||||
if (global_handler_) {
|
if (global_handler_) {
|
||||||
|
|
@ -40,7 +41,8 @@ std::unique_ptr<MMIOHandler> MMIOHandler::Install(
|
||||||
auto handler = std::unique_ptr<MMIOHandler>(new MMIOHandler(
|
auto handler = std::unique_ptr<MMIOHandler>(new MMIOHandler(
|
||||||
virtual_membase, physical_membase, membase_end, host_to_guest_virtual,
|
virtual_membase, physical_membase, membase_end, host_to_guest_virtual,
|
||||||
host_to_guest_virtual_context, access_violation_callback,
|
host_to_guest_virtual_context, access_violation_callback,
|
||||||
access_violation_callback_context));
|
access_violation_callback_context, record_mmio_callback,
|
||||||
|
record_mmio_context));
|
||||||
|
|
||||||
// Install the exception handler directed at the MMIOHandler.
|
// Install the exception handler directed at the MMIOHandler.
|
||||||
ExceptionHandler::Install(ExceptionCallbackThunk, handler.get());
|
ExceptionHandler::Install(ExceptionCallbackThunk, handler.get());
|
||||||
|
|
@ -54,14 +56,18 @@ MMIOHandler::MMIOHandler(uint8_t* virtual_membase, uint8_t* physical_membase,
|
||||||
HostToGuestVirtual host_to_guest_virtual,
|
HostToGuestVirtual host_to_guest_virtual,
|
||||||
const void* host_to_guest_virtual_context,
|
const void* host_to_guest_virtual_context,
|
||||||
AccessViolationCallback access_violation_callback,
|
AccessViolationCallback access_violation_callback,
|
||||||
void* access_violation_callback_context)
|
void* access_violation_callback_context,
|
||||||
|
MmioAccessRecordCallback record_mmio_callback,
|
||||||
|
void* record_mmio_context)
|
||||||
: virtual_membase_(virtual_membase),
|
: virtual_membase_(virtual_membase),
|
||||||
physical_membase_(physical_membase),
|
physical_membase_(physical_membase),
|
||||||
memory_end_(membase_end),
|
memory_end_(membase_end),
|
||||||
host_to_guest_virtual_(host_to_guest_virtual),
|
host_to_guest_virtual_(host_to_guest_virtual),
|
||||||
host_to_guest_virtual_context_(host_to_guest_virtual_context),
|
host_to_guest_virtual_context_(host_to_guest_virtual_context),
|
||||||
access_violation_callback_(access_violation_callback),
|
access_violation_callback_(access_violation_callback),
|
||||||
access_violation_callback_context_(access_violation_callback_context) {}
|
access_violation_callback_context_(access_violation_callback_context),
|
||||||
|
record_mmio_callback_(record_mmio_callback),
|
||||||
|
record_mmio_context_(record_mmio_context) {}
|
||||||
|
|
||||||
MMIOHandler::~MMIOHandler() {
|
MMIOHandler::~MMIOHandler() {
|
||||||
ExceptionHandler::Uninstall(ExceptionCallbackThunk, this);
|
ExceptionHandler::Uninstall(ExceptionCallbackThunk, this);
|
||||||
|
|
@ -412,6 +418,8 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) {
|
||||||
// Quick kill anything outside our mapping.
|
// Quick kill anything outside our mapping.
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
uint64_t hostip = ex->pc();
|
||||||
|
|
||||||
void* fault_host_address = reinterpret_cast<void*>(ex->fault_address());
|
void* fault_host_address = reinterpret_cast<void*>(ex->fault_address());
|
||||||
|
|
||||||
// Access violations are pretty rare, so we can do a linear search here.
|
// Access violations are pretty rare, so we can do a linear search here.
|
||||||
|
|
@ -561,6 +569,13 @@ bool MMIOHandler::ExceptionCallback(Exception* ex) {
|
||||||
}
|
}
|
||||||
#endif // XE_ARCH_ARM64
|
#endif // XE_ARCH_ARM64
|
||||||
|
|
||||||
|
if (record_mmio_callback_) {
|
||||||
|
// record that the guest address corresponding to the faulting instructions'
|
||||||
|
// host address reads/writes mmio. we can backpropagate this info on future
|
||||||
|
// compilations
|
||||||
|
record_mmio_callback_(record_mmio_context_, (void*)ex->pc());
|
||||||
|
}
|
||||||
|
|
||||||
// Advance RIP to the next instruction so that we resume properly.
|
// Advance RIP to the next instruction so that we resume properly.
|
||||||
ex->set_resume_pc(rip + decoded_load_store.length);
|
ex->set_resume_pc(rip + decoded_load_store.length);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,8 @@ typedef uint32_t (*MMIOReadCallback)(void* ppc_context, void* callback_context,
|
||||||
uint32_t addr);
|
uint32_t addr);
|
||||||
typedef void (*MMIOWriteCallback)(void* ppc_context, void* callback_context,
|
typedef void (*MMIOWriteCallback)(void* ppc_context, void* callback_context,
|
||||||
uint32_t addr, uint32_t value);
|
uint32_t addr, uint32_t value);
|
||||||
|
typedef void (*MmioAccessRecordCallback)(void* context,
|
||||||
|
void* host_insn_address);
|
||||||
struct MMIORange {
|
struct MMIORange {
|
||||||
uint32_t address;
|
uint32_t address;
|
||||||
uint32_t mask;
|
uint32_t mask;
|
||||||
|
|
@ -58,7 +59,8 @@ class MMIOHandler {
|
||||||
HostToGuestVirtual host_to_guest_virtual,
|
HostToGuestVirtual host_to_guest_virtual,
|
||||||
const void* host_to_guest_virtual_context,
|
const void* host_to_guest_virtual_context,
|
||||||
AccessViolationCallback access_violation_callback,
|
AccessViolationCallback access_violation_callback,
|
||||||
void* access_violation_callback_context);
|
void* access_violation_callback_context,
|
||||||
|
MmioAccessRecordCallback record_mmio_callback, void* record_mmio_context);
|
||||||
static MMIOHandler* global_handler() { return global_handler_; }
|
static MMIOHandler* global_handler() { return global_handler_; }
|
||||||
|
|
||||||
bool RegisterRange(uint32_t virtual_address, uint32_t mask, uint32_t size,
|
bool RegisterRange(uint32_t virtual_address, uint32_t mask, uint32_t size,
|
||||||
|
|
@ -68,13 +70,20 @@ class MMIOHandler {
|
||||||
|
|
||||||
bool CheckLoad(uint32_t virtual_address, uint32_t* out_value);
|
bool CheckLoad(uint32_t virtual_address, uint32_t* out_value);
|
||||||
bool CheckStore(uint32_t virtual_address, uint32_t value);
|
bool CheckStore(uint32_t virtual_address, uint32_t value);
|
||||||
|
void SetMMIOExceptionRecordingCallback(MmioAccessRecordCallback callback,
|
||||||
|
void* context) {
|
||||||
|
record_mmio_context_ = context;
|
||||||
|
record_mmio_callback_ = callback;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
MMIOHandler(uint8_t* virtual_membase, uint8_t* physical_membase,
|
MMIOHandler(uint8_t* virtual_membase, uint8_t* physical_membase,
|
||||||
uint8_t* membase_end, HostToGuestVirtual host_to_guest_virtual,
|
uint8_t* membase_end, HostToGuestVirtual host_to_guest_virtual,
|
||||||
const void* host_to_guest_virtual_context,
|
const void* host_to_guest_virtual_context,
|
||||||
AccessViolationCallback access_violation_callback,
|
AccessViolationCallback access_violation_callback,
|
||||||
void* access_violation_callback_context);
|
void* access_violation_callback_context,
|
||||||
|
MmioAccessRecordCallback record_mmio_callback,
|
||||||
|
void* record_mmio_context);
|
||||||
|
|
||||||
static bool ExceptionCallbackThunk(Exception* ex, void* data);
|
static bool ExceptionCallbackThunk(Exception* ex, void* data);
|
||||||
bool ExceptionCallback(Exception* ex);
|
bool ExceptionCallback(Exception* ex);
|
||||||
|
|
@ -90,7 +99,9 @@ class MMIOHandler {
|
||||||
|
|
||||||
AccessViolationCallback access_violation_callback_;
|
AccessViolationCallback access_violation_callback_;
|
||||||
void* access_violation_callback_context_;
|
void* access_violation_callback_context_;
|
||||||
|
MmioAccessRecordCallback record_mmio_callback_;
|
||||||
|
|
||||||
|
void* record_mmio_context_;
|
||||||
static MMIOHandler* global_handler_;
|
static MMIOHandler* global_handler_;
|
||||||
|
|
||||||
xe::global_critical_region global_critical_region_;
|
xe::global_critical_region global_critical_region_;
|
||||||
|
|
|
||||||
|
|
@ -1439,11 +1439,23 @@ int InstrEmit_vsel(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
int InstrEmit_vsel128(PPCHIRBuilder& f, const InstrData& i) {
|
int InstrEmit_vsel128(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
return InstrEmit_vsel_(f, VX128_VD128, VX128_VA128, VX128_VB128, VX128_VD128);
|
return InstrEmit_vsel_(f, VX128_VD128, VX128_VA128, VX128_VB128, VX128_VD128);
|
||||||
}
|
}
|
||||||
|
// chrispy: this is test code for checking whether a game takes advantage of the
|
||||||
|
// VSR/VSL undocumented/undefined variable shift behavior
|
||||||
|
static void AssertShiftElementsOk(PPCHIRBuilder& f, Value* v) {
|
||||||
|
#if 0
|
||||||
|
Value* splatted = f.Splat(f.Extract(v, (uint8_t)0, INT8_TYPE), VEC128_TYPE);
|
||||||
|
|
||||||
|
Value* checkequal = f.Xor(splatted, v);
|
||||||
|
f.DebugBreakTrue(f.IsTrue(checkequal));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
int InstrEmit_vsl(PPCHIRBuilder& f, const InstrData& i) {
|
int InstrEmit_vsl(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
Value* v = f.Shl(f.LoadVR(i.VX.VA),
|
Value* va = f.LoadVR(i.VX.VA);
|
||||||
f.And(f.Extract(f.LoadVR(i.VX.VB), 15, INT8_TYPE),
|
Value* vb = f.LoadVR(i.VX.VB);
|
||||||
f.LoadConstantInt8(0b111)));
|
|
||||||
|
AssertShiftElementsOk(f, vb);
|
||||||
|
Value* v =
|
||||||
|
f.Shl(va, f.And(f.Extract(vb, 15, INT8_TYPE), f.LoadConstantInt8(0b111)));
|
||||||
f.StoreVR(i.VX.VD, v);
|
f.StoreVR(i.VX.VD, v);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -1623,9 +1635,13 @@ int InstrEmit_vspltisw128(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
}
|
}
|
||||||
|
|
||||||
int InstrEmit_vsr(PPCHIRBuilder& f, const InstrData& i) {
|
int InstrEmit_vsr(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
Value* v = f.Shr(f.LoadVR(i.VX.VA),
|
Value* va = f.LoadVR(i.VX.VA);
|
||||||
f.And(f.Extract(f.LoadVR(i.VX.VB), 15, INT8_TYPE),
|
Value* vb = f.LoadVR(i.VX.VB);
|
||||||
f.LoadConstantInt8(0b111)));
|
|
||||||
|
AssertShiftElementsOk(f, vb);
|
||||||
|
|
||||||
|
Value* v =
|
||||||
|
f.Shr(va, f.And(f.Extract(vb, 15, INT8_TYPE), f.LoadConstantInt8(0b111)));
|
||||||
f.StoreVR(i.VX.VD, v);
|
f.StoreVR(i.VX.VD, v);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -769,8 +769,14 @@ int InstrEmit_mfmsr(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
// bit 62 = RI; recoverable interrupt
|
// bit 62 = RI; recoverable interrupt
|
||||||
// return 8000h if unlocked (interrupts enabled), else 0
|
// return 8000h if unlocked (interrupts enabled), else 0
|
||||||
f.MemoryBarrier();
|
f.MemoryBarrier();
|
||||||
|
if (cvars::disable_global_lock || true) {
|
||||||
|
f.StoreGPR(i.X.RT, f.LoadConstantUint64(0));
|
||||||
|
|
||||||
|
} else {
|
||||||
f.CallExtern(f.builtins()->check_global_lock);
|
f.CallExtern(f.builtins()->check_global_lock);
|
||||||
f.StoreGPR(i.X.RT, f.LoadContext(offsetof(PPCContext, scratch), INT64_TYPE));
|
f.StoreGPR(i.X.RT,
|
||||||
|
f.LoadContext(offsetof(PPCContext, scratch), INT64_TYPE));
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -782,6 +788,7 @@ int InstrEmit_mtmsr(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
f.StoreContext(
|
f.StoreContext(
|
||||||
offsetof(PPCContext, scratch),
|
offsetof(PPCContext, scratch),
|
||||||
f.ZeroExtend(f.ZeroExtend(f.LoadGPR(i.X.RT), INT64_TYPE), INT64_TYPE));
|
f.ZeroExtend(f.ZeroExtend(f.LoadGPR(i.X.RT), INT64_TYPE), INT64_TYPE));
|
||||||
|
#if 0
|
||||||
if (i.X.RT == 13) {
|
if (i.X.RT == 13) {
|
||||||
// iff storing from r13 we are taking a lock (disable interrupts).
|
// iff storing from r13 we are taking a lock (disable interrupts).
|
||||||
if (!cvars::disable_global_lock) {
|
if (!cvars::disable_global_lock) {
|
||||||
|
|
@ -793,6 +800,7 @@ int InstrEmit_mtmsr(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
f.CallExtern(f.builtins()->leave_global_lock);
|
f.CallExtern(f.builtins()->leave_global_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
// L = 0
|
// L = 0
|
||||||
|
|
@ -807,6 +815,7 @@ int InstrEmit_mtmsrd(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
f.MemoryBarrier();
|
f.MemoryBarrier();
|
||||||
f.StoreContext(offsetof(PPCContext, scratch),
|
f.StoreContext(offsetof(PPCContext, scratch),
|
||||||
f.ZeroExtend(f.LoadGPR(i.X.RT), INT64_TYPE));
|
f.ZeroExtend(f.LoadGPR(i.X.RT), INT64_TYPE));
|
||||||
|
#if 0
|
||||||
if (i.X.RT == 13) {
|
if (i.X.RT == 13) {
|
||||||
// iff storing from r13 we are taking a lock (disable interrupts).
|
// iff storing from r13 we are taking a lock (disable interrupts).
|
||||||
if (!cvars::disable_global_lock) {
|
if (!cvars::disable_global_lock) {
|
||||||
|
|
@ -818,6 +827,7 @@ int InstrEmit_mtmsrd(PPCHIRBuilder& f, const InstrData& i) {
|
||||||
f.CallExtern(f.builtins()->leave_global_lock);
|
f.CallExtern(f.builtins()->leave_global_lock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
return 0;
|
return 0;
|
||||||
} else {
|
} else {
|
||||||
// L = 0
|
// L = 0
|
||||||
|
|
|
||||||
|
|
@ -5406,6 +5406,7 @@ PPCOpcodeDisasmInfo ppc_opcode_disasm_table[] = {
|
||||||
INSTRUCTION(0x6c000000, "xoris" , kD , kI, kGeneral, "XOR Immediate Shifted" , (PPCOpcodeField::kRS,PPCOpcodeField::kUIMM), (PPCOpcodeField::kRA), PrintDisasm_xoris),
|
INSTRUCTION(0x6c000000, "xoris" , kD , kI, kGeneral, "XOR Immediate Shifted" , (PPCOpcodeField::kRS,PPCOpcodeField::kUIMM), (PPCOpcodeField::kRA), PrintDisasm_xoris),
|
||||||
INSTRUCTION(0x7c000278, "xorx" , kX , kI, kGeneral, "XOR" , (PPCOpcodeField::kRS,PPCOpcodeField::kRB), (PPCOpcodeField::kRA,PPCOpcodeField::kCRcond), PrintDisasm_xorx),
|
INSTRUCTION(0x7c000278, "xorx" , kX , kI, kGeneral, "XOR" , (PPCOpcodeField::kRS,PPCOpcodeField::kRB), (PPCOpcodeField::kRA,PPCOpcodeField::kCRcond), PrintDisasm_xorx),
|
||||||
};
|
};
|
||||||
|
#undef INSTRUCTION
|
||||||
static_assert(sizeof(ppc_opcode_disasm_table) / sizeof(PPCOpcodeDisasmInfo) == static_cast<int>(PPCOpcode::kInvalid), "PPC table mismatch - rerun ppc-table-gen");
|
static_assert(sizeof(ppc_opcode_disasm_table) / sizeof(PPCOpcodeDisasmInfo) == static_cast<int>(PPCOpcode::kInvalid), "PPC table mismatch - rerun ppc-table-gen");
|
||||||
|
|
||||||
const PPCOpcodeDisasmInfo& GetOpcodeDisasmInfo(PPCOpcode opcode) {
|
const PPCOpcodeDisasmInfo& GetOpcodeDisasmInfo(PPCOpcode opcode) {
|
||||||
|
|
|
||||||
|
|
@ -470,6 +470,7 @@ PPCOpcodeInfo ppc_opcode_table[] = {
|
||||||
INSTRUCTION(0x6c000000, "xoris" , kD , kI, kGeneral),
|
INSTRUCTION(0x6c000000, "xoris" , kD , kI, kGeneral),
|
||||||
INSTRUCTION(0x7c000278, "xorx" , kX , kI, kGeneral),
|
INSTRUCTION(0x7c000278, "xorx" , kX , kI, kGeneral),
|
||||||
};
|
};
|
||||||
|
#undef INSTRUCTION
|
||||||
static_assert(sizeof(ppc_opcode_table) / sizeof(PPCOpcodeInfo) == static_cast<int>(PPCOpcode::kInvalid), "PPC table mismatch - rerun ppc-table-gen");
|
static_assert(sizeof(ppc_opcode_table) / sizeof(PPCOpcodeInfo) == static_cast<int>(PPCOpcode::kInvalid), "PPC table mismatch - rerun ppc-table-gen");
|
||||||
|
|
||||||
const PPCOpcodeInfo& GetOpcodeInfo(PPCOpcode opcode) {
|
const PPCOpcodeInfo& GetOpcodeInfo(PPCOpcode opcode) {
|
||||||
|
|
|
||||||
|
|
@ -257,11 +257,22 @@ Function* Processor::ResolveFunction(uint32_t address) {
|
||||||
|
|
||||||
// Grab symbol declaration.
|
// Grab symbol declaration.
|
||||||
auto function = LookupFunction(address);
|
auto function = LookupFunction(address);
|
||||||
|
|
||||||
if (!function) {
|
if (!function) {
|
||||||
entry->status = Entry::STATUS_FAILED;
|
entry->status = Entry::STATUS_FAILED;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
auto module_for = function->module();
|
||||||
|
|
||||||
|
auto xexmod = dynamic_cast<XexModule*>(module_for);
|
||||||
|
if (xexmod) {
|
||||||
|
auto addr_flags = xexmod->GetInstructionAddressFlags(address);
|
||||||
|
if (addr_flags) {
|
||||||
|
addr_flags->was_resolved = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (!DemandFunction(function)) {
|
if (!DemandFunction(function)) {
|
||||||
entry->status = Entry::STATUS_FAILED;
|
entry->status = Entry::STATUS_FAILED;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
|
|
|
||||||
|
|
@ -14,13 +14,16 @@
|
||||||
#include "third_party/fmt/include/fmt/format.h"
|
#include "third_party/fmt/include/fmt/format.h"
|
||||||
|
|
||||||
#include "xenia/base/byte_order.h"
|
#include "xenia/base/byte_order.h"
|
||||||
|
#include "xenia/base/cvar.h"
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
#include "xenia/base/math.h"
|
#include "xenia/base/math.h"
|
||||||
#include "xenia/base/memory.h"
|
#include "xenia/base/memory.h"
|
||||||
|
|
||||||
#include "xenia/cpu/cpu_flags.h"
|
#include "xenia/cpu/cpu_flags.h"
|
||||||
#include "xenia/cpu/export_resolver.h"
|
#include "xenia/cpu/export_resolver.h"
|
||||||
#include "xenia/cpu/lzx.h"
|
#include "xenia/cpu/lzx.h"
|
||||||
#include "xenia/cpu/processor.h"
|
#include "xenia/cpu/processor.h"
|
||||||
|
#include "xenia/emulator.h"
|
||||||
#include "xenia/kernel/kernel_state.h"
|
#include "xenia/kernel/kernel_state.h"
|
||||||
#include "xenia/kernel/xmodule.h"
|
#include "xenia/kernel/xmodule.h"
|
||||||
|
|
||||||
|
|
@ -29,6 +32,14 @@
|
||||||
#include "third_party/crypto/rijndael-alg-fst.h"
|
#include "third_party/crypto/rijndael-alg-fst.h"
|
||||||
#include "third_party/pe/pe_image.h"
|
#include "third_party/pe/pe_image.h"
|
||||||
|
|
||||||
|
DEFINE_bool(disable_instruction_infocache, false,
|
||||||
|
"Disables caching records of called instructions/mmio accesses.",
|
||||||
|
"CPU");
|
||||||
|
DEFINE_bool(disable_function_precompilation, true,
|
||||||
|
"Disables pre-compiling guest functions that we know we've called "
|
||||||
|
"on previous runs",
|
||||||
|
"CPU");
|
||||||
|
|
||||||
static const uint8_t xe_xex2_retail_key[16] = {
|
static const uint8_t xe_xex2_retail_key[16] = {
|
||||||
0x20, 0xB1, 0x85, 0xA5, 0x9D, 0x28, 0xFD, 0xC3,
|
0x20, 0xB1, 0x85, 0xA5, 0x9D, 0x28, 0xFD, 0xC3,
|
||||||
0x40, 0x58, 0x3F, 0xBB, 0x08, 0x96, 0xBF, 0x91};
|
0x40, 0x58, 0x3F, 0xBB, 0x08, 0x96, 0xBF, 0x91};
|
||||||
|
|
@ -977,6 +988,7 @@ bool XexModule::LoadContinue() {
|
||||||
|
|
||||||
// Scan and find the low/high addresses.
|
// Scan and find the low/high addresses.
|
||||||
// All code sections are continuous, so this should be easy.
|
// All code sections are continuous, so this should be easy.
|
||||||
|
// could use a source for the above information
|
||||||
auto heap = memory()->LookupHeap(base_address_);
|
auto heap = memory()->LookupHeap(base_address_);
|
||||||
auto page_size = heap->page_size();
|
auto page_size = heap->page_size();
|
||||||
|
|
||||||
|
|
@ -1045,7 +1057,24 @@ bool XexModule::LoadContinue() {
|
||||||
library_offset += library->size;
|
library_offset += library->size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
sha1::SHA1 final_image_sha_;
|
||||||
|
|
||||||
|
final_image_sha_.reset();
|
||||||
|
|
||||||
|
unsigned high_code = this->high_address_ - this->low_address_;
|
||||||
|
|
||||||
|
final_image_sha_.processBytes(memory()->TranslateVirtual(this->low_address_),
|
||||||
|
high_code);
|
||||||
|
final_image_sha_.finalize(image_sha_bytes_);
|
||||||
|
|
||||||
|
char fmtbuf[16];
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < 16; ++i) {
|
||||||
|
sprintf_s(fmtbuf, "%X", image_sha_bytes_[i]);
|
||||||
|
image_sha_str_ += &fmtbuf[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
info_cache_.Init(this);
|
||||||
// Find __savegprlr_* and __restgprlr_* and the others.
|
// Find __savegprlr_* and __restgprlr_* and the others.
|
||||||
// We can flag these for special handling (inlining/etc).
|
// We can flag these for special handling (inlining/etc).
|
||||||
if (!FindSaveRest()) {
|
if (!FindSaveRest()) {
|
||||||
|
|
@ -1288,7 +1317,68 @@ std::unique_ptr<Function> XexModule::CreateFunction(uint32_t address) {
|
||||||
return std::unique_ptr<Function>(
|
return std::unique_ptr<Function>(
|
||||||
processor_->backend()->CreateGuestFunction(this, address));
|
processor_->backend()->CreateGuestFunction(this, address));
|
||||||
}
|
}
|
||||||
|
void XexInfoCache::Init(XexModule* xexmod) {
|
||||||
|
if (cvars::disable_instruction_infocache) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
auto emu = xexmod->kernel_state_->emulator();
|
||||||
|
std::filesystem::path infocache_path = emu->cache_root();
|
||||||
|
|
||||||
|
infocache_path.append(L"modules");
|
||||||
|
|
||||||
|
infocache_path.append(xexmod->image_sha_str_);
|
||||||
|
|
||||||
|
std::filesystem::create_directories(infocache_path);
|
||||||
|
infocache_path.append("executable_addr_flags.bin");
|
||||||
|
|
||||||
|
unsigned num_codebytes = xexmod->high_address_ - xexmod->low_address_;
|
||||||
|
num_codebytes += 3; // round up to nearest multiple of 4
|
||||||
|
num_codebytes &= ~3;
|
||||||
|
bool did_exist = true;
|
||||||
|
if (!std::filesystem::exists(infocache_path)) {
|
||||||
|
xe::filesystem::CreateEmptyFile(infocache_path);
|
||||||
|
did_exist = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// todo: prepopulate with stuff from pdata, dll exports
|
||||||
|
this->executable_addr_flags_ = std::move(xe::MappedMemory::Open(
|
||||||
|
infocache_path, xe::MappedMemory::Mode::kReadWrite, 0,
|
||||||
|
sizeof(InfoCacheFlagsHeader) +
|
||||||
|
(sizeof(InfoCacheFlags) *
|
||||||
|
(num_codebytes /
|
||||||
|
4)))); // one infocacheflags entry for each PPC instr-sized addr
|
||||||
|
|
||||||
|
if (did_exist) {
|
||||||
|
xexmod->PrecompileKnownFunctions();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
InfoCacheFlags* XexModule::GetInstructionAddressFlags(uint32_t guest_addr) {
|
||||||
|
if (guest_addr < low_address_ || guest_addr > high_address_) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
guest_addr -= low_address_;
|
||||||
|
|
||||||
|
return info_cache_.LookupFlags(guest_addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void XexModule::PrecompileKnownFunctions() {
|
||||||
|
if (cvars::disable_function_precompilation) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
uint32_t start = 0;
|
||||||
|
uint32_t end = (high_address_ - low_address_) / 4;
|
||||||
|
auto flags = info_cache_.LookupFlags(0);
|
||||||
|
if (!flags) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (uint32_t i = 0; i < end; i++) {
|
||||||
|
if (flags[i].was_resolved) {
|
||||||
|
processor_->ResolveFunction(low_address_ + (i * 4));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
bool XexModule::FindSaveRest() {
|
bool XexModule::FindSaveRest() {
|
||||||
// Special stack save/restore functions.
|
// Special stack save/restore functions.
|
||||||
// http://research.microsoft.com/en-us/um/redmond/projects/invisible/src/crt/md/ppc/xxx.s.htm
|
// http://research.microsoft.com/en-us/um/redmond/projects/invisible/src/crt/md/ppc/xxx.s.htm
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@
|
||||||
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include "xenia/base/mapped_memory.h"
|
||||||
#include "xenia/cpu/module.h"
|
#include "xenia/cpu/module.h"
|
||||||
#include "xenia/kernel/util/xex2_info.h"
|
#include "xenia/kernel/util/xex2_info.h"
|
||||||
|
|
||||||
|
|
@ -30,6 +30,39 @@ constexpr fourcc_t kXEX2Signature = make_fourcc("XEX2");
|
||||||
constexpr fourcc_t kElfSignature = make_fourcc(0x7F, 'E', 'L', 'F');
|
constexpr fourcc_t kElfSignature = make_fourcc(0x7F, 'E', 'L', 'F');
|
||||||
|
|
||||||
class Runtime;
|
class Runtime;
|
||||||
|
struct InfoCacheFlags {
|
||||||
|
uint32_t was_resolved : 1; // has this address ever been called/requested
|
||||||
|
// via resolvefunction?
|
||||||
|
uint32_t accessed_mmio : 1;
|
||||||
|
uint32_t reserved : 30;
|
||||||
|
};
|
||||||
|
struct XexInfoCache {
|
||||||
|
struct InfoCacheFlagsHeader {
|
||||||
|
unsigned char reserved[256]; // put xenia version here
|
||||||
|
|
||||||
|
InfoCacheFlags* LookupFlags(unsigned offset) {
|
||||||
|
return &reinterpret_cast<InfoCacheFlags*>(&this[1])[offset];
|
||||||
|
}
|
||||||
|
};
|
||||||
|
/*
|
||||||
|
for every 4-byte aligned address, records a 4 byte set of flags.
|
||||||
|
*/
|
||||||
|
std::unique_ptr<MappedMemory> executable_addr_flags_;
|
||||||
|
|
||||||
|
void Init(class XexModule*);
|
||||||
|
InfoCacheFlags* LookupFlags(unsigned offset) {
|
||||||
|
offset /= 4;
|
||||||
|
if (!executable_addr_flags_) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
uint8_t* data = executable_addr_flags_->data();
|
||||||
|
|
||||||
|
if (!data) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
return reinterpret_cast<InfoCacheFlagsHeader*>(data)->LookupFlags(offset);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
class XexModule : public xe::cpu::Module {
|
class XexModule : public xe::cpu::Module {
|
||||||
public:
|
public:
|
||||||
|
|
@ -174,10 +207,14 @@ class XexModule : public xe::cpu::Module {
|
||||||
XEX_MODULE_PATCH_FULL));
|
XEX_MODULE_PATCH_FULL));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
InfoCacheFlags* GetInstructionAddressFlags(uint32_t guest_addr);
|
||||||
|
void PrecompileKnownFunctions();
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::unique_ptr<Function> CreateFunction(uint32_t address) override;
|
std::unique_ptr<Function> CreateFunction(uint32_t address) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
friend struct XexInfoCache;
|
||||||
void ReadSecurityInfo();
|
void ReadSecurityInfo();
|
||||||
|
|
||||||
int ReadImage(const void* xex_addr, size_t xex_length, bool use_dev_key);
|
int ReadImage(const void* xex_addr, size_t xex_length, bool use_dev_key);
|
||||||
|
|
@ -217,6 +254,10 @@ class XexModule : public xe::cpu::Module {
|
||||||
|
|
||||||
XexFormat xex_format_ = kFormatUnknown;
|
XexFormat xex_format_ = kFormatUnknown;
|
||||||
SecurityInfoContext security_info_ = {};
|
SecurityInfoContext security_info_ = {};
|
||||||
|
|
||||||
|
uint8_t image_sha_bytes_[16];
|
||||||
|
std::string image_sha_str_;
|
||||||
|
XexInfoCache info_cache_;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace cpu
|
} // namespace cpu
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
#include "third_party/fmt/include/fmt/format.h"
|
#include "third_party/fmt/include/fmt/format.h"
|
||||||
#include "xenia/base/byte_stream.h"
|
#include "xenia/base/byte_stream.h"
|
||||||
|
#include "xenia/base/cvar.h"
|
||||||
#include "xenia/base/logging.h"
|
#include "xenia/base/logging.h"
|
||||||
#include "xenia/base/math.h"
|
#include "xenia/base/math.h"
|
||||||
#include "xenia/base/profiling.h"
|
#include "xenia/base/profiling.h"
|
||||||
|
|
@ -28,6 +29,10 @@
|
||||||
#include "xenia/kernel/kernel_state.h"
|
#include "xenia/kernel/kernel_state.h"
|
||||||
#include "xenia/kernel/user_module.h"
|
#include "xenia/kernel/user_module.h"
|
||||||
|
|
||||||
|
DEFINE_bool(log_unknown_register_writes, false,
|
||||||
|
"Log writes to unknown registers from "
|
||||||
|
"CommandProcessor::WriteRegister. Has significant performance hit.",
|
||||||
|
"GPU");
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
|
||||||
|
|
@ -329,19 +334,9 @@ void CommandProcessor::UpdateWritePointer(uint32_t value) {
|
||||||
write_ptr_index_ = value;
|
write_ptr_index_ = value;
|
||||||
write_ptr_index_event_->Set();
|
write_ptr_index_event_->Set();
|
||||||
}
|
}
|
||||||
|
void CommandProcessor::HandleSpecialRegisterWrite(uint32_t index,
|
||||||
void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
uint32_t value) {
|
||||||
RegisterFile& regs = *register_file_;
|
RegisterFile& regs = *register_file_;
|
||||||
if (index >= RegisterFile::kRegisterCount) {
|
|
||||||
XELOGW("CommandProcessor::WriteRegister index out of bounds: {}", index);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
regs.values[index].u32 = value;
|
|
||||||
if (!regs.GetRegisterInfo(index)) {
|
|
||||||
XELOGW("GPU: Write to unknown register ({:04X} = {:08X})", index, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scratch register writeback.
|
// Scratch register writeback.
|
||||||
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
|
if (index >= XE_GPU_REG_SCRATCH_REG0 && index <= XE_GPU_REG_SCRATCH_REG7) {
|
||||||
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
|
uint32_t scratch_reg = index - XE_GPU_REG_SCRATCH_REG0;
|
||||||
|
|
@ -469,6 +464,43 @@ void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
void CommandProcessor::WriteRegister(uint32_t index, uint32_t value) {
|
||||||
|
if (XE_UNLIKELY(cvars::log_unknown_register_writes)) {
|
||||||
|
// chrispy: rearrange check order, place set after checks
|
||||||
|
if (XE_UNLIKELY(!register_file_->IsValidRegister(index))) {
|
||||||
|
XELOGW("GPU: Write to unknown register ({:04X} = {:08X})", index, value);
|
||||||
|
check_reg_out_of_bounds:
|
||||||
|
if (XE_UNLIKELY(index >= RegisterFile::kRegisterCount)) {
|
||||||
|
XELOGW("CommandProcessor::WriteRegister index out of bounds: {}",
|
||||||
|
index);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
goto check_reg_out_of_bounds;
|
||||||
|
}
|
||||||
|
register_file_->values[index].u32 = value;
|
||||||
|
|
||||||
|
// regs with extra logic on write: XE_GPU_REG_COHER_STATUS_HOST
|
||||||
|
// XE_GPU_REG_DC_LUT_RW_INDEX
|
||||||
|
// XE_GPU_REG_DC_LUT_SEQ_COLOR XE_GPU_REG_DC_LUT_PWL_DATA
|
||||||
|
// XE_GPU_REG_DC_LUT_30_COLOR
|
||||||
|
|
||||||
|
// quick pre-test
|
||||||
|
// todo: figure out just how unlikely this is. if very (it ought to be, theres
|
||||||
|
// a ton of registers other than these) make this predicate branchless and
|
||||||
|
// mark with unlikely, then make HandleSpecialRegisterWrite noinline yep, its
|
||||||
|
// very unlikely. these ORS here are meant to be bitwise ors, so that we do
|
||||||
|
// not do branching evaluation of the conditions (we will almost always take
|
||||||
|
// all of the branches)
|
||||||
|
if (XE_UNLIKELY(
|
||||||
|
(index - XE_GPU_REG_SCRATCH_REG0 < 8) |
|
||||||
|
(index == XE_GPU_REG_COHER_STATUS_HOST) |
|
||||||
|
((index - XE_GPU_REG_DC_LUT_RW_INDEX) <=
|
||||||
|
(XE_GPU_REG_DC_LUT_30_COLOR - XE_GPU_REG_DC_LUT_RW_INDEX)))) {
|
||||||
|
HandleSpecialRegisterWrite(index, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void CommandProcessor::MakeCoherent() {
|
void CommandProcessor::MakeCoherent() {
|
||||||
SCOPE_profile_cpu_f("gpu");
|
SCOPE_profile_cpu_f("gpu");
|
||||||
|
|
@ -570,7 +602,7 @@ void CommandProcessor::ExecuteIndirectBuffer(uint32_t ptr, uint32_t count) {
|
||||||
// Return up a level if we encounter a bad packet.
|
// Return up a level if we encounter a bad packet.
|
||||||
XELOGE("**** INDIRECT RINGBUFFER: Failed to execute packet.");
|
XELOGE("**** INDIRECT RINGBUFFER: Failed to execute packet.");
|
||||||
assert_always();
|
assert_always();
|
||||||
//break;
|
// break;
|
||||||
}
|
}
|
||||||
} while (reader.read_count());
|
} while (reader.read_count());
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -150,7 +150,9 @@ class CommandProcessor {
|
||||||
void WorkerThreadMain();
|
void WorkerThreadMain();
|
||||||
virtual bool SetupContext() = 0;
|
virtual bool SetupContext() = 0;
|
||||||
virtual void ShutdownContext() = 0;
|
virtual void ShutdownContext() = 0;
|
||||||
|
// rarely needed, most register writes have no special logic here
|
||||||
|
XE_NOINLINE
|
||||||
|
void HandleSpecialRegisterWrite(uint32_t index, uint32_t value);
|
||||||
virtual void WriteRegister(uint32_t index, uint32_t value);
|
virtual void WriteRegister(uint32_t index, uint32_t value);
|
||||||
|
|
||||||
const reg::DC_LUT_30_COLOR* gamma_ramp_256_entry_table() const {
|
const reg::DC_LUT_30_COLOR* gamma_ramp_256_entry_table() const {
|
||||||
|
|
|
||||||
|
|
@ -712,7 +712,7 @@ void D3D12CommandProcessor::SetViewport(const D3D12_VIEWPORT& viewport) {
|
||||||
ff_viewport_update_needed_ |= ff_viewport_.Height != viewport.Height;
|
ff_viewport_update_needed_ |= ff_viewport_.Height != viewport.Height;
|
||||||
ff_viewport_update_needed_ |= ff_viewport_.MinDepth != viewport.MinDepth;
|
ff_viewport_update_needed_ |= ff_viewport_.MinDepth != viewport.MinDepth;
|
||||||
ff_viewport_update_needed_ |= ff_viewport_.MaxDepth != viewport.MaxDepth;
|
ff_viewport_update_needed_ |= ff_viewport_.MaxDepth != viewport.MaxDepth;
|
||||||
if (ff_viewport_update_needed_) {
|
if (XE_UNLIKELY(ff_viewport_update_needed_)) {
|
||||||
ff_viewport_ = viewport;
|
ff_viewport_ = viewport;
|
||||||
deferred_command_list_.RSSetViewport(ff_viewport_);
|
deferred_command_list_.RSSetViewport(ff_viewport_);
|
||||||
ff_viewport_update_needed_ = false;
|
ff_viewport_update_needed_ = false;
|
||||||
|
|
|
||||||
|
|
@ -4799,18 +4799,16 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
|
||||||
if (!current_transfers.empty()) {
|
if (!current_transfers.empty()) {
|
||||||
are_current_command_list_render_targets_valid_ = false;
|
are_current_command_list_render_targets_valid_ = false;
|
||||||
if (dest_rt_key.is_depth) {
|
if (dest_rt_key.is_depth) {
|
||||||
command_list.D3DOMSetRenderTargets(
|
auto handle = dest_d3d12_rt.descriptor_draw().GetHandle();
|
||||||
0, nullptr, FALSE, &dest_d3d12_rt.descriptor_draw().GetHandle());
|
command_list.D3DOMSetRenderTargets(0, nullptr, FALSE, &handle);
|
||||||
if (!use_stencil_reference_output_) {
|
if (!use_stencil_reference_output_) {
|
||||||
command_processor_.SetStencilReference(UINT8_MAX);
|
command_processor_.SetStencilReference(UINT8_MAX);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
command_list.D3DOMSetRenderTargets(
|
auto handle = dest_d3d12_rt.descriptor_load_separate().IsValid()
|
||||||
1,
|
|
||||||
&(dest_d3d12_rt.descriptor_load_separate().IsValid()
|
|
||||||
? dest_d3d12_rt.descriptor_load_separate().GetHandle()
|
? dest_d3d12_rt.descriptor_load_separate().GetHandle()
|
||||||
: dest_d3d12_rt.descriptor_draw().GetHandle()),
|
: dest_d3d12_rt.descriptor_draw().GetHandle();
|
||||||
FALSE, nullptr);
|
command_list.D3DOMSetRenderTargets(1, &handle, FALSE, nullptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t dest_pitch_tiles = dest_rt_key.GetPitchTiles();
|
uint32_t dest_pitch_tiles = dest_rt_key.GetPitchTiles();
|
||||||
|
|
@ -5425,12 +5423,12 @@ void D3D12RenderTargetCache::PerformTransfersAndResolveClears(
|
||||||
dest_d3d12_rt.SetResourceState(D3D12_RESOURCE_STATE_RENDER_TARGET),
|
dest_d3d12_rt.SetResourceState(D3D12_RESOURCE_STATE_RENDER_TARGET),
|
||||||
D3D12_RESOURCE_STATE_RENDER_TARGET);
|
D3D12_RESOURCE_STATE_RENDER_TARGET);
|
||||||
if (clear_via_drawing) {
|
if (clear_via_drawing) {
|
||||||
command_list.D3DOMSetRenderTargets(
|
auto handle =
|
||||||
1,
|
(dest_d3d12_rt.descriptor_load_separate().IsValid()
|
||||||
&(dest_d3d12_rt.descriptor_load_separate().IsValid()
|
|
||||||
? dest_d3d12_rt.descriptor_load_separate().GetHandle()
|
? dest_d3d12_rt.descriptor_load_separate().GetHandle()
|
||||||
: dest_d3d12_rt.descriptor_draw().GetHandle()),
|
: dest_d3d12_rt.descriptor_draw().GetHandle());
|
||||||
FALSE, nullptr);
|
|
||||||
|
command_list.D3DOMSetRenderTargets(1, &handle, FALSE, nullptr);
|
||||||
are_current_command_list_render_targets_valid_ = true;
|
are_current_command_list_render_targets_valid_ = true;
|
||||||
D3D12_VIEWPORT clear_viewport;
|
D3D12_VIEWPORT clear_viewport;
|
||||||
clear_viewport.TopLeftX = float(clear_rect.left);
|
clear_viewport.TopLeftX = float(clear_rect.left);
|
||||||
|
|
|
||||||
|
|
@ -78,314 +78,24 @@ namespace shaders {
|
||||||
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_scaled_cs.h"
|
#include "xenia/gpu/shaders/bytecode/d3d12_5_1/texture_load_r5g6b5_b5g6r5_scaled_cs.h"
|
||||||
} // namespace shaders
|
} // namespace shaders
|
||||||
|
|
||||||
const D3D12TextureCache::HostFormat D3D12TextureCache::host_formats_[64] = {
|
/*
|
||||||
// k_1_REVERSE
|
chrispy: we're getting cache misses in GetHostFormatSwizzle, use a
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
denser array todo: not all 65536 possible swizzles are used, this could
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
probably be one cache line
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
*/
|
||||||
// k_1
|
using SwizzleArray = std::array<unsigned short, 64>;
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
static constexpr SwizzleArray build_xenos_swizzle_for_format() {
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
SwizzleArray result{0};
|
||||||
// k_8
|
|
||||||
{DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, kLoadShaderIndex8bpb,
|
for (int i = 0; i < 64; ++i) {
|
||||||
DXGI_FORMAT_R8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
result[i] =
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
static_cast<uint16_t>(D3D12TextureCache::host_formats_[i].swizzle);
|
||||||
// k_1_5_5_5
|
}
|
||||||
// Red and blue swapped in the load shader for simplicity.
|
return result;
|
||||||
{DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM,
|
}
|
||||||
kLoadShaderIndexR5G5B5A1ToB5G5R5A1, DXGI_FORMAT_UNKNOWN,
|
alignas(64) constexpr SwizzleArray xenos_swizzle_for_format =
|
||||||
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
build_xenos_swizzle_for_format();
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_5_6_5
|
|
||||||
// Red and blue swapped in the load shader for simplicity.
|
|
||||||
{DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM,
|
|
||||||
kLoadShaderIndexR5G6B5ToB5G6R5, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
|
||||||
// k_6_5_5
|
|
||||||
// On the host, green bits in blue, blue bits in green.
|
|
||||||
{DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM,
|
|
||||||
kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, XE_GPU_MAKE_TEXTURE_SWIZZLE(R, B, G, G)},
|
|
||||||
// k_8_8_8_8
|
|
||||||
{DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM,
|
|
||||||
kLoadShaderIndex32bpb, DXGI_FORMAT_R8G8B8A8_SNORM, kLoadShaderIndexUnknown,
|
|
||||||
false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_2_10_10_10
|
|
||||||
{DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM,
|
|
||||||
kLoadShaderIndex32bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_8_A
|
|
||||||
{DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, kLoadShaderIndex8bpb,
|
|
||||||
DXGI_FORMAT_R8_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_8_B
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_8_8
|
|
||||||
{DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndex16bpb,
|
|
||||||
DXGI_FORMAT_R8G8_SNORM, kLoadShaderIndexUnknown, false,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
|
||||||
// k_Cr_Y1_Cb_Y0_REP
|
|
||||||
// Red and blue swapped in the load shader for simplicity.
|
|
||||||
// TODO(Triang3l): The DXGI_FORMAT_R8G8B8A8_U/SNORM conversion is usable for
|
|
||||||
// the signed version, separate unsigned and signed load shaders completely
|
|
||||||
// (as one doesn't need decompression for this format, while another does).
|
|
||||||
{DXGI_FORMAT_G8R8_G8B8_UNORM, DXGI_FORMAT_G8R8_G8B8_UNORM,
|
|
||||||
kLoadShaderIndexGBGR8ToGRGB8, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexGBGR8ToRGB8,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
|
||||||
// k_Y1_Cr_Y0_Cb_REP
|
|
||||||
// Red and blue swapped in the load shader for simplicity.
|
|
||||||
// TODO(Triang3l): The DXGI_FORMAT_R8G8B8A8_U/SNORM conversion is usable for
|
|
||||||
// the signed version, separate unsigned and signed load shaders completely
|
|
||||||
// (as one doesn't need decompression for this format, while another does).
|
|
||||||
{DXGI_FORMAT_R8G8_B8G8_UNORM, DXGI_FORMAT_R8G8_B8G8_UNORM,
|
|
||||||
kLoadShaderIndexBGRG8ToRGBG8, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
true, DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexBGRG8ToRGB8,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
|
||||||
// k_16_16_EDRAM
|
|
||||||
// Not usable as a texture, also has -32...32 range.
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
|
||||||
// k_8_8_8_8_A
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_4_4_4_4
|
|
||||||
// Red and blue swapped in the load shader for simplicity.
|
|
||||||
{DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM,
|
|
||||||
kLoadShaderIndexRGBA4ToBGRA4, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_10_11_11
|
|
||||||
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
|
|
||||||
kLoadShaderIndexR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM,
|
|
||||||
kLoadShaderIndexR11G11B10ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
|
||||||
// k_11_11_10
|
|
||||||
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
|
|
||||||
kLoadShaderIndexR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM,
|
|
||||||
kLoadShaderIndexR10G11B11ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
|
||||||
// k_DXT1
|
|
||||||
{DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, kLoadShaderIndex64bpb,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true,
|
|
||||||
DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT1ToRGBA8,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_DXT2_3
|
|
||||||
{DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, kLoadShaderIndex128bpb,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true,
|
|
||||||
DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT3ToRGBA8,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_DXT4_5
|
|
||||||
{DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, kLoadShaderIndex128bpb,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true,
|
|
||||||
DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT5ToRGBA8,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_16_16_16_16_EDRAM
|
|
||||||
// Not usable as a texture, also has -32...32 range.
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// R32_FLOAT for depth because shaders would require an additional SRV to
|
|
||||||
// sample stencil, which we don't provide.
|
|
||||||
// k_24_8
|
|
||||||
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexDepthUnorm,
|
|
||||||
DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_24_8_FLOAT
|
|
||||||
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexDepthFloat,
|
|
||||||
DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_16
|
|
||||||
{DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UNORM, kLoadShaderIndex16bpb,
|
|
||||||
DXGI_FORMAT_R16_SNORM, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_16_16
|
|
||||||
{DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UNORM,
|
|
||||||
kLoadShaderIndex32bpb, DXGI_FORMAT_R16G16_SNORM, kLoadShaderIndexUnknown,
|
|
||||||
false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
|
||||||
// k_16_16_16_16
|
|
||||||
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
|
|
||||||
kLoadShaderIndex64bpb, DXGI_FORMAT_R16G16B16A16_SNORM,
|
|
||||||
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_16_EXPAND
|
|
||||||
{DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, kLoadShaderIndex16bpb,
|
|
||||||
DXGI_FORMAT_R16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_16_16_EXPAND
|
|
||||||
{DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndex32bpb,
|
|
||||||
DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndexUnknown, false,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
|
||||||
// k_16_16_16_16_EXPAND
|
|
||||||
{DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT,
|
|
||||||
kLoadShaderIndex64bpb, DXGI_FORMAT_R16G16B16A16_FLOAT,
|
|
||||||
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_16_FLOAT
|
|
||||||
{DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, kLoadShaderIndex16bpb,
|
|
||||||
DXGI_FORMAT_R16_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_16_16_FLOAT
|
|
||||||
{DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndex32bpb,
|
|
||||||
DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndexUnknown, false,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
|
||||||
// k_16_16_16_16_FLOAT
|
|
||||||
{DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT,
|
|
||||||
kLoadShaderIndex64bpb, DXGI_FORMAT_R16G16B16A16_FLOAT,
|
|
||||||
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_32
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_32_32
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
|
||||||
// k_32_32_32_32
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_32_FLOAT
|
|
||||||
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndex32bpb,
|
|
||||||
DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_32_32_FLOAT
|
|
||||||
{DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_FLOAT, kLoadShaderIndex64bpb,
|
|
||||||
DXGI_FORMAT_R32G32_FLOAT, kLoadShaderIndexUnknown, false,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
|
||||||
// k_32_32_32_32_FLOAT
|
|
||||||
{DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT,
|
|
||||||
kLoadShaderIndex128bpb, DXGI_FORMAT_R32G32B32A32_FLOAT,
|
|
||||||
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_32_AS_8
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_32_AS_8_8
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
|
||||||
// k_16_MPEG
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_16_16_MPEG
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
|
||||||
// k_8_INTERLACED
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_32_AS_8_INTERLACED
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_32_AS_8_8_INTERLACED
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
|
||||||
// k_16_INTERLACED
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_16_MPEG_INTERLACED
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_16_16_MPEG_INTERLACED
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
|
||||||
// k_DXN
|
|
||||||
{DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_UNORM, kLoadShaderIndex128bpb,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8_UNORM,
|
|
||||||
kLoadShaderIndexDXNToRG8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
|
||||||
// k_8_8_8_8_AS_16_16_16_16
|
|
||||||
{DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM,
|
|
||||||
kLoadShaderIndex32bpb, DXGI_FORMAT_R8G8B8A8_SNORM, kLoadShaderIndexUnknown,
|
|
||||||
false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_DXT1_AS_16_16_16_16
|
|
||||||
{DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, kLoadShaderIndex64bpb,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true,
|
|
||||||
DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT1ToRGBA8,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_DXT2_3_AS_16_16_16_16
|
|
||||||
{DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, kLoadShaderIndex128bpb,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true,
|
|
||||||
DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT3ToRGBA8,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_DXT4_5_AS_16_16_16_16
|
|
||||||
{DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, kLoadShaderIndex128bpb,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true,
|
|
||||||
DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT5ToRGBA8,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_2_10_10_10_AS_16_16_16_16
|
|
||||||
{DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM,
|
|
||||||
kLoadShaderIndex32bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_10_11_11_AS_16_16_16_16
|
|
||||||
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
|
|
||||||
kLoadShaderIndexR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM,
|
|
||||||
kLoadShaderIndexR11G11B10ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
|
||||||
// k_11_11_10_AS_16_16_16_16
|
|
||||||
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
|
|
||||||
kLoadShaderIndexR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM,
|
|
||||||
kLoadShaderIndexR10G11B11ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
|
||||||
// k_32_32_32_FLOAT
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
|
||||||
// k_DXT3A
|
|
||||||
// R8_UNORM has the same size as BC2, but doesn't have the 4x4 size
|
|
||||||
// alignment requirement.
|
|
||||||
{DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM, kLoadShaderIndexDXT3A,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_DXT5A
|
|
||||||
{DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_UNORM, kLoadShaderIndex64bpb,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8_UNORM,
|
|
||||||
kLoadShaderIndexDXT5AToR8, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
|
||||||
// k_CTX1
|
|
||||||
{DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndexCTX1,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
|
||||||
// k_DXT3A_AS_1_1_1_1
|
|
||||||
{DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM,
|
|
||||||
kLoadShaderIndexDXT3AAs1111ToBGRA4, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_8_8_8_8_GAMMA_EDRAM
|
|
||||||
// Not usable as a texture.
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
// k_2_10_10_10_FLOAT_EDRAM
|
|
||||||
// Not usable as a texture.
|
|
||||||
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
|
||||||
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
|
||||||
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
|
||||||
};
|
|
||||||
|
|
||||||
D3D12TextureCache::D3D12TextureCache(const RegisterFile& register_file,
|
D3D12TextureCache::D3D12TextureCache(const RegisterFile& register_file,
|
||||||
D3D12SharedMemory& shared_memory,
|
D3D12SharedMemory& shared_memory,
|
||||||
|
|
@ -1544,7 +1254,8 @@ bool D3D12TextureCache::IsScaledResolveSupportedForFormat(
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t D3D12TextureCache::GetHostFormatSwizzle(TextureKey key) const {
|
uint32_t D3D12TextureCache::GetHostFormatSwizzle(TextureKey key) const {
|
||||||
return host_formats_[uint32_t(key.format)].swizzle;
|
// return host_formats_[uint32_t(key.format)].swizzle;
|
||||||
|
return xenos_swizzle_for_format[uint32_t(key.format)];
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t D3D12TextureCache::GetMaxHostTextureWidthHeight(
|
uint32_t D3D12TextureCache::GetMaxHostTextureWidthHeight(
|
||||||
|
|
|
||||||
|
|
@ -160,29 +160,6 @@ class D3D12TextureCache final : public TextureCache {
|
||||||
ID3D12Resource* RequestSwapTexture(
|
ID3D12Resource* RequestSwapTexture(
|
||||||
D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out,
|
D3D12_SHADER_RESOURCE_VIEW_DESC& srv_desc_out,
|
||||||
xenos::TextureFormat& format_out);
|
xenos::TextureFormat& format_out);
|
||||||
|
|
||||||
protected:
|
|
||||||
bool IsSignedVersionSeparateForFormat(TextureKey key) const override;
|
|
||||||
bool IsScaledResolveSupportedForFormat(TextureKey key) const override;
|
|
||||||
uint32_t GetHostFormatSwizzle(TextureKey key) const override;
|
|
||||||
|
|
||||||
uint32_t GetMaxHostTextureWidthHeight(
|
|
||||||
xenos::DataDimension dimension) const override;
|
|
||||||
uint32_t GetMaxHostTextureDepthOrArraySize(
|
|
||||||
xenos::DataDimension dimension) const override;
|
|
||||||
|
|
||||||
std::unique_ptr<Texture> CreateTexture(TextureKey key) override;
|
|
||||||
|
|
||||||
// This binds pipelines, allocates descriptors, and copies!
|
|
||||||
bool LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base,
|
|
||||||
bool load_mips) override;
|
|
||||||
|
|
||||||
void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) override;
|
|
||||||
|
|
||||||
private:
|
|
||||||
static constexpr uint32_t kLoadGuestXThreadsPerGroupLog2 = 2;
|
|
||||||
static constexpr uint32_t kLoadGuestYBlocksPerGroupLog2 = 5;
|
|
||||||
|
|
||||||
struct HostFormat {
|
struct HostFormat {
|
||||||
// Format info for the regular case.
|
// Format info for the regular case.
|
||||||
// DXGI format (typeless when different signedness or number representation
|
// DXGI format (typeless when different signedness or number representation
|
||||||
|
|
@ -223,6 +200,352 @@ class D3D12TextureCache final : public TextureCache {
|
||||||
// Mapping of Xenos swizzle components to DXGI format components.
|
// Mapping of Xenos swizzle components to DXGI format components.
|
||||||
uint32_t swizzle;
|
uint32_t swizzle;
|
||||||
};
|
};
|
||||||
|
static constexpr HostFormat host_formats_[64]{
|
||||||
|
// k_1_REVERSE
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_1
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_8
|
||||||
|
{DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, kLoadShaderIndex8bpb,
|
||||||
|
DXGI_FORMAT_R8_SNORM, kLoadShaderIndexUnknown, false,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_1_5_5_5
|
||||||
|
// Red and blue swapped in the load shader for simplicity.
|
||||||
|
{DXGI_FORMAT_B5G5R5A1_UNORM, DXGI_FORMAT_B5G5R5A1_UNORM,
|
||||||
|
kLoadShaderIndexR5G5B5A1ToB5G5R5A1, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_5_6_5
|
||||||
|
// Red and blue swapped in the load shader for simplicity.
|
||||||
|
{DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM,
|
||||||
|
kLoadShaderIndexR5G6B5ToB5G6R5, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
||||||
|
// k_6_5_5
|
||||||
|
// On the host, green bits in blue, blue bits in green.
|
||||||
|
{DXGI_FORMAT_B5G6R5_UNORM, DXGI_FORMAT_B5G6R5_UNORM,
|
||||||
|
kLoadShaderIndexR5G5B6ToB5G6R5WithRBGASwizzle, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, XE_GPU_MAKE_TEXTURE_SWIZZLE(R, B, G, G)},
|
||||||
|
// k_8_8_8_8
|
||||||
|
{DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM,
|
||||||
|
kLoadShaderIndex32bpb, DXGI_FORMAT_R8G8B8A8_SNORM,
|
||||||
|
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_2_10_10_10
|
||||||
|
{DXGI_FORMAT_R10G10B10A2_TYPELESS, DXGI_FORMAT_R10G10B10A2_UNORM,
|
||||||
|
kLoadShaderIndex32bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_8_A
|
||||||
|
{DXGI_FORMAT_R8_TYPELESS, DXGI_FORMAT_R8_UNORM, kLoadShaderIndex8bpb,
|
||||||
|
DXGI_FORMAT_R8_SNORM, kLoadShaderIndexUnknown, false,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_8_B
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_8_8
|
||||||
|
{DXGI_FORMAT_R8G8_TYPELESS, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndex16bpb,
|
||||||
|
DXGI_FORMAT_R8G8_SNORM, kLoadShaderIndexUnknown, false,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
||||||
|
// k_Cr_Y1_Cb_Y0_REP
|
||||||
|
// Red and blue swapped in the load shader for simplicity.
|
||||||
|
// TODO(Triang3l): The DXGI_FORMAT_R8G8B8A8_U/SNORM conversion is
|
||||||
|
// usable for
|
||||||
|
// the signed version, separate unsigned and signed load shaders
|
||||||
|
// completely
|
||||||
|
// (as one doesn't need decompression for this format, while another
|
||||||
|
// does).
|
||||||
|
{DXGI_FORMAT_G8R8_G8B8_UNORM, DXGI_FORMAT_G8R8_G8B8_UNORM,
|
||||||
|
kLoadShaderIndexGBGR8ToGRGB8, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM,
|
||||||
|
kLoadShaderIndexGBGR8ToRGB8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
||||||
|
// k_Y1_Cr_Y0_Cb_REP
|
||||||
|
// Red and blue swapped in the load shader for simplicity.
|
||||||
|
// TODO(Triang3l): The DXGI_FORMAT_R8G8B8A8_U/SNORM conversion is
|
||||||
|
// usable for
|
||||||
|
// the signed version, separate unsigned and signed load shaders
|
||||||
|
// completely
|
||||||
|
// (as one doesn't need decompression for this format, while another
|
||||||
|
// does).
|
||||||
|
{DXGI_FORMAT_R8G8_B8G8_UNORM, DXGI_FORMAT_R8G8_B8G8_UNORM,
|
||||||
|
kLoadShaderIndexBGRG8ToRGBG8, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8G8B8A8_UNORM,
|
||||||
|
kLoadShaderIndexBGRG8ToRGB8, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
||||||
|
// k_16_16_EDRAM
|
||||||
|
// Not usable as a texture, also has -32...32 range.
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
||||||
|
// k_8_8_8_8_A
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_4_4_4_4
|
||||||
|
// Red and blue swapped in the load shader for simplicity.
|
||||||
|
{DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM,
|
||||||
|
kLoadShaderIndexRGBA4ToBGRA4, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_10_11_11
|
||||||
|
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
|
||||||
|
kLoadShaderIndexR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM,
|
||||||
|
kLoadShaderIndexR11G11B10ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
||||||
|
// k_11_11_10
|
||||||
|
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
|
||||||
|
kLoadShaderIndexR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM,
|
||||||
|
kLoadShaderIndexR10G11B11ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
||||||
|
// k_DXT1
|
||||||
|
{DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, kLoadShaderIndex64bpb,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true,
|
||||||
|
DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT1ToRGBA8,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_DXT2_3
|
||||||
|
{DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, kLoadShaderIndex128bpb,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true,
|
||||||
|
DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT3ToRGBA8,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_DXT4_5
|
||||||
|
{DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, kLoadShaderIndex128bpb,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true,
|
||||||
|
DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT5ToRGBA8,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_16_16_16_16_EDRAM
|
||||||
|
// Not usable as a texture, also has -32...32 range.
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// R32_FLOAT for depth because shaders would require an additional SRV
|
||||||
|
// to
|
||||||
|
// sample stencil, which we don't provide.
|
||||||
|
// k_24_8
|
||||||
|
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexDepthUnorm,
|
||||||
|
DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_24_8_FLOAT
|
||||||
|
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexDepthFloat,
|
||||||
|
DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_16
|
||||||
|
{DXGI_FORMAT_R16_TYPELESS, DXGI_FORMAT_R16_UNORM, kLoadShaderIndex16bpb,
|
||||||
|
DXGI_FORMAT_R16_SNORM, kLoadShaderIndexUnknown, false,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_16_16
|
||||||
|
{DXGI_FORMAT_R16G16_TYPELESS, DXGI_FORMAT_R16G16_UNORM,
|
||||||
|
kLoadShaderIndex32bpb, DXGI_FORMAT_R16G16_SNORM, kLoadShaderIndexUnknown,
|
||||||
|
false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
||||||
|
// k_16_16_16_16
|
||||||
|
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
|
||||||
|
kLoadShaderIndex64bpb, DXGI_FORMAT_R16G16B16A16_SNORM,
|
||||||
|
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_16_EXPAND
|
||||||
|
{DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, kLoadShaderIndex16bpb,
|
||||||
|
DXGI_FORMAT_R16_FLOAT, kLoadShaderIndexUnknown, false,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_16_16_EXPAND
|
||||||
|
{DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT,
|
||||||
|
kLoadShaderIndex32bpb, DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndexUnknown,
|
||||||
|
false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
||||||
|
// k_16_16_16_16_EXPAND
|
||||||
|
{DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT,
|
||||||
|
kLoadShaderIndex64bpb, DXGI_FORMAT_R16G16B16A16_FLOAT,
|
||||||
|
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_16_FLOAT
|
||||||
|
{DXGI_FORMAT_R16_FLOAT, DXGI_FORMAT_R16_FLOAT, kLoadShaderIndex16bpb,
|
||||||
|
DXGI_FORMAT_R16_FLOAT, kLoadShaderIndexUnknown, false,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_16_16_FLOAT
|
||||||
|
{DXGI_FORMAT_R16G16_FLOAT, DXGI_FORMAT_R16G16_FLOAT,
|
||||||
|
kLoadShaderIndex32bpb, DXGI_FORMAT_R16G16_FLOAT, kLoadShaderIndexUnknown,
|
||||||
|
false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
||||||
|
// k_16_16_16_16_FLOAT
|
||||||
|
{DXGI_FORMAT_R16G16B16A16_FLOAT, DXGI_FORMAT_R16G16B16A16_FLOAT,
|
||||||
|
kLoadShaderIndex64bpb, DXGI_FORMAT_R16G16B16A16_FLOAT,
|
||||||
|
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_32
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_32_32
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
||||||
|
// k_32_32_32_32
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_32_FLOAT
|
||||||
|
{DXGI_FORMAT_R32_FLOAT, DXGI_FORMAT_R32_FLOAT, kLoadShaderIndex32bpb,
|
||||||
|
DXGI_FORMAT_R32_FLOAT, kLoadShaderIndexUnknown, false,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_32_32_FLOAT
|
||||||
|
{DXGI_FORMAT_R32G32_FLOAT, DXGI_FORMAT_R32G32_FLOAT,
|
||||||
|
kLoadShaderIndex64bpb, DXGI_FORMAT_R32G32_FLOAT, kLoadShaderIndexUnknown,
|
||||||
|
false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
||||||
|
// k_32_32_32_32_FLOAT
|
||||||
|
{DXGI_FORMAT_R32G32B32A32_FLOAT, DXGI_FORMAT_R32G32B32A32_FLOAT,
|
||||||
|
kLoadShaderIndex128bpb, DXGI_FORMAT_R32G32B32A32_FLOAT,
|
||||||
|
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_32_AS_8
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_32_AS_8_8
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
||||||
|
// k_16_MPEG
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_16_16_MPEG
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
||||||
|
// k_8_INTERLACED
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_32_AS_8_INTERLACED
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_32_AS_8_8_INTERLACED
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
||||||
|
// k_16_INTERLACED
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_16_MPEG_INTERLACED
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_16_16_MPEG_INTERLACED
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
||||||
|
// k_DXN
|
||||||
|
{DXGI_FORMAT_BC5_UNORM, DXGI_FORMAT_BC5_UNORM, kLoadShaderIndex128bpb,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true,
|
||||||
|
DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndexDXNToRG8,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
||||||
|
// k_8_8_8_8_AS_16_16_16_16
|
||||||
|
{DXGI_FORMAT_R8G8B8A8_TYPELESS, DXGI_FORMAT_R8G8B8A8_UNORM,
|
||||||
|
kLoadShaderIndex32bpb, DXGI_FORMAT_R8G8B8A8_SNORM,
|
||||||
|
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_DXT1_AS_16_16_16_16
|
||||||
|
{DXGI_FORMAT_BC1_UNORM, DXGI_FORMAT_BC1_UNORM, kLoadShaderIndex64bpb,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true,
|
||||||
|
DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT1ToRGBA8,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_DXT2_3_AS_16_16_16_16
|
||||||
|
{DXGI_FORMAT_BC2_UNORM, DXGI_FORMAT_BC2_UNORM, kLoadShaderIndex128bpb,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true,
|
||||||
|
DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT3ToRGBA8,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_DXT4_5_AS_16_16_16_16
|
||||||
|
{DXGI_FORMAT_BC3_UNORM, DXGI_FORMAT_BC3_UNORM, kLoadShaderIndex128bpb,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true,
|
||||||
|
DXGI_FORMAT_R8G8B8A8_UNORM, kLoadShaderIndexDXT5ToRGBA8,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_2_10_10_10_AS_16_16_16_16
|
||||||
|
{DXGI_FORMAT_R10G10B10A2_UNORM, DXGI_FORMAT_R10G10B10A2_UNORM,
|
||||||
|
kLoadShaderIndex32bpb, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
false, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_10_11_11_AS_16_16_16_16
|
||||||
|
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
|
||||||
|
kLoadShaderIndexR11G11B10ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM,
|
||||||
|
kLoadShaderIndexR11G11B10ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
||||||
|
// k_11_11_10_AS_16_16_16_16
|
||||||
|
{DXGI_FORMAT_R16G16B16A16_TYPELESS, DXGI_FORMAT_R16G16B16A16_UNORM,
|
||||||
|
kLoadShaderIndexR10G11B11ToRGBA16, DXGI_FORMAT_R16G16B16A16_SNORM,
|
||||||
|
kLoadShaderIndexR10G11B11ToRGBA16SNorm, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
||||||
|
// k_32_32_32_FLOAT
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBB},
|
||||||
|
// k_DXT3A
|
||||||
|
// R8_UNORM has the same size as BC2, but doesn't have the 4x4 size
|
||||||
|
// alignment requirement.
|
||||||
|
{DXGI_FORMAT_R8_UNORM, DXGI_FORMAT_R8_UNORM, kLoadShaderIndexDXT3A,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_DXT5A
|
||||||
|
{DXGI_FORMAT_BC4_UNORM, DXGI_FORMAT_BC4_UNORM, kLoadShaderIndex64bpb,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, true, DXGI_FORMAT_R8_UNORM,
|
||||||
|
kLoadShaderIndexDXT5AToR8, xenos::XE_GPU_TEXTURE_SWIZZLE_RRRR},
|
||||||
|
// k_CTX1
|
||||||
|
{DXGI_FORMAT_R8G8_UNORM, DXGI_FORMAT_R8G8_UNORM, kLoadShaderIndexCTX1,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGGG},
|
||||||
|
// k_DXT3A_AS_1_1_1_1
|
||||||
|
{DXGI_FORMAT_B4G4R4A4_UNORM, DXGI_FORMAT_B4G4R4A4_UNORM,
|
||||||
|
kLoadShaderIndexDXT3AAs1111ToBGRA4, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_8_8_8_8_GAMMA_EDRAM
|
||||||
|
// Not usable as a texture.
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
// k_2_10_10_10_FLOAT_EDRAM
|
||||||
|
// Not usable as a texture.
|
||||||
|
{DXGI_FORMAT_UNKNOWN, DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown,
|
||||||
|
DXGI_FORMAT_UNKNOWN, kLoadShaderIndexUnknown, false, DXGI_FORMAT_UNKNOWN,
|
||||||
|
kLoadShaderIndexUnknown, xenos::XE_GPU_TEXTURE_SWIZZLE_RGBA},
|
||||||
|
};
|
||||||
|
|
||||||
|
protected:
|
||||||
|
bool IsSignedVersionSeparateForFormat(TextureKey key) const override;
|
||||||
|
bool IsScaledResolveSupportedForFormat(TextureKey key) const override;
|
||||||
|
uint32_t GetHostFormatSwizzle(TextureKey key) const override;
|
||||||
|
|
||||||
|
uint32_t GetMaxHostTextureWidthHeight(
|
||||||
|
xenos::DataDimension dimension) const override;
|
||||||
|
uint32_t GetMaxHostTextureDepthOrArraySize(
|
||||||
|
xenos::DataDimension dimension) const override;
|
||||||
|
|
||||||
|
std::unique_ptr<Texture> CreateTexture(TextureKey key) override;
|
||||||
|
|
||||||
|
// This binds pipelines, allocates descriptors, and copies!
|
||||||
|
bool LoadTextureDataFromResidentMemoryImpl(Texture& texture, bool load_base,
|
||||||
|
bool load_mips) override;
|
||||||
|
|
||||||
|
void UpdateTextureBindingsImpl(uint32_t fetch_constant_mask) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static constexpr uint32_t kLoadGuestXThreadsPerGroupLog2 = 2;
|
||||||
|
static constexpr uint32_t kLoadGuestYBlocksPerGroupLog2 = 5;
|
||||||
|
|
||||||
class D3D12Texture final : public Texture {
|
class D3D12Texture final : public Texture {
|
||||||
public:
|
public:
|
||||||
|
|
@ -467,8 +790,6 @@ class D3D12TextureCache final : public TextureCache {
|
||||||
|
|
||||||
xenos::ClampMode NormalizeClampMode(xenos::ClampMode clamp_mode) const;
|
xenos::ClampMode NormalizeClampMode(xenos::ClampMode clamp_mode) const;
|
||||||
|
|
||||||
static const HostFormat host_formats_[64];
|
|
||||||
|
|
||||||
D3D12CommandProcessor& command_processor_;
|
D3D12CommandProcessor& command_processor_;
|
||||||
bool bindless_resources_used_;
|
bool bindless_resources_used_;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -198,7 +198,7 @@ uint32_t GraphicsSystem::ReadRegister(uint32_t addr) {
|
||||||
// maximum [width(0x0FFF), height(0x0FFF)]
|
// maximum [width(0x0FFF), height(0x0FFF)]
|
||||||
return 0x050002D0;
|
return 0x050002D0;
|
||||||
default:
|
default:
|
||||||
if (!register_file_.GetRegisterInfo(r)) {
|
if (!register_file_.IsValidRegister(r)) {
|
||||||
XELOGE("GPU: Read from unknown register ({:04X})", r);
|
XELOGE("GPU: Read from unknown register ({:04X})", r);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "xenia/gpu/register_file.h"
|
#include "xenia/gpu/register_file.h"
|
||||||
|
#include <array>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
#include "xenia/base/math.h"
|
#include "xenia/base/math.h"
|
||||||
|
|
@ -17,6 +17,52 @@ namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
|
||||||
RegisterFile::RegisterFile() { std::memset(values, 0, sizeof(values)); }
|
RegisterFile::RegisterFile() { std::memset(values, 0, sizeof(values)); }
|
||||||
|
constexpr unsigned int GetHighestRegisterNumber() {
|
||||||
|
uint32_t highest = 0;
|
||||||
|
#define XE_GPU_REGISTER(index, type, name) \
|
||||||
|
highest = std::max<uint32_t>(highest, index);
|
||||||
|
#include "xenia/gpu/register_table.inc"
|
||||||
|
#undef XE_GPU_REGISTER
|
||||||
|
|
||||||
|
return highest;
|
||||||
|
}
|
||||||
|
constexpr unsigned int GetLowestRegisterNumber() {
|
||||||
|
uint32_t lowest = UINT_MAX;
|
||||||
|
#define XE_GPU_REGISTER(index, type, name) \
|
||||||
|
lowest = std::min<uint32_t>(lowest, index);
|
||||||
|
#include "xenia/gpu/register_table.inc"
|
||||||
|
#undef XE_GPU_REGISTER
|
||||||
|
|
||||||
|
return lowest;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr uint32_t lowest_register = GetLowestRegisterNumber();
|
||||||
|
static constexpr uint32_t highest_register = GetHighestRegisterNumber();
|
||||||
|
|
||||||
|
static constexpr uint32_t total_num_registers =
|
||||||
|
highest_register - lowest_register;
|
||||||
|
|
||||||
|
static constexpr uint32_t num_required_words_for_registers =
|
||||||
|
((total_num_registers + 63) & ~63) / 64;
|
||||||
|
// can't use bitset, its not constexpr in c++ 17
|
||||||
|
using ValidRegisterBitset = std::array<
|
||||||
|
uint64_t,
|
||||||
|
num_required_words_for_registers>; // std::bitset<highest_register
|
||||||
|
// - lowest_register>;
|
||||||
|
|
||||||
|
static constexpr ValidRegisterBitset BuildValidRegisterBitset() {
|
||||||
|
ValidRegisterBitset result{};
|
||||||
|
#define XE_GPU_REGISTER(index, type, name) \
|
||||||
|
result[(index - lowest_register) / 64] |= \
|
||||||
|
1ULL << ((index - lowest_register) % 64);
|
||||||
|
|
||||||
|
#include "xenia/gpu/register_table.inc"
|
||||||
|
#undef XE_GPU_REGISTER
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
static constexpr ValidRegisterBitset valid_register_bitset =
|
||||||
|
BuildValidRegisterBitset();
|
||||||
|
|
||||||
const RegisterInfo* RegisterFile::GetRegisterInfo(uint32_t index) {
|
const RegisterInfo* RegisterFile::GetRegisterInfo(uint32_t index) {
|
||||||
switch (index) {
|
switch (index) {
|
||||||
|
|
@ -34,6 +80,18 @@ const RegisterInfo* RegisterFile::GetRegisterInfo(uint32_t index) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
|
todo: this still uses a lot of cpu! our bitset is too large
|
||||||
|
*/
|
||||||
|
bool RegisterFile::IsValidRegister(uint32_t index) {
|
||||||
|
if (XE_UNLIKELY(index < lowest_register) ||
|
||||||
|
XE_UNLIKELY(index > highest_register)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
uint32_t register_linear_index = index - lowest_register;
|
||||||
|
|
||||||
|
return (valid_register_bitset[register_linear_index / 64] &
|
||||||
|
(1ULL << (register_linear_index % 64))) != 0;
|
||||||
|
}
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
||||||
|
|
@ -32,7 +32,7 @@ class RegisterFile {
|
||||||
RegisterFile();
|
RegisterFile();
|
||||||
|
|
||||||
static const RegisterInfo* GetRegisterInfo(uint32_t index);
|
static const RegisterInfo* GetRegisterInfo(uint32_t index);
|
||||||
|
static bool IsValidRegister(uint32_t index);
|
||||||
static constexpr size_t kRegisterCount = 0x5003;
|
static constexpr size_t kRegisterCount = 0x5003;
|
||||||
union RegisterValue {
|
union RegisterValue {
|
||||||
uint32_t u32;
|
uint32_t u32;
|
||||||
|
|
|
||||||
|
|
@ -41,9 +41,6 @@
|
||||||
#include "xenia/ui/windowed_app_context.h"
|
#include "xenia/ui/windowed_app_context.h"
|
||||||
#include "xenia/xbox.h"
|
#include "xenia/xbox.h"
|
||||||
|
|
||||||
DEFINE_string(target_trace_file, "", "Specifies the trace file to load.",
|
|
||||||
"GPU");
|
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
|
||||||
|
|
@ -66,7 +63,7 @@ TraceViewer::TraceViewer(xe::ui::WindowedAppContext& app_context,
|
||||||
TraceViewer::~TraceViewer() = default;
|
TraceViewer::~TraceViewer() = default;
|
||||||
|
|
||||||
bool TraceViewer::OnInitialize() {
|
bool TraceViewer::OnInitialize() {
|
||||||
std::string path = cvars::target_trace_file;
|
std::string path = cvars::target_trace_file.u8string();
|
||||||
|
|
||||||
// If no path passed, ask the user.
|
// If no path passed, ask the user.
|
||||||
// On Android, however, there's no synchronous file picker, and the trace file
|
// On Android, however, there's no synchronous file picker, and the trace file
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,7 @@
|
||||||
|
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
|
||||||
|
#include "xenia/base/cvar.h"
|
||||||
#include "xenia/emulator.h"
|
#include "xenia/emulator.h"
|
||||||
#include "xenia/gpu/shader.h"
|
#include "xenia/gpu/shader.h"
|
||||||
#include "xenia/gpu/trace_player.h"
|
#include "xenia/gpu/trace_player.h"
|
||||||
|
|
@ -24,7 +25,7 @@
|
||||||
#include "xenia/ui/window.h"
|
#include "xenia/ui/window.h"
|
||||||
#include "xenia/ui/window_listener.h"
|
#include "xenia/ui/window_listener.h"
|
||||||
#include "xenia/ui/windowed_app.h"
|
#include "xenia/ui/windowed_app.h"
|
||||||
|
DECLARE_path(target_trace_file);
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
#if XE_ENABLE_TRACE_WRITER_INSTRUMENTATION == 1
|
||||||
TraceWriter::TraceWriter(uint8_t* membase)
|
TraceWriter::TraceWriter(uint8_t* membase)
|
||||||
: membase_(membase), file_(nullptr) {}
|
: membase_(membase), file_(nullptr) {}
|
||||||
|
|
||||||
|
|
@ -362,6 +362,6 @@ void TraceWriter::WriteGammaRamp(
|
||||||
fwrite(gamma_ramp_pwl_rgb, 1, kPWLUncompressedLength, file_);
|
fwrite(gamma_ramp_pwl_rgb, 1, kPWLUncompressedLength, file_);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
} // namespace xe
|
} // namespace xe
|
||||||
|
|
|
||||||
|
|
@ -17,11 +17,22 @@
|
||||||
#include "xenia/gpu/registers.h"
|
#include "xenia/gpu/registers.h"
|
||||||
#include "xenia/gpu/trace_protocol.h"
|
#include "xenia/gpu/trace_protocol.h"
|
||||||
|
|
||||||
|
// only enable trace writer in debug builds, measured hit from the trace
|
||||||
|
// function calls (even if they just immediately return) is 0.40-0.60% cpu time
|
||||||
|
// total. with inlining they just bloat the caller and negatively impact
|
||||||
|
// register allocation for the caller
|
||||||
|
#ifdef NDEBUG
|
||||||
|
#define XE_ENABLE_TRACE_WRITER_INSTRUMENTATION 0
|
||||||
|
#else
|
||||||
|
#define XE_ENABLE_TRACE_WRITER_INSTRUMENTATION 1
|
||||||
|
#endif
|
||||||
|
|
||||||
namespace xe {
|
namespace xe {
|
||||||
namespace gpu {
|
namespace gpu {
|
||||||
|
|
||||||
class TraceWriter {
|
class TraceWriter {
|
||||||
public:
|
public:
|
||||||
|
#if XE_ENABLE_TRACE_WRITER_INSTRUMENTATION == 1
|
||||||
explicit TraceWriter(uint8_t* membase);
|
explicit TraceWriter(uint8_t* membase);
|
||||||
~TraceWriter();
|
~TraceWriter();
|
||||||
|
|
||||||
|
|
@ -61,6 +72,49 @@ class TraceWriter {
|
||||||
|
|
||||||
bool compress_output_ = true;
|
bool compress_output_ = true;
|
||||||
size_t compression_threshold_ = 1024; // Min. number of bytes to compress.
|
size_t compression_threshold_ = 1024; // Min. number of bytes to compress.
|
||||||
|
|
||||||
|
#else
|
||||||
|
// this could be annoying to maintain if new methods are added or the
|
||||||
|
// signatures change
|
||||||
|
constexpr explicit TraceWriter(uint8_t* membase) {}
|
||||||
|
|
||||||
|
static constexpr bool is_open() { return false; }
|
||||||
|
|
||||||
|
static constexpr bool Open(const std::filesystem::path& path,
|
||||||
|
uint32_t title_id) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
static constexpr void Flush() {}
|
||||||
|
static constexpr void Close() {}
|
||||||
|
|
||||||
|
static constexpr void WritePrimaryBufferStart(uint32_t base_ptr,
|
||||||
|
uint32_t count) {}
|
||||||
|
static constexpr void WritePrimaryBufferEnd() {}
|
||||||
|
static constexpr void WriteIndirectBufferStart(uint32_t base_ptr,
|
||||||
|
uint32_t count) {}
|
||||||
|
static constexpr void WriteIndirectBufferEnd() {}
|
||||||
|
static constexpr void WritePacketStart(uint32_t base_ptr, uint32_t count) {}
|
||||||
|
static constexpr void WritePacketEnd() {}
|
||||||
|
static constexpr void WriteMemoryRead(uint32_t base_ptr, size_t length,
|
||||||
|
const void* host_ptr = nullptr) {}
|
||||||
|
static constexpr void WriteMemoryReadCached(uint32_t base_ptr,
|
||||||
|
size_t length) {}
|
||||||
|
static constexpr void WriteMemoryReadCachedNop(uint32_t base_ptr,
|
||||||
|
size_t length) {}
|
||||||
|
static constexpr void WriteMemoryWrite(uint32_t base_ptr, size_t length,
|
||||||
|
const void* host_ptr = nullptr) {}
|
||||||
|
static constexpr void WriteEdramSnapshot(const void* snapshot) {}
|
||||||
|
static constexpr void WriteEvent(EventCommand::Type event_type) {}
|
||||||
|
static constexpr void WriteRegisters(uint32_t first_register,
|
||||||
|
const uint32_t* register_values,
|
||||||
|
uint32_t register_count,
|
||||||
|
bool execute_callbacks_on_play) {}
|
||||||
|
static constexpr void WriteGammaRamp(
|
||||||
|
const reg::DC_LUT_30_COLOR* gamma_ramp_256_entry_table,
|
||||||
|
const reg::DC_LUT_PWL_DATA* gamma_ramp_pwl_rgb,
|
||||||
|
uint32_t gamma_ramp_rw_component) {}
|
||||||
|
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace gpu
|
} // namespace gpu
|
||||||
|
|
|
||||||
|
|
@ -225,6 +225,7 @@ X_STATUS UserModule::LoadContinue() {
|
||||||
ldr_data->xex_header_base = guest_xex_header_;
|
ldr_data->xex_header_base = guest_xex_header_;
|
||||||
ldr_data->full_image_size = security_header->image_size;
|
ldr_data->full_image_size = security_header->image_size;
|
||||||
ldr_data->image_base = this->xex_module()->base_address();
|
ldr_data->image_base = this->xex_module()->base_address();
|
||||||
|
|
||||||
ldr_data->entry_point = entry_point_;
|
ldr_data->entry_point = entry_point_;
|
||||||
|
|
||||||
OnLoad();
|
OnLoad();
|
||||||
|
|
|
||||||
|
|
@ -198,7 +198,8 @@ bool Memory::Initialize() {
|
||||||
// Add handlers for MMIO.
|
// Add handlers for MMIO.
|
||||||
mmio_handler_ = cpu::MMIOHandler::Install(
|
mmio_handler_ = cpu::MMIOHandler::Install(
|
||||||
virtual_membase_, physical_membase_, physical_membase_ + 0x1FFFFFFF,
|
virtual_membase_, physical_membase_, physical_membase_ + 0x1FFFFFFF,
|
||||||
HostToGuestVirtualThunk, this, AccessViolationCallbackThunk, this);
|
HostToGuestVirtualThunk, this, AccessViolationCallbackThunk, this,
|
||||||
|
nullptr, nullptr);
|
||||||
if (!mmio_handler_) {
|
if (!mmio_handler_) {
|
||||||
XELOGE("Unable to install MMIO handlers");
|
XELOGE("Unable to install MMIO handlers");
|
||||||
assert_always();
|
assert_always();
|
||||||
|
|
@ -213,6 +214,11 @@ bool Memory::Initialize() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Memory::SetMMIOExceptionRecordingCallback(
|
||||||
|
cpu::MmioAccessRecordCallback callback, void* context) {
|
||||||
|
mmio_handler_->SetMMIOExceptionRecordingCallback(callback, context);
|
||||||
|
}
|
||||||
|
|
||||||
static const struct {
|
static const struct {
|
||||||
uint64_t virtual_address_start;
|
uint64_t virtual_address_start;
|
||||||
uint64_t virtual_address_end;
|
uint64_t virtual_address_end;
|
||||||
|
|
@ -1530,7 +1536,8 @@ bool PhysicalHeap::AllocRange(uint32_t low_address, uint32_t high_address,
|
||||||
bool PhysicalHeap::AllocSystemHeap(uint32_t size, uint32_t alignment,
|
bool PhysicalHeap::AllocSystemHeap(uint32_t size, uint32_t alignment,
|
||||||
uint32_t allocation_type, uint32_t protect,
|
uint32_t allocation_type, uint32_t protect,
|
||||||
bool top_down, uint32_t* out_address) {
|
bool top_down, uint32_t* out_address) {
|
||||||
return Alloc(size, alignment, allocation_type, protect, top_down, out_address);
|
return Alloc(size, alignment, allocation_type, protect, top_down,
|
||||||
|
out_address);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool PhysicalHeap::Decommit(uint32_t address, uint32_t size) {
|
bool PhysicalHeap::Decommit(uint32_t address, uint32_t size) {
|
||||||
|
|
|
||||||
|
|
@ -498,6 +498,9 @@ class Memory {
|
||||||
bool Save(ByteStream* stream);
|
bool Save(ByteStream* stream);
|
||||||
bool Restore(ByteStream* stream);
|
bool Restore(ByteStream* stream);
|
||||||
|
|
||||||
|
void SetMMIOExceptionRecordingCallback(cpu::MmioAccessRecordCallback callback,
|
||||||
|
void* context);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int MapViews(uint8_t* mapping_base);
|
int MapViews(uint8_t* mapping_base);
|
||||||
void UnmapViews();
|
void UnmapViews();
|
||||||
|
|
|
||||||
|
|
@ -181,7 +181,6 @@ bool Win32Window::OpenImpl() {
|
||||||
SetWindowPlacement(hwnd_, &initial_dpi_placement);
|
SetWindowPlacement(hwnd_, &initial_dpi_placement);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Disable rounded corners starting with Windows 11 (or silently receive and
|
// Disable rounded corners starting with Windows 11 (or silently receive and
|
||||||
// ignore E_INVALIDARG on Windows versions before 10.0.22000.0), primarily to
|
// ignore E_INVALIDARG on Windows versions before 10.0.22000.0), primarily to
|
||||||
// preserve all pixels of the guest output.
|
// preserve all pixels of the guest output.
|
||||||
|
|
@ -189,7 +188,6 @@ bool Win32Window::OpenImpl() {
|
||||||
DwmSetWindowAttribute(hwnd_, DWMWA_WINDOW_CORNER_PREFERENCE,
|
DwmSetWindowAttribute(hwnd_, DWMWA_WINDOW_CORNER_PREFERENCE,
|
||||||
&window_corner_preference,
|
&window_corner_preference,
|
||||||
sizeof(window_corner_preference));
|
sizeof(window_corner_preference));
|
||||||
|
|
||||||
// Disable flicks.
|
// Disable flicks.
|
||||||
ATOM atom = GlobalAddAtomW(L"MicrosoftTabletPenServiceProperty");
|
ATOM atom = GlobalAddAtomW(L"MicrosoftTabletPenServiceProperty");
|
||||||
const DWORD_PTR dwHwndTabletProperty =
|
const DWORD_PTR dwHwndTabletProperty =
|
||||||
|
|
@ -1047,7 +1045,9 @@ LRESULT Win32Window::WndProc(HWND hWnd, UINT message, WPARAM wParam,
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case WM_MOVE: {
|
case WM_MOVE: {
|
||||||
OnMonitorUpdate(MonitorUpdateEvent(this, false));
|
// chrispy: fix clang use of temporary error
|
||||||
|
MonitorUpdateEvent update_event{this, false};
|
||||||
|
OnMonitorUpdate(update_event);
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case WM_SIZE: {
|
case WM_SIZE: {
|
||||||
|
|
@ -1084,7 +1084,9 @@ LRESULT Win32Window::WndProc(HWND hWnd, UINT message, WPARAM wParam,
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case WM_DISPLAYCHANGE: {
|
case WM_DISPLAYCHANGE: {
|
||||||
OnMonitorUpdate(MonitorUpdateEvent(this, true));
|
// chrispy: fix clang use of temporary error
|
||||||
|
MonitorUpdateEvent update_event{this, true};
|
||||||
|
OnMonitorUpdate(update_event);
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
case WM_DPICHANGED: {
|
case WM_DPICHANGED: {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue