mirror of
https://github.com/xenia-project/xenia.git
synced 2025-12-06 07:12:03 +01:00
When the cvars clock_no_scaling and clock_source_raw are set, tick counts will be directly calculated in the emitted code.
3112 lines
112 KiB
C++
3112 lines
112 KiB
C++
/**
|
|
******************************************************************************
|
|
* Xenia : Xbox 360 Emulator Research Project *
|
|
******************************************************************************
|
|
* Copyright 2019 Ben Vanik. All rights reserved. *
|
|
* Released under the BSD license - see LICENSE in the root for more details. *
|
|
******************************************************************************
|
|
*/
|
|
|
|
// A note about vectors:
|
|
// Xenia represents vectors as xyzw pairs, with indices 0123.
|
|
// XMM registers are xyzw pairs with indices 3210, making them more like wzyx.
|
|
// This makes things somewhat confusing. It'd be nice to just shuffle the
|
|
// registers around on load/store, however certain operations require that
|
|
// data be in the right offset.
|
|
// Basically, this identity must hold:
|
|
// shuffle(vec, b00011011) -> {x,y,z,w} => {x,y,z,w}
|
|
// All indices and operations must respect that.
|
|
//
|
|
// Memory (big endian):
|
|
// [00 01 02 03] [04 05 06 07] [08 09 0A 0B] [0C 0D 0E 0F] (x, y, z, w)
|
|
// load into xmm register:
|
|
// [0F 0E 0D 0C] [0B 0A 09 08] [07 06 05 04] [03 02 01 00] (w, z, y, x)
|
|
|
|
#include "xenia/cpu/backend/x64/x64_sequences.h"
|
|
|
|
#include <algorithm>
|
|
#include <cstring>
|
|
#include <unordered_map>
|
|
|
|
#include "xenia/base/assert.h"
|
|
#include "xenia/base/clock.h"
|
|
#include "xenia/base/logging.h"
|
|
#include "xenia/base/threading.h"
|
|
#include "xenia/cpu/backend/x64/x64_emitter.h"
|
|
#include "xenia/cpu/backend/x64/x64_op.h"
|
|
#include "xenia/cpu/backend/x64/x64_tracers.h"
|
|
#include "xenia/cpu/hir/hir_builder.h"
|
|
#include "xenia/cpu/processor.h"
|
|
|
|
namespace xe {
|
|
namespace cpu {
|
|
namespace backend {
|
|
namespace x64 {
|
|
|
|
using namespace Xbyak;
|
|
|
|
// TODO(benvanik): direct usings.
|
|
using namespace xe::cpu;
|
|
using namespace xe::cpu::hir;
|
|
|
|
using xe::cpu::hir::Instr;
|
|
|
|
typedef bool (*SequenceSelectFn)(X64Emitter&, const Instr*);
|
|
std::unordered_map<uint32_t, SequenceSelectFn> sequence_table;
|
|
|
|
// ============================================================================
|
|
// OPCODE_COMMENT
|
|
// ============================================================================
|
|
struct COMMENT : Sequence<COMMENT, I<OPCODE_COMMENT, VoidOp, OffsetOp>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
if (IsTracingInstr()) {
|
|
auto str = reinterpret_cast<const char*>(i.src1.value);
|
|
// TODO(benvanik): pass through.
|
|
// TODO(benvanik): don't just leak this memory.
|
|
auto str_copy = strdup(str);
|
|
e.mov(e.rdx, reinterpret_cast<uint64_t>(str_copy));
|
|
e.CallNative(reinterpret_cast<void*>(TraceString));
|
|
}
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_COMMENT, COMMENT);
|
|
|
|
// ============================================================================
|
|
// OPCODE_NOP
|
|
// ============================================================================
|
|
struct NOP : Sequence<NOP, I<OPCODE_NOP, VoidOp>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) { e.nop(); }
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_NOP, NOP);
|
|
|
|
// ============================================================================
|
|
// OPCODE_SOURCE_OFFSET
|
|
// ============================================================================
|
|
struct SOURCE_OFFSET
|
|
: Sequence<SOURCE_OFFSET, I<OPCODE_SOURCE_OFFSET, VoidOp, OffsetOp>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.MarkSourceOffset(i.instr);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_SOURCE_OFFSET, SOURCE_OFFSET);
|
|
|
|
// ============================================================================
|
|
// OPCODE_ASSIGN
|
|
// ============================================================================
|
|
struct ASSIGN_I8 : Sequence<ASSIGN_I8, I<OPCODE_ASSIGN, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.mov(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct ASSIGN_I16 : Sequence<ASSIGN_I16, I<OPCODE_ASSIGN, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.mov(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct ASSIGN_I32 : Sequence<ASSIGN_I32, I<OPCODE_ASSIGN, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.mov(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct ASSIGN_I64 : Sequence<ASSIGN_I64, I<OPCODE_ASSIGN, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.mov(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct ASSIGN_F32 : Sequence<ASSIGN_F32, I<OPCODE_ASSIGN, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vmovaps(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct ASSIGN_F64 : Sequence<ASSIGN_F64, I<OPCODE_ASSIGN, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vmovaps(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct ASSIGN_V128 : Sequence<ASSIGN_V128, I<OPCODE_ASSIGN, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vmovaps(i.dest, i.src1);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_ASSIGN, ASSIGN_I8, ASSIGN_I16, ASSIGN_I32,
|
|
ASSIGN_I64, ASSIGN_F32, ASSIGN_F64, ASSIGN_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_CAST
|
|
// ============================================================================
|
|
struct CAST_I32_F32 : Sequence<CAST_I32_F32, I<OPCODE_CAST, I32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vmovd(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct CAST_I64_F64 : Sequence<CAST_I64_F64, I<OPCODE_CAST, I64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vmovq(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct CAST_F32_I32 : Sequence<CAST_F32_I32, I<OPCODE_CAST, F32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vmovd(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct CAST_F64_I64 : Sequence<CAST_F64_I64, I<OPCODE_CAST, F64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vmovq(i.dest, i.src1);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_CAST, CAST_I32_F32, CAST_I64_F64, CAST_F32_I32,
|
|
CAST_F64_I64);
|
|
|
|
// ============================================================================
|
|
// OPCODE_ZERO_EXTEND
|
|
// ============================================================================
|
|
struct ZERO_EXTEND_I16_I8
|
|
: Sequence<ZERO_EXTEND_I16_I8, I<OPCODE_ZERO_EXTEND, I16Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movzx(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct ZERO_EXTEND_I32_I8
|
|
: Sequence<ZERO_EXTEND_I32_I8, I<OPCODE_ZERO_EXTEND, I32Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movzx(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct ZERO_EXTEND_I64_I8
|
|
: Sequence<ZERO_EXTEND_I64_I8, I<OPCODE_ZERO_EXTEND, I64Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movzx(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct ZERO_EXTEND_I32_I16
|
|
: Sequence<ZERO_EXTEND_I32_I16, I<OPCODE_ZERO_EXTEND, I32Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movzx(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct ZERO_EXTEND_I64_I16
|
|
: Sequence<ZERO_EXTEND_I64_I16, I<OPCODE_ZERO_EXTEND, I64Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movzx(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct ZERO_EXTEND_I64_I32
|
|
: Sequence<ZERO_EXTEND_I64_I32, I<OPCODE_ZERO_EXTEND, I64Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.mov(i.dest.reg().cvt32(), i.src1);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_ZERO_EXTEND, ZERO_EXTEND_I16_I8, ZERO_EXTEND_I32_I8,
|
|
ZERO_EXTEND_I64_I8, ZERO_EXTEND_I32_I16,
|
|
ZERO_EXTEND_I64_I16, ZERO_EXTEND_I64_I32);
|
|
|
|
// ============================================================================
|
|
// OPCODE_SIGN_EXTEND
|
|
// ============================================================================
|
|
struct SIGN_EXTEND_I16_I8
|
|
: Sequence<SIGN_EXTEND_I16_I8, I<OPCODE_SIGN_EXTEND, I16Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movsx(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct SIGN_EXTEND_I32_I8
|
|
: Sequence<SIGN_EXTEND_I32_I8, I<OPCODE_SIGN_EXTEND, I32Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movsx(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct SIGN_EXTEND_I64_I8
|
|
: Sequence<SIGN_EXTEND_I64_I8, I<OPCODE_SIGN_EXTEND, I64Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movsx(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct SIGN_EXTEND_I32_I16
|
|
: Sequence<SIGN_EXTEND_I32_I16, I<OPCODE_SIGN_EXTEND, I32Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movsx(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct SIGN_EXTEND_I64_I16
|
|
: Sequence<SIGN_EXTEND_I64_I16, I<OPCODE_SIGN_EXTEND, I64Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movsx(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct SIGN_EXTEND_I64_I32
|
|
: Sequence<SIGN_EXTEND_I64_I32, I<OPCODE_SIGN_EXTEND, I64Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movsxd(i.dest, i.src1);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_SIGN_EXTEND, SIGN_EXTEND_I16_I8, SIGN_EXTEND_I32_I8,
|
|
SIGN_EXTEND_I64_I8, SIGN_EXTEND_I32_I16,
|
|
SIGN_EXTEND_I64_I16, SIGN_EXTEND_I64_I32);
|
|
|
|
// ============================================================================
|
|
// OPCODE_TRUNCATE
|
|
// ============================================================================
|
|
struct TRUNCATE_I8_I16
|
|
: Sequence<TRUNCATE_I8_I16, I<OPCODE_TRUNCATE, I8Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movzx(i.dest.reg().cvt32(), i.src1.reg().cvt8());
|
|
}
|
|
};
|
|
struct TRUNCATE_I8_I32
|
|
: Sequence<TRUNCATE_I8_I32, I<OPCODE_TRUNCATE, I8Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movzx(i.dest.reg().cvt32(), i.src1.reg().cvt8());
|
|
}
|
|
};
|
|
struct TRUNCATE_I8_I64
|
|
: Sequence<TRUNCATE_I8_I64, I<OPCODE_TRUNCATE, I8Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movzx(i.dest.reg().cvt32(), i.src1.reg().cvt8());
|
|
}
|
|
};
|
|
struct TRUNCATE_I16_I32
|
|
: Sequence<TRUNCATE_I16_I32, I<OPCODE_TRUNCATE, I16Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movzx(i.dest.reg().cvt32(), i.src1.reg().cvt16());
|
|
}
|
|
};
|
|
struct TRUNCATE_I16_I64
|
|
: Sequence<TRUNCATE_I16_I64, I<OPCODE_TRUNCATE, I16Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.movzx(i.dest.reg().cvt32(), i.src1.reg().cvt16());
|
|
}
|
|
};
|
|
struct TRUNCATE_I32_I64
|
|
: Sequence<TRUNCATE_I32_I64, I<OPCODE_TRUNCATE, I32Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.mov(i.dest, i.src1.reg().cvt32());
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_TRUNCATE, TRUNCATE_I8_I16, TRUNCATE_I8_I32,
|
|
TRUNCATE_I8_I64, TRUNCATE_I16_I32, TRUNCATE_I16_I64,
|
|
TRUNCATE_I32_I64);
|
|
|
|
// ============================================================================
|
|
// OPCODE_CONVERT
|
|
// ============================================================================
|
|
struct CONVERT_I32_F32
|
|
: Sequence<CONVERT_I32_F32, I<OPCODE_CONVERT, I32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// TODO(benvanik): saturation check? cvtt* (trunc?)
|
|
if (i.instr->flags == ROUND_TO_ZERO) {
|
|
e.vcvttss2si(i.dest, i.src1);
|
|
} else {
|
|
e.vcvtss2si(i.dest, i.src1);
|
|
}
|
|
}
|
|
};
|
|
struct CONVERT_I32_F64
|
|
: Sequence<CONVERT_I32_F64, I<OPCODE_CONVERT, I32Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// Intel returns 0x80000000 if the double value does not fit within an int32
|
|
// PPC saturates the value instead.
|
|
// So, we can clamp the double value to (double)0x7FFFFFFF.
|
|
e.vminsd(e.xmm0, i.src1, e.GetXmmConstPtr(XMMIntMaxPD));
|
|
if (i.instr->flags == ROUND_TO_ZERO) {
|
|
e.vcvttsd2si(i.dest, e.xmm0);
|
|
} else {
|
|
e.vcvtsd2si(i.dest, e.xmm0);
|
|
}
|
|
}
|
|
};
|
|
struct CONVERT_I64_F64
|
|
: Sequence<CONVERT_I64_F64, I<OPCODE_CONVERT, I64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// Copy src1.
|
|
e.movq(e.rcx, i.src1);
|
|
|
|
// TODO(benvanik): saturation check? cvtt* (trunc?)
|
|
if (i.instr->flags == ROUND_TO_ZERO) {
|
|
e.vcvttsd2si(i.dest, i.src1);
|
|
} else {
|
|
e.vcvtsd2si(i.dest, i.src1);
|
|
}
|
|
|
|
// 0x8000000000000000
|
|
e.mov(e.rax, 0x1);
|
|
e.shl(e.rax, 63);
|
|
|
|
// Saturate positive overflow
|
|
// TODO(DrChat): Find a shorter equivalent sequence.
|
|
// if (result ind. && src1 >= 0)
|
|
// result = 0x7FFFFFFFFFFFFFFF;
|
|
e.cmp(e.rax, i.dest);
|
|
e.sete(e.al);
|
|
e.movzx(e.rax, e.al);
|
|
e.shr(e.rcx, 63);
|
|
e.xor_(e.rcx, 0x01);
|
|
e.and_(e.rax, e.rcx);
|
|
|
|
e.sub(i.dest, e.rax);
|
|
}
|
|
};
|
|
struct CONVERT_F32_I32
|
|
: Sequence<CONVERT_F32_I32, I<OPCODE_CONVERT, F32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// TODO(benvanik): saturation check? cvtt* (trunc?)
|
|
e.vcvtsi2ss(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct CONVERT_F32_F64
|
|
: Sequence<CONVERT_F32_F64, I<OPCODE_CONVERT, F32Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// TODO(benvanik): saturation check? cvtt* (trunc?)
|
|
e.vcvtsd2ss(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct CONVERT_F64_I64
|
|
: Sequence<CONVERT_F64_I64, I<OPCODE_CONVERT, F64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// TODO(benvanik): saturation check? cvtt* (trunc?)
|
|
e.vcvtsi2sd(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct CONVERT_F64_F32
|
|
: Sequence<CONVERT_F64_F32, I<OPCODE_CONVERT, F64Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vcvtss2sd(i.dest, i.src1);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_CONVERT, CONVERT_I32_F32, CONVERT_I32_F64,
|
|
CONVERT_I64_F64, CONVERT_F32_I32, CONVERT_F32_F64,
|
|
CONVERT_F64_I64, CONVERT_F64_F32);
|
|
|
|
// ============================================================================
|
|
// OPCODE_ROUND
|
|
// ============================================================================
|
|
struct ROUND_F32 : Sequence<ROUND_F32, I<OPCODE_ROUND, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
switch (i.instr->flags) {
|
|
case ROUND_TO_ZERO:
|
|
e.vroundss(i.dest, i.src1, 0b00000011);
|
|
break;
|
|
case ROUND_TO_NEAREST:
|
|
e.vroundss(i.dest, i.src1, 0b00000000);
|
|
break;
|
|
case ROUND_TO_MINUS_INFINITY:
|
|
e.vroundss(i.dest, i.src1, 0b00000001);
|
|
break;
|
|
case ROUND_TO_POSITIVE_INFINITY:
|
|
e.vroundss(i.dest, i.src1, 0b00000010);
|
|
break;
|
|
}
|
|
}
|
|
};
|
|
struct ROUND_F64 : Sequence<ROUND_F64, I<OPCODE_ROUND, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
switch (i.instr->flags) {
|
|
case ROUND_TO_ZERO:
|
|
e.vroundsd(i.dest, i.src1, 0b00000011);
|
|
break;
|
|
case ROUND_TO_NEAREST:
|
|
e.vroundsd(i.dest, i.src1, 0b00000000);
|
|
break;
|
|
case ROUND_TO_MINUS_INFINITY:
|
|
e.vroundsd(i.dest, i.src1, 0b00000001);
|
|
break;
|
|
case ROUND_TO_POSITIVE_INFINITY:
|
|
e.vroundsd(i.dest, i.src1, 0b00000010);
|
|
break;
|
|
}
|
|
}
|
|
};
|
|
struct ROUND_V128 : Sequence<ROUND_V128, I<OPCODE_ROUND, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
switch (i.instr->flags) {
|
|
case ROUND_TO_ZERO:
|
|
e.vroundps(i.dest, i.src1, 0b00000011);
|
|
break;
|
|
case ROUND_TO_NEAREST:
|
|
e.vroundps(i.dest, i.src1, 0b00000000);
|
|
break;
|
|
case ROUND_TO_MINUS_INFINITY:
|
|
e.vroundps(i.dest, i.src1, 0b00000001);
|
|
break;
|
|
case ROUND_TO_POSITIVE_INFINITY:
|
|
e.vroundps(i.dest, i.src1, 0b00000010);
|
|
break;
|
|
}
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_ROUND, ROUND_F32, ROUND_F64, ROUND_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_LOAD_CLOCK
|
|
// ============================================================================
|
|
struct LOAD_CLOCK : Sequence<LOAD_CLOCK, I<OPCODE_LOAD_CLOCK, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// When scaling is disabled and the raw clock source is selected, the code
|
|
// in the Clock class is actually just forwarding tick counts after one
|
|
// simple multiply and division. In that case we rather bake the scaling in
|
|
// here to cut extra function calls with CPU cache misses and stack frame
|
|
// overhead.
|
|
if (cvars::clock_no_scaling && cvars::clock_source_raw) {
|
|
auto ratio = Clock::guest_tick_ratio();
|
|
// The 360 CPU is an in-order CPU, AMD64 usually isn't. Without
|
|
// mfence/lfence magic the rdtsc instruction can be executed sooner or
|
|
// later in the cache window. Since it's resolution however is much higher
|
|
// than the 360's mftb instruction this can safely be ignored.
|
|
|
|
// Read time stamp in edx (high part) and eax (low part).
|
|
e.rdtsc();
|
|
// Make it a 64 bit number in rax.
|
|
e.shl(e.rdx, 32);
|
|
e.or_(e.rax, e.rdx);
|
|
// Apply tick frequency scaling.
|
|
e.mov(e.rcx, ratio.first);
|
|
e.mul(e.rcx);
|
|
// We actually now have a 128 bit number in rdx:rax.
|
|
e.mov(e.rcx, ratio.second);
|
|
e.div(e.rcx);
|
|
e.mov(i.dest, e.rax);
|
|
} else {
|
|
e.CallNative(LoadClock);
|
|
e.mov(i.dest, e.rax);
|
|
}
|
|
}
|
|
static uint64_t LoadClock(void* raw_context) {
|
|
return Clock::QueryGuestTickCount();
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_LOAD_CLOCK, LOAD_CLOCK);
|
|
|
|
// ============================================================================
|
|
// OPCODE_CONTEXT_BARRIER
|
|
// ============================================================================
|
|
struct CONTEXT_BARRIER
|
|
: Sequence<CONTEXT_BARRIER, I<OPCODE_CONTEXT_BARRIER, VoidOp>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_CONTEXT_BARRIER, CONTEXT_BARRIER);
|
|
|
|
// ============================================================================
|
|
// OPCODE_MAX
|
|
// ============================================================================
|
|
struct MAX_F32 : Sequence<MAX_F32, I<OPCODE_MAX, F32Op, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vmaxss(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
struct MAX_F64 : Sequence<MAX_F64, I<OPCODE_MAX, F64Op, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vmaxsd(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
struct MAX_V128 : Sequence<MAX_V128, I<OPCODE_MAX, V128Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vmaxps(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_MAX, MAX_F32, MAX_F64, MAX_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_MIN
|
|
// ============================================================================
|
|
struct MIN_I8 : Sequence<MIN_I8, I<OPCODE_MIN, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryOp(
|
|
e, i,
|
|
[](X64Emitter& e, const Reg8& dest_src, const Reg8& src) {
|
|
e.cmp(dest_src, src);
|
|
e.cmovg(dest_src.cvt32(), src.cvt32());
|
|
},
|
|
[](X64Emitter& e, const Reg8& dest_src, int32_t constant) {
|
|
e.mov(e.al, constant);
|
|
e.cmp(dest_src, e.al);
|
|
e.cmovg(dest_src.cvt32(), e.eax);
|
|
});
|
|
}
|
|
};
|
|
struct MIN_I16 : Sequence<MIN_I16, I<OPCODE_MIN, I16Op, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryOp(
|
|
e, i,
|
|
[](X64Emitter& e, const Reg16& dest_src, const Reg16& src) {
|
|
e.cmp(dest_src, src);
|
|
e.cmovg(dest_src.cvt32(), src.cvt32());
|
|
},
|
|
[](X64Emitter& e, const Reg16& dest_src, int32_t constant) {
|
|
e.mov(e.ax, constant);
|
|
e.cmp(dest_src, e.ax);
|
|
e.cmovg(dest_src.cvt32(), e.eax);
|
|
});
|
|
}
|
|
};
|
|
struct MIN_I32 : Sequence<MIN_I32, I<OPCODE_MIN, I32Op, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryOp(
|
|
e, i,
|
|
[](X64Emitter& e, const Reg32& dest_src, const Reg32& src) {
|
|
e.cmp(dest_src, src);
|
|
e.cmovg(dest_src, src);
|
|
},
|
|
[](X64Emitter& e, const Reg32& dest_src, int32_t constant) {
|
|
e.mov(e.eax, constant);
|
|
e.cmp(dest_src, e.eax);
|
|
e.cmovg(dest_src, e.eax);
|
|
});
|
|
}
|
|
};
|
|
struct MIN_I64 : Sequence<MIN_I64, I<OPCODE_MIN, I64Op, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryOp(
|
|
e, i,
|
|
[](X64Emitter& e, const Reg64& dest_src, const Reg64& src) {
|
|
e.cmp(dest_src, src);
|
|
e.cmovg(dest_src, src);
|
|
},
|
|
[](X64Emitter& e, const Reg64& dest_src, int64_t constant) {
|
|
e.mov(e.rax, constant);
|
|
e.cmp(dest_src, e.rax);
|
|
e.cmovg(dest_src, e.rax);
|
|
});
|
|
}
|
|
};
|
|
struct MIN_F32 : Sequence<MIN_F32, I<OPCODE_MIN, F32Op, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vminss(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
struct MIN_F64 : Sequence<MIN_F64, I<OPCODE_MIN, F64Op, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vminsd(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
struct MIN_V128 : Sequence<MIN_V128, I<OPCODE_MIN, V128Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vminps(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_MIN, MIN_I8, MIN_I16, MIN_I32, MIN_I64, MIN_F32,
|
|
MIN_F64, MIN_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_SELECT
|
|
// ============================================================================
|
|
// dest = src1 ? src2 : src3
|
|
// TODO(benvanik): match compare + select sequences, as often it's something
|
|
// like SELECT(VECTOR_COMPARE_SGE(a, b), a, b)
|
|
struct SELECT_I8
|
|
: Sequence<SELECT_I8, I<OPCODE_SELECT, I8Op, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
Reg8 src2;
|
|
if (i.src2.is_constant) {
|
|
src2 = e.al;
|
|
e.mov(src2, i.src2.constant());
|
|
} else {
|
|
src2 = i.src2;
|
|
}
|
|
e.test(i.src1, i.src1);
|
|
e.cmovnz(i.dest.reg().cvt32(), src2.cvt32());
|
|
e.cmovz(i.dest.reg().cvt32(), i.src3.reg().cvt32());
|
|
}
|
|
};
|
|
struct SELECT_I16
|
|
: Sequence<SELECT_I16, I<OPCODE_SELECT, I16Op, I8Op, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
Reg16 src2;
|
|
if (i.src2.is_constant) {
|
|
src2 = e.ax;
|
|
e.mov(src2, i.src2.constant());
|
|
} else {
|
|
src2 = i.src2;
|
|
}
|
|
e.test(i.src1, i.src1);
|
|
e.cmovnz(i.dest.reg().cvt32(), src2.cvt32());
|
|
e.cmovz(i.dest.reg().cvt32(), i.src3.reg().cvt32());
|
|
}
|
|
};
|
|
struct SELECT_I32
|
|
: Sequence<SELECT_I32, I<OPCODE_SELECT, I32Op, I8Op, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
Reg32 src2;
|
|
if (i.src2.is_constant) {
|
|
src2 = e.eax;
|
|
e.mov(src2, i.src2.constant());
|
|
} else {
|
|
src2 = i.src2;
|
|
}
|
|
e.test(i.src1, i.src1);
|
|
e.cmovnz(i.dest, src2);
|
|
e.cmovz(i.dest, i.src3);
|
|
}
|
|
};
|
|
struct SELECT_I64
|
|
: Sequence<SELECT_I64, I<OPCODE_SELECT, I64Op, I8Op, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
Reg64 src2;
|
|
if (i.src2.is_constant) {
|
|
src2 = e.rax;
|
|
e.mov(src2, i.src2.constant());
|
|
} else {
|
|
src2 = i.src2;
|
|
}
|
|
e.test(i.src1, i.src1);
|
|
e.cmovnz(i.dest, src2);
|
|
e.cmovz(i.dest, i.src3);
|
|
}
|
|
};
|
|
struct SELECT_F32
|
|
: Sequence<SELECT_F32, I<OPCODE_SELECT, F32Op, I8Op, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// TODO(benvanik): find a shorter sequence.
|
|
// dest = src1 != 0 ? src2 : src3
|
|
e.movzx(e.eax, i.src1);
|
|
e.vmovd(e.xmm1, e.eax);
|
|
e.vxorps(e.xmm0, e.xmm0);
|
|
e.vpcmpeqd(e.xmm0, e.xmm1);
|
|
|
|
Xmm src2 = i.src2.is_constant ? e.xmm2 : i.src2;
|
|
if (i.src2.is_constant) {
|
|
e.LoadConstantXmm(src2, i.src2.constant());
|
|
}
|
|
e.vpandn(e.xmm1, e.xmm0, src2);
|
|
|
|
Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3;
|
|
if (i.src3.is_constant) {
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
}
|
|
e.vpand(i.dest, e.xmm0, src3);
|
|
e.vpor(i.dest, e.xmm1);
|
|
}
|
|
};
|
|
struct SELECT_F64
|
|
: Sequence<SELECT_F64, I<OPCODE_SELECT, F64Op, I8Op, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// dest = src1 != 0 ? src2 : src3
|
|
e.movzx(e.eax, i.src1);
|
|
e.vmovd(e.xmm1, e.eax);
|
|
e.vpxor(e.xmm0, e.xmm0);
|
|
e.vpcmpeqq(e.xmm0, e.xmm1);
|
|
|
|
Xmm src2 = i.src2.is_constant ? e.xmm2 : i.src2;
|
|
if (i.src2.is_constant) {
|
|
e.LoadConstantXmm(src2, i.src2.constant());
|
|
}
|
|
e.vpandn(e.xmm1, e.xmm0, src2);
|
|
|
|
Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3;
|
|
if (i.src3.is_constant) {
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
}
|
|
e.vpand(i.dest, e.xmm0, src3);
|
|
e.vpor(i.dest, e.xmm1);
|
|
}
|
|
};
|
|
struct SELECT_V128_I8
|
|
: Sequence<SELECT_V128_I8, I<OPCODE_SELECT, V128Op, I8Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// TODO(benvanik): find a shorter sequence.
|
|
// dest = src1 != 0 ? src2 : src3
|
|
e.movzx(e.eax, i.src1);
|
|
e.vmovd(e.xmm1, e.eax);
|
|
e.vpbroadcastd(e.xmm1, e.xmm1);
|
|
e.vxorps(e.xmm0, e.xmm0);
|
|
e.vpcmpeqd(e.xmm0, e.xmm1);
|
|
|
|
Xmm src2 = i.src2.is_constant ? e.xmm2 : i.src2;
|
|
if (i.src2.is_constant) {
|
|
e.LoadConstantXmm(src2, i.src2.constant());
|
|
}
|
|
e.vpandn(e.xmm1, e.xmm0, src2);
|
|
|
|
Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3;
|
|
if (i.src3.is_constant) {
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
}
|
|
e.vpand(i.dest, e.xmm0, src3);
|
|
e.vpor(i.dest, e.xmm1);
|
|
}
|
|
};
|
|
struct SELECT_V128_V128
|
|
: Sequence<SELECT_V128_V128,
|
|
I<OPCODE_SELECT, V128Op, V128Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
Xmm src1 = i.src1.is_constant ? e.xmm0 : i.src1;
|
|
if (i.src1.is_constant) {
|
|
e.LoadConstantXmm(src1, i.src1.constant());
|
|
}
|
|
|
|
Xmm src2 = i.src2.is_constant ? e.xmm1 : i.src2;
|
|
if (i.src2.is_constant) {
|
|
e.LoadConstantXmm(src2, i.src2.constant());
|
|
}
|
|
|
|
Xmm src3 = i.src3.is_constant ? e.xmm2 : i.src3;
|
|
if (i.src3.is_constant) {
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
}
|
|
|
|
// src1 ? src2 : src3;
|
|
e.vpandn(e.xmm3, src1, src2);
|
|
e.vpand(i.dest, src1, src3);
|
|
e.vpor(i.dest, i.dest, e.xmm3);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_SELECT, SELECT_I8, SELECT_I16, SELECT_I32,
|
|
SELECT_I64, SELECT_F32, SELECT_F64, SELECT_V128_I8,
|
|
SELECT_V128_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_IS_TRUE
|
|
// ============================================================================
|
|
struct IS_TRUE_I8 : Sequence<IS_TRUE_I8, I<OPCODE_IS_TRUE, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.test(i.src1, i.src1);
|
|
e.setnz(i.dest);
|
|
}
|
|
};
|
|
struct IS_TRUE_I16 : Sequence<IS_TRUE_I16, I<OPCODE_IS_TRUE, I8Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.test(i.src1, i.src1);
|
|
e.setnz(i.dest);
|
|
}
|
|
};
|
|
struct IS_TRUE_I32 : Sequence<IS_TRUE_I32, I<OPCODE_IS_TRUE, I8Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.test(i.src1, i.src1);
|
|
e.setnz(i.dest);
|
|
}
|
|
};
|
|
struct IS_TRUE_I64 : Sequence<IS_TRUE_I64, I<OPCODE_IS_TRUE, I8Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.test(i.src1, i.src1);
|
|
e.setnz(i.dest);
|
|
}
|
|
};
|
|
struct IS_TRUE_F32 : Sequence<IS_TRUE_F32, I<OPCODE_IS_TRUE, I8Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vptest(i.src1, i.src1);
|
|
e.setnz(i.dest);
|
|
}
|
|
};
|
|
struct IS_TRUE_F64 : Sequence<IS_TRUE_F64, I<OPCODE_IS_TRUE, I8Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vptest(i.src1, i.src1);
|
|
e.setnz(i.dest);
|
|
}
|
|
};
|
|
struct IS_TRUE_V128 : Sequence<IS_TRUE_V128, I<OPCODE_IS_TRUE, I8Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vptest(i.src1, i.src1);
|
|
e.setnz(i.dest);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_IS_TRUE, IS_TRUE_I8, IS_TRUE_I16, IS_TRUE_I32,
|
|
IS_TRUE_I64, IS_TRUE_F32, IS_TRUE_F64, IS_TRUE_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_IS_FALSE
|
|
// ============================================================================
|
|
struct IS_FALSE_I8 : Sequence<IS_FALSE_I8, I<OPCODE_IS_FALSE, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.test(i.src1, i.src1);
|
|
e.setz(i.dest);
|
|
}
|
|
};
|
|
struct IS_FALSE_I16 : Sequence<IS_FALSE_I16, I<OPCODE_IS_FALSE, I8Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.test(i.src1, i.src1);
|
|
e.setz(i.dest);
|
|
}
|
|
};
|
|
struct IS_FALSE_I32 : Sequence<IS_FALSE_I32, I<OPCODE_IS_FALSE, I8Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.test(i.src1, i.src1);
|
|
e.setz(i.dest);
|
|
}
|
|
};
|
|
struct IS_FALSE_I64 : Sequence<IS_FALSE_I64, I<OPCODE_IS_FALSE, I8Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.test(i.src1, i.src1);
|
|
e.setz(i.dest);
|
|
}
|
|
};
|
|
struct IS_FALSE_F32 : Sequence<IS_FALSE_F32, I<OPCODE_IS_FALSE, I8Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vptest(i.src1, i.src1);
|
|
e.setz(i.dest);
|
|
}
|
|
};
|
|
struct IS_FALSE_F64 : Sequence<IS_FALSE_F64, I<OPCODE_IS_FALSE, I8Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vptest(i.src1, i.src1);
|
|
e.setz(i.dest);
|
|
}
|
|
};
|
|
struct IS_FALSE_V128
|
|
: Sequence<IS_FALSE_V128, I<OPCODE_IS_FALSE, I8Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vptest(i.src1, i.src1);
|
|
e.setz(i.dest);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_IS_FALSE, IS_FALSE_I8, IS_FALSE_I16, IS_FALSE_I32,
|
|
IS_FALSE_I64, IS_FALSE_F32, IS_FALSE_F64, IS_FALSE_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_IS_NAN
|
|
// ============================================================================
|
|
struct IS_NAN_F32 : Sequence<IS_NAN_F32, I<OPCODE_IS_NAN, I8Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vucomiss(i.src1, i.src1);
|
|
e.setp(i.dest);
|
|
}
|
|
};
|
|
|
|
struct IS_NAN_F64 : Sequence<IS_NAN_F64, I<OPCODE_IS_NAN, I8Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vucomisd(i.src1, i.src1);
|
|
e.setp(i.dest);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_IS_NAN, IS_NAN_F32, IS_NAN_F64);
|
|
|
|
// ============================================================================
|
|
// OPCODE_COMPARE_EQ
|
|
// ============================================================================
|
|
struct COMPARE_EQ_I8
|
|
: Sequence<COMPARE_EQ_I8, I<OPCODE_COMPARE_EQ, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeCompareOp(e, i,
|
|
[](X64Emitter& e, const Reg8& src1,
|
|
const Reg8& src2) { e.cmp(src1, src2); },
|
|
[](X64Emitter& e, const Reg8& src1,
|
|
int32_t constant) { e.cmp(src1, constant); });
|
|
e.sete(i.dest);
|
|
}
|
|
};
|
|
struct COMPARE_EQ_I16
|
|
: Sequence<COMPARE_EQ_I16, I<OPCODE_COMPARE_EQ, I8Op, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeCompareOp(e, i,
|
|
[](X64Emitter& e, const Reg16& src1,
|
|
const Reg16& src2) { e.cmp(src1, src2); },
|
|
[](X64Emitter& e, const Reg16& src1,
|
|
int32_t constant) { e.cmp(src1, constant); });
|
|
e.sete(i.dest);
|
|
}
|
|
};
|
|
struct COMPARE_EQ_I32
|
|
: Sequence<COMPARE_EQ_I32, I<OPCODE_COMPARE_EQ, I8Op, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeCompareOp(e, i,
|
|
[](X64Emitter& e, const Reg32& src1,
|
|
const Reg32& src2) { e.cmp(src1, src2); },
|
|
[](X64Emitter& e, const Reg32& src1,
|
|
int32_t constant) { e.cmp(src1, constant); });
|
|
e.sete(i.dest);
|
|
}
|
|
};
|
|
struct COMPARE_EQ_I64
|
|
: Sequence<COMPARE_EQ_I64, I<OPCODE_COMPARE_EQ, I8Op, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeCompareOp(e, i,
|
|
[](X64Emitter& e, const Reg64& src1,
|
|
const Reg64& src2) { e.cmp(src1, src2); },
|
|
[](X64Emitter& e, const Reg64& src1,
|
|
int32_t constant) { e.cmp(src1, constant); });
|
|
e.sete(i.dest);
|
|
}
|
|
};
|
|
struct COMPARE_EQ_F32
|
|
: Sequence<COMPARE_EQ_F32, I<OPCODE_COMPARE_EQ, I8Op, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(
|
|
e, i, [&i](X64Emitter& e, I8Op dest, const Xmm& src1, const Xmm& src2) {
|
|
e.vcomiss(src1, src2);
|
|
});
|
|
e.sete(i.dest);
|
|
}
|
|
};
|
|
struct COMPARE_EQ_F64
|
|
: Sequence<COMPARE_EQ_F64, I<OPCODE_COMPARE_EQ, I8Op, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(
|
|
e, i, [&i](X64Emitter& e, I8Op dest, const Xmm& src1, const Xmm& src2) {
|
|
e.vcomisd(src1, src2);
|
|
});
|
|
e.sete(i.dest);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_COMPARE_EQ, COMPARE_EQ_I8, COMPARE_EQ_I16,
|
|
COMPARE_EQ_I32, COMPARE_EQ_I64, COMPARE_EQ_F32,
|
|
COMPARE_EQ_F64);
|
|
|
|
// ============================================================================
|
|
// OPCODE_COMPARE_NE
|
|
// ============================================================================
|
|
struct COMPARE_NE_I8
|
|
: Sequence<COMPARE_NE_I8, I<OPCODE_COMPARE_NE, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeCompareOp(e, i,
|
|
[](X64Emitter& e, const Reg8& src1,
|
|
const Reg8& src2) { e.cmp(src1, src2); },
|
|
[](X64Emitter& e, const Reg8& src1,
|
|
int32_t constant) { e.cmp(src1, constant); });
|
|
e.setne(i.dest);
|
|
}
|
|
};
|
|
struct COMPARE_NE_I16
|
|
: Sequence<COMPARE_NE_I16, I<OPCODE_COMPARE_NE, I8Op, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeCompareOp(e, i,
|
|
[](X64Emitter& e, const Reg16& src1,
|
|
const Reg16& src2) { e.cmp(src1, src2); },
|
|
[](X64Emitter& e, const Reg16& src1,
|
|
int32_t constant) { e.cmp(src1, constant); });
|
|
e.setne(i.dest);
|
|
}
|
|
};
|
|
struct COMPARE_NE_I32
|
|
: Sequence<COMPARE_NE_I32, I<OPCODE_COMPARE_NE, I8Op, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeCompareOp(e, i,
|
|
[](X64Emitter& e, const Reg32& src1,
|
|
const Reg32& src2) { e.cmp(src1, src2); },
|
|
[](X64Emitter& e, const Reg32& src1,
|
|
int32_t constant) { e.cmp(src1, constant); });
|
|
e.setne(i.dest);
|
|
}
|
|
};
|
|
struct COMPARE_NE_I64
|
|
: Sequence<COMPARE_NE_I64, I<OPCODE_COMPARE_NE, I8Op, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeCompareOp(e, i,
|
|
[](X64Emitter& e, const Reg64& src1,
|
|
const Reg64& src2) { e.cmp(src1, src2); },
|
|
[](X64Emitter& e, const Reg64& src1,
|
|
int32_t constant) { e.cmp(src1, constant); });
|
|
e.setne(i.dest);
|
|
}
|
|
};
|
|
struct COMPARE_NE_F32
|
|
: Sequence<COMPARE_NE_F32, I<OPCODE_COMPARE_NE, I8Op, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vcomiss(i.src1, i.src2);
|
|
e.setne(i.dest);
|
|
}
|
|
};
|
|
struct COMPARE_NE_F64
|
|
: Sequence<COMPARE_NE_F64, I<OPCODE_COMPARE_NE, I8Op, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vcomisd(i.src1, i.src2);
|
|
e.setne(i.dest);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_COMPARE_NE, COMPARE_NE_I8, COMPARE_NE_I16,
|
|
COMPARE_NE_I32, COMPARE_NE_I64, COMPARE_NE_F32,
|
|
COMPARE_NE_F64);
|
|
|
|
// ============================================================================
|
|
// OPCODE_COMPARE_*
|
|
// ============================================================================
|
|
#define EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, type, \
|
|
reg_type) \
|
|
struct COMPARE_##op##_##type \
|
|
: Sequence<COMPARE_##op##_##type, \
|
|
I<OPCODE_COMPARE_##op, I8Op, type, type>> { \
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) { \
|
|
EmitAssociativeCompareOp( \
|
|
e, i, \
|
|
[](X64Emitter& e, const Reg8& dest, const reg_type& src1, \
|
|
const reg_type& src2, bool inverse) { \
|
|
e.cmp(src1, src2); \
|
|
if (!inverse) { \
|
|
e.instr(dest); \
|
|
} else { \
|
|
e.inverse_instr(dest); \
|
|
} \
|
|
}, \
|
|
[](X64Emitter& e, const Reg8& dest, const reg_type& src1, \
|
|
int32_t constant, bool inverse) { \
|
|
e.cmp(src1, constant); \
|
|
if (!inverse) { \
|
|
e.instr(dest); \
|
|
} else { \
|
|
e.inverse_instr(dest); \
|
|
} \
|
|
}); \
|
|
} \
|
|
};
|
|
#define EMITTER_ASSOCIATIVE_COMPARE_XX(op, instr, inverse_instr) \
|
|
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I8Op, Reg8); \
|
|
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I16Op, Reg16); \
|
|
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I32Op, Reg32); \
|
|
EMITTER_ASSOCIATIVE_COMPARE_INT(op, instr, inverse_instr, I64Op, Reg64); \
|
|
EMITTER_OPCODE_TABLE(OPCODE_COMPARE_##op, COMPARE_##op##_I8Op, \
|
|
COMPARE_##op##_I16Op, COMPARE_##op##_I32Op, \
|
|
COMPARE_##op##_I64Op);
|
|
EMITTER_ASSOCIATIVE_COMPARE_XX(SLT, setl, setg);
|
|
EMITTER_ASSOCIATIVE_COMPARE_XX(SLE, setle, setge);
|
|
EMITTER_ASSOCIATIVE_COMPARE_XX(SGT, setg, setl);
|
|
EMITTER_ASSOCIATIVE_COMPARE_XX(SGE, setge, setle);
|
|
EMITTER_ASSOCIATIVE_COMPARE_XX(ULT, setb, seta);
|
|
EMITTER_ASSOCIATIVE_COMPARE_XX(ULE, setbe, setae);
|
|
EMITTER_ASSOCIATIVE_COMPARE_XX(UGT, seta, setb);
|
|
EMITTER_ASSOCIATIVE_COMPARE_XX(UGE, setae, setbe);
|
|
|
|
// https://web.archive.org/web/20171129015931/https://x86.renejeschke.de/html/file_module_x86_id_288.html
|
|
// Original link: https://x86.renejeschke.de/html/file_module_x86_id_288.html
|
|
#define EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(op, instr) \
|
|
struct COMPARE_##op##_F32 \
|
|
: Sequence<COMPARE_##op##_F32, \
|
|
I<OPCODE_COMPARE_##op, I8Op, F32Op, F32Op>> { \
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) { \
|
|
e.vcomiss(i.src1, i.src2); \
|
|
e.instr(i.dest); \
|
|
} \
|
|
}; \
|
|
struct COMPARE_##op##_F64 \
|
|
: Sequence<COMPARE_##op##_F64, \
|
|
I<OPCODE_COMPARE_##op, I8Op, F64Op, F64Op>> { \
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) { \
|
|
if (i.src1.is_constant) { \
|
|
e.LoadConstantXmm(e.xmm0, i.src1.constant()); \
|
|
e.vcomisd(e.xmm0, i.src2); \
|
|
} else if (i.src2.is_constant) { \
|
|
e.LoadConstantXmm(e.xmm0, i.src2.constant()); \
|
|
e.vcomisd(i.src1, e.xmm0); \
|
|
} else { \
|
|
e.vcomisd(i.src1, i.src2); \
|
|
} \
|
|
e.instr(i.dest); \
|
|
} \
|
|
}; \
|
|
EMITTER_OPCODE_TABLE(OPCODE_COMPARE_##op##_FLT, COMPARE_##op##_F32, \
|
|
COMPARE_##op##_F64);
|
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SLT, setb);
|
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SLE, setbe);
|
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SGT, seta);
|
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(SGE, setae);
|
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(ULT, setb);
|
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(ULE, setbe);
|
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(UGT, seta);
|
|
EMITTER_ASSOCIATIVE_COMPARE_FLT_XX(UGE, setae);
|
|
|
|
// ============================================================================
|
|
// OPCODE_DID_SATURATE
|
|
// ============================================================================
|
|
struct DID_SATURATE
|
|
: Sequence<DID_SATURATE, I<OPCODE_DID_SATURATE, I8Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// TODO(benvanik): implement saturation check (VECTOR_ADD, etc).
|
|
e.xor_(i.dest, i.dest);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_DID_SATURATE, DID_SATURATE);
|
|
|
|
// ============================================================================
|
|
// OPCODE_ADD
|
|
// ============================================================================
|
|
// TODO(benvanik): put dest/src1|2 together.
|
|
template <typename SEQ, typename REG, typename ARGS>
|
|
void EmitAddXX(X64Emitter& e, const ARGS& i) {
|
|
SEQ::EmitCommutativeBinaryOp(
|
|
e, i,
|
|
[](X64Emitter& e, const REG& dest_src, const REG& src) {
|
|
e.add(dest_src, src);
|
|
},
|
|
[](X64Emitter& e, const REG& dest_src, int32_t constant) {
|
|
e.add(dest_src, constant);
|
|
});
|
|
}
|
|
struct ADD_I8 : Sequence<ADD_I8, I<OPCODE_ADD, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitAddXX<ADD_I8, Reg8>(e, i);
|
|
}
|
|
};
|
|
struct ADD_I16 : Sequence<ADD_I16, I<OPCODE_ADD, I16Op, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitAddXX<ADD_I16, Reg16>(e, i);
|
|
}
|
|
};
|
|
struct ADD_I32 : Sequence<ADD_I32, I<OPCODE_ADD, I32Op, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitAddXX<ADD_I32, Reg32>(e, i);
|
|
}
|
|
};
|
|
struct ADD_I64 : Sequence<ADD_I64, I<OPCODE_ADD, I64Op, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitAddXX<ADD_I64, Reg64>(e, i);
|
|
}
|
|
};
|
|
struct ADD_F32 : Sequence<ADD_F32, I<OPCODE_ADD, F32Op, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vaddss(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
struct ADD_F64 : Sequence<ADD_F64, I<OPCODE_ADD, F64Op, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vaddsd(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
struct ADD_V128 : Sequence<ADD_V128, I<OPCODE_ADD, V128Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vaddps(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_ADD, ADD_I8, ADD_I16, ADD_I32, ADD_I64, ADD_F32,
|
|
ADD_F64, ADD_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_ADD_CARRY
|
|
// ============================================================================
|
|
// TODO(benvanik): put dest/src1|2 together.
|
|
template <typename SEQ, typename REG, typename ARGS>
|
|
void EmitAddCarryXX(X64Emitter& e, const ARGS& i) {
|
|
// TODO(benvanik): faster setting? we could probably do some fun math tricks
|
|
// here to get the carry flag set.
|
|
if (i.src3.is_constant) {
|
|
if (i.src3.constant()) {
|
|
e.stc();
|
|
} else {
|
|
e.clc();
|
|
}
|
|
} else {
|
|
if (i.src3.reg().getIdx() <= 4) {
|
|
// Can move from A/B/C/DX to AH.
|
|
e.mov(e.ah, i.src3.reg().cvt8());
|
|
} else {
|
|
e.mov(e.al, i.src3);
|
|
e.mov(e.ah, e.al);
|
|
}
|
|
e.sahf();
|
|
}
|
|
SEQ::EmitCommutativeBinaryOp(
|
|
e, i,
|
|
[](X64Emitter& e, const REG& dest_src, const REG& src) {
|
|
e.adc(dest_src, src);
|
|
},
|
|
[](X64Emitter& e, const REG& dest_src, int32_t constant) {
|
|
e.adc(dest_src, constant);
|
|
});
|
|
}
|
|
struct ADD_CARRY_I8
|
|
: Sequence<ADD_CARRY_I8, I<OPCODE_ADD_CARRY, I8Op, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitAddCarryXX<ADD_CARRY_I8, Reg8>(e, i);
|
|
}
|
|
};
|
|
struct ADD_CARRY_I16
|
|
: Sequence<ADD_CARRY_I16, I<OPCODE_ADD_CARRY, I16Op, I16Op, I16Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitAddCarryXX<ADD_CARRY_I16, Reg16>(e, i);
|
|
}
|
|
};
|
|
struct ADD_CARRY_I32
|
|
: Sequence<ADD_CARRY_I32, I<OPCODE_ADD_CARRY, I32Op, I32Op, I32Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitAddCarryXX<ADD_CARRY_I32, Reg32>(e, i);
|
|
}
|
|
};
|
|
struct ADD_CARRY_I64
|
|
: Sequence<ADD_CARRY_I64, I<OPCODE_ADD_CARRY, I64Op, I64Op, I64Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitAddCarryXX<ADD_CARRY_I64, Reg64>(e, i);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_ADD_CARRY, ADD_CARRY_I8, ADD_CARRY_I16,
|
|
ADD_CARRY_I32, ADD_CARRY_I64);
|
|
|
|
// ============================================================================
|
|
// OPCODE_SUB
|
|
// ============================================================================
|
|
// TODO(benvanik): put dest/src1|2 together.
|
|
template <typename SEQ, typename REG, typename ARGS>
|
|
void EmitSubXX(X64Emitter& e, const ARGS& i) {
|
|
SEQ::EmitAssociativeBinaryOp(
|
|
e, i,
|
|
[](X64Emitter& e, const REG& dest_src, const REG& src) {
|
|
e.sub(dest_src, src);
|
|
},
|
|
[](X64Emitter& e, const REG& dest_src, int32_t constant) {
|
|
e.sub(dest_src, constant);
|
|
});
|
|
}
|
|
struct SUB_I8 : Sequence<SUB_I8, I<OPCODE_SUB, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitSubXX<SUB_I8, Reg8>(e, i);
|
|
}
|
|
};
|
|
struct SUB_I16 : Sequence<SUB_I16, I<OPCODE_SUB, I16Op, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitSubXX<SUB_I16, Reg16>(e, i);
|
|
}
|
|
};
|
|
struct SUB_I32 : Sequence<SUB_I32, I<OPCODE_SUB, I32Op, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitSubXX<SUB_I32, Reg32>(e, i);
|
|
}
|
|
};
|
|
struct SUB_I64 : Sequence<SUB_I64, I<OPCODE_SUB, I64Op, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitSubXX<SUB_I64, Reg64>(e, i);
|
|
}
|
|
};
|
|
struct SUB_F32 : Sequence<SUB_F32, I<OPCODE_SUB, F32Op, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_true(!i.instr->flags);
|
|
EmitAssociativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vsubss(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
struct SUB_F64 : Sequence<SUB_F64, I<OPCODE_SUB, F64Op, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_true(!i.instr->flags);
|
|
EmitAssociativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vsubsd(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
struct SUB_V128 : Sequence<SUB_V128, I<OPCODE_SUB, V128Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_true(!i.instr->flags);
|
|
EmitAssociativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vsubps(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_SUB, SUB_I8, SUB_I16, SUB_I32, SUB_I64, SUB_F32,
|
|
SUB_F64, SUB_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_MUL
|
|
// ============================================================================
|
|
// Sign doesn't matter here, as we don't use the high bits.
|
|
// We exploit mulx here to avoid creating too much register pressure.
|
|
struct MUL_I8 : Sequence<MUL_I8, I<OPCODE_MUL, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
|
// mulx: $1:$2 = EDX * $3
|
|
|
|
// TODO(benvanik): place src2 in edx?
|
|
if (i.src1.is_constant) {
|
|
assert_true(!i.src2.is_constant);
|
|
e.movzx(e.edx, i.src2);
|
|
e.mov(e.eax, static_cast<uint8_t>(i.src1.constant()));
|
|
e.mulx(e.edx, i.dest.reg().cvt32(), e.eax);
|
|
} else if (i.src2.is_constant) {
|
|
e.movzx(e.edx, i.src1);
|
|
e.mov(e.eax, static_cast<uint8_t>(i.src2.constant()));
|
|
e.mulx(e.edx, i.dest.reg().cvt32(), e.eax);
|
|
} else {
|
|
e.movzx(e.edx, i.src2);
|
|
e.mulx(e.edx, i.dest.reg().cvt32(), i.src1.reg().cvt32());
|
|
}
|
|
} else {
|
|
// x86 mul instruction
|
|
// AH:AL = AL * $1;
|
|
|
|
if (i.src1.is_constant) {
|
|
assert_true(!i.src2.is_constant);
|
|
e.mov(e.al, i.src1.constant());
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.al);
|
|
} else if (i.src2.is_constant) {
|
|
assert_true(!i.src1.is_constant);
|
|
e.mov(e.al, i.src2.constant());
|
|
e.mul(i.src1);
|
|
e.mov(i.dest, e.al);
|
|
} else {
|
|
e.movzx(e.al, i.src1);
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.al);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
struct MUL_I16 : Sequence<MUL_I16, I<OPCODE_MUL, I16Op, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
|
// mulx: $1:$2 = EDX * $3
|
|
|
|
// TODO(benvanik): place src2 in edx?
|
|
if (i.src1.is_constant) {
|
|
assert_true(!i.src2.is_constant);
|
|
e.movzx(e.edx, i.src2);
|
|
e.mov(e.ax, static_cast<uint16_t>(i.src1.constant()));
|
|
e.mulx(e.edx, i.dest.reg().cvt32(), e.eax);
|
|
} else if (i.src2.is_constant) {
|
|
e.movzx(e.edx, i.src1);
|
|
e.mov(e.ax, static_cast<uint16_t>(i.src2.constant()));
|
|
e.mulx(e.edx, i.dest.reg().cvt32(), e.eax);
|
|
} else {
|
|
e.movzx(e.edx, i.src2);
|
|
e.mulx(e.edx, i.dest.reg().cvt32(), i.src1.reg().cvt32());
|
|
}
|
|
} else {
|
|
// x86 mul instruction
|
|
// DX:AX = AX * $1;
|
|
|
|
if (i.src1.is_constant) {
|
|
assert_true(!i.src2.is_constant);
|
|
e.mov(e.ax, i.src1.constant());
|
|
e.mul(i.src2);
|
|
e.movzx(i.dest, e.ax);
|
|
} else if (i.src2.is_constant) {
|
|
assert_true(!i.src1.is_constant);
|
|
e.mov(e.ax, i.src2.constant());
|
|
e.mul(i.src1);
|
|
e.movzx(i.dest, e.ax);
|
|
} else {
|
|
e.movzx(e.ax, i.src1);
|
|
e.mul(i.src2);
|
|
e.movzx(i.dest, e.ax);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
struct MUL_I32 : Sequence<MUL_I32, I<OPCODE_MUL, I32Op, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
|
// mulx: $1:$2 = EDX * $3
|
|
|
|
// TODO(benvanik): place src2 in edx?
|
|
if (i.src1.is_constant) {
|
|
assert_true(!i.src2.is_constant);
|
|
e.mov(e.edx, i.src2);
|
|
e.mov(e.eax, i.src1.constant());
|
|
e.mulx(e.edx, i.dest, e.eax);
|
|
} else if (i.src2.is_constant) {
|
|
e.mov(e.edx, i.src1);
|
|
e.mov(e.eax, i.src2.constant());
|
|
e.mulx(e.edx, i.dest, e.eax);
|
|
} else {
|
|
e.mov(e.edx, i.src2);
|
|
e.mulx(e.edx, i.dest, i.src1);
|
|
}
|
|
} else {
|
|
// x86 mul instruction
|
|
// EDX:EAX = EAX * $1;
|
|
|
|
// is_constant AKA not a register
|
|
if (i.src1.is_constant) {
|
|
assert_true(!i.src2.is_constant); // can't multiply 2 constants
|
|
e.mov(e.eax, i.src1.constant());
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.eax);
|
|
} else if (i.src2.is_constant) {
|
|
assert_true(!i.src1.is_constant); // can't multiply 2 constants
|
|
e.mov(e.eax, i.src2.constant());
|
|
e.mul(i.src1);
|
|
e.mov(i.dest, e.eax);
|
|
} else {
|
|
e.mov(e.eax, i.src1);
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.eax);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
struct MUL_I64 : Sequence<MUL_I64, I<OPCODE_MUL, I64Op, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
|
// mulx: $1:$2 = RDX * $3
|
|
|
|
// TODO(benvanik): place src2 in edx?
|
|
if (i.src1.is_constant) {
|
|
assert_true(!i.src2.is_constant);
|
|
e.mov(e.rdx, i.src2);
|
|
e.mov(e.rax, i.src1.constant());
|
|
e.mulx(e.rdx, i.dest, e.rax);
|
|
} else if (i.src2.is_constant) {
|
|
e.mov(e.rdx, i.src1);
|
|
e.mov(e.rax, i.src2.constant());
|
|
e.mulx(e.rdx, i.dest, e.rax);
|
|
} else {
|
|
e.mov(e.rdx, i.src2);
|
|
e.mulx(e.rdx, i.dest, i.src1);
|
|
}
|
|
} else {
|
|
// x86 mul instruction
|
|
// RDX:RAX = RAX * $1;
|
|
|
|
if (i.src1.is_constant) {
|
|
assert_true(!i.src2.is_constant); // can't multiply 2 constants
|
|
e.mov(e.rax, i.src1.constant());
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.rax);
|
|
} else if (i.src2.is_constant) {
|
|
assert_true(!i.src1.is_constant); // can't multiply 2 constants
|
|
e.mov(e.rax, i.src2.constant());
|
|
e.mul(i.src1);
|
|
e.mov(i.dest, e.rax);
|
|
} else {
|
|
e.mov(e.rax, i.src1);
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.rax);
|
|
}
|
|
}
|
|
}
|
|
};
|
|
struct MUL_F32 : Sequence<MUL_F32, I<OPCODE_MUL, F32Op, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_true(!i.instr->flags);
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vmulss(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
struct MUL_F64 : Sequence<MUL_F64, I<OPCODE_MUL, F64Op, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_true(!i.instr->flags);
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vmulsd(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
struct MUL_V128 : Sequence<MUL_V128, I<OPCODE_MUL, V128Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_true(!i.instr->flags);
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vmulps(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_MUL, MUL_I8, MUL_I16, MUL_I32, MUL_I64, MUL_F32,
|
|
MUL_F64, MUL_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_MUL_HI
|
|
// ============================================================================
|
|
struct MUL_HI_I8 : Sequence<MUL_HI_I8, I<OPCODE_MUL_HI, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
|
// mulx: $1:$2 = EDX * $3
|
|
|
|
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
|
// TODO(benvanik): place src1 in eax? still need to sign extend
|
|
e.movzx(e.edx, i.src1);
|
|
e.mulx(i.dest.reg().cvt32(), e.eax, i.src2.reg().cvt32());
|
|
} else {
|
|
// x86 mul instruction
|
|
// AH:AL = AL * $1;
|
|
if (i.src1.is_constant) {
|
|
assert_true(!i.src2.is_constant); // can't multiply 2 constants
|
|
e.mov(e.al, i.src1.constant());
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.ah);
|
|
} else if (i.src2.is_constant) {
|
|
assert_true(!i.src1.is_constant); // can't multiply 2 constants
|
|
e.mov(e.al, i.src2.constant());
|
|
e.mul(i.src1);
|
|
e.mov(i.dest, e.ah);
|
|
} else {
|
|
e.mov(e.al, i.src1);
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.ah);
|
|
}
|
|
}
|
|
} else {
|
|
if (i.src1.is_constant) {
|
|
e.mov(e.al, i.src1.constant());
|
|
} else {
|
|
e.mov(e.al, i.src1);
|
|
}
|
|
if (i.src2.is_constant) {
|
|
e.mov(e.al, i.src2.constant());
|
|
e.imul(e.al);
|
|
} else {
|
|
e.imul(i.src2);
|
|
}
|
|
e.mov(i.dest, e.ah);
|
|
}
|
|
}
|
|
};
|
|
struct MUL_HI_I16
|
|
: Sequence<MUL_HI_I16, I<OPCODE_MUL_HI, I16Op, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
|
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
|
// TODO(benvanik): place src1 in eax? still need to sign extend
|
|
e.movzx(e.edx, i.src1);
|
|
e.mulx(i.dest.reg().cvt32(), e.eax, i.src2.reg().cvt32());
|
|
} else {
|
|
// x86 mul instruction
|
|
// DX:AX = AX * $1;
|
|
if (i.src1.is_constant) {
|
|
assert_true(!i.src2.is_constant); // can't multiply 2 constants
|
|
e.mov(e.ax, i.src1.constant());
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.dx);
|
|
} else if (i.src2.is_constant) {
|
|
assert_true(!i.src1.is_constant); // can't multiply 2 constants
|
|
e.mov(e.ax, i.src2.constant());
|
|
e.mul(i.src1);
|
|
e.mov(i.dest, e.dx);
|
|
} else {
|
|
e.mov(e.ax, i.src1);
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.dx);
|
|
}
|
|
}
|
|
} else {
|
|
if (i.src1.is_constant) {
|
|
e.mov(e.ax, i.src1.constant());
|
|
} else {
|
|
e.mov(e.ax, i.src1);
|
|
}
|
|
if (i.src2.is_constant) {
|
|
e.mov(e.dx, i.src2.constant());
|
|
e.imul(e.dx);
|
|
} else {
|
|
e.imul(i.src2);
|
|
}
|
|
e.mov(i.dest, e.dx);
|
|
}
|
|
}
|
|
};
|
|
struct MUL_HI_I32
|
|
: Sequence<MUL_HI_I32, I<OPCODE_MUL_HI, I32Op, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
|
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
|
// TODO(benvanik): place src1 in eax? still need to sign extend
|
|
e.mov(e.edx, i.src1);
|
|
if (i.src2.is_constant) {
|
|
e.mov(e.eax, i.src2.constant());
|
|
e.mulx(i.dest, e.edx, e.eax);
|
|
} else {
|
|
e.mulx(i.dest, e.edx, i.src2);
|
|
}
|
|
} else {
|
|
// x86 mul instruction
|
|
// EDX:EAX = EAX * $1;
|
|
if (i.src1.is_constant) {
|
|
assert_true(!i.src2.is_constant); // can't multiply 2 constants
|
|
e.mov(e.eax, i.src1.constant());
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.edx);
|
|
} else if (i.src2.is_constant) {
|
|
assert_true(!i.src1.is_constant); // can't multiply 2 constants
|
|
e.mov(e.eax, i.src2.constant());
|
|
e.mul(i.src1);
|
|
e.mov(i.dest, e.edx);
|
|
} else {
|
|
e.mov(e.eax, i.src1);
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.edx);
|
|
}
|
|
}
|
|
} else {
|
|
if (i.src1.is_constant) {
|
|
e.mov(e.eax, i.src1.constant());
|
|
} else {
|
|
e.mov(e.eax, i.src1);
|
|
}
|
|
if (i.src2.is_constant) {
|
|
e.mov(e.edx, i.src2.constant());
|
|
e.imul(e.edx);
|
|
} else {
|
|
e.imul(i.src2);
|
|
}
|
|
e.mov(i.dest, e.edx);
|
|
}
|
|
}
|
|
};
|
|
struct MUL_HI_I64
|
|
: Sequence<MUL_HI_I64, I<OPCODE_MUL_HI, I64Op, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
|
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
|
// TODO(benvanik): place src1 in eax? still need to sign extend
|
|
e.mov(e.rdx, i.src1);
|
|
if (i.src2.is_constant) {
|
|
e.mov(e.rax, i.src2.constant());
|
|
e.mulx(i.dest, e.rdx, e.rax);
|
|
} else {
|
|
e.mulx(i.dest, e.rax, i.src2);
|
|
}
|
|
} else {
|
|
// x86 mul instruction
|
|
// RDX:RAX < RAX * REG(op1);
|
|
if (i.src1.is_constant) {
|
|
assert_true(!i.src2.is_constant); // can't multiply 2 constants
|
|
e.mov(e.rax, i.src1.constant());
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.rdx);
|
|
} else if (i.src2.is_constant) {
|
|
assert_true(!i.src1.is_constant); // can't multiply 2 constants
|
|
e.mov(e.rax, i.src2.constant());
|
|
e.mul(i.src1);
|
|
e.mov(i.dest, e.rdx);
|
|
} else {
|
|
e.mov(e.rax, i.src1);
|
|
e.mul(i.src2);
|
|
e.mov(i.dest, e.rdx);
|
|
}
|
|
}
|
|
} else {
|
|
if (i.src1.is_constant) {
|
|
e.mov(e.rax, i.src1.constant());
|
|
} else {
|
|
e.mov(e.rax, i.src1);
|
|
}
|
|
if (i.src2.is_constant) {
|
|
e.mov(e.rdx, i.src2.constant());
|
|
e.imul(e.rdx);
|
|
} else {
|
|
e.imul(i.src2);
|
|
}
|
|
e.mov(i.dest, e.rdx);
|
|
}
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_MUL_HI, MUL_HI_I8, MUL_HI_I16, MUL_HI_I32,
|
|
MUL_HI_I64);
|
|
|
|
// ============================================================================
|
|
// OPCODE_DIV
|
|
// ============================================================================
|
|
// TODO(benvanik): optimize common constant cases.
|
|
// TODO(benvanik): simplify code!
|
|
struct DIV_I8 : Sequence<DIV_I8, I<OPCODE_DIV, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
Xbyak::Label skip;
|
|
e.inLocalLabel();
|
|
|
|
if (i.src2.is_constant) {
|
|
assert_true(!i.src1.is_constant);
|
|
e.mov(e.cl, i.src2.constant());
|
|
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
|
e.movzx(e.ax, i.src1);
|
|
e.div(e.cl);
|
|
} else {
|
|
e.movsx(e.ax, i.src1);
|
|
e.idiv(e.cl);
|
|
}
|
|
} else {
|
|
// Skip if src2 is zero.
|
|
e.test(i.src2, i.src2);
|
|
e.jz(skip, CodeGenerator::T_SHORT);
|
|
|
|
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
|
if (i.src1.is_constant) {
|
|
e.mov(e.ax, static_cast<int16_t>(i.src1.constant()));
|
|
} else {
|
|
e.movzx(e.ax, i.src1);
|
|
}
|
|
e.div(i.src2);
|
|
} else {
|
|
if (i.src1.is_constant) {
|
|
e.mov(e.ax, static_cast<int16_t>(i.src1.constant()));
|
|
} else {
|
|
e.movsx(e.ax, i.src1);
|
|
}
|
|
e.idiv(i.src2);
|
|
}
|
|
}
|
|
|
|
e.L(skip);
|
|
e.outLocalLabel();
|
|
e.mov(i.dest, e.al);
|
|
}
|
|
};
|
|
struct DIV_I16 : Sequence<DIV_I16, I<OPCODE_DIV, I16Op, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
Xbyak::Label skip;
|
|
e.inLocalLabel();
|
|
|
|
if (i.src2.is_constant) {
|
|
assert_true(!i.src1.is_constant);
|
|
e.mov(e.cx, i.src2.constant());
|
|
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
|
e.mov(e.ax, i.src1);
|
|
// Zero upper bits.
|
|
e.xor_(e.dx, e.dx);
|
|
e.div(e.cx);
|
|
} else {
|
|
e.mov(e.ax, i.src1);
|
|
e.cwd(); // dx:ax = sign-extend ax
|
|
e.idiv(e.cx);
|
|
}
|
|
} else {
|
|
// Skip if src2 is zero.
|
|
e.test(i.src2, i.src2);
|
|
e.jz(skip, CodeGenerator::T_SHORT);
|
|
|
|
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
|
if (i.src1.is_constant) {
|
|
e.mov(e.ax, i.src1.constant());
|
|
} else {
|
|
e.mov(e.ax, i.src1);
|
|
}
|
|
// Zero upper bits.
|
|
e.xor_(e.dx, e.dx);
|
|
e.div(i.src2);
|
|
} else {
|
|
if (i.src1.is_constant) {
|
|
e.mov(e.ax, i.src1.constant());
|
|
} else {
|
|
e.mov(e.ax, i.src1);
|
|
}
|
|
e.cwd(); // dx:ax = sign-extend ax
|
|
e.idiv(i.src2);
|
|
}
|
|
}
|
|
|
|
e.L(skip);
|
|
e.outLocalLabel();
|
|
e.mov(i.dest, e.ax);
|
|
}
|
|
};
|
|
struct DIV_I32 : Sequence<DIV_I32, I<OPCODE_DIV, I32Op, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
Xbyak::Label skip;
|
|
e.inLocalLabel();
|
|
|
|
if (i.src2.is_constant) {
|
|
assert_true(!i.src1.is_constant);
|
|
e.mov(e.ecx, i.src2.constant());
|
|
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
|
e.mov(e.eax, i.src1);
|
|
// Zero upper bits.
|
|
e.xor_(e.edx, e.edx);
|
|
e.div(e.ecx);
|
|
} else {
|
|
e.mov(e.eax, i.src1);
|
|
e.cdq(); // edx:eax = sign-extend eax
|
|
e.idiv(e.ecx);
|
|
}
|
|
} else {
|
|
// Skip if src2 is zero.
|
|
e.test(i.src2, i.src2);
|
|
e.jz(skip, CodeGenerator::T_SHORT);
|
|
|
|
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
|
if (i.src1.is_constant) {
|
|
e.mov(e.eax, i.src1.constant());
|
|
} else {
|
|
e.mov(e.eax, i.src1);
|
|
}
|
|
// Zero upper bits.
|
|
e.xor_(e.edx, e.edx);
|
|
e.div(i.src2);
|
|
} else {
|
|
if (i.src1.is_constant) {
|
|
e.mov(e.eax, i.src1.constant());
|
|
} else {
|
|
e.mov(e.eax, i.src1);
|
|
}
|
|
e.cdq(); // edx:eax = sign-extend eax
|
|
e.idiv(i.src2);
|
|
}
|
|
}
|
|
|
|
e.L(skip);
|
|
e.outLocalLabel();
|
|
e.mov(i.dest, e.eax);
|
|
}
|
|
};
|
|
struct DIV_I64 : Sequence<DIV_I64, I<OPCODE_DIV, I64Op, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
Xbyak::Label skip;
|
|
e.inLocalLabel();
|
|
|
|
if (i.src2.is_constant) {
|
|
assert_true(!i.src1.is_constant);
|
|
e.mov(e.rcx, i.src2.constant());
|
|
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
|
e.mov(e.rax, i.src1);
|
|
// Zero upper bits.
|
|
e.xor_(e.rdx, e.rdx);
|
|
e.div(e.rcx);
|
|
} else {
|
|
e.mov(e.rax, i.src1);
|
|
e.cqo(); // rdx:rax = sign-extend rax
|
|
e.idiv(e.rcx);
|
|
}
|
|
} else {
|
|
// Skip if src2 is zero.
|
|
e.test(i.src2, i.src2);
|
|
e.jz(skip, CodeGenerator::T_SHORT);
|
|
|
|
if (i.instr->flags & ARITHMETIC_UNSIGNED) {
|
|
if (i.src1.is_constant) {
|
|
e.mov(e.rax, i.src1.constant());
|
|
} else {
|
|
e.mov(e.rax, i.src1);
|
|
}
|
|
// Zero upper bits.
|
|
e.xor_(e.rdx, e.rdx);
|
|
e.div(i.src2);
|
|
} else {
|
|
if (i.src1.is_constant) {
|
|
e.mov(e.rax, i.src1.constant());
|
|
} else {
|
|
e.mov(e.rax, i.src1);
|
|
}
|
|
e.cqo(); // rdx:rax = sign-extend rax
|
|
e.idiv(i.src2);
|
|
}
|
|
}
|
|
|
|
e.L(skip);
|
|
e.outLocalLabel();
|
|
e.mov(i.dest, e.rax);
|
|
}
|
|
};
|
|
struct DIV_F32 : Sequence<DIV_F32, I<OPCODE_DIV, F32Op, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_true(!i.instr->flags);
|
|
EmitAssociativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vdivss(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
struct DIV_F64 : Sequence<DIV_F64, I<OPCODE_DIV, F64Op, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_true(!i.instr->flags);
|
|
EmitAssociativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vdivsd(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
struct DIV_V128 : Sequence<DIV_V128, I<OPCODE_DIV, V128Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_true(!i.instr->flags);
|
|
EmitAssociativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vdivps(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_DIV, DIV_I8, DIV_I16, DIV_I32, DIV_I64, DIV_F32,
|
|
DIV_F64, DIV_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_MUL_ADD
|
|
// ============================================================================
|
|
// d = 1 * 2 + 3
|
|
// $0 = $1x$0 + $2
|
|
// Forms of vfmadd/vfmsub:
|
|
// - 132 -> $1 = $1 * $3 + $2
|
|
// - 213 -> $1 = $2 * $1 + $3
|
|
// - 231 -> $1 = $2 * $3 + $1
|
|
struct MUL_ADD_F32
|
|
: Sequence<MUL_ADD_F32, I<OPCODE_MUL_ADD, F32Op, F32Op, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// FMA extension
|
|
if (e.IsFeatureEnabled(kX64EmitFMA)) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[&i](X64Emitter& e, const Xmm& dest,
|
|
const Xmm& src1, const Xmm& src2) {
|
|
Xmm src3 =
|
|
i.src3.is_constant ? e.xmm1 : i.src3;
|
|
if (i.src3.is_constant) {
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
}
|
|
if (i.dest == src1) {
|
|
e.vfmadd213ss(i.dest, src2, src3);
|
|
} else if (i.dest == src2) {
|
|
e.vfmadd213ss(i.dest, src1, src3);
|
|
} else if (i.dest == i.src3) {
|
|
e.vfmadd231ss(i.dest, src1, src2);
|
|
} else {
|
|
// Dest not equal to anything
|
|
e.vmovss(i.dest, src1);
|
|
e.vfmadd213ss(i.dest, src2, src3);
|
|
}
|
|
});
|
|
} else {
|
|
Xmm src3;
|
|
if (i.src3.is_constant) {
|
|
src3 = e.xmm1;
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
} else {
|
|
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
|
|
src3 = i.src3;
|
|
if (i.dest == i.src3) {
|
|
e.vmovss(e.xmm1, i.src3);
|
|
src3 = e.xmm1;
|
|
}
|
|
}
|
|
|
|
// Multiply operation is commutative.
|
|
EmitCommutativeBinaryXmmOp(
|
|
e, i, [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vmulss(dest, src1, src2); // $0 = $1 * $2
|
|
});
|
|
|
|
e.vaddss(i.dest, i.dest, src3); // $0 = $1 + $2
|
|
}
|
|
}
|
|
};
|
|
struct MUL_ADD_F64
|
|
: Sequence<MUL_ADD_F64, I<OPCODE_MUL_ADD, F64Op, F64Op, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// FMA extension
|
|
if (e.IsFeatureEnabled(kX64EmitFMA)) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[&i](X64Emitter& e, const Xmm& dest,
|
|
const Xmm& src1, const Xmm& src2) {
|
|
Xmm src3 =
|
|
i.src3.is_constant ? e.xmm1 : i.src3;
|
|
if (i.src3.is_constant) {
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
}
|
|
if (i.dest == src1) {
|
|
e.vfmadd213sd(i.dest, src2, src3);
|
|
} else if (i.dest == src2) {
|
|
e.vfmadd213sd(i.dest, src1, src3);
|
|
} else if (i.dest == i.src3) {
|
|
e.vfmadd231sd(i.dest, src1, src2);
|
|
} else {
|
|
// Dest not equal to anything
|
|
e.vmovsd(i.dest, src1);
|
|
e.vfmadd213sd(i.dest, src2, src3);
|
|
}
|
|
});
|
|
} else {
|
|
Xmm src3;
|
|
if (i.src3.is_constant) {
|
|
src3 = e.xmm1;
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
} else {
|
|
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
|
|
src3 = i.src3;
|
|
if (i.dest == i.src3) {
|
|
e.vmovsd(e.xmm1, i.src3);
|
|
src3 = e.xmm1;
|
|
}
|
|
}
|
|
|
|
// Multiply operation is commutative.
|
|
EmitCommutativeBinaryXmmOp(
|
|
e, i, [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vmulsd(dest, src1, src2); // $0 = $1 * $2
|
|
});
|
|
|
|
e.vaddsd(i.dest, i.dest, src3); // $0 = $1 + $2
|
|
}
|
|
}
|
|
};
|
|
struct MUL_ADD_V128
|
|
: Sequence<MUL_ADD_V128,
|
|
I<OPCODE_MUL_ADD, V128Op, V128Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// TODO(benvanik): the vfmadd sequence produces slightly different results
|
|
// than vmul+vadd and it'd be nice to know why. Until we know, it's
|
|
// disabled so tests pass.
|
|
if (false && e.IsFeatureEnabled(kX64EmitFMA)) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[&i](X64Emitter& e, const Xmm& dest,
|
|
const Xmm& src1, const Xmm& src2) {
|
|
Xmm src3 =
|
|
i.src3.is_constant ? e.xmm1 : i.src3;
|
|
if (i.src3.is_constant) {
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
}
|
|
if (i.dest == src1) {
|
|
e.vfmadd213ps(i.dest, src2, src3);
|
|
} else if (i.dest == src2) {
|
|
e.vfmadd213ps(i.dest, src1, src3);
|
|
} else if (i.dest == i.src3) {
|
|
e.vfmadd231ps(i.dest, src1, src2);
|
|
} else {
|
|
// Dest not equal to anything
|
|
e.vmovdqa(i.dest, src1);
|
|
e.vfmadd213ps(i.dest, src2, src3);
|
|
}
|
|
});
|
|
} else {
|
|
Xmm src3;
|
|
if (i.src3.is_constant) {
|
|
src3 = e.xmm1;
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
} else {
|
|
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
|
|
src3 = i.src3;
|
|
if (i.dest == i.src3) {
|
|
e.vmovdqa(e.xmm1, i.src3);
|
|
src3 = e.xmm1;
|
|
}
|
|
}
|
|
|
|
// Multiply operation is commutative.
|
|
EmitCommutativeBinaryXmmOp(
|
|
e, i, [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vmulps(dest, src1, src2); // $0 = $1 * $2
|
|
});
|
|
|
|
e.vaddps(i.dest, i.dest, src3); // $0 = $1 + $2
|
|
}
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_MUL_ADD, MUL_ADD_F32, MUL_ADD_F64, MUL_ADD_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_MUL_SUB
|
|
// ============================================================================
|
|
// d = 1 * 2 - 3
|
|
// $0 = $2x$0 - $3
|
|
// TODO(benvanik): use other forms (132/213/etc) to avoid register shuffling.
|
|
// dest could be src2 or src3 - need to ensure it's not before overwriting dest
|
|
// perhaps use other 132/213/etc
|
|
// Forms:
|
|
// - 132 -> $1 = $1 * $3 - $2
|
|
// - 213 -> $1 = $2 * $1 - $3
|
|
// - 231 -> $1 = $2 * $3 - $1
|
|
struct MUL_SUB_F32
|
|
: Sequence<MUL_SUB_F32, I<OPCODE_MUL_SUB, F32Op, F32Op, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// FMA extension
|
|
if (e.IsFeatureEnabled(kX64EmitFMA)) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[&i](X64Emitter& e, const Xmm& dest,
|
|
const Xmm& src1, const Xmm& src2) {
|
|
Xmm src3 =
|
|
i.src3.is_constant ? e.xmm1 : i.src3;
|
|
if (i.src3.is_constant) {
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
}
|
|
if (i.dest == src1) {
|
|
e.vfmsub213ss(i.dest, src2, src3);
|
|
} else if (i.dest == src2) {
|
|
e.vfmsub213ss(i.dest, src1, src3);
|
|
} else if (i.dest == i.src3) {
|
|
e.vfmsub231ss(i.dest, src1, src2);
|
|
} else {
|
|
// Dest not equal to anything
|
|
e.vmovss(i.dest, src1);
|
|
e.vfmsub213ss(i.dest, src2, src3);
|
|
}
|
|
});
|
|
} else {
|
|
Xmm src3;
|
|
if (i.src3.is_constant) {
|
|
src3 = e.xmm1;
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
} else {
|
|
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
|
|
src3 = i.src3;
|
|
if (i.dest == i.src3) {
|
|
e.vmovss(e.xmm1, i.src3);
|
|
src3 = e.xmm1;
|
|
}
|
|
}
|
|
|
|
// Multiply operation is commutative.
|
|
EmitCommutativeBinaryXmmOp(
|
|
e, i, [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vmulss(dest, src1, src2); // $0 = $1 * $2
|
|
});
|
|
|
|
e.vsubss(i.dest, i.dest, src3); // $0 = $1 - $2
|
|
}
|
|
}
|
|
};
|
|
struct MUL_SUB_F64
|
|
: Sequence<MUL_SUB_F64, I<OPCODE_MUL_SUB, F64Op, F64Op, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// FMA extension
|
|
if (e.IsFeatureEnabled(kX64EmitFMA)) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[&i](X64Emitter& e, const Xmm& dest,
|
|
const Xmm& src1, const Xmm& src2) {
|
|
Xmm src3 =
|
|
i.src3.is_constant ? e.xmm1 : i.src3;
|
|
if (i.src3.is_constant) {
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
}
|
|
if (i.dest == src1) {
|
|
e.vfmsub213sd(i.dest, src2, src3);
|
|
} else if (i.dest == src2) {
|
|
e.vfmsub213sd(i.dest, src1, src3);
|
|
} else if (i.dest == i.src3) {
|
|
e.vfmsub231sd(i.dest, src1, src2);
|
|
} else {
|
|
// Dest not equal to anything
|
|
e.vmovsd(i.dest, src1);
|
|
e.vfmsub213sd(i.dest, src2, src3);
|
|
}
|
|
});
|
|
} else {
|
|
Xmm src3;
|
|
if (i.src3.is_constant) {
|
|
src3 = e.xmm1;
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
} else {
|
|
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
|
|
src3 = i.src3;
|
|
if (i.dest == i.src3) {
|
|
e.vmovsd(e.xmm1, i.src3);
|
|
src3 = e.xmm1;
|
|
}
|
|
}
|
|
|
|
// Multiply operation is commutative.
|
|
EmitCommutativeBinaryXmmOp(
|
|
e, i, [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vmulsd(dest, src1, src2); // $0 = $1 * $2
|
|
});
|
|
|
|
e.vsubsd(i.dest, i.dest, src3); // $0 = $1 - $2
|
|
}
|
|
}
|
|
};
|
|
struct MUL_SUB_V128
|
|
: Sequence<MUL_SUB_V128,
|
|
I<OPCODE_MUL_SUB, V128Op, V128Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// FMA extension
|
|
if (e.IsFeatureEnabled(kX64EmitFMA)) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[&i](X64Emitter& e, const Xmm& dest,
|
|
const Xmm& src1, const Xmm& src2) {
|
|
Xmm src3 =
|
|
i.src3.is_constant ? e.xmm1 : i.src3;
|
|
if (i.src3.is_constant) {
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
}
|
|
if (i.dest == src1) {
|
|
e.vfmsub213ps(i.dest, src2, src3);
|
|
} else if (i.dest == src2) {
|
|
e.vfmsub213ps(i.dest, src1, src3);
|
|
} else if (i.dest == i.src3) {
|
|
e.vfmsub231ps(i.dest, src1, src2);
|
|
} else {
|
|
// Dest not equal to anything
|
|
e.vmovdqa(i.dest, src1);
|
|
e.vfmsub213ps(i.dest, src2, src3);
|
|
}
|
|
});
|
|
} else {
|
|
Xmm src3;
|
|
if (i.src3.is_constant) {
|
|
src3 = e.xmm1;
|
|
e.LoadConstantXmm(src3, i.src3.constant());
|
|
} else {
|
|
// If i.dest == i.src3, back up i.src3 so we don't overwrite it.
|
|
src3 = i.src3;
|
|
if (i.dest == i.src3) {
|
|
e.vmovdqa(e.xmm1, i.src3);
|
|
src3 = e.xmm1;
|
|
}
|
|
}
|
|
|
|
// Multiply operation is commutative.
|
|
EmitCommutativeBinaryXmmOp(
|
|
e, i, [&i](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vmulps(dest, src1, src2); // $0 = $1 * $2
|
|
});
|
|
|
|
e.vsubps(i.dest, i.dest, src3); // $0 = $1 - $2
|
|
}
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_MUL_SUB, MUL_SUB_F32, MUL_SUB_F64, MUL_SUB_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_NEG
|
|
// ============================================================================
|
|
// TODO(benvanik): put dest/src1 together.
|
|
template <typename SEQ, typename REG, typename ARGS>
|
|
void EmitNegXX(X64Emitter& e, const ARGS& i) {
|
|
SEQ::EmitUnaryOp(e, i,
|
|
[](X64Emitter& e, const REG& dest_src) { e.neg(dest_src); });
|
|
}
|
|
struct NEG_I8 : Sequence<NEG_I8, I<OPCODE_NEG, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitNegXX<NEG_I8, Reg8>(e, i);
|
|
}
|
|
};
|
|
struct NEG_I16 : Sequence<NEG_I16, I<OPCODE_NEG, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitNegXX<NEG_I16, Reg16>(e, i);
|
|
}
|
|
};
|
|
struct NEG_I32 : Sequence<NEG_I32, I<OPCODE_NEG, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitNegXX<NEG_I32, Reg32>(e, i);
|
|
}
|
|
};
|
|
struct NEG_I64 : Sequence<NEG_I64, I<OPCODE_NEG, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitNegXX<NEG_I64, Reg64>(e, i);
|
|
}
|
|
};
|
|
struct NEG_F32 : Sequence<NEG_F32, I<OPCODE_NEG, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vxorps(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS));
|
|
}
|
|
};
|
|
struct NEG_F64 : Sequence<NEG_F64, I<OPCODE_NEG, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vxorpd(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPD));
|
|
}
|
|
};
|
|
struct NEG_V128 : Sequence<NEG_V128, I<OPCODE_NEG, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_true(!i.instr->flags);
|
|
e.vxorps(i.dest, i.src1, e.GetXmmConstPtr(XMMSignMaskPS));
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_NEG, NEG_I8, NEG_I16, NEG_I32, NEG_I64, NEG_F32,
|
|
NEG_F64, NEG_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_ABS
|
|
// ============================================================================
|
|
struct ABS_F32 : Sequence<ABS_F32, I<OPCODE_ABS, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPS));
|
|
}
|
|
};
|
|
struct ABS_F64 : Sequence<ABS_F64, I<OPCODE_ABS, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPD));
|
|
}
|
|
};
|
|
struct ABS_V128 : Sequence<ABS_V128, I<OPCODE_ABS, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vpand(i.dest, i.src1, e.GetXmmConstPtr(XMMAbsMaskPS));
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_ABS, ABS_F32, ABS_F64, ABS_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_SQRT
|
|
// ============================================================================
|
|
struct SQRT_F32 : Sequence<SQRT_F32, I<OPCODE_SQRT, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vsqrtss(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct SQRT_F64 : Sequence<SQRT_F64, I<OPCODE_SQRT, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vsqrtsd(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct SQRT_V128 : Sequence<SQRT_V128, I<OPCODE_SQRT, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vsqrtps(i.dest, i.src1);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_SQRT, SQRT_F32, SQRT_F64, SQRT_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_RSQRT
|
|
// ============================================================================
|
|
struct RSQRT_F32 : Sequence<RSQRT_F32, I<OPCODE_RSQRT, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vrsqrtss(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct RSQRT_F64 : Sequence<RSQRT_F64, I<OPCODE_RSQRT, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vcvtsd2ss(i.dest, i.src1);
|
|
e.vrsqrtss(i.dest, i.dest);
|
|
e.vcvtss2sd(i.dest, i.dest);
|
|
}
|
|
};
|
|
struct RSQRT_V128 : Sequence<RSQRT_V128, I<OPCODE_RSQRT, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vrsqrtps(i.dest, i.src1);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_RSQRT, RSQRT_F32, RSQRT_F64, RSQRT_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_RECIP
|
|
// ============================================================================
|
|
struct RECIP_F32 : Sequence<RECIP_F32, I<OPCODE_RECIP, F32Op, F32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vrcpss(i.dest, i.src1);
|
|
}
|
|
};
|
|
struct RECIP_F64 : Sequence<RECIP_F64, I<OPCODE_RECIP, F64Op, F64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vcvtsd2ss(i.dest, i.src1);
|
|
e.vrcpss(i.dest, i.dest);
|
|
e.vcvtss2sd(i.dest, i.dest);
|
|
}
|
|
};
|
|
struct RECIP_V128 : Sequence<RECIP_V128, I<OPCODE_RECIP, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.vrcpps(i.dest, i.src1);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_RECIP, RECIP_F32, RECIP_F64, RECIP_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_POW2
|
|
// ============================================================================
|
|
// TODO(benvanik): use approx here:
|
|
// https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html
|
|
struct POW2_F32 : Sequence<POW2_F32, I<OPCODE_POW2, F32Op, F32Op>> {
|
|
static __m128 EmulatePow2(void*, __m128 src) {
|
|
float src_value;
|
|
_mm_store_ss(&src_value, src);
|
|
float result = std::exp2(src_value);
|
|
return _mm_load_ss(&result);
|
|
}
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_always();
|
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
e.CallNativeSafe(reinterpret_cast<void*>(EmulatePow2));
|
|
e.vmovaps(i.dest, e.xmm0);
|
|
}
|
|
};
|
|
struct POW2_F64 : Sequence<POW2_F64, I<OPCODE_POW2, F64Op, F64Op>> {
|
|
static __m128d EmulatePow2(void*, __m128d src) {
|
|
double src_value;
|
|
_mm_store_sd(&src_value, src);
|
|
double result = std::exp2(src_value);
|
|
return _mm_load_sd(&result);
|
|
}
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_always();
|
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
e.CallNativeSafe(reinterpret_cast<void*>(EmulatePow2));
|
|
e.vmovaps(i.dest, e.xmm0);
|
|
}
|
|
};
|
|
struct POW2_V128 : Sequence<POW2_V128, I<OPCODE_POW2, V128Op, V128Op>> {
|
|
static __m128 EmulatePow2(void*, __m128 src) {
|
|
alignas(16) float values[4];
|
|
_mm_store_ps(values, src);
|
|
for (size_t i = 0; i < 4; ++i) {
|
|
values[i] = std::exp2(values[i]);
|
|
}
|
|
return _mm_load_ps(values);
|
|
}
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
e.CallNativeSafe(reinterpret_cast<void*>(EmulatePow2));
|
|
e.vmovaps(i.dest, e.xmm0);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_POW2, POW2_F32, POW2_F64, POW2_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_LOG2
|
|
// ============================================================================
|
|
// TODO(benvanik): use approx here:
|
|
// https://jrfonseca.blogspot.com/2008/09/fast-sse2-pow-tables-or-polynomials.html
|
|
// TODO(benvanik): this emulated fn destroys all xmm registers! don't do it!
|
|
struct LOG2_F32 : Sequence<LOG2_F32, I<OPCODE_LOG2, F32Op, F32Op>> {
|
|
static __m128 EmulateLog2(void*, __m128 src) {
|
|
float src_value;
|
|
_mm_store_ss(&src_value, src);
|
|
float result = std::log2(src_value);
|
|
return _mm_load_ss(&result);
|
|
}
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_always();
|
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
e.CallNativeSafe(reinterpret_cast<void*>(EmulateLog2));
|
|
e.vmovaps(i.dest, e.xmm0);
|
|
}
|
|
};
|
|
struct LOG2_F64 : Sequence<LOG2_F64, I<OPCODE_LOG2, F64Op, F64Op>> {
|
|
static __m128d EmulateLog2(void*, __m128d src) {
|
|
double src_value;
|
|
_mm_store_sd(&src_value, src);
|
|
double result = std::log2(src_value);
|
|
return _mm_load_sd(&result);
|
|
}
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
assert_always();
|
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
e.CallNativeSafe(reinterpret_cast<void*>(EmulateLog2));
|
|
e.vmovaps(i.dest, e.xmm0);
|
|
}
|
|
};
|
|
struct LOG2_V128 : Sequence<LOG2_V128, I<OPCODE_LOG2, V128Op, V128Op>> {
|
|
static __m128 EmulateLog2(void*, __m128 src) {
|
|
alignas(16) float values[4];
|
|
_mm_store_ps(values, src);
|
|
for (size_t i = 0; i < 4; ++i) {
|
|
values[i] = std::log2(values[i]);
|
|
}
|
|
return _mm_load_ps(values);
|
|
}
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
e.CallNativeSafe(reinterpret_cast<void*>(EmulateLog2));
|
|
e.vmovaps(i.dest, e.xmm0);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_LOG2, LOG2_F32, LOG2_F64, LOG2_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_DOT_PRODUCT_3
|
|
// ============================================================================
|
|
struct DOT_PRODUCT_3_V128
|
|
: Sequence<DOT_PRODUCT_3_V128,
|
|
I<OPCODE_DOT_PRODUCT_3, F32Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// https://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
// TODO(benvanik): apparently this is very slow
|
|
// - find alternative?
|
|
e.vdpps(dest, src1, src2, 0b01110001);
|
|
});
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_DOT_PRODUCT_3, DOT_PRODUCT_3_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_DOT_PRODUCT_4
|
|
// ============================================================================
|
|
struct DOT_PRODUCT_4_V128
|
|
: Sequence<DOT_PRODUCT_4_V128,
|
|
I<OPCODE_DOT_PRODUCT_4, F32Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// https://msdn.microsoft.com/en-us/library/bb514054(v=vs.90).aspx
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
// TODO(benvanik): apparently this is very slow
|
|
// - find alternative?
|
|
e.vdpps(dest, src1, src2, 0b11110001);
|
|
});
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_DOT_PRODUCT_4, DOT_PRODUCT_4_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_AND
|
|
// ============================================================================
|
|
// TODO(benvanik): put dest/src1|2 together.
|
|
template <typename SEQ, typename REG, typename ARGS>
|
|
void EmitAndXX(X64Emitter& e, const ARGS& i) {
|
|
SEQ::EmitCommutativeBinaryOp(
|
|
e, i,
|
|
[](X64Emitter& e, const REG& dest_src, const REG& src) {
|
|
e.and_(dest_src, src);
|
|
},
|
|
[](X64Emitter& e, const REG& dest_src, int32_t constant) {
|
|
e.and_(dest_src, constant);
|
|
});
|
|
}
|
|
struct AND_I8 : Sequence<AND_I8, I<OPCODE_AND, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitAndXX<AND_I8, Reg8>(e, i);
|
|
}
|
|
};
|
|
struct AND_I16 : Sequence<AND_I16, I<OPCODE_AND, I16Op, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitAndXX<AND_I16, Reg16>(e, i);
|
|
}
|
|
};
|
|
struct AND_I32 : Sequence<AND_I32, I<OPCODE_AND, I32Op, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitAndXX<AND_I32, Reg32>(e, i);
|
|
}
|
|
};
|
|
struct AND_I64 : Sequence<AND_I64, I<OPCODE_AND, I64Op, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitAndXX<AND_I64, Reg64>(e, i);
|
|
}
|
|
};
|
|
struct AND_V128 : Sequence<AND_V128, I<OPCODE_AND, V128Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vpand(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_AND, AND_I8, AND_I16, AND_I32, AND_I64, AND_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_OR
|
|
// ============================================================================
|
|
// TODO(benvanik): put dest/src1|2 together.
|
|
template <typename SEQ, typename REG, typename ARGS>
|
|
void EmitOrXX(X64Emitter& e, const ARGS& i) {
|
|
SEQ::EmitCommutativeBinaryOp(
|
|
e, i,
|
|
[](X64Emitter& e, const REG& dest_src, const REG& src) {
|
|
e.or_(dest_src, src);
|
|
},
|
|
[](X64Emitter& e, const REG& dest_src, int32_t constant) {
|
|
e.or_(dest_src, constant);
|
|
});
|
|
}
|
|
struct OR_I8 : Sequence<OR_I8, I<OPCODE_OR, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitOrXX<OR_I8, Reg8>(e, i);
|
|
}
|
|
};
|
|
struct OR_I16 : Sequence<OR_I16, I<OPCODE_OR, I16Op, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitOrXX<OR_I16, Reg16>(e, i);
|
|
}
|
|
};
|
|
struct OR_I32 : Sequence<OR_I32, I<OPCODE_OR, I32Op, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitOrXX<OR_I32, Reg32>(e, i);
|
|
}
|
|
};
|
|
struct OR_I64 : Sequence<OR_I64, I<OPCODE_OR, I64Op, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitOrXX<OR_I64, Reg64>(e, i);
|
|
}
|
|
};
|
|
struct OR_V128 : Sequence<OR_V128, I<OPCODE_OR, V128Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vpor(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_OR, OR_I8, OR_I16, OR_I32, OR_I64, OR_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_XOR
|
|
// ============================================================================
|
|
// TODO(benvanik): put dest/src1|2 together.
|
|
template <typename SEQ, typename REG, typename ARGS>
|
|
void EmitXorXX(X64Emitter& e, const ARGS& i) {
|
|
SEQ::EmitCommutativeBinaryOp(
|
|
e, i,
|
|
[](X64Emitter& e, const REG& dest_src, const REG& src) {
|
|
e.xor_(dest_src, src);
|
|
},
|
|
[](X64Emitter& e, const REG& dest_src, int32_t constant) {
|
|
e.xor_(dest_src, constant);
|
|
});
|
|
}
|
|
struct XOR_I8 : Sequence<XOR_I8, I<OPCODE_XOR, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitXorXX<XOR_I8, Reg8>(e, i);
|
|
}
|
|
};
|
|
struct XOR_I16 : Sequence<XOR_I16, I<OPCODE_XOR, I16Op, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitXorXX<XOR_I16, Reg16>(e, i);
|
|
}
|
|
};
|
|
struct XOR_I32 : Sequence<XOR_I32, I<OPCODE_XOR, I32Op, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitXorXX<XOR_I32, Reg32>(e, i);
|
|
}
|
|
};
|
|
struct XOR_I64 : Sequence<XOR_I64, I<OPCODE_XOR, I64Op, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitXorXX<XOR_I64, Reg64>(e, i);
|
|
}
|
|
};
|
|
struct XOR_V128 : Sequence<XOR_V128, I<OPCODE_XOR, V128Op, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitCommutativeBinaryXmmOp(e, i,
|
|
[](X64Emitter& e, Xmm dest, Xmm src1, Xmm src2) {
|
|
e.vpxor(dest, src1, src2);
|
|
});
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_XOR, XOR_I8, XOR_I16, XOR_I32, XOR_I64, XOR_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_NOT
|
|
// ============================================================================
|
|
// TODO(benvanik): put dest/src1 together.
|
|
template <typename SEQ, typename REG, typename ARGS>
|
|
void EmitNotXX(X64Emitter& e, const ARGS& i) {
|
|
SEQ::EmitUnaryOp(
|
|
e, i, [](X64Emitter& e, const REG& dest_src) { e.not_(dest_src); });
|
|
}
|
|
struct NOT_I8 : Sequence<NOT_I8, I<OPCODE_NOT, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitNotXX<NOT_I8, Reg8>(e, i);
|
|
}
|
|
};
|
|
struct NOT_I16 : Sequence<NOT_I16, I<OPCODE_NOT, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitNotXX<NOT_I16, Reg16>(e, i);
|
|
}
|
|
};
|
|
struct NOT_I32 : Sequence<NOT_I32, I<OPCODE_NOT, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitNotXX<NOT_I32, Reg32>(e, i);
|
|
}
|
|
};
|
|
struct NOT_I64 : Sequence<NOT_I64, I<OPCODE_NOT, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitNotXX<NOT_I64, Reg64>(e, i);
|
|
}
|
|
};
|
|
struct NOT_V128 : Sequence<NOT_V128, I<OPCODE_NOT, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// dest = src ^ 0xFFFF...
|
|
e.vpxor(i.dest, i.src1, e.GetXmmConstPtr(XMMFFFF /* FF... */));
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_NOT, NOT_I8, NOT_I16, NOT_I32, NOT_I64, NOT_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_SHL
|
|
// ============================================================================
|
|
// TODO(benvanik): optimize common shifts.
|
|
template <typename SEQ, typename REG, typename ARGS>
|
|
void EmitShlXX(X64Emitter& e, const ARGS& i) {
|
|
SEQ::EmitAssociativeBinaryOp(
|
|
e, i,
|
|
[](X64Emitter& e, const REG& dest_src, const Reg8& src) {
|
|
// shlx: $1 = $2 << $3
|
|
// shl: $1 = $1 << $2
|
|
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
|
if (dest_src.getBit() == 64) {
|
|
e.shlx(dest_src.cvt64(), dest_src.cvt64(), src.cvt64());
|
|
} else {
|
|
e.shlx(dest_src.cvt32(), dest_src.cvt32(), src.cvt32());
|
|
}
|
|
} else {
|
|
e.mov(e.cl, src);
|
|
e.shl(dest_src, e.cl);
|
|
}
|
|
},
|
|
[](X64Emitter& e, const REG& dest_src, int8_t constant) {
|
|
e.shl(dest_src, constant);
|
|
});
|
|
}
|
|
struct SHL_I8 : Sequence<SHL_I8, I<OPCODE_SHL, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitShlXX<SHL_I8, Reg8>(e, i);
|
|
}
|
|
};
|
|
struct SHL_I16 : Sequence<SHL_I16, I<OPCODE_SHL, I16Op, I16Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitShlXX<SHL_I16, Reg16>(e, i);
|
|
}
|
|
};
|
|
struct SHL_I32 : Sequence<SHL_I32, I<OPCODE_SHL, I32Op, I32Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitShlXX<SHL_I32, Reg32>(e, i);
|
|
}
|
|
};
|
|
struct SHL_I64 : Sequence<SHL_I64, I<OPCODE_SHL, I64Op, I64Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitShlXX<SHL_I64, Reg64>(e, i);
|
|
}
|
|
};
|
|
struct SHL_V128 : Sequence<SHL_V128, I<OPCODE_SHL, V128Op, V128Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// TODO(benvanik): native version (with shift magic).
|
|
if (i.src2.is_constant) {
|
|
e.mov(e.GetNativeParam(1), i.src2.constant());
|
|
} else {
|
|
e.mov(e.GetNativeParam(1), i.src2);
|
|
}
|
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
e.CallNativeSafe(reinterpret_cast<void*>(EmulateShlV128));
|
|
e.vmovaps(i.dest, e.xmm0);
|
|
}
|
|
static __m128i EmulateShlV128(void*, __m128i src1, uint8_t src2) {
|
|
// Almost all instances are shamt = 1, but non-constant.
|
|
// shamt is [0,7]
|
|
uint8_t shamt = src2 & 0x7;
|
|
alignas(16) vec128_t value;
|
|
_mm_store_si128(reinterpret_cast<__m128i*>(&value), src1);
|
|
for (int i = 0; i < 15; ++i) {
|
|
value.u8[i ^ 0x3] = (value.u8[i ^ 0x3] << shamt) |
|
|
(value.u8[(i + 1) ^ 0x3] >> (8 - shamt));
|
|
}
|
|
value.u8[15 ^ 0x3] = value.u8[15 ^ 0x3] << shamt;
|
|
return _mm_load_si128(reinterpret_cast<__m128i*>(&value));
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_SHL, SHL_I8, SHL_I16, SHL_I32, SHL_I64, SHL_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_SHR
|
|
// ============================================================================
|
|
// TODO(benvanik): optimize common shifts.
|
|
template <typename SEQ, typename REG, typename ARGS>
|
|
void EmitShrXX(X64Emitter& e, const ARGS& i) {
|
|
SEQ::EmitAssociativeBinaryOp(
|
|
e, i,
|
|
[](X64Emitter& e, const REG& dest_src, const Reg8& src) {
|
|
// shrx: op1 dest, op2 src, op3 count
|
|
// shr: op1 src/dest, op2 count
|
|
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
|
if (dest_src.getBit() == 64) {
|
|
e.shrx(dest_src.cvt64(), dest_src.cvt64(), src.cvt64());
|
|
} else if (dest_src.getBit() == 32) {
|
|
e.shrx(dest_src.cvt32(), dest_src.cvt32(), src.cvt32());
|
|
} else {
|
|
e.movzx(dest_src.cvt32(), dest_src);
|
|
e.shrx(dest_src.cvt32(), dest_src.cvt32(), src.cvt32());
|
|
}
|
|
} else {
|
|
e.mov(e.cl, src);
|
|
e.shr(dest_src, e.cl);
|
|
}
|
|
},
|
|
[](X64Emitter& e, const REG& dest_src, int8_t constant) {
|
|
e.shr(dest_src, constant);
|
|
});
|
|
}
|
|
struct SHR_I8 : Sequence<SHR_I8, I<OPCODE_SHR, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitShrXX<SHR_I8, Reg8>(e, i);
|
|
}
|
|
};
|
|
struct SHR_I16 : Sequence<SHR_I16, I<OPCODE_SHR, I16Op, I16Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitShrXX<SHR_I16, Reg16>(e, i);
|
|
}
|
|
};
|
|
struct SHR_I32 : Sequence<SHR_I32, I<OPCODE_SHR, I32Op, I32Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitShrXX<SHR_I32, Reg32>(e, i);
|
|
}
|
|
};
|
|
struct SHR_I64 : Sequence<SHR_I64, I<OPCODE_SHR, I64Op, I64Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitShrXX<SHR_I64, Reg64>(e, i);
|
|
}
|
|
};
|
|
struct SHR_V128 : Sequence<SHR_V128, I<OPCODE_SHR, V128Op, V128Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// TODO(benvanik): native version (with shift magic).
|
|
if (i.src2.is_constant) {
|
|
e.mov(e.GetNativeParam(1), i.src2.constant());
|
|
} else {
|
|
e.mov(e.GetNativeParam(1), i.src2);
|
|
}
|
|
e.lea(e.GetNativeParam(0), e.StashXmm(0, i.src1));
|
|
e.CallNativeSafe(reinterpret_cast<void*>(EmulateShrV128));
|
|
e.vmovaps(i.dest, e.xmm0);
|
|
}
|
|
static __m128i EmulateShrV128(void*, __m128i src1, uint8_t src2) {
|
|
// Almost all instances are shamt = 1, but non-constant.
|
|
// shamt is [0,7]
|
|
uint8_t shamt = src2 & 0x7;
|
|
alignas(16) vec128_t value;
|
|
_mm_store_si128(reinterpret_cast<__m128i*>(&value), src1);
|
|
for (int i = 15; i > 0; --i) {
|
|
value.u8[i ^ 0x3] = (value.u8[i ^ 0x3] >> shamt) |
|
|
(value.u8[(i - 1) ^ 0x3] << (8 - shamt));
|
|
}
|
|
value.u8[0 ^ 0x3] = value.u8[0 ^ 0x3] >> shamt;
|
|
return _mm_load_si128(reinterpret_cast<__m128i*>(&value));
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_SHR, SHR_I8, SHR_I16, SHR_I32, SHR_I64, SHR_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_SHA
|
|
// ============================================================================
|
|
// TODO(benvanik): optimize common shifts.
|
|
template <typename SEQ, typename REG, typename ARGS>
|
|
void EmitSarXX(X64Emitter& e, const ARGS& i) {
|
|
SEQ::EmitAssociativeBinaryOp(
|
|
e, i,
|
|
[](X64Emitter& e, const REG& dest_src, const Reg8& src) {
|
|
if (e.IsFeatureEnabled(kX64EmitBMI2)) {
|
|
if (dest_src.getBit() == 64) {
|
|
e.sarx(dest_src.cvt64(), dest_src.cvt64(), src.cvt64());
|
|
} else if (dest_src.getBit() == 32) {
|
|
e.sarx(dest_src.cvt32(), dest_src.cvt32(), src.cvt32());
|
|
} else {
|
|
e.movsx(dest_src.cvt32(), dest_src);
|
|
e.sarx(dest_src.cvt32(), dest_src.cvt32(), src.cvt32());
|
|
}
|
|
} else {
|
|
e.mov(e.cl, src);
|
|
e.sar(dest_src, e.cl);
|
|
}
|
|
},
|
|
[](X64Emitter& e, const REG& dest_src, int8_t constant) {
|
|
e.sar(dest_src, constant);
|
|
});
|
|
}
|
|
struct SHA_I8 : Sequence<SHA_I8, I<OPCODE_SHA, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitSarXX<SHA_I8, Reg8>(e, i);
|
|
}
|
|
};
|
|
struct SHA_I16 : Sequence<SHA_I16, I<OPCODE_SHA, I16Op, I16Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitSarXX<SHA_I16, Reg16>(e, i);
|
|
}
|
|
};
|
|
struct SHA_I32 : Sequence<SHA_I32, I<OPCODE_SHA, I32Op, I32Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitSarXX<SHA_I32, Reg32>(e, i);
|
|
}
|
|
};
|
|
struct SHA_I64 : Sequence<SHA_I64, I<OPCODE_SHA, I64Op, I64Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitSarXX<SHA_I64, Reg64>(e, i);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_SHA, SHA_I8, SHA_I16, SHA_I32, SHA_I64);
|
|
|
|
// ============================================================================
|
|
// OPCODE_ROTATE_LEFT
|
|
// ============================================================================
|
|
// TODO(benvanik): put dest/src1 together, src2 in cl.
|
|
template <typename SEQ, typename REG, typename ARGS>
|
|
void EmitRotateLeftXX(X64Emitter& e, const ARGS& i) {
|
|
if (i.src2.is_constant) {
|
|
// Constant rotate.
|
|
if (i.dest != i.src1) {
|
|
if (i.src1.is_constant) {
|
|
e.mov(i.dest, i.src1.constant());
|
|
} else {
|
|
e.mov(i.dest, i.src1);
|
|
}
|
|
}
|
|
e.rol(i.dest, i.src2.constant());
|
|
} else {
|
|
// Variable rotate.
|
|
if (i.src2.reg().getIdx() != e.cl.getIdx()) {
|
|
e.mov(e.cl, i.src2);
|
|
}
|
|
if (i.dest != i.src1) {
|
|
if (i.src1.is_constant) {
|
|
e.mov(i.dest, i.src1.constant());
|
|
} else {
|
|
e.mov(i.dest, i.src1);
|
|
}
|
|
}
|
|
e.rol(i.dest, e.cl);
|
|
}
|
|
}
|
|
struct ROTATE_LEFT_I8
|
|
: Sequence<ROTATE_LEFT_I8, I<OPCODE_ROTATE_LEFT, I8Op, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitRotateLeftXX<ROTATE_LEFT_I8, Reg8>(e, i);
|
|
}
|
|
};
|
|
struct ROTATE_LEFT_I16
|
|
: Sequence<ROTATE_LEFT_I16, I<OPCODE_ROTATE_LEFT, I16Op, I16Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitRotateLeftXX<ROTATE_LEFT_I16, Reg16>(e, i);
|
|
}
|
|
};
|
|
struct ROTATE_LEFT_I32
|
|
: Sequence<ROTATE_LEFT_I32, I<OPCODE_ROTATE_LEFT, I32Op, I32Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitRotateLeftXX<ROTATE_LEFT_I32, Reg32>(e, i);
|
|
}
|
|
};
|
|
struct ROTATE_LEFT_I64
|
|
: Sequence<ROTATE_LEFT_I64, I<OPCODE_ROTATE_LEFT, I64Op, I64Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitRotateLeftXX<ROTATE_LEFT_I64, Reg64>(e, i);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_ROTATE_LEFT, ROTATE_LEFT_I8, ROTATE_LEFT_I16,
|
|
ROTATE_LEFT_I32, ROTATE_LEFT_I64);
|
|
|
|
// ============================================================================
|
|
// OPCODE_BYTE_SWAP
|
|
// ============================================================================
|
|
// TODO(benvanik): put dest/src1 together.
|
|
struct BYTE_SWAP_I16
|
|
: Sequence<BYTE_SWAP_I16, I<OPCODE_BYTE_SWAP, I16Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitUnaryOp(
|
|
e, i, [](X64Emitter& e, const Reg16& dest_src) { e.ror(dest_src, 8); });
|
|
}
|
|
};
|
|
struct BYTE_SWAP_I32
|
|
: Sequence<BYTE_SWAP_I32, I<OPCODE_BYTE_SWAP, I32Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitUnaryOp(
|
|
e, i, [](X64Emitter& e, const Reg32& dest_src) { e.bswap(dest_src); });
|
|
}
|
|
};
|
|
struct BYTE_SWAP_I64
|
|
: Sequence<BYTE_SWAP_I64, I<OPCODE_BYTE_SWAP, I64Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
EmitUnaryOp(
|
|
e, i, [](X64Emitter& e, const Reg64& dest_src) { e.bswap(dest_src); });
|
|
}
|
|
};
|
|
struct BYTE_SWAP_V128
|
|
: Sequence<BYTE_SWAP_V128, I<OPCODE_BYTE_SWAP, V128Op, V128Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
// TODO(benvanik): find a way to do this without the memory load.
|
|
e.vpshufb(i.dest, i.src1, e.GetXmmConstPtr(XMMByteSwapMask));
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_BYTE_SWAP, BYTE_SWAP_I16, BYTE_SWAP_I32,
|
|
BYTE_SWAP_I64, BYTE_SWAP_V128);
|
|
|
|
// ============================================================================
|
|
// OPCODE_CNTLZ
|
|
// Count leading zeroes
|
|
// ============================================================================
|
|
struct CNTLZ_I8 : Sequence<CNTLZ_I8, I<OPCODE_CNTLZ, I8Op, I8Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
if (e.IsFeatureEnabled(kX64EmitLZCNT)) {
|
|
// No 8bit lzcnt, so do 16 and sub 8.
|
|
e.movzx(i.dest.reg().cvt16(), i.src1);
|
|
e.lzcnt(i.dest.reg().cvt16(), i.dest.reg().cvt16());
|
|
e.sub(i.dest, 8);
|
|
} else {
|
|
Xbyak::Label end;
|
|
e.inLocalLabel();
|
|
|
|
e.bsr(e.rax, i.src1); // ZF set if i.src1 is 0
|
|
e.mov(i.dest, 0x8);
|
|
e.jz(end);
|
|
|
|
e.xor_(e.rax, 0x7);
|
|
e.mov(i.dest, e.rax);
|
|
|
|
e.L(end);
|
|
e.outLocalLabel();
|
|
}
|
|
}
|
|
};
|
|
struct CNTLZ_I16 : Sequence<CNTLZ_I16, I<OPCODE_CNTLZ, I8Op, I16Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
if (e.IsFeatureEnabled(kX64EmitLZCNT)) {
|
|
// LZCNT: searches $2 until MSB 1 found, stores idx (from last bit) in $1
|
|
e.lzcnt(i.dest.reg().cvt32(), i.src1);
|
|
} else {
|
|
Xbyak::Label end;
|
|
e.inLocalLabel();
|
|
|
|
e.bsr(e.rax, i.src1); // ZF set if i.src1 is 0
|
|
e.mov(i.dest, 0x10);
|
|
e.jz(end);
|
|
|
|
e.xor_(e.rax, 0x0F);
|
|
e.mov(i.dest, e.rax);
|
|
|
|
e.L(end);
|
|
e.outLocalLabel();
|
|
}
|
|
}
|
|
};
|
|
struct CNTLZ_I32 : Sequence<CNTLZ_I32, I<OPCODE_CNTLZ, I8Op, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
if (e.IsFeatureEnabled(kX64EmitLZCNT)) {
|
|
e.lzcnt(i.dest.reg().cvt32(), i.src1);
|
|
} else {
|
|
Xbyak::Label end;
|
|
e.inLocalLabel();
|
|
|
|
e.bsr(e.rax, i.src1); // ZF set if i.src1 is 0
|
|
e.mov(i.dest, 0x20);
|
|
e.jz(end);
|
|
|
|
e.xor_(e.rax, 0x1F);
|
|
e.mov(i.dest, e.rax);
|
|
|
|
e.L(end);
|
|
e.outLocalLabel();
|
|
}
|
|
}
|
|
};
|
|
struct CNTLZ_I64 : Sequence<CNTLZ_I64, I<OPCODE_CNTLZ, I8Op, I64Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
if (e.IsFeatureEnabled(kX64EmitLZCNT)) {
|
|
e.lzcnt(i.dest.reg().cvt64(), i.src1);
|
|
} else {
|
|
Xbyak::Label end;
|
|
e.inLocalLabel();
|
|
|
|
e.bsr(e.rax, i.src1); // ZF set if i.src1 is 0
|
|
e.mov(i.dest, 0x40);
|
|
e.jz(end);
|
|
|
|
e.xor_(e.rax, 0x3F);
|
|
e.mov(i.dest, e.rax);
|
|
|
|
e.L(end);
|
|
e.outLocalLabel();
|
|
}
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_CNTLZ, CNTLZ_I8, CNTLZ_I16, CNTLZ_I32, CNTLZ_I64);
|
|
|
|
// ============================================================================
|
|
// OPCODE_SET_ROUNDING_MODE
|
|
// ============================================================================
|
|
// Input: FPSCR (PPC format)
|
|
static const uint32_t mxcsr_table[] = {
|
|
0x1F80, 0x7F80, 0x5F80, 0x3F80, 0x9F80, 0xFF80, 0xDF80, 0xBF80,
|
|
};
|
|
struct SET_ROUNDING_MODE_I32
|
|
: Sequence<SET_ROUNDING_MODE_I32,
|
|
I<OPCODE_SET_ROUNDING_MODE, VoidOp, I32Op>> {
|
|
static void Emit(X64Emitter& e, const EmitArgType& i) {
|
|
e.mov(e.rcx, i.src1);
|
|
e.and_(e.rcx, 0x7);
|
|
e.mov(e.rax, uintptr_t(mxcsr_table));
|
|
e.vldmxcsr(e.ptr[e.rax + e.rcx * 4]);
|
|
}
|
|
};
|
|
EMITTER_OPCODE_TABLE(OPCODE_SET_ROUNDING_MODE, SET_ROUNDING_MODE_I32);
|
|
|
|
// Include anchors to other sequence sources so they get included in the build.
|
|
extern volatile int anchor_control;
|
|
static int anchor_control_dest = anchor_control;
|
|
|
|
extern volatile int anchor_memory;
|
|
static int anchor_memory_dest = anchor_memory;
|
|
|
|
extern volatile int anchor_vector;
|
|
static int anchor_vector_dest = anchor_vector;
|
|
|
|
bool SelectSequence(X64Emitter* e, const Instr* i, const Instr** new_tail) {
|
|
const InstrKey key(i);
|
|
auto it = sequence_table.find(key);
|
|
if (it != sequence_table.end()) {
|
|
if (it->second(*e, i)) {
|
|
*new_tail = i->next;
|
|
return true;
|
|
}
|
|
}
|
|
XELOGE("No sequence match for variant %s", i->opcode->name);
|
|
return false;
|
|
}
|
|
|
|
} // namespace x64
|
|
} // namespace backend
|
|
} // namespace cpu
|
|
} // namespace xe
|