rpcsx/rpcs3/Emu/RSX/Program/GLSLInterpreter/VertexInterpreter.glsl
2022-05-31 23:34:14 +03:00

606 lines
16 KiB
GLSL

R"(
// Program outputs
layout(location=0) out vec4 dest[16];
#define RSX_SCA_OPCODE_NOP 0x00 // No-Operation
#define RSX_SCA_OPCODE_MOV 0x01 // Move (copy)
#define RSX_SCA_OPCODE_RCP 0x02 // Reciprocal
#define RSX_SCA_OPCODE_RCC 0x03 // Reciprocal clamped
#define RSX_SCA_OPCODE_RSQ 0x04 // Reciprocal square root
#define RSX_SCA_OPCODE_EXP 0x05 // Exponential base 2 (low-precision)
#define RSX_SCA_OPCODE_LOG 0x06 // Logarithm base 2 (low-precision)
#define RSX_SCA_OPCODE_LIT 0x07 // Lighting calculation
#define RSX_SCA_OPCODE_BRA 0x08 // Branch
#define RSX_SCA_OPCODE_BRI 0x09 // Branch by CC register
#define RSX_SCA_OPCODE_CAL 0x0a // Subroutine call
#define RSX_SCA_OPCODE_CLI 0x0b // Subroutine call by CC register
#define RSX_SCA_OPCODE_RET 0x0c // Return from subroutine
#define RSX_SCA_OPCODE_LG2 0x0d // Logarithm base 2
#define RSX_SCA_OPCODE_EX2 0x0e // Exponential base 2
#define RSX_SCA_OPCODE_SIN 0x0f // Sine function
#define RSX_SCA_OPCODE_COS 0x10 // Cosine function
#define RSX_SCA_OPCODE_BRB 0x11 // Branch by Boolean constant
#define RSX_SCA_OPCODE_CLB 0x12 // Subroutine call by Boolean constant
#define RSX_SCA_OPCODE_PSH 0x13 // Push onto stack
#define RSX_SCA_OPCODE_POP 0x14 // Pop from stack
#define RSX_VEC_OPCODE_NOP 0x00 // No-Operation
#define RSX_VEC_OPCODE_MOV 0x01 // Move
#define RSX_VEC_OPCODE_MUL 0x02 // Multiply
#define RSX_VEC_OPCODE_ADD 0x03 // Addition
#define RSX_VEC_OPCODE_MAD 0x04 // Multiply-Add
#define RSX_VEC_OPCODE_DP3 0x05 // 3-component Dot Product
#define RSX_VEC_OPCODE_DPH 0x06 // Homogeneous Dot Product
#define RSX_VEC_OPCODE_DP4 0x07 // 4-component Dot Product
#define RSX_VEC_OPCODE_DST 0x08 // Calculate distance vector
#define RSX_VEC_OPCODE_MIN 0x09 // Minimum
#define RSX_VEC_OPCODE_MAX 0x0a // Maximum
#define RSX_VEC_OPCODE_SLT 0x0b // Set-If-LessThan
#define RSX_VEC_OPCODE_SGE 0x0c // Set-If-GreaterEqual
#define RSX_VEC_OPCODE_ARL 0x0d // Load to address register (round down)
#define RSX_VEC_OPCODE_FRC 0x0e // Extract fractional part (fraction)
#define RSX_VEC_OPCODE_FLR 0x0f // Round down (floor)
#define RSX_VEC_OPCODE_SEQ 0x10 // Set-If-Equal
#define RSX_VEC_OPCODE_SFL 0x11 // Set-If-False
#define RSX_VEC_OPCODE_SGT 0x12 // Set-If-GreaterThan
#define RSX_VEC_OPCODE_SLE 0x13 // Set-If-LessEqual
#define RSX_VEC_OPCODE_SNE 0x14 // Set-If-NotEqual
#define RSX_VEC_OPCODE_STR 0x15 // Set-If-True
#define RSX_VEC_OPCODE_SSG 0x16 // Convert positive values to 1 and negative values to -1
#define RSX_VEC_OPCODE_TXL 0x19 // Texture fetch
#define RSX_VP_REGISTER_TYPE_TEMP 1
#define RSX_VP_REGISTER_TYPE_INPUT 2
#define RSX_VP_REGISTER_TYPE_CONSTANT 3
#define EXEC_LT 1
#define EXEC_EQ 2
#define EXEC_GT 4
#define GET_BITS bitfieldExtract
#define TEST_BIT(word, bit) (GET_BITS(word, bit, 1) != 0)
#define reg_mov(d, s, m) d = mix(d, s, m)
struct D0
{
uint addr_swz;
uvec4 swizzle;
uint cond;
bool cond_test_enable;
bool cond_update_enable_0;
uint dst_tmp;
uint addr_reg_sel_1;
uint cond_reg_sel_1;
bool saturate;
bool index_input;
bool cond_update_enable_1;
bool vec_result;
};
struct D1
{
uint input_src;
uint const_src;
uint vec_opcode;
uint sca_opcode;
};
struct D2
{
uint tex_num;
};
struct D3
{
bool end;
bool index_const;
uint dst;
uint sca_dst_tmp;
bvec4 vec_mask;
bvec4 sca_mask;
};
struct SRC
{
uint reg_type;
uint tmp_src;
uvec4 swizzle;
bool neg;
bool abs;
};
D0 unpack_D0(const in uint packed_value)
{
D0 result;
result.addr_swz = GET_BITS(packed_value, 0, 2);
result.swizzle.w = GET_BITS(packed_value, 2, 2);
result.swizzle.z = GET_BITS(packed_value, 4, 2);
result.swizzle.y = GET_BITS(packed_value, 6, 2);
result.swizzle.x = GET_BITS(packed_value, 8, 2);
result.cond = GET_BITS(packed_value, 10, 3);
result.cond_test_enable = TEST_BIT(packed_value, 13);
result.cond_update_enable_0 = TEST_BIT(packed_value, 14);
result.dst_tmp = GET_BITS(packed_value, 15, 6);
result.addr_reg_sel_1 = GET_BITS(packed_value, 24, 1);
result.cond_reg_sel_1 = GET_BITS(packed_value, 25, 1);
result.saturate = TEST_BIT(packed_value, 26);
result.index_input = TEST_BIT(packed_value, 27);
result.cond_update_enable_1 = TEST_BIT(packed_value, 29);
result.vec_result = TEST_BIT(packed_value, 30);
return result;
}
D1 unpack_D1(const in uint packed_value)
{
D1 result;
result.input_src = GET_BITS(packed_value, 8, 4);
result.const_src = GET_BITS(packed_value, 12, 10);
result.vec_opcode = GET_BITS(packed_value, 22, 5);
result.sca_opcode = GET_BITS(packed_value, 27, 5);
return result;
}
D2 unpack_D2(const in uint packed_value)
{
D2 result;
result.tex_num = GET_BITS(packed_value, 8, 2);
return result;
}
D3 unpack_D3(const in uint packed_value)
{
D3 result;
result.end = TEST_BIT(packed_value, 0);
result.index_const = TEST_BIT(packed_value, 1);
result.dst = GET_BITS(packed_value, 2, 5);
result.sca_dst_tmp = GET_BITS(packed_value, 7, 6);
result.vec_mask.w = TEST_BIT(packed_value, 13);
result.vec_mask.z = TEST_BIT(packed_value, 14);
result.vec_mask.y = TEST_BIT(packed_value, 15);
result.vec_mask.x = TEST_BIT(packed_value, 16);
result.sca_mask.w = TEST_BIT(packed_value, 17);
result.sca_mask.z = TEST_BIT(packed_value, 18);
result.sca_mask.y = TEST_BIT(packed_value, 19);
result.sca_mask.x = TEST_BIT(packed_value, 20);
return result;
}
bool attribute_enabled(const in uint mask)
{
return (output_mask & mask) != 0;
}
vec4 shuffle(const in vec4 value, const in uvec4 swz)
{
vec4 result;
result.x = ref(value, swz.x);
result.y = ref(value, swz.y);
result.z = ref(value, swz.z);
result.w = ref(value, swz.w);
return result;
}
vec4 _distance(const in vec4 a, const in vec4 b)
{
// Old-school distance vector
return vec4(1., a.y * b.y, a.z, b.w);
}
bvec4 test_cond(const in vec4 cond, const in uint mode)
{
switch (mode)
{
case EXEC_GT | EXEC_EQ | EXEC_LT:
return bvec4(true);
case EXEC_GT | EXEC_EQ:
return greaterThanEqual(cond, vec4(0.));
case EXEC_LT | EXEC_EQ:
return lessThanEqual(cond, vec4(0.));
case EXEC_LT | EXEC_GT:
return notEqual(cond, vec4(0.));
case EXEC_GT:
return greaterThan(cond, vec4(0.));
case EXEC_LT:
return lessThan(cond, vec4(0.));
case EXEC_EQ:
return equal(cond, vec4(0.));
default:
return bvec4(false);
}
}
// Local registers
uvec4 instr;
vec4 temp[32];
ivec4 a[2] = { ivec4(0), ivec4(0) };
vec4 cc[2] = { vec4(0), vec4(0) };
D0 d0;
D1 d1;
D2 d2;
D3 d3;
vec4 get_cond()
{
return shuffle(cc[d0.cond_reg_sel_1], d0.swizzle);
}
void write_sca(in float value)
{
if (d0.saturate)
{
value = clamp(value, 0, 1);
}
if (d3.sca_dst_tmp == 0x3f)
{
if (!d0.vec_result)
{
reg_mov(dest[d3.dst], vec4(value), d3.sca_mask);
}
else
{
reg_mov(cc[d0.cond_reg_sel_1], vec4(value), d3.sca_mask);
}
}
else
{
reg_mov(temp[d3.sca_dst_tmp], vec4(value), d3.sca_mask);
}
}
void write_vec(in vec4 value)
{
if (d0.saturate)
{
value = clamp(value, 0, 1);
}
bvec4 write_mask = d3.vec_mask;
if (d0.cond_test_enable)
{
const bvec4 mask = test_cond(get_cond(), d0.cond);
write_mask = bvec4(uvec4(write_mask) & uvec4(mask));
}
if (d0.dst_tmp == 0x3f && !d0.vec_result)
{
reg_mov(cc[d0.cond_reg_sel_1], value, write_mask);
}
else
{
if (d0.vec_result && d3.dst < 16)
{
reg_mov(dest[d3.dst], value, write_mask);
}
if (d0.dst_tmp != 0x3f)
{
reg_mov(temp[d0.dst_tmp], value, write_mask);
}
}
}
void write_output(const in int oid, const in int mask_bit)
{
if (!attribute_enabled(1 << mask_bit))
{
dest[oid] = vec4(0., 0., 0., 1.);
}
}
// Cannot dynamically index into the gl_ClipDistance array without causing problems due to it's unknown size
#define write_clip_distance(plane, mask_bit, test, value)\
if (test && attribute_enabled(1 << mask_bit))\
gl_ClipDistance[plane] = value;\
else\
gl_ClipDistance[plane] = 0.5f;\
ivec4 read_addr_reg()
{
return a[d0.addr_reg_sel_1];
}
int branch_addr()
{
uint addr_h = GET_BITS(instr.z, 0, 6);
uint addr_l = GET_BITS(instr.w, 29, 3);
return int((addr_h << 3) + addr_l);
}
bool static_branch()
{
uint mask = (1 << GET_BITS(instr.w, 23, 5));
bool cond = TEST_BIT(instr.w, 28);
bool actual = (transform_branch_bits & mask) != 0;
return (cond == actual);
}
bool dynamic_branch()
{
if (d0.cond == (EXEC_LT | EXEC_GT | EXEC_EQ)) return true;
if (d0.cond == 0) return false;
return any(test_cond(get_cond(), d0.cond));
}
vec4 read_src(const in int index)
{
uint src;
vec4 value;
bool do_abs = false;
switch (index)
{
case 0:
src = (GET_BITS(instr.y, 0, 8) << 9) | GET_BITS(instr.z, 23, 9);
do_abs = TEST_BIT(instr.x, 21);
break;
case 1:
src = GET_BITS(instr.z, 6, 17);
do_abs = TEST_BIT(instr.x, 22);
break;
case 2:
src = (GET_BITS(instr.z, 0, 6) << 11) | GET_BITS(instr.w, 21, 11);
do_abs = TEST_BIT(instr.x, 23);
break;
}
uint reg_type = GET_BITS(src, 0, 2);
uint tmp_src = GET_BITS(src, 2, 6);
switch (reg_type)
{
case RSX_VP_REGISTER_TYPE_TEMP:
value = temp[tmp_src];
break;
case RSX_VP_REGISTER_TYPE_INPUT:
value = read_location(int(d1.input_src));
break;
case RSX_VP_REGISTER_TYPE_CONSTANT:
if (d3.index_const)
{
value = vc[d1.const_src + ref(a[d0.addr_reg_sel_1], d0.addr_swz)];
}
else
{
value = vc[d1.const_src];
}
break;
}
if (GET_BITS(src, 8, 8) != 0x1B)
{
uvec4 swz = uvec4(
GET_BITS(src, 14, 2),
GET_BITS(src, 12, 2),
GET_BITS(src, 10, 2),
GET_BITS(src, 8, 2)
);
value = shuffle(value, swz);
}
if (do_abs)
{
value = abs(value);
}
if (TEST_BIT(src, 16))
{
value = -value;
}
return value;
}
void main()
{
// Initialize output registers
for (int i = 0; i < 16; ++i)
{
dest[i] = vec4(0., 0., 0., 1.);
}
int callstack[8];
int stack_ptr = 0;
int current_instruction = 0;
d3.end = false;
while (current_instruction < 512)
{
if (d3.end)
{
break;
}
instr = vp_instructions[current_instruction];
current_instruction++;
d0 = unpack_D0(instr.x);
d1 = unpack_D1(instr.y);
d2 = unpack_D2(instr.z);
d3 = unpack_D3(instr.w);
uint vec_opcode = d1.vec_opcode;
uint sca_opcode = d1.sca_opcode;
if (d0.cond_test_enable && d0.cond == 0)
{
vec_opcode = RSX_VEC_OPCODE_NOP;
sca_opcode = RSX_SCA_OPCODE_NOP;
}
if (vec_opcode == RSX_VEC_OPCODE_ARL)
{
a[d0.dst_tmp] = ivec4(read_src(0));
}
else if (vec_opcode != RSX_VEC_OPCODE_NOP)
{
vec4 value = read_src(0);
switch (vec_opcode)
{
case RSX_VEC_OPCODE_MOV: break;
case RSX_VEC_OPCODE_MUL: value *= read_src(1); break;
case RSX_VEC_OPCODE_ADD: value += read_src(2); break;
case RSX_VEC_OPCODE_MAD: value = fma(value, read_src(1), read_src(2)); break;
case RSX_VEC_OPCODE_DP3: value = vec4(dot(value.xyz, read_src(1).xyz)); break;
case RSX_VEC_OPCODE_DPH: value = vec4(dot(vec4(value.xyz, 1.0), read_src(1))); break;
case RSX_VEC_OPCODE_DP4: value = vec4(dot(value, read_src(1))); break;
case RSX_VEC_OPCODE_DST: value = _distance(value, read_src(1)); break;
case RSX_VEC_OPCODE_MIN: value = min(value, read_src(1)); break;
case RSX_VEC_OPCODE_MAX: value = max(value, read_src(1)); break;
case RSX_VEC_OPCODE_SLT: value = vec4(lessThan(value, read_src(1))); break;
case RSX_VEC_OPCODE_SGE: value = vec4(greaterThanEqual(value, read_src(1))); break;
case RSX_VEC_OPCODE_FRC: value = fract(value); break;
case RSX_VEC_OPCODE_FLR: value = floor(value); break;
case RSX_VEC_OPCODE_SEQ: value = vec4(equal(value, read_src(1))); break;
case RSX_VEC_OPCODE_SFL: value = vec4(0); break;
case RSX_VEC_OPCODE_SGT: value = vec4(greaterThan(value, read_src(1))); break;
case RSX_VEC_OPCODE_SLE: value = vec4(lessThanEqual(value, read_src(1))); break;
case RSX_VEC_OPCODE_SNE: value = vec4(notEqual(value, read_src(1))); break;
case RSX_VEC_OPCODE_STR: value = vec4(1); break;
case RSX_VEC_OPCODE_SSG: value = sign(value); break;
}
write_vec(value);
}
if (sca_opcode != RSX_SCA_OPCODE_NOP)
{
float value = read_src(2).x;
switch (sca_opcode)
{
case RSX_SCA_OPCODE_MOV: break;
case RSX_SCA_OPCODE_RCP: value = 1.0 / value; break;
case RSX_SCA_OPCODE_RCC: value = clamp(1.0 / value, 5.42101e-20, 1.884467e19); break;
case RSX_SCA_OPCODE_RSQ: value = 1.0 / sqrt(value); break;
case RSX_SCA_OPCODE_EXP: value = exp(value); break;
case RSX_SCA_OPCODE_LOG: value = log(value); break;
//case RSX_SCA_OPCODE_LIT: value = lit_legacy(value); break;
case RSX_SCA_OPCODE_LG2: value = log2(value); break;
case RSX_SCA_OPCODE_EX2: value = exp2(value); break;
case RSX_SCA_OPCODE_SIN: value = sin(value); break;
case RSX_SCA_OPCODE_COS: value = cos(value); break;
case RSX_SCA_OPCODE_BRA:
// Jump by address register
if (dynamic_branch()) current_instruction = int(read_addr_reg().x);
continue;
case RSX_SCA_OPCODE_BRI:
// Jump immediate
if (dynamic_branch()) current_instruction = branch_addr();
continue;
case RSX_SCA_OPCODE_CAL:
// Call immediate
if (dynamic_branch())
{
callstack[stack_ptr] = current_instruction;
stack_ptr++;
current_instruction = branch_addr();
}
continue;
case RSX_SCA_OPCODE_CLI:
// Unknown
continue;
case RSX_SCA_OPCODE_RET:
// Return
if (dynamic_branch())
{
if (stack_ptr == 0) return;
current_instruction = callstack[stack_ptr];
stack_ptr--;
}
continue;
case RSX_SCA_OPCODE_BRB:
// Branch by boolean mask
if (static_branch())
{
current_instruction = branch_addr();
}
continue;
case RSX_SCA_OPCODE_CLB:
// Call by boolean mask
if (static_branch())
{
callstack[stack_ptr] = current_instruction;
stack_ptr++;
current_instruction = branch_addr();
}
continue;
//case RSX_SCA_OPCODE_PSH:
//case RSX_SCA_OPCODE_POP:
}
write_sca(value);
}
}
// Unconditionally update COLOR0 and SPECULAR0
write_output(1, 0);
write_output(2, 1);
// Conditionally update COLOR1 and SPECULAR1 depending on 2-sided mask
if (control == 0)
{
dest[3] = dest[1];
dest[4] = dest[2];
}
else
{
// 2-sided lighting
write_output(3, 2);
write_output(4, 3);
}
if (!attribute_enabled(1 << 4))
{
dest[5].x = 0;
}
if (attribute_enabled(1 << 5))
{
gl_PointSize = dest[6].x;
}
else
{
gl_PointSize = point_size;
}
write_clip_distance(0, 6, user_clip_enabled[0].x > 0, dest[5].y * user_clip_factor[0].x);
write_clip_distance(1, 7, user_clip_enabled[0].y > 0, dest[5].z * user_clip_factor[0].y);
write_clip_distance(2, 8, user_clip_enabled[0].z > 0, dest[5].w * user_clip_factor[0].z);
write_clip_distance(3, 9, user_clip_enabled[0].w > 0, dest[6].y * user_clip_factor[0].w);
write_clip_distance(4, 10, user_clip_enabled[1].x > 0, dest[6].z * user_clip_factor[1].x);
write_clip_distance(5, 11, user_clip_enabled[1].y > 0, dest[6].w * user_clip_factor[1].y);
write_output(15, 12);
write_output(6, 13);
write_output(7, 14);
write_output(8, 15);
write_output(9, 16);
write_output(10, 17);
write_output(11, 18);
write_output(12, 19);
write_output(13, 20);
write_output(14, 21);
vec4 pos = dest[0] * scale_offset_mat;
#ifdef Z_NEGATIVE_ONE_TO_ONE
pos.z = (pos.z + pos.z) - pos.w;
#endif
gl_Position = pos;
}
)"