mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-04-20 22:05:12 +00:00
SPU LLVM: Add VNNI optimized variant of sumb
- Uses vpdpbusd to horrizontally add values, for some reason this is much faster than the normal horizontal add instructions.
This commit is contained in:
parent
a86b278115
commit
d304b52391
2 changed files with 24 additions and 0 deletions
|
|
@ -7032,6 +7032,18 @@ public:
|
|||
|
||||
void SUMB(spu_opcode_t op)
|
||||
{
|
||||
// TODO: Some future CPUS will support VNNI but not avx512
|
||||
if (m_use_avx512_icl)
|
||||
{
|
||||
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
|
||||
const auto zeroes = splat<u32[4]>(0);
|
||||
const auto ones = splat<u32[4]>(0x01010101);
|
||||
const auto ax = bitcast<u16[8]>(vpdpbusd(zeroes, a, ones));
|
||||
const auto bx = bitcast<u16[8]>(vpdpbusd(zeroes, b, ones));
|
||||
set_vr(op.rt, shuffle2(ax, bx, 0, 8, 2, 10, 4, 12, 6, 14));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
|
||||
const auto ahs = eval((a >> 8) + (a & 0xff));
|
||||
const auto bhs = eval((b >> 8) + (b & 0xff));
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue