mirror of
https://github.com/RPCSX/rpcsx.git
synced 2026-04-04 14:08:37 +00:00
PPU/SPU LLVM: Allow Zen4 cpus to use VPERMI2B/VPERMT2B instead of the vperm2b256to128 path
- Zen4 based cpus can process VPERM2B in a single uop, unlike intel where it is 3 uops.
This commit is contained in:
parent
7d32dc312f
commit
d8897c585d
5 changed files with 27 additions and 6 deletions
|
|
@ -8313,13 +8313,13 @@ public:
|
|||
{
|
||||
if (perm_only)
|
||||
{
|
||||
set_vr(op.rt4, vperm2b256to128(as, bs, c));
|
||||
set_vr(op.rt4, vperm2b(as, bs, c));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto m = gf2p8affineqb(c, build<u8[16]>(0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20), 0x7f);
|
||||
const auto mm = select(noncast<s8[16]>(m) >= 0, splat<u8[16]>(0), m);
|
||||
const auto ab = vperm2b256to128(as, bs, c);
|
||||
const auto ab = vperm2b(as, bs, c);
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(c) >= 0, ab, mm));
|
||||
return;
|
||||
}
|
||||
|
|
@ -8371,18 +8371,18 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
if (m_use_avx512_icl && (op.ra != op.rb))
|
||||
if (m_use_avx512_icl && (op.ra != op.rb || m_interp_magn))
|
||||
{
|
||||
if (perm_only)
|
||||
{
|
||||
set_vr(op.rt4, vperm2b256to128(a, b, eval(c ^ 0xf)));
|
||||
set_vr(op.rt4, vperm2b(a, b, eval(c ^ 0xf)));
|
||||
return;
|
||||
}
|
||||
|
||||
const auto m = gf2p8affineqb(c, build<u8[16]>(0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x40, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20), 0x7f);
|
||||
const auto mm = select(noncast<s8[16]>(m) >= 0, splat<u8[16]>(0), m);
|
||||
const auto cr = eval(c ^ 0xf);
|
||||
const auto ab = vperm2b256to128(a, b, cr);
|
||||
const auto ab = vperm2b(a, b, cr);
|
||||
set_vr(op.rt4, select(noncast<s8[16]>(c) >= 0, ab, mm));
|
||||
return;
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue