From b9571cfda0263d5e491cd1d9fda4c3205df312c6 Mon Sep 17 00:00:00 2001 From: Wunkolo Date: Fri, 3 May 2024 15:49:14 -0700 Subject: [PATCH] [a64] Implement `OPCODE_EXTRACT` --- src/xenia/cpu/backend/a64/a64_seq_vector.cc | 69 ++++++++++++++++++++- 1 file changed, 66 insertions(+), 3 deletions(-) diff --git a/src/xenia/cpu/backend/a64/a64_seq_vector.cc b/src/xenia/cpu/backend/a64/a64_seq_vector.cc index ff313aefb..a1c69ccfe 100644 --- a/src/xenia/cpu/backend/a64/a64_seq_vector.cc +++ b/src/xenia/cpu/backend/a64/a64_seq_vector.cc @@ -575,15 +575,78 @@ EMITTER_OPCODE_TABLE(OPCODE_INSERT, INSERT_I8, INSERT_I16, INSERT_I32); // This can be a single broadcast. struct EXTRACT_I8 : Sequence> { - static void Emit(A64Emitter& e, const EmitArgType& i) {} + static void Emit(A64Emitter& e, const EmitArgType& i) { + if (i.src2.is_constant) { + e.UMOV(i.dest, i.src1.reg().Belem()[VEC128_B(i.src2.constant())]); + } else { + // Fixup index + e.EOR(W0, i.src2, 0b11); + e.AND(W0, W0, 0x1F); + e.DUP(Q0.B16(), W0); + // Byte-table lookup + e.TBL(Q0.B16(), List{i.src1.reg().B16()}, Q0.B16()); + // Get lowest element + e.UMOV(i.dest, Q0.Belem()[0]); + } + } }; struct EXTRACT_I16 : Sequence> { - static void Emit(A64Emitter& e, const EmitArgType& i) {} + static void Emit(A64Emitter& e, const EmitArgType& i) { + if (i.src2.is_constant) { + e.UMOV(i.dest, i.src1.reg().Helem()[VEC128_W(i.src2.constant())]); + } else { + // Fixup index + e.EOR(W0, i.src2, 0b01); + e.LSL(W0, W0, 1); + + // Replicate index as byte + e.MOV(W1, 0x01'01); + e.MUL(W0, W0, W1); + + // Byte indices + e.ADD(W0, W0, 0x01'00); + e.UXTH(W0, W0); + + // Replicate byte indices + e.DUP(Q0.H8(), W0); + // Byte-table lookup + e.TBL(Q0.B16(), List{i.src1.reg().B16()}, Q0.B16()); + // Get lowest element + e.UMOV(i.dest, Q0.Helem()[0]); + } + } }; struct EXTRACT_I32 : Sequence> { - static void Emit(A64Emitter& e, const EmitArgType& i) {} + static void Emit(A64Emitter& e, const EmitArgType& i) { + static const vec128_t extract_table_32[4] = { + vec128b(3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + vec128b(7, 6, 5, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + vec128b(11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + vec128b(15, 14, 13, 12, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), + }; + if (i.src2.is_constant) { + e.UMOV(i.dest, i.src1.reg().Selem()[VEC128_D(i.src2.constant())]); + } else { + QReg src1 = i.src1.reg(); + if (i.src1.is_constant) { + src1 = Q1; + e.LoadConstantV(src1, i.src1.constant()); + } + + e.AND(X0, i.src2.reg().toX(), 0b11); + e.LSL(X0, X0, 4); + + e.MOVP2R(X1, extract_table_32); + e.LDR(Q0, X1, X0); + + // Byte-table lookup + e.TBL(Q0.B16(), List{src1.B16()}, Q0.B16()); + // Get lowest element + e.UMOV(i.dest, Q0.Selem()[0]); + } + } }; EMITTER_OPCODE_TABLE(OPCODE_EXTRACT, EXTRACT_I8, EXTRACT_I16, EXTRACT_I32);