From 71aea7e571cac07c4ca84e788b0cfde421fb8b1a Mon Sep 17 00:00:00 2001 From: Wunk Date: Sat, 1 Jul 2023 19:35:07 -0700 Subject: [PATCH] shader/jit: Use `xbyak::util::Cpu` for Host capabilities (#6643) Xbyak has a complete utility-class for determining the host-processor's ISA-features such as SSE4.1, AVX, AVX2, AVX512{F,VL,DQ,VBMI,etc}, and so on for further potential optimizations. --- src/video_core/shader/shader_jit_x64_compiler.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/src/video_core/shader/shader_jit_x64_compiler.cpp b/src/video_core/shader/shader_jit_x64_compiler.cpp index 00cfde0c2..85681ab83 100644 --- a/src/video_core/shader/shader_jit_x64_compiler.cpp +++ b/src/video_core/shader/shader_jit_x64_compiler.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "common/assert.h" #include "common/logging/log.h" @@ -32,6 +33,8 @@ using Xbyak::Xmm; using nihstro::DestRegister; using nihstro::RegisterType; +static const Xbyak::util::Cpu host_caps; + namespace Pica::Shader { typedef void (JitShader::*JitFunction)(Instruction instr); @@ -306,7 +309,7 @@ void JitShader::Compile_DestEnable(Instruction instr, Xmm src) { // register... movaps(SCRATCH, xword[STATE + dest_offset_disp]); - if (Common::GetCPUCaps().sse4_1) { + if (host_caps.has(Cpu::tSSE41)) { u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); blendps(SCRATCH, src, mask); @@ -437,7 +440,7 @@ void JitShader::Compile_DPH(Instruction instr) { Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); } - if (Common::GetCPUCaps().sse4_1) { + if (host_caps.has(Cpu::tSSE41)) { // Set 4th component to 1.0 blendps(SRC1, ONE, 0b1000); } else { @@ -507,7 +510,7 @@ void JitShader::Compile_SLT(Instruction instr) { void JitShader::Compile_FLR(Instruction instr) { Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); - if (Common::GetCPUCaps().sse4_1) { + if (host_caps.has(Cpu::tSSE41)) { roundps(SRC1, SRC1, _MM_FROUND_FLOOR); } else { cvttps2dq(SRC1, SRC1);