2
1
Fork 0
mirror of https://github.com/yuzu-emu/yuzu.git synced 2024-07-04 23:31:19 +01:00

shader: Implement HADD2

This commit is contained in:
ReinUsesLisp 2021-03-03 03:07:19 -03:00 committed by ameerj
parent 980cafdc27
commit 4006929c98
12 changed files with 400 additions and 42 deletions

View file

@ -71,6 +71,7 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate/impl/floating_point_multi_function.cpp frontend/maxwell/translate/impl/floating_point_multi_function.cpp
frontend/maxwell/translate/impl/floating_point_multiply.cpp frontend/maxwell/translate/impl/floating_point_multiply.cpp
frontend/maxwell/translate/impl/floating_point_range_reduction.cpp frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
frontend/maxwell/translate/impl/half_floating_point_add.cpp
frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.cpp
frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/impl.h
frontend/maxwell/translate/impl/integer_add.cpp frontend/maxwell/translate/impl/integer_add.cpp

View file

@ -90,24 +90,36 @@ Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x2(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x3(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index);
void EmitCompositeConstructF16x2(EmitContext& ctx); Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index);
void EmitCompositeConstructF16x3(EmitContext& ctx); Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index);
void EmitCompositeConstructF16x4(EmitContext& ctx); Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x3(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index);
void EmitCompositeConstructF32x2(EmitContext& ctx); Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index);
void EmitCompositeConstructF32x3(EmitContext& ctx); Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index);
void EmitCompositeConstructF32x4(EmitContext& ctx); Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2);
Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x3(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index); Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index);
Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index);
void EmitCompositeConstructF64x2(EmitContext& ctx); void EmitCompositeConstructF64x2(EmitContext& ctx);
void EmitCompositeConstructF64x3(EmitContext& ctx); void EmitCompositeConstructF64x3(EmitContext& ctx);
void EmitCompositeConstructF64x4(EmitContext& ctx); void EmitCompositeConstructF64x4(EmitContext& ctx);
void EmitCompositeExtractF64x2(EmitContext& ctx); void EmitCompositeExtractF64x2(EmitContext& ctx);
void EmitCompositeExtractF64x3(EmitContext& ctx); void EmitCompositeExtractF64x3(EmitContext& ctx);
void EmitCompositeExtractF64x4(EmitContext& ctx); void EmitCompositeExtractF64x4(EmitContext& ctx);
Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index);
Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU8(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU16(EmitContext& ctx, Id cond, Id true_value, Id false_value);
Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value); Id EmitSelectU32(EmitContext& ctx, Id cond, Id true_value, Id false_value);
@ -270,5 +282,9 @@ Id EmitConvertU64F32(EmitContext& ctx, Id value);
Id EmitConvertU64F64(EmitContext& ctx, Id value); Id EmitConvertU64F64(EmitContext& ctx, Id value);
Id EmitConvertU64U32(EmitContext& ctx, Id value); Id EmitConvertU64U32(EmitContext& ctx, Id value);
Id EmitConvertU32U64(EmitContext& ctx, Id value); Id EmitConvertU32U64(EmitContext& ctx, Id value);
Id EmitConvertF16F32(EmitContext& ctx, Id value);
Id EmitConvertF32F16(EmitContext& ctx, Id value);
Id EmitConvertF32F64(EmitContext& ctx, Id value);
Id EmitConvertF64F32(EmitContext& ctx, Id value);
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View file

@ -30,16 +30,28 @@ Id EmitCompositeExtractU32x4(EmitContext& ctx, Id composite, u32 index) {
return ctx.OpCompositeExtract(ctx.U32[1], composite, index); return ctx.OpCompositeExtract(ctx.U32[1], composite, index);
} }
void EmitCompositeConstructF16x2(EmitContext&) { Id EmitCompositeInsertU32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
throw NotImplementedException("SPIR-V Instruction"); return ctx.OpCompositeInsert(ctx.U32[2], object, composite, index);
} }
void EmitCompositeConstructF16x3(EmitContext&) { Id EmitCompositeInsertU32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
throw NotImplementedException("SPIR-V Instruction"); return ctx.OpCompositeInsert(ctx.U32[3], object, composite, index);
} }
void EmitCompositeConstructF16x4(EmitContext&) { Id EmitCompositeInsertU32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
throw NotImplementedException("SPIR-V Instruction"); return ctx.OpCompositeInsert(ctx.U32[4], object, composite, index);
}
Id EmitCompositeConstructF16x2(EmitContext& ctx, Id e1, Id e2) {
return ctx.OpCompositeConstruct(ctx.F16[2], e1, e2);
}
Id EmitCompositeConstructF16x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
return ctx.OpCompositeConstruct(ctx.F16[3], e1, e2, e3);
}
Id EmitCompositeConstructF16x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
return ctx.OpCompositeConstruct(ctx.F16[4], e1, e2, e3, e4);
} }
Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) { Id EmitCompositeExtractF16x2(EmitContext& ctx, Id composite, u32 index) {
@ -54,16 +66,28 @@ Id EmitCompositeExtractF16x4(EmitContext& ctx, Id composite, u32 index) {
return ctx.OpCompositeExtract(ctx.F16[1], composite, index); return ctx.OpCompositeExtract(ctx.F16[1], composite, index);
} }
void EmitCompositeConstructF32x2(EmitContext&) { Id EmitCompositeInsertF16x2(EmitContext& ctx, Id composite, Id object, u32 index) {
throw NotImplementedException("SPIR-V Instruction"); return ctx.OpCompositeInsert(ctx.F16[2], object, composite, index);
} }
void EmitCompositeConstructF32x3(EmitContext&) { Id EmitCompositeInsertF16x3(EmitContext& ctx, Id composite, Id object, u32 index) {
throw NotImplementedException("SPIR-V Instruction"); return ctx.OpCompositeInsert(ctx.F16[3], object, composite, index);
} }
void EmitCompositeConstructF32x4(EmitContext&) { Id EmitCompositeInsertF16x4(EmitContext& ctx, Id composite, Id object, u32 index) {
throw NotImplementedException("SPIR-V Instruction"); return ctx.OpCompositeInsert(ctx.F16[4], object, composite, index);
}
Id EmitCompositeConstructF32x2(EmitContext& ctx, Id e1, Id e2) {
return ctx.OpCompositeConstruct(ctx.F32[2], e1, e2);
}
Id EmitCompositeConstructF32x3(EmitContext& ctx, Id e1, Id e2, Id e3) {
return ctx.OpCompositeConstruct(ctx.F32[3], e1, e2, e3);
}
Id EmitCompositeConstructF32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4) {
return ctx.OpCompositeConstruct(ctx.F32[4], e1, e2, e3, e4);
} }
Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) { Id EmitCompositeExtractF32x2(EmitContext& ctx, Id composite, u32 index) {
@ -78,6 +102,18 @@ Id EmitCompositeExtractF32x4(EmitContext& ctx, Id composite, u32 index) {
return ctx.OpCompositeExtract(ctx.F32[1], composite, index); return ctx.OpCompositeExtract(ctx.F32[1], composite, index);
} }
Id EmitCompositeInsertF32x2(EmitContext& ctx, Id composite, Id object, u32 index) {
return ctx.OpCompositeInsert(ctx.F32[2], object, composite, index);
}
Id EmitCompositeInsertF32x3(EmitContext& ctx, Id composite, Id object, u32 index) {
return ctx.OpCompositeInsert(ctx.F32[3], object, composite, index);
}
Id EmitCompositeInsertF32x4(EmitContext& ctx, Id composite, Id object, u32 index) {
return ctx.OpCompositeInsert(ctx.F32[4], object, composite, index);
}
void EmitCompositeConstructF64x2(EmitContext&) { void EmitCompositeConstructF64x2(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction"); throw NotImplementedException("SPIR-V Instruction");
} }
@ -102,4 +138,16 @@ void EmitCompositeExtractF64x4(EmitContext&) {
throw NotImplementedException("SPIR-V Instruction"); throw NotImplementedException("SPIR-V Instruction");
} }
Id EmitCompositeInsertF64x2(EmitContext& ctx, Id composite, Id object, u32 index) {
return ctx.OpCompositeInsert(ctx.F64[2], object, composite, index);
}
Id EmitCompositeInsertF64x3(EmitContext& ctx, Id composite, Id object, u32 index) {
return ctx.OpCompositeInsert(ctx.F64[3], object, composite, index);
}
Id EmitCompositeInsertF64x4(EmitContext& ctx, Id composite, Id object, u32 index) {
return ctx.OpCompositeInsert(ctx.F64[4], object, composite, index);
}
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View file

@ -86,4 +86,20 @@ Id EmitConvertU32U64(EmitContext& ctx, Id value) {
return ctx.OpUConvert(ctx.U32[1], value); return ctx.OpUConvert(ctx.U32[1], value);
} }
Id EmitConvertF16F32(EmitContext& ctx, Id value) {
return ctx.OpFConvert(ctx.F16[1], value);
}
Id EmitConvertF32F16(EmitContext& ctx, Id value) {
return ctx.OpFConvert(ctx.F32[1], value);
}
Id EmitConvertF32F64(EmitContext& ctx, Id value) {
return ctx.OpFConvert(ctx.F32[1], value);
}
Id EmitConvertF64F32(EmitContext& ctx, Id value) {
return ctx.OpFConvert(ctx.F64[1], value);
}
} // namespace Shader::Backend::SPIRV } // namespace Shader::Backend::SPIRV

View file

@ -334,12 +334,12 @@ Value IREmitter::CompositeConstruct(const Value& e1, const Value& e2, const Valu
} }
Value IREmitter::CompositeExtract(const Value& vector, size_t element) { Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
const auto read = [&](Opcode opcode, size_t limit) -> Value { const auto read{[&](Opcode opcode, size_t limit) -> Value {
if (element >= limit) { if (element >= limit) {
throw InvalidArgument("Out of bounds element {}", element); throw InvalidArgument("Out of bounds element {}", element);
} }
return Inst(opcode, vector, Value{static_cast<u32>(element)}); return Inst(opcode, vector, Value{static_cast<u32>(element)});
}; }};
switch (vector.Type()) { switch (vector.Type()) {
case Type::U32x2: case Type::U32x2:
return read(Opcode::CompositeExtractU32x2, 2); return read(Opcode::CompositeExtractU32x2, 2);
@ -370,6 +370,43 @@ Value IREmitter::CompositeExtract(const Value& vector, size_t element) {
} }
} }
Value IREmitter::CompositeInsert(const Value& vector, const Value& object, size_t element) {
const auto insert{[&](Opcode opcode, size_t limit) {
if (element >= limit) {
throw InvalidArgument("Out of bounds element {}", element);
}
return Inst(opcode, vector, object, Value{static_cast<u32>(element)});
}};
switch (vector.Type()) {
case Type::U32x2:
return insert(Opcode::CompositeInsertU32x2, 2);
case Type::U32x3:
return insert(Opcode::CompositeInsertU32x3, 3);
case Type::U32x4:
return insert(Opcode::CompositeInsertU32x4, 4);
case Type::F16x2:
return insert(Opcode::CompositeInsertF16x2, 2);
case Type::F16x3:
return insert(Opcode::CompositeInsertF16x3, 3);
case Type::F16x4:
return insert(Opcode::CompositeInsertF16x4, 4);
case Type::F32x2:
return insert(Opcode::CompositeInsertF32x2, 2);
case Type::F32x3:
return insert(Opcode::CompositeInsertF32x3, 3);
case Type::F32x4:
return insert(Opcode::CompositeInsertF32x4, 4);
case Type::F64x2:
return insert(Opcode::CompositeInsertF64x2, 2);
case Type::F64x3:
return insert(Opcode::CompositeInsertF64x3, 3);
case Type::F64x4:
return insert(Opcode::CompositeInsertF64x4, 4);
default:
ThrowInvalidType(vector.Type());
}
}
Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) { Value IREmitter::Select(const U1& condition, const Value& true_value, const Value& false_value) {
if (true_value.Type() != false_value.Type()) { if (true_value.Type() != false_value.Type()) {
throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type()); throw InvalidArgument("Mismatching types {} and {}", true_value.Type(), false_value.Type());
@ -433,7 +470,7 @@ U32 IREmitter::PackFloat2x16(const Value& vector) {
} }
Value IREmitter::UnpackFloat2x16(const U32& value) { Value IREmitter::UnpackFloat2x16(const U32& value) {
return Inst<Value>(Opcode::UnpackFloat2x16, value); return Inst(Opcode::UnpackFloat2x16, value);
} }
F64 IREmitter::PackDouble2x32(const Value& vector) { F64 IREmitter::PackDouble2x32(const Value& vector) {
@ -968,7 +1005,7 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v
} }
} }
U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) { U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
switch (result_bitsize) { switch (result_bitsize) {
case 32: case 32:
switch (value.Type()) { switch (value.Type()) {
@ -995,4 +1032,49 @@ U32U64 IREmitter::ConvertU(size_t result_bitsize, const U32U64& value) {
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize); throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
} }
F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
switch (result_bitsize) {
case 16:
switch (value.Type()) {
case Type::F16:
// Nothing to do
return value;
case Type::F32:
return Inst<F16>(Opcode::ConvertF16F32, value);
case Type::F64:
throw LogicError("Illegal conversion from F64 to F16");
default:
break;
}
break;
case 32:
switch (value.Type()) {
case Type::F16:
return Inst<F32>(Opcode::ConvertF32F16, value);
case Type::F32:
// Nothing to do
return value;
case Type::F64:
return Inst<F64>(Opcode::ConvertF32F64, value);
default:
break;
}
break;
case 64:
switch (value.Type()) {
case Type::F16:
throw LogicError("Illegal conversion from F16 to F64");
case Type::F32:
// Nothing to do
return value;
case Type::F64:
return Inst<F64>(Opcode::ConvertF32F64, value);
default:
break;
}
break;
}
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
}
} // namespace Shader::IR } // namespace Shader::IR

View file

@ -97,6 +97,7 @@ public:
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3, [[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3,
const Value& e4); const Value& e4);
[[nodiscard]] Value CompositeExtract(const Value& vector, size_t element); [[nodiscard]] Value CompositeExtract(const Value& vector, size_t element);
[[nodiscard]] Value CompositeInsert(const Value& vector, const Value& object, size_t element);
[[nodiscard]] Value Select(const U1& condition, const Value& true_value, [[nodiscard]] Value Select(const U1& condition, const Value& true_value,
const Value& false_value); const Value& false_value);
@ -186,7 +187,8 @@ public:
[[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
[[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value); [[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
[[nodiscard]] U32U64 ConvertU(size_t result_bitsize, const U32U64& value); [[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
[[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
private: private:
IR::Block::iterator insertion_point; IR::Block::iterator insertion_point;

View file

@ -83,24 +83,36 @@ OPCODE(CompositeConstructU32x4, U32x4, U32,
OPCODE(CompositeExtractU32x2, U32, U32x2, U32, ) OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
OPCODE(CompositeExtractU32x3, U32, U32x3, U32, ) OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
OPCODE(CompositeExtractU32x4, U32, U32x4, U32, ) OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
OPCODE(CompositeConstructF16x2, F16x2, F16, F16, ) OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, ) OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, ) OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
OPCODE(CompositeExtractF16x2, F16, F16x2, U32, ) OPCODE(CompositeExtractF16x2, F16, F16x2, U32, )
OPCODE(CompositeExtractF16x3, F16, F16x3, U32, ) OPCODE(CompositeExtractF16x3, F16, F16x3, U32, )
OPCODE(CompositeExtractF16x4, F16, F16x4, U32, ) OPCODE(CompositeExtractF16x4, F16, F16x4, U32, )
OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
OPCODE(CompositeConstructF32x2, F32x2, F32, F32, ) OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, ) OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, ) OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
OPCODE(CompositeExtractF32x2, F32, F32x2, U32, ) OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
OPCODE(CompositeExtractF32x3, F32, F32x3, U32, ) OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
OPCODE(CompositeExtractF32x4, F32, F32x4, U32, ) OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
OPCODE(CompositeConstructF64x2, F64x2, F64, F64, ) OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, ) OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, ) OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
OPCODE(CompositeExtractF64x2, F64, F64x2, U32, ) OPCODE(CompositeExtractF64x2, F64, F64x2, U32, )
OPCODE(CompositeExtractF64x3, F64, F64x3, U32, ) OPCODE(CompositeExtractF64x3, F64, F64x3, U32, )
OPCODE(CompositeExtractF64x4, F64, F64x4, U32, ) OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
// Select operations // Select operations
OPCODE(SelectU8, U8, U1, U8, U8, ) OPCODE(SelectU8, U8, U1, U8, U8, )
@ -277,6 +289,9 @@ OPCODE(ConvertU32F64, U32, F64,
OPCODE(ConvertU64F16, U64, F16, ) OPCODE(ConvertU64F16, U64, F16, )
OPCODE(ConvertU64F32, U64, F32, ) OPCODE(ConvertU64F32, U64, F32, )
OPCODE(ConvertU64F64, U64, F64, ) OPCODE(ConvertU64F64, U64, F64, )
OPCODE(ConvertU64U32, U64, U32, ) OPCODE(ConvertU64U32, U64, U32, )
OPCODE(ConvertU32U64, U32, U64, ) OPCODE(ConvertU32U64, U32, U64, )
OPCODE(ConvertF16F32, F16, F32, )
OPCODE(ConvertF32F16, F32, F16, )
OPCODE(ConvertF32F64, F32, F64, )
OPCODE(ConvertF64F32, F64, F32, )

View file

@ -0,0 +1,184 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/common_types.h"
#include "shader_recompiler/exception.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_encoding.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class Merge : u64 {
H1_H0,
F32,
MRG_H0,
MRG_H1,
};
enum class Swizzle : u64 {
H1_H0,
F32,
H0_H0,
H1_H1,
};
std::pair<IR::F16F32F64, IR::F16F32F64> Extract(IR::IREmitter& ir, IR::U32 value, Swizzle swizzle) {
switch (swizzle) {
case Swizzle::H1_H0: {
const IR::Value vector{ir.UnpackFloat2x16(value)};
return {IR::F16{ir.CompositeExtract(vector, 0)}, IR::F16{ir.CompositeExtract(vector, 1)}};
}
case Swizzle::H0_H0: {
const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 0)};
return {scalar, scalar};
}
case Swizzle::H1_H1: {
const IR::F16 scalar{ir.CompositeExtract(ir.UnpackFloat2x16(value), 1)};
return {scalar, scalar};
}
case Swizzle::F32: {
const IR::F32 scalar{ir.BitCast<IR::F32>(value)};
return {scalar, scalar};
}
}
throw InvalidArgument("Invalid swizzle {}", swizzle);
}
IR::U32 MergeResult(IR::IREmitter& ir, IR::Reg dest, const IR::F16& lhs, const IR::F16& rhs,
Merge merge) {
switch (merge) {
case Merge::H1_H0:
return ir.PackFloat2x16(ir.CompositeConstruct(lhs, rhs));
case Merge::F32:
return ir.BitCast<IR::U32, IR::F32>(ir.FPConvert(32, lhs));
case Merge::MRG_H0:
case Merge::MRG_H1: {
const IR::Value vector{ir.UnpackFloat2x16(ir.GetReg(dest))};
const bool h0{merge == Merge::MRG_H0};
const IR::F16& insert{h0 ? lhs : rhs};
return ir.PackFloat2x16(ir.CompositeInsert(vector, insert, h0 ? 0 : 1));
}
}
throw InvalidArgument("Invalid merge {}", merge);
}
void HADD2(TranslatorVisitor& v, u64 insn, Merge merge, bool ftz, bool sat, bool abs_a, bool neg_a,
Swizzle swizzle_a, bool abs_b, bool neg_b, Swizzle swizzle_b, const IR::U32& src_b) {
union {
u64 raw;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a;
} const hadd2{insn};
auto [lhs_a, rhs_a]{Extract(v.ir, v.X(hadd2.src_a), swizzle_a)};
auto [lhs_b, rhs_b]{Extract(v.ir, src_b, swizzle_b)};
const bool promotion{lhs_a.Type() != lhs_b.Type()};
if (promotion) {
if (lhs_a.Type() == IR::Type::F16) {
lhs_a = v.ir.FPConvert(32, lhs_a);
rhs_a = v.ir.FPConvert(32, rhs_a);
}
if (lhs_b.Type() == IR::Type::F16) {
lhs_b = v.ir.FPConvert(32, lhs_b);
rhs_b = v.ir.FPConvert(32, rhs_b);
}
}
lhs_a = v.ir.FPAbsNeg(lhs_a, abs_a, neg_a);
rhs_a = v.ir.FPAbsNeg(rhs_a, abs_a, neg_a);
lhs_b = v.ir.FPAbsNeg(lhs_b, abs_b, neg_b);
rhs_b = v.ir.FPAbsNeg(rhs_b, abs_b, neg_b);
const IR::FpControl fp_control{
.no_contraction{true},
.rounding{IR::FpRounding::DontCare},
.fmz_mode{ftz ? IR::FmzMode::FTZ : IR::FmzMode::None},
};
IR::F16F32F64 lhs{v.ir.FPAdd(lhs_a, lhs_b, fp_control)};
IR::F16F32F64 rhs{v.ir.FPAdd(rhs_a, rhs_b, fp_control)};
if (sat) {
lhs = v.ir.FPSaturate(lhs);
rhs = v.ir.FPSaturate(rhs);
}
if (promotion) {
lhs = v.ir.FPConvert(16, lhs);
rhs = v.ir.FPConvert(16, rhs);
}
v.X(hadd2.dest_reg, MergeResult(v.ir, hadd2.dest_reg, lhs, rhs, merge));
}
} // Anonymous namespace
void TranslatorVisitor::HADD2_reg(u64 insn) {
union {
u64 raw;
BitField<49, 2, Merge> merge;
BitField<39, 1, u64> ftz;
BitField<32, 1, u64> sat;
BitField<43, 1, u64> neg_a;
BitField<44, 1, u64> abs_a;
BitField<47, 2, Swizzle> swizzle_a;
BitField<31, 1, u64> neg_b;
BitField<30, 1, u64> abs_b;
BitField<28, 2, Swizzle> swizzle_b;
} const hadd2{insn};
HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0,
hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, hadd2.swizzle_b,
GetReg20(insn));
}
void TranslatorVisitor::HADD2_cbuf(u64 insn) {
union {
u64 raw;
BitField<49, 2, Merge> merge;
BitField<39, 1, u64> ftz;
BitField<52, 1, u64> sat;
BitField<43, 1, u64> neg_a;
BitField<44, 1, u64> abs_a;
BitField<47, 2, Swizzle> swizzle_a;
BitField<56, 1, u64> neg_b;
BitField<54, 1, u64> abs_b;
} const hadd2{insn};
HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0,
hadd2.neg_a != 0, hadd2.swizzle_a, hadd2.abs_b != 0, hadd2.neg_b != 0, Swizzle::F32,
GetCbuf(insn));
}
void TranslatorVisitor::HADD2_imm(u64 insn) {
union {
u64 raw;
BitField<49, 2, Merge> merge;
BitField<39, 1, u64> ftz;
BitField<52, 1, u64> sat;
BitField<43, 1, u64> neg_a;
BitField<44, 1, u64> abs_a;
BitField<47, 2, Swizzle> swizzle_a;
BitField<56, 1, u64> neg_high;
BitField<30, 9, u64> high;
BitField<29, 1, u64> neg_low;
BitField<20, 9, u64> low;
} const hadd2{insn};
const u32 imm{static_cast<u32>(hadd2.low << 6) | ((hadd2.neg_low != 0 ? 1 : 0) << 15) |
static_cast<u32>(hadd2.high << 22) | ((hadd2.neg_high != 0 ? 1 : 0) << 31)};
HADD2(*this, insn, hadd2.merge, hadd2.ftz != 0, hadd2.sat != 0, hadd2.abs_a != 0,
hadd2.neg_a != 0, hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
}
void TranslatorVisitor::HADD2_32I(u64 insn) {
union {
u64 raw;
BitField<55, 1, u64> ftz;
BitField<52, 1, u64> sat;
BitField<56, 1, u64> neg_a;
BitField<53, 2, Swizzle> swizzle_a;
BitField<20, 32, u64> imm32;
} const hadd2{insn};
const u32 imm{static_cast<u32>(hadd2.imm32)};
HADD2(*this, insn, Merge::H1_H0, hadd2.ftz != 0, hadd2.sat != 0, false, hadd2.neg_a != 0,
hadd2.swizzle_a, false, false, Swizzle::H1_H0, ir.Imm32(imm));
}
} // namespace Shader::Maxwell

View file

@ -59,7 +59,7 @@ IR::U64 Address(TranslatorVisitor& v, u64 insn) {
const IR::U64 address{[&]() -> IR::U64 { const IR::U64 address{[&]() -> IR::U64 {
if (mem.e == 0) { if (mem.e == 0) {
// LDG/STG without .E uses a 32-bit pointer, zero-extend it // LDG/STG without .E uses a 32-bit pointer, zero-extend it
return v.ir.ConvertU(64, v.X(mem.addr_reg)); return v.ir.UConvert(64, v.X(mem.addr_reg));
} }
if (!IR::IsAligned(mem.addr_reg, 2)) { if (!IR::IsAligned(mem.addr_reg, 2)) {
throw NotImplementedException("Unaligned address register"); throw NotImplementedException("Unaligned address register");

View file

@ -265,22 +265,6 @@ void TranslatorVisitor::GETLMEMBASE(u64) {
ThrowNotImplemented(Opcode::GETLMEMBASE); ThrowNotImplemented(Opcode::GETLMEMBASE);
} }
void TranslatorVisitor::HADD2_reg(u64) {
ThrowNotImplemented(Opcode::HADD2_reg);
}
void TranslatorVisitor::HADD2_cbuf(u64) {
ThrowNotImplemented(Opcode::HADD2_cbuf);
}
void TranslatorVisitor::HADD2_imm(u64) {
ThrowNotImplemented(Opcode::HADD2_imm);
}
void TranslatorVisitor::HADD2_32I(u64) {
ThrowNotImplemented(Opcode::HADD2_32I);
}
void TranslatorVisitor::HFMA2_reg(u64) { void TranslatorVisitor::HFMA2_reg(u64) {
ThrowNotImplemented(Opcode::HFMA2_reg); ThrowNotImplemented(Opcode::HFMA2_reg);
} }

View file

@ -298,7 +298,7 @@ IR::U32 StorageOffset(IR::Block& block, IR::Inst& inst, StorageBufferAddr buffer
offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset)); offset = ir.IAdd(offset, ir.Imm32(low_addr->imm_offset));
} }
} else { } else {
offset = ir.ConvertU(32, IR::U64{inst.Arg(0)}); offset = ir.UConvert(32, IR::U64{inst.Arg(0)});
} }
// Subtract the least significant 32 bits from the guest offset. The result is the storage // Subtract the least significant 32 bits from the guest offset. The result is the storage
// buffer offset in bytes. // buffer offset in bytes.

View file

@ -44,6 +44,12 @@ IR::Opcode Replace(IR::Opcode op) {
return IR::Opcode::CompositeExtractF32x3; return IR::Opcode::CompositeExtractF32x3;
case IR::Opcode::CompositeExtractF16x4: case IR::Opcode::CompositeExtractF16x4:
return IR::Opcode::CompositeExtractF32x4; return IR::Opcode::CompositeExtractF32x4;
case IR::Opcode::CompositeInsertF16x2:
return IR::Opcode::CompositeInsertF32x2;
case IR::Opcode::CompositeInsertF16x3:
return IR::Opcode::CompositeInsertF32x3;
case IR::Opcode::CompositeInsertF16x4:
return IR::Opcode::CompositeInsertF32x4;
case IR::Opcode::ConvertS16F16: case IR::Opcode::ConvertS16F16:
return IR::Opcode::ConvertS16F32; return IR::Opcode::ConvertS16F32;
case IR::Opcode::ConvertS32F16: case IR::Opcode::ConvertS32F16:
@ -60,6 +66,10 @@ IR::Opcode Replace(IR::Opcode op) {
return IR::Opcode::PackHalf2x16; return IR::Opcode::PackHalf2x16;
case IR::Opcode::UnpackFloat2x16: case IR::Opcode::UnpackFloat2x16:
return IR::Opcode::UnpackHalf2x16; return IR::Opcode::UnpackHalf2x16;
case IR::Opcode::ConvertF32F16:
return IR::Opcode::Identity;
case IR::Opcode::ConvertF16F32:
return IR::Opcode::Identity;
default: default:
return op; return op;
} }