New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-18 00:57:08 +01:00
|
|
|
using Ryujinx.Graphics.Shader.IntermediateRepresentation;
|
|
|
|
using System.Collections.Generic;
|
|
|
|
using System.Linq;
|
|
|
|
|
|
|
|
namespace Ryujinx.Graphics.Shader.Translation.Optimizations
|
|
|
|
{
|
|
|
|
static class Optimizer
|
|
|
|
{
|
|
|
|
public static void Optimize(BasicBlock[] blocks)
|
|
|
|
{
|
|
|
|
bool modified;
|
|
|
|
|
|
|
|
do
|
|
|
|
{
|
|
|
|
modified = false;
|
|
|
|
|
|
|
|
for (int blkIndex = 0; blkIndex < blocks.Length; blkIndex++)
|
|
|
|
{
|
|
|
|
BasicBlock block = blocks[blkIndex];
|
|
|
|
|
|
|
|
LinkedListNode<INode> node = block.Operations.First;
|
|
|
|
|
|
|
|
while (node != null)
|
|
|
|
{
|
|
|
|
LinkedListNode<INode> nextNode = node.Next;
|
|
|
|
|
|
|
|
bool isUnused = IsUnused(node.Value);
|
|
|
|
|
|
|
|
if (!(node.Value is Operation operation) || isUnused)
|
|
|
|
{
|
|
|
|
if (isUnused)
|
|
|
|
{
|
|
|
|
RemoveNode(block, node);
|
|
|
|
|
|
|
|
modified = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
node = nextNode;
|
|
|
|
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
ConstantFolding.Fold(operation);
|
|
|
|
|
|
|
|
Simplification.Simplify(operation);
|
|
|
|
|
|
|
|
if (DestIsLocalVar(operation))
|
|
|
|
{
|
|
|
|
if (operation.Inst == Instruction.Copy)
|
|
|
|
{
|
|
|
|
PropagateCopy(operation);
|
|
|
|
|
|
|
|
RemoveNode(block, node);
|
|
|
|
|
|
|
|
modified = true;
|
|
|
|
}
|
|
|
|
else if (operation.Inst == Instruction.PackHalf2x16 && PropagatePack(operation))
|
|
|
|
{
|
|
|
|
if (operation.Dest.UseOps.Count == 0)
|
|
|
|
{
|
|
|
|
RemoveNode(block, node);
|
|
|
|
}
|
|
|
|
|
|
|
|
modified = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
node = nextNode;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (BranchElimination.Eliminate(block))
|
|
|
|
{
|
|
|
|
RemoveNode(block, block.Operations.Last);
|
|
|
|
|
|
|
|
modified = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
while (modified);
|
|
|
|
}
|
|
|
|
|
|
|
|
private static void PropagateCopy(Operation copyOp)
|
|
|
|
{
|
2019-07-02 03:39:22 +01:00
|
|
|
// Propagate copy source operand to all uses of
|
|
|
|
// the destination operand.
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-18 00:57:08 +01:00
|
|
|
Operand dest = copyOp.Dest;
|
|
|
|
Operand src = copyOp.GetSource(0);
|
|
|
|
|
|
|
|
INode[] uses = dest.UseOps.ToArray();
|
|
|
|
|
|
|
|
foreach (INode useNode in uses)
|
|
|
|
{
|
|
|
|
for (int index = 0; index < useNode.SourcesCount; index++)
|
|
|
|
{
|
|
|
|
if (useNode.GetSource(index) == dest)
|
|
|
|
{
|
|
|
|
useNode.SetSource(index, src);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private static bool PropagatePack(Operation packOp)
|
|
|
|
{
|
2019-07-02 03:39:22 +01:00
|
|
|
// Propagate pack source operands to uses by unpack
|
|
|
|
// instruction. The source depends on the unpack instruction.
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-18 00:57:08 +01:00
|
|
|
bool modified = false;
|
|
|
|
|
|
|
|
Operand dest = packOp.Dest;
|
|
|
|
Operand src0 = packOp.GetSource(0);
|
|
|
|
Operand src1 = packOp.GetSource(1);
|
|
|
|
|
|
|
|
INode[] uses = dest.UseOps.ToArray();
|
|
|
|
|
|
|
|
foreach (INode useNode in uses)
|
|
|
|
{
|
|
|
|
if (!(useNode is Operation operation) || operation.Inst != Instruction.UnpackHalf2x16)
|
|
|
|
{
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (operation.GetSource(0) == dest)
|
|
|
|
{
|
|
|
|
operation.TurnIntoCopy(operation.ComponentIndex == 1 ? src1 : src0);
|
|
|
|
|
|
|
|
modified = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return modified;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static void RemoveNode(BasicBlock block, LinkedListNode<INode> llNode)
|
|
|
|
{
|
2019-07-02 03:39:22 +01:00
|
|
|
// Remove a node from the nodes list, and also remove itself
|
|
|
|
// from all the use lists on the operands that this node uses.
|
New shader translator implementation (#654)
* Start implementing a new shader translator
* Fix shift instructions and a typo
* Small refactoring on StructuredProgram, move RemovePhis method to a separate class
* Initial geometry shader support
* Implement TLD4
* Fix -- There's no negation on FMUL32I
* Add constant folding and algebraic simplification optimizations, nits
* Some leftovers from constant folding
* Avoid cast for constant assignments
* Add a branch elimination pass, and misc small fixes
* Remove redundant branches, add expression propagation and other improvements on the code
* Small leftovers -- add missing break and continue, remove unused properties, other improvements
* Add null check to handle empty block cases on block visitor
* Add HADD2 and HMUL2 half float shader instructions
* Optimize pack/unpack sequences, some fixes related to half float instructions
* Add TXQ, TLD, TLDS and TLD4S shader texture instructions, and some support for bindless textures, some refactoring on codegen
* Fix copy paste mistake that caused RZ to be ignored on the AST instruction
* Add workaround for conditional exit, and fix half float instruction with constant buffer
* Add missing 0.0 source for TLDS.LZ variants
* Simplify the switch for TLDS.LZ
* Texture instructions related fixes
* Implement the HFMA instruction, and some misc. fixes
* Enable constant folding on UnpackHalf2x16 instructions
* Refactor HFMA to use OpCode* for opcode decoding rather than on the helper methods
* Remove the old shader translator
* Remove ShaderDeclInfo and other unused things
* Add dual vertex shader support
* Add ShaderConfig, used to pass shader type and maximum cbuffer size
* Move and rename some instruction enums
* Move texture instructions into a separate file
* Move operand GetExpression and locals management to OperandManager
* Optimize opcode decoding using a simple list and binary search
* Add missing condition for do-while on goto elimination
* Misc. fixes on texture instructions
* Simplify TLDS switch
* Address PR feedback, and a nit
2019-04-18 00:57:08 +01:00
|
|
|
block.Operations.Remove(llNode);
|
|
|
|
|
|
|
|
Queue<INode> nodes = new Queue<INode>();
|
|
|
|
|
|
|
|
nodes.Enqueue(llNode.Value);
|
|
|
|
|
|
|
|
while (nodes.TryDequeue(out INode node))
|
|
|
|
{
|
|
|
|
for (int index = 0; index < node.SourcesCount; index++)
|
|
|
|
{
|
|
|
|
Operand src = node.GetSource(index);
|
|
|
|
|
|
|
|
if (src.Type != OperandType.LocalVariable)
|
|
|
|
{
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (src.UseOps.Remove(node) && src.UseOps.Count == 0)
|
|
|
|
{
|
|
|
|
nodes.Enqueue(src.AsgOp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private static bool IsUnused(INode node)
|
|
|
|
{
|
|
|
|
return DestIsLocalVar(node) && node.Dest.UseOps.Count == 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static bool DestIsLocalVar(INode node)
|
|
|
|
{
|
|
|
|
return node.Dest != null && node.Dest.Type == OperandType.LocalVariable;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|