Revert "Extracted the attribute setup and draw commands into their own functions"

This reverts commit b3b34a1e76. This
commit causes a performance regression for not enough benefits
This commit is contained in:
James Rowe 2017-11-16 11:46:17 -07:00
parent 947067de81
commit 9d9693c13d

View file

@ -119,16 +119,64 @@ static void WriteUniformFloatReg(ShaderRegs& config, Shader::ShaderSetup& setup,
} }
} }
static void LoadDefaultVertexAttributes(u32 register_value) { static void WritePicaReg(u32 id, u32 value, u32 mask) {
auto& regs = g_state.regs; auto& regs = g_state.regs;
if (id >= Regs::NUM_REGS) {
LOG_ERROR(HW_GPU,
"Commandlist tried to write to invalid register 0x%03X (value: %08X, mask: %X)",
id, value, mask);
return;
}
// TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value
u32 old_value = regs.reg_array[id];
const u32 write_mask = expand_bits_to_bytes[mask];
regs.reg_array[id] = (old_value & ~write_mask) | (value & write_mask);
// Double check for is_pica_tracing to avoid call overhead
if (DebugUtils::IsPicaTracing()) {
DebugUtils::OnPicaRegWrite({(u16)id, (u16)mask, regs.reg_array[id]});
}
if (g_debug_context)
g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded,
reinterpret_cast<void*>(&id));
switch (id) {
// Trigger IRQ
case PICA_REG_INDEX(trigger_irq):
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::P3D);
break;
case PICA_REG_INDEX(pipeline.triangle_topology):
g_state.primitive_assembler.Reconfigure(regs.pipeline.triangle_topology);
break;
case PICA_REG_INDEX(pipeline.restart_primitive):
g_state.primitive_assembler.Reset();
break;
case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index):
g_state.immediate.current_attribute = 0;
g_state.immediate.reset_geometry_pipeline = true;
default_attr_counter = 0;
break;
// Load default vertex input attributes
case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233):
case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234):
case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235): {
// TODO: Does actual hardware indeed keep an intermediate buffer or does // TODO: Does actual hardware indeed keep an intermediate buffer or does
// it directly write the values? // it directly write the values?
default_attr_write_buffer[default_attr_counter++] = register_value; default_attr_write_buffer[default_attr_counter++] = value;
// Default attributes are written in a packed format such that four float24 values are encoded // Default attributes are written in a packed format such that four float24 values are
// in three 32-bit numbers. // encoded in
// We write to internal memory once a full such vector is written. // three 32-bit numbers. We write to internal memory once a full such vector is
// written.
if (default_attr_counter >= 3) { if (default_attr_counter >= 3) {
default_attr_counter = 0; default_attr_counter = 0;
@ -136,7 +184,7 @@ static void LoadDefaultVertexAttributes(u32 register_value) {
if (setup.index >= 16) { if (setup.index >= 16) {
LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index); LOG_ERROR(HW_GPU, "Invalid VS default attribute index %d", (int)setup.index);
return; break;
} }
Math::Vec4<float24> attribute; Math::Vec4<float24> attribute;
@ -201,16 +249,34 @@ static void LoadDefaultVertexAttributes(u32 register_value) {
// See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550 // See: https://github.com/citra-emu/citra/pull/2866#issuecomment-327011550
VideoCore::g_renderer->Rasterizer()->DrawTriangles(); VideoCore::g_renderer->Rasterizer()->DrawTriangles();
if (g_debug_context) { if (g_debug_context) {
g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch,
nullptr);
} }
} }
} }
} }
break;
} }
static void Draw(u32 command_id) { case PICA_REG_INDEX(pipeline.gpu_mode):
// This register likely just enables vertex processing and doesn't need any special handling
break;
case PICA_REG_INDEX_WORKAROUND(pipeline.command_buffer.trigger[0], 0x23c):
case PICA_REG_INDEX_WORKAROUND(pipeline.command_buffer.trigger[1], 0x23d): {
unsigned index =
static_cast<unsigned>(id - PICA_REG_INDEX(pipeline.command_buffer.trigger[0]));
u32* head_ptr = (u32*)Memory::GetPhysicalPointer(
regs.pipeline.command_buffer.GetPhysicalAddress(index));
g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr;
g_state.cmd_list.length = regs.pipeline.command_buffer.GetSize(index) / sizeof(u32);
break;
}
// It seems like these trigger vertex rendering
case PICA_REG_INDEX(pipeline.trigger_draw):
case PICA_REG_INDEX(pipeline.trigger_draw_indexed): {
MICROPROFILE_SCOPE(GPU_Drawing); MICROPROFILE_SCOPE(GPU_Drawing);
auto& regs = g_state.regs;
#if PICA_LOG_TEV #if PICA_LOG_TEV
DebugUtils::DumpTevStageConfig(regs.GetTevStages()); DebugUtils::DumpTevStageConfig(regs.GetTevStages());
@ -225,7 +291,7 @@ static void Draw(u32 command_id) {
VertexLoader loader(regs.pipeline); VertexLoader loader(regs.pipeline);
// Load vertices // Load vertices
bool is_indexed = (command_id == PICA_REG_INDEX(pipeline.trigger_draw_indexed)); bool is_indexed = (id == PICA_REG_INDEX(pipeline.trigger_draw_indexed));
const auto& index_info = regs.pipeline.index_array; const auto& index_info = regs.pipeline.index_array;
const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset); const u8* index_address_8 = Memory::GetPhysicalPointer(base_address + index_info.offset);
@ -272,8 +338,8 @@ static void Draw(u32 command_id) {
for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) { for (unsigned int index = 0; index < regs.pipeline.num_vertices; ++index) {
// Indexed rendering doesn't use the start offset // Indexed rendering doesn't use the start offset
unsigned int vertex = is_indexed unsigned int vertex =
? (index_u16 ? index_address_16[index] : index_address_8[index]) is_indexed ? (index_u16 ? index_address_16[index] : index_address_8[index])
: (index + regs.pipeline.vertex_offset); : (index + regs.pipeline.vertex_offset);
// -1 is a common special value used for primitive restart. Since it's unknown if // -1 is a common special value used for primitive restart. Since it's unknown if
@ -290,7 +356,8 @@ static void Draw(u32 command_id) {
if (g_debug_context && Pica::g_debug_context->recorder) { if (g_debug_context && Pica::g_debug_context->recorder) {
int size = index_u16 ? 2 : 1; int size = index_u16 ? 2 : 1;
memory_accesses.AddAccess(base_address + index_info.offset + size * index, size); memory_accesses.AddAccess(base_address + index_info.offset + size * index,
size);
} }
for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) { for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
@ -335,82 +402,10 @@ static void Draw(u32 command_id) {
if (g_debug_context) { if (g_debug_context) {
g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr); g_debug_context->OnEvent(DebugContext::Event::FinishedPrimitiveBatch, nullptr);
} }
}
static void WritePicaReg(u32 id, u32 value, u32 mask) {
auto& regs = g_state.regs;
if (id >= Regs::NUM_REGS) {
LOG_ERROR(HW_GPU,
"Commandlist tried to write to invalid register 0x%03X (value: %08X, mask: %X)",
id, value, mask);
return;
}
// TODO: Figure out how register masking acts on e.g. vs.uniform_setup.set_value
u32 old_value = regs.reg_array[id];
const u32 write_mask = expand_bits_to_bytes[mask];
regs.reg_array[id] = (old_value & ~write_mask) | (value & write_mask);
// Double check for is_pica_tracing to avoid call overhead
if (DebugUtils::IsPicaTracing()) {
DebugUtils::OnPicaRegWrite({(u16)id, (u16)mask, regs.reg_array[id]});
}
if (g_debug_context)
g_debug_context->OnEvent(DebugContext::Event::PicaCommandLoaded,
reinterpret_cast<void*>(&id));
switch (id) {
// Trigger IRQ
case PICA_REG_INDEX(trigger_irq):
Service::GSP::SignalInterrupt(Service::GSP::InterruptId::P3D);
break;
case PICA_REG_INDEX(pipeline.triangle_topology):
g_state.primitive_assembler.Reconfigure(regs.pipeline.triangle_topology);
break;
case PICA_REG_INDEX(pipeline.restart_primitive):
g_state.primitive_assembler.Reset();
break;
case PICA_REG_INDEX(pipeline.vs_default_attributes_setup.index):
g_state.immediate.current_attribute = 0;
g_state.immediate.reset_geometry_pipeline = true;
default_attr_counter = 0;
break;
// Load default vertex input attributes
case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[0], 0x233):
case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[1], 0x234):
case PICA_REG_INDEX_WORKAROUND(pipeline.vs_default_attributes_setup.set_value[2], 0x235):
LoadDefaultVertexAttributes(value);
break;
case PICA_REG_INDEX(pipeline.gpu_mode):
// This register likely just enables vertex processing and doesn't need any special handling
break;
case PICA_REG_INDEX_WORKAROUND(pipeline.command_buffer.trigger[0], 0x23c):
case PICA_REG_INDEX_WORKAROUND(pipeline.command_buffer.trigger[1], 0x23d): {
unsigned index =
static_cast<unsigned>(id - PICA_REG_INDEX(pipeline.command_buffer.trigger[0]));
u32* head_ptr = (u32*)Memory::GetPhysicalPointer(
regs.pipeline.command_buffer.GetPhysicalAddress(index));
g_state.cmd_list.head_ptr = g_state.cmd_list.current_ptr = head_ptr;
g_state.cmd_list.length = regs.pipeline.command_buffer.GetSize(index) / sizeof(u32);
break; break;
} }
// It seems like these trigger vertex rendering
case PICA_REG_INDEX(pipeline.trigger_draw):
case PICA_REG_INDEX(pipeline.trigger_draw_indexed):
Draw(id);
break;
case PICA_REG_INDEX(gs.bool_uniforms): case PICA_REG_INDEX(gs.bool_uniforms):
WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value()); WriteUniformBoolReg(g_state.gs, g_state.regs.gs.bool_uniforms.Value());
break; break;