Correct Kepler Memory on Linear Pushes.

2024-07-04 23:31:19 +01:00 · 2019-04-15 12:43:37 -04:00 · 2019-04-15 12:43:37 -04:00 · 8a099ac99f
commit 8a099ac99f
parent 1f4dfb3998
2 changed files with 48 additions and 16 deletions
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@ -10,6 +10,8 @@
 #include "video_core/memory_manager.h"
 #include "video_core/rasterizer_interface.h"
 #include "video_core/renderer_base.h"
+#include "video_core/textures/convert.h"
+#include "video_core/textures/decoders.h"

 namespace Tegra::Engines {

@ -27,30 +29,40 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {

    switch (method_call.method) {
    case KEPLERMEMORY_REG_INDEX(exec): {
-        state.write_offset = 0;
+        ProcessExec();
        break;
    }
    case KEPLERMEMORY_REG_INDEX(data): {
-        ProcessData(method_call.argument);
+        ProcessData(method_call.argument, method_call.IsLastCall());
        break;
    }
    }
 }

-void KeplerMemory::ProcessData(u32 data) {
-    ASSERT_MSG(regs.exec.linear, "Non-linear uploads are not supported");
-    ASSERT(regs.dest.x == 0 && regs.dest.y == 0 && regs.dest.z == 0);
+void KeplerMemory::ProcessExec() {
+    state.write_offset = 0;
+    state.copy_size = regs.line_length_in * regs.line_count;
+    state.inner_buffer.resize(state.copy_size);
+}

-    // We have to invalidate the destination region to evict any outdated surfaces from the cache.
-    // We do this before actually writing the new data because the destination address might
-    // contain a dirty surface that will have to be written back to memory.
-    const GPUVAddr address{regs.dest.Address() + state.write_offset * sizeof(u32)};
-    rasterizer.InvalidateRegion(ToCacheAddr(memory_manager.GetPointer(address)), sizeof(u32));
-    memory_manager.Write<u32>(address, data);
+void KeplerMemory::ProcessData(u32 data, bool is_last_call) {
+    const u32 sub_copy_size = std::min(4U, state.copy_size - state.write_offset);
+    std::memcpy(&state.inner_buffer[state.write_offset], &data, sub_copy_size);
+    state.write_offset += sub_copy_size;
+    if (is_last_call) {
+        UNIMPLEMENTED_IF_MSG(regs.exec.linear == 0, "Block Linear Copy is not implemented");
+        if (regs.exec.linear != 0) {
+            const GPUVAddr address{regs.dest.Address()};
+            const auto host_ptr = memory_manager.GetPointer(address);
+            // We have to invalidate the destination region to evict any outdated surfaces from the
+            // cache. We do this before actually writing the new data because the destination
+            // address might contain a dirty surface that will have to be written back to memory.

-    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
-
-    state.write_offset++;
+            rasterizer.InvalidateRegion(ToCacheAddr(host_ptr), state.copy_size);
+            std::memcpy(host_ptr, state.inner_buffer.data(), state.copy_size);
+            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+        }
+    }
 }

 } // namespace Tegra::Engines
--- a/src/video_core/engines/kepler_memory.h
+++ b/src/video_core/engines/kepler_memory.h
@ -6,6 +6,7 @@

 #include <array>
 #include <cstddef>
+#include <vector>
 #include "common/bit_field.h"
 #include "common/common_funcs.h"
 #include "common/common_types.h"
@ -51,7 +52,11 @@ public:
                    u32 address_high;
                    u32 address_low;
                    u32 pitch;
-                    u32 block_dimensions;
+                    union {
+                        BitField<0, 4, u32> block_width;
+                        BitField<4, 4, u32> block_height;
+                        BitField<8, 4, u32> block_depth;
+                    };
                    u32 width;
                    u32 height;
                    u32 depth;
@ -63,6 +68,18 @@ public:
                        return static_cast<GPUVAddr>((static_cast<GPUVAddr>(address_high) << 32) |
                                                     address_low);
                    }
+
+                    u32 BlockWidth() const {
+                        return 1U << block_width.Value();
+                    }
+
+                    u32 BlockHeight() const {
+                        return 1U << block_height.Value();
+                    }
+
+                    u32 BlockDepth() const {
+                        return 1U << block_depth.Value();
+                    }
                } dest;

                struct {
@ -81,6 +98,8 @@ public:

    struct {
        u32 write_offset = 0;
+        u32 copy_size = 0;
+        std::vector<u8> inner_buffer;
    } state{};

 private:
@ -88,7 +107,8 @@ private:
    VideoCore::RasterizerInterface& rasterizer;
    MemoryManager& memory_manager;

-    void ProcessData(u32 data);
+    void ProcessExec();
+    void ProcessData(u32 data, bool is_last_call);
 };

 #define ASSERT_REG_POSITION(field_name, position)                                                  \