2
1
Fork 0
mirror of https://github.com/yuzu-emu/yuzu.git synced 2024-07-04 23:31:19 +01:00

Decoders: Improve overall speed.

This commit is contained in:
Fernando Sahmkow 2022-04-03 02:18:03 +02:00
parent f5fd6b5c86
commit 98317f2b77

View file

@ -35,7 +35,7 @@ void incrpdep(u32& value) {
template <bool TO_LINEAR, u32 BYTES_PER_PIXEL> template <bool TO_LINEAR, u32 BYTES_PER_PIXEL>
void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth, void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32 height, u32 depth,
u32 block_height, u32 block_depth, u32 stride_alignment) { u32 block_height, u32 block_depth, u32 stride) {
// The origin of the transformation can be configured here, leave it as zero as the current API // The origin of the transformation can be configured here, leave it as zero as the current API
// doesn't expose it. // doesn't expose it.
static constexpr u32 origin_x = 0; static constexpr u32 origin_x = 0;
@ -45,7 +45,6 @@ void SwizzleImpl(std::span<u8> output, std::span<const u8> input, u32 width, u32
// We can configure here a custom pitch // We can configure here a custom pitch
// As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch. // As it's not exposed 'width * BYTES_PER_PIXEL' will be the expected pitch.
const u32 pitch = width * BYTES_PER_PIXEL; const u32 pitch = width * BYTES_PER_PIXEL;
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * BYTES_PER_PIXEL;
const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT); const u32 gobs_in_x = Common::DivCeilLog2(stride, GOB_SIZE_X_SHIFT);
const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth); const u32 block_size = gobs_in_x << (GOB_SIZE_SHIFT + block_height + block_depth);
@ -179,15 +178,23 @@ void Swizzle(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixe
void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, void UnswizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel,
u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth, u32 width, u32 height, u32 depth, u32 block_height, u32 block_depth,
u32 stride_alignment) { u32 stride_alignment) {
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
width = (width * bytes_per_pixel) >> new_bpp;
bytes_per_pixel = 1U << new_bpp;
Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, Swizzle<false>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
stride_alignment); stride);
} }
void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, void SwizzleTexture(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,
u32 height, u32 depth, u32 block_height, u32 block_depth, u32 height, u32 depth, u32 block_height, u32 block_depth,
u32 stride_alignment) { u32 stride_alignment) {
const u32 stride = Common::AlignUpLog2(width, stride_alignment) * bytes_per_pixel;
const u32 new_bpp = std::min(4U, static_cast<u32>(std::countr_zero(width * bytes_per_pixel)));
width = (width * bytes_per_pixel) >> new_bpp;
bytes_per_pixel = 1U << new_bpp;
Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth, Swizzle<true>(output, input, bytes_per_pixel, width, height, depth, block_height, block_depth,
stride_alignment); stride);
} }
void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width, void SwizzleSubrect(std::span<u8> output, std::span<const u8> input, u32 bytes_per_pixel, u32 width,