Sign Up
Log In
Log In
or
Sign Up
Places
All Projects
Status Monitor
Collapse sidebar
home:X0F:branches:Emulators
melonds
melonds_PR2065.patch
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File melonds_PR2065.patch of Package melonds
From 57d20751fcd1ff52fe9f8890d565c65c5b6abc13 Mon Sep 17 00:00:00 2001 From: FireNX70 <firenx70@gmail.com> Date: Sun, 9 Jun 2024 18:47:43 +0200 Subject: [PATCH 1/9] Fix glMemoryBarrier flags --- src/GPU3D_Compute.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/GPU3D_Compute.cpp b/src/GPU3D_Compute.cpp index da2559507..027d68066 100644 --- a/src/GPU3D_Compute.cpp +++ b/src/GPU3D_Compute.cpp @@ -932,23 +932,23 @@ void ComputeRenderer::RenderFrame(GPU& gpu) glBindImageTexture(0, YSpanIndicesTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_RGBA16UI); glUseProgram(ShaderInterpXSpans[wbuffer]); glDispatchCompute((numSetupIndices + 31) / 32, 1, 1); - glMemoryBarrier(GL_SHADER_STORAGE_BUFFER); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); // bin polygons glUseProgram(ShaderBinCombined); glDispatchCompute(((gpu.GPU3D.RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH); - glMemoryBarrier(GL_SHADER_STORAGE_BUFFER); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); // calculate list offsets glUseProgram(ShaderCalculateWorkListOffset); glDispatchCompute((numVariants + 31) / 32, 1, 1); - glMemoryBarrier(GL_SHADER_STORAGE_BUFFER); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); // sort shader work glUseProgram(ShaderSortWork); glBindBuffer(GL_DISPATCH_INDIRECT_BUFFER, BinResultMemory); glDispatchComputeIndirect(offsetof(BinResultHeader, SortWorkWorkCount)); - glMemoryBarrier(GL_SHADER_STORAGE_BUFFER); + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); glActiveTexture(GL_TEXTURE0); From 528501149e21e380898dd77a4c527087f0d8651e Mon Sep 17 00:00:00 2001 From: FireNX70 <firenx70@gmail.com> Date: Sun, 9 Jun 2024 20:07:26 +0200 Subject: [PATCH 2/9] Scale TileSize with internal resolution --- src/GPU3D_Compute.cpp | 7 +++++++ src/GPU3D_Compute.h | 6 +++--- src/GPU3D_Compute_shaders.h | 1 - 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/GPU3D_Compute.cpp b/src/GPU3D_Compute.cpp index 027d68066..6d6115fe1 100644 --- a/src/GPU3D_Compute.cpp +++ b/src/GPU3D_Compute.cpp @@ -19,6 +19,7 @@ #include "GPU3D_Compute.h" #include <assert.h> +#include <algorithm> #include "OpenGLSupport.h" @@ -50,6 +51,8 @@ bool ComputeRenderer::CompileShader(GLuint& shader, const std::string& source, c shaderSource += std::to_string(ScreenHeight); shaderSource += "\n#define MaxWorkTiles "; shaderSource += std::to_string(MaxWorkTiles); + shaderSource += "\n#define TileSize "; + shaderSource += std::to_string(TileSize); shaderSource += ComputeRendererShaders::Common; shaderSource += source; @@ -310,6 +313,10 @@ void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinate ScreenWidth = 256 * ScaleFactor; ScreenHeight = 192 * ScaleFactor; + TileSize = std::min(8 * (1 << (ScaleFactor / 5)), 32); + CoarseTileW = CoarseTileCountX * TileSize; + CoarseTileH = CoarseTileCountY * TileSize; + TilesPerLine = ScreenWidth/TileSize; TileLines = ScreenHeight/TileSize; diff --git a/src/GPU3D_Compute.h b/src/GPU3D_Compute.h index 7544c09e0..1e225948b 100644 --- a/src/GPU3D_Compute.h +++ b/src/GPU3D_Compute.h @@ -163,11 +163,11 @@ class ComputeRenderer : public Renderer3D float TextureLayer; }; - static constexpr int TileSize = 8; + int TileSize; static constexpr int CoarseTileCountX = 8; static constexpr int CoarseTileCountY = 4; - static constexpr int CoarseTileW = CoarseTileCountX * TileSize; - static constexpr int CoarseTileH = CoarseTileCountY * TileSize; + int CoarseTileW; + int CoarseTileH; static constexpr int BinStride = 2048/32; static constexpr int CoarseBinStride = BinStride/32; diff --git a/src/GPU3D_Compute_shaders.h b/src/GPU3D_Compute_shaders.h index 572f9ad66..0ad57dbd1 100644 --- a/src/GPU3D_Compute_shaders.h +++ b/src/GPU3D_Compute_shaders.h @@ -339,7 +339,6 @@ const uint ResultAttrStart = ResultDepthStart+ScreenWidth*ScreenHeight*2; const char* Common = R"( -#define TileSize 8 const int CoarseTileCountX = 8; const int CoarseTileCountY = 4; const int CoarseTileW = (CoarseTileCountX * TileSize); From 078b4c43f4af165279e0fd235abb3cb241b2b20b Mon Sep 17 00:00:00 2001 From: FireNX70 <firenx70@gmail.com> Date: Sun, 9 Jun 2024 21:46:44 +0200 Subject: [PATCH 3/9] Clean up tile size calc --- src/GPU3D_Compute.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GPU3D_Compute.cpp b/src/GPU3D_Compute.cpp index 6d6115fe1..8e0ea4f36 100644 --- a/src/GPU3D_Compute.cpp +++ b/src/GPU3D_Compute.cpp @@ -313,7 +313,7 @@ void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinate ScreenWidth = 256 * ScaleFactor; ScreenHeight = 192 * ScaleFactor; - TileSize = std::min(8 * (1 << (ScaleFactor / 5)), 32); + TileSize = std::min(8 << (ScaleFactor / 5), 32); CoarseTileW = CoarseTileCountX * TileSize; CoarseTileH = CoarseTileCountY * TileSize; From 7433388beb6662673b021a1c4b0ceba9513bd156 Mon Sep 17 00:00:00 2001 From: FireNX70 <firenx70@gmail.com> Date: Mon, 10 Jun 2024 20:15:05 +0200 Subject: [PATCH 4/9] Use a different CoarseTileCountY with size 32 tiles --- src/GPU3D_Compute.cpp | 3 +++ src/GPU3D_Compute.h | 2 +- src/GPU3D_Compute_shaders.h | 1 - 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/GPU3D_Compute.cpp b/src/GPU3D_Compute.cpp index 8e0ea4f36..294cd5180 100644 --- a/src/GPU3D_Compute.cpp +++ b/src/GPU3D_Compute.cpp @@ -53,6 +53,8 @@ bool ComputeRenderer::CompileShader(GLuint& shader, const std::string& source, c shaderSource += std::to_string(MaxWorkTiles); shaderSource += "\n#define TileSize "; shaderSource += std::to_string(TileSize); + shaderSource += "\nconst int CoarseTileCountY = "; + shaderSource += std::to_string(CoarseTileCountY) + ";"; shaderSource += ComputeRendererShaders::Common; shaderSource += source; @@ -314,6 +316,7 @@ void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinate ScreenHeight = 192 * ScaleFactor; TileSize = std::min(8 << (ScaleFactor / 5), 32); + CoarseTileCountY = TileSize < 32 ? 4 : 6; CoarseTileW = CoarseTileCountX * TileSize; CoarseTileH = CoarseTileCountY * TileSize; diff --git a/src/GPU3D_Compute.h b/src/GPU3D_Compute.h index 1e225948b..4f944e42f 100644 --- a/src/GPU3D_Compute.h +++ b/src/GPU3D_Compute.h @@ -165,7 +165,7 @@ class ComputeRenderer : public Renderer3D int TileSize; static constexpr int CoarseTileCountX = 8; - static constexpr int CoarseTileCountY = 4; + int CoarseTileCountY; int CoarseTileW; int CoarseTileH; diff --git a/src/GPU3D_Compute_shaders.h b/src/GPU3D_Compute_shaders.h index 0ad57dbd1..9fb4aae76 100644 --- a/src/GPU3D_Compute_shaders.h +++ b/src/GPU3D_Compute_shaders.h @@ -340,7 +340,6 @@ const uint ResultAttrStart = ResultDepthStart+ScreenWidth*ScreenHeight*2; const char* Common = R"( const int CoarseTileCountX = 8; -const int CoarseTileCountY = 4; const int CoarseTileW = (CoarseTileCountX * TileSize); const int CoarseTileH = (CoarseTileCountY * TileSize); From d24079e693befa45e0ea1d89d70f48adf25614a8 Mon Sep 17 00:00:00 2001 From: FireNX70 <firenx70@gmail.com> Date: Tue, 11 Jun 2024 15:56:24 +0200 Subject: [PATCH 5/9] Better tile scaling calc --- src/GPU3D_Compute.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/GPU3D_Compute.cpp b/src/GPU3D_Compute.cpp index 294cd5180..9d5a23785 100644 --- a/src/GPU3D_Compute.cpp +++ b/src/GPU3D_Compute.cpp @@ -302,6 +302,8 @@ void ComputeRenderer::Reset(GPU& gpu) void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinates) { + unsigned char TileScale; + CurGLCompositor.SetScaleFactor(scale); if (ScaleFactor != -1) @@ -315,7 +317,13 @@ void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinate ScreenWidth = 256 * ScaleFactor; ScreenHeight = 192 * ScaleFactor; - TileSize = std::min(8 << (ScaleFactor / 5), 32); + //Starting at 4.5x we want to double TileSize every time scale doubles + TileScale = 2 * ScaleFactor / 9; + TileScale &= ~(TileScale >> 1); + TileScale <<= 1; + TileScale += TileScale == 0; + + TileSize = std::min(8 * TileScale, 32); CoarseTileCountY = TileSize < 32 ? 4 : 6; CoarseTileW = CoarseTileCountX * TileSize; CoarseTileH = CoarseTileCountY * TileSize; From 57b5e16e6dfbeb0e3d53e32f2a32e654e2fb3203 Mon Sep 17 00:00:00 2001 From: FireNX70 <firenx70@gmail.com> Date: Tue, 11 Jun 2024 17:22:36 +0200 Subject: [PATCH 6/9] Actually good way to get the MS bit --- src/GPU3D_Compute.cpp | 9 +++++++-- src/Utils.h | 14 ++++++++++++++ 2 files changed, 21 insertions(+), 2 deletions(-) diff --git a/src/GPU3D_Compute.cpp b/src/GPU3D_Compute.cpp index 9d5a23785..c22a66b96 100644 --- a/src/GPU3D_Compute.cpp +++ b/src/GPU3D_Compute.cpp @@ -21,6 +21,8 @@ #include <assert.h> #include <algorithm> +#include "Utils.h" + #include "OpenGLSupport.h" #include "GPU3D_Compute_shaders.h" @@ -302,7 +304,7 @@ void ComputeRenderer::Reset(GPU& gpu) void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinates) { - unsigned char TileScale; + u8 TileScale; CurGLCompositor.SetScaleFactor(scale); @@ -319,9 +321,12 @@ void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinate //Starting at 4.5x we want to double TileSize every time scale doubles TileScale = 2 * ScaleFactor / 9; - TileScale &= ~(TileScale >> 1); + TileScale = GetMSBit(TileScale); TileScale <<= 1; TileScale += TileScale == 0; + + std::printf("Scale: %d\n", ScaleFactor); + std::printf("TileScale: %d\n", TileScale); TileSize = std::min(8 * TileScale, 32); CoarseTileCountY = TileSize < 32 ? 4 : 6; diff --git a/src/Utils.h b/src/Utils.h index 63be217b8..e6444c51f 100644 --- a/src/Utils.h +++ b/src/Utils.h @@ -38,6 +38,20 @@ std::pair<std::unique_ptr<u8[]>, u32> PadToPowerOf2(const u8* data, u32 len) noe std::unique_ptr<u8[]> CopyToUnique(const u8* data, u32 len) noexcept; +template <typename T> +T GetMSBit(T val) +{ + val |= (val >> 1); + val |= (val >> 2); + val |= (val >> 4); + + if constexpr(sizeof(val) > 1) val |= (val >> 8); + if constexpr(sizeof(val) > 2) val |= (val >> 16); + if constexpr(sizeof(val) > 4) val |= (val >> 32); + + return val - (val >> 1); +} + } #endif // MELONDS_UTILS_H From f097cc3b4e93bcd9b5db73d6be68d21dcfb85955 Mon Sep 17 00:00:00 2001 From: FireNX70 <firenx70@gmail.com> Date: Thu, 13 Jun 2024 00:30:33 +0200 Subject: [PATCH 7/9] Tie BinCombined's local_size_x to the coarse tiles' area --- src/GPU3D_Compute.cpp | 5 ++++- src/GPU3D_Compute.h | 1 + src/GPU3D_Compute_shaders.h | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/GPU3D_Compute.cpp b/src/GPU3D_Compute.cpp index c22a66b96..ecf485440 100644 --- a/src/GPU3D_Compute.cpp +++ b/src/GPU3D_Compute.cpp @@ -57,6 +57,8 @@ bool ComputeRenderer::CompileShader(GLuint& shader, const std::string& source, c shaderSource += std::to_string(TileSize); shaderSource += "\nconst int CoarseTileCountY = "; shaderSource += std::to_string(CoarseTileCountY) + ";"; + shaderSource += "\n#define CoarseTileArea "; + shaderSource += std::to_string(CoarseTileArea); shaderSource += ComputeRendererShaders::Common; shaderSource += source; @@ -330,6 +332,7 @@ void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinate TileSize = std::min(8 * TileScale, 32); CoarseTileCountY = TileSize < 32 ? 4 : 6; + CoarseTileArea = CoarseTileCountX * CoarseTileCountY; CoarseTileW = CoarseTileCountX * TileSize; CoarseTileH = CoarseTileCountY * TileSize; @@ -959,7 +962,7 @@ void ComputeRenderer::RenderFrame(GPU& gpu) // bin polygons glUseProgram(ShaderBinCombined); - glDispatchCompute(((gpu.GPU3D.RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH); + glDispatchCompute(((gpu.GPU3D.RenderNumPolygons + CoarseTileArea - 1) / CoarseTileArea), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); // calculate list offsets diff --git a/src/GPU3D_Compute.h b/src/GPU3D_Compute.h index 4f944e42f..6a5fd499c 100644 --- a/src/GPU3D_Compute.h +++ b/src/GPU3D_Compute.h @@ -166,6 +166,7 @@ class ComputeRenderer : public Renderer3D int TileSize; static constexpr int CoarseTileCountX = 8; int CoarseTileCountY; + int CoarseTileArea; int CoarseTileW; int CoarseTileH; diff --git a/src/GPU3D_Compute_shaders.h b/src/GPU3D_Compute_shaders.h index 9fb4aae76..9b3190295 100644 --- a/src/GPU3D_Compute_shaders.h +++ b/src/GPU3D_Compute_shaders.h @@ -862,7 +862,7 @@ const std::string BinCombined = XSpanSetupBuffer + WorkDescBuffer + R"( -layout (local_size_x = 32) in; +layout (local_size_x = CoarseTileArea) in; bool BinPolygon(Polygon polygon, ivec2 topLeft, ivec2 botRight) { From 9c87d9998f1ad793b7ea442aed8407851d65b750 Mon Sep 17 00:00:00 2001 From: FireNX70 <firenx70@gmail.com> Date: Thu, 13 Jun 2024 00:46:05 +0200 Subject: [PATCH 8/9] Work count X is unrelated to the local size here --- src/GPU3D_Compute.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/GPU3D_Compute.cpp b/src/GPU3D_Compute.cpp index ecf485440..93aac5ce3 100644 --- a/src/GPU3D_Compute.cpp +++ b/src/GPU3D_Compute.cpp @@ -962,7 +962,7 @@ void ComputeRenderer::RenderFrame(GPU& gpu) // bin polygons glUseProgram(ShaderBinCombined); - glDispatchCompute(((gpu.GPU3D.RenderNumPolygons + CoarseTileArea - 1) / CoarseTileArea), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH); + glDispatchCompute(((gpu.GPU3D.RenderNumPolygons + 31) / 32), ScreenWidth/CoarseTileW, ScreenHeight/CoarseTileH); glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); // calculate list offsets From 6f8ce9fe897fab0277754209122df4181fb4a177 Mon Sep 17 00:00:00 2001 From: FireNX70 <firenx70@gmail.com> Date: Thu, 13 Jun 2024 07:57:49 +0200 Subject: [PATCH 9/9] Adjust ClearCoarseBinMask's local size according to TileSize --- src/GPU3D_Compute.cpp | 5 ++++- src/GPU3D_Compute.h | 1 + src/GPU3D_Compute_shaders.h | 2 +- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/GPU3D_Compute.cpp b/src/GPU3D_Compute.cpp index 93aac5ce3..16a3d80a8 100644 --- a/src/GPU3D_Compute.cpp +++ b/src/GPU3D_Compute.cpp @@ -59,6 +59,8 @@ bool ComputeRenderer::CompileShader(GLuint& shader, const std::string& source, c shaderSource += std::to_string(CoarseTileCountY) + ";"; shaderSource += "\n#define CoarseTileArea "; shaderSource += std::to_string(CoarseTileArea); + shaderSource += "\n#define ClearCoarseBinMaskLocalSize "; + shaderSource += std::to_string(ClearCoarseBinMaskLocalSize); shaderSource += ComputeRendererShaders::Common; shaderSource += source; @@ -332,6 +334,7 @@ void ComputeRenderer::SetRenderSettings(int scale, bool highResolutionCoordinate TileSize = std::min(8 * TileScale, 32); CoarseTileCountY = TileSize < 32 ? 4 : 6; + ClearCoarseBinMaskLocalSize = TileSize < 32 ? 64 : 48; CoarseTileArea = CoarseTileCountX * CoarseTileCountY; CoarseTileW = CoarseTileCountX * TileSize; CoarseTileH = CoarseTileCountY * TileSize; @@ -944,7 +947,7 @@ void ComputeRenderer::RenderFrame(GPU& gpu) glBindBufferBase(GL_UNIFORM_BUFFER, 0, MetaUniformMemory); glUseProgram(ShaderClearCoarseBinMask); - glDispatchCompute(TilesPerLine*TileLines/32, 1, 1); + glDispatchCompute(TilesPerLine*TileLines/ClearCoarseBinMaskLocalSize, 1, 1); bool wbuffer = false; if (numYSpans > 0) diff --git a/src/GPU3D_Compute.h b/src/GPU3D_Compute.h index 6a5fd499c..30766ec7a 100644 --- a/src/GPU3D_Compute.h +++ b/src/GPU3D_Compute.h @@ -169,6 +169,7 @@ class ComputeRenderer : public Renderer3D int CoarseTileArea; int CoarseTileW; int CoarseTileH; + int ClearCoarseBinMaskLocalSize; static constexpr int BinStride = 2048/32; static constexpr int CoarseBinStride = BinStride/32; diff --git a/src/GPU3D_Compute_shaders.h b/src/GPU3D_Compute_shaders.h index 9b3190295..556346109 100644 --- a/src/GPU3D_Compute_shaders.h +++ b/src/GPU3D_Compute_shaders.h @@ -846,7 +846,7 @@ void main() const std::string ClearCoarseBinMask = BinningBuffer + R"( -layout (local_size_x = 32) in; +layout (local_size_x = ClearCoarseBinMaskLocalSize) in; void main() {
Locations
Projects
Search
Status Monitor
Help
OpenBuildService.org
Documentation
API Documentation
Code of Conduct
Contact
Support
@OBShq
Terms
openSUSE Build Service is sponsored by
The Open Build Service is an
openSUSE project
.
Sign Up
Log In
Places
Places
All Projects
Status Monitor