Reduce amount of size calculations.

This commit is contained in:
Fernando Sahmkow 2019-05-08 18:27:29 -04:00 committed by ReinUsesLisp
parent 4e2071b6d9
commit b347543e83
8 changed files with 97 additions and 88 deletions

View File

@ -4,6 +4,7 @@
#pragma once #pragma once
#include <algorithm>
#include <string> #include <string>
#if !defined(ARCHITECTURE_x86_64) #if !defined(ARCHITECTURE_x86_64)
@ -60,4 +61,14 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) {
return a | b << 8 | c << 16 | d << 24; return a | b << 8 | c << 16 | d << 24;
} }
template <class ForwardIt, class T, class Compare = std::less<>>
ForwardIt BinaryFind(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) {
// Note: BOTH type T and the type after ForwardIt is dereferenced
// must be implicitly convertible to BOTH Type1 and Type2, used in Compare.
// This is stricter than lower_bound requirement (see above)
first = std::lower_bound(first, last, value, comp);
return first != last && !comp(value, *first) ? first : last;
}
} // namespace Common } // namespace Common

View File

@ -240,7 +240,6 @@ CachedSurface::~CachedSurface() {
} }
void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) { void CachedSurface::DownloadTexture(std::vector<u8>& staging_buffer) {
LOG_CRITICAL(Render_OpenGL, "Flushing");
MICROPROFILE_SCOPE(OpenGL_Texture_Download); MICROPROFILE_SCOPE(OpenGL_Texture_Download);
// TODO(Rodrigo): Optimize alignment // TODO(Rodrigo): Optimize alignment

View File

@ -133,7 +133,7 @@ protected:
const VideoCommon::CopyParams& copy_params) override; const VideoCommon::CopyParams& copy_params) override;
void ImageBlit(Surface src_surface, Surface dst_surface, const Common::Rectangle<u32>& src_rect, void ImageBlit(Surface src_surface, Surface dst_surface, const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect) override; const Common::Rectangle<u32>& dst_rect) override;
private: private:
OGLFramebuffer src_framebuffer; OGLFramebuffer src_framebuffer;

View File

@ -19,19 +19,27 @@ using Tegra::Texture::ConvertFromGuestToHost;
using VideoCore::MortonSwizzleMode; using VideoCore::MortonSwizzleMode;
SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params) SurfaceBaseImpl::SurfaceBaseImpl(GPUVAddr gpu_addr, const SurfaceParams& params)
: params{params}, gpu_addr{gpu_addr}, layer_size{params.GetGuestLayerSize()}, : params{params}, mipmap_sizes(params.num_levels),
guest_memory_size{params.GetGuestSizeInBytes()}, host_memory_size{ mipmap_offsets(params.num_levels), gpu_addr{gpu_addr}, host_memory_size{
params.GetHostSizeInBytes()} { params.GetHostSizeInBytes()} {
mipmap_offsets.reserve(params.num_levels);
mipmap_sizes.reserve(params.num_levels);
std::size_t offset = 0; std::size_t offset = 0;
for (u32 level = 0; level < params.num_levels; ++level) { for (u32 level = 0; level < params.num_levels; ++level) {
const std::size_t mipmap_size{params.GetGuestMipmapSize(level)}; const std::size_t mipmap_size{params.GetGuestMipmapSize(level)};
mipmap_sizes.push_back(mipmap_size); mipmap_sizes[level] = mipmap_size;
mipmap_offsets.push_back(offset); mipmap_offsets[level] = offset;
offset += mipmap_size; offset += mipmap_size;
} }
layer_size = offset;
if (params.is_layered) {
if (params.is_tiled) {
layer_size =
SurfaceParams::AlignLayered(layer_size, params.block_height, params.block_depth);
}
guest_memory_size = layer_size * params.depth;
} else {
guest_memory_size = layer_size;
}
} }
void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params, void SurfaceBaseImpl::SwizzleFunc(MortonSwizzleMode mode, u8* memory, const SurfaceParams& params,

View File

@ -9,6 +9,7 @@
#include <vector> #include <vector>
#include "common/assert.h" #include "common/assert.h"
#include "common/common_funcs.h"
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/gpu.h" #include "video_core/gpu.h"
#include "video_core/morton.h" #include "video_core/morton.h"
@ -16,16 +17,6 @@
#include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_params.h"
#include "video_core/texture_cache/surface_view.h" #include "video_core/texture_cache/surface_view.h"
template <class ForwardIt, class T, class Compare = std::less<>>
ForwardIt binary_find(ForwardIt first, ForwardIt last, const T& value, Compare comp = {}) {
// Note: BOTH type T and the type after ForwardIt is dereferenced
// must be implicitly convertible to BOTH Type1 and Type2, used in Compare.
// This is stricter than lower_bound requirement (see above)
first = std::lower_bound(first, last, value, comp);
return first != last && !comp(value, *first) ? first : last;
}
namespace Tegra { namespace Tegra {
class MemoryManager; class MemoryManager;
} }
@ -153,7 +144,7 @@ public:
const auto layer{static_cast<u32>(relative_address / layer_size)}; const auto layer{static_cast<u32>(relative_address / layer_size)};
const GPUVAddr mipmap_address = relative_address - layer_size * layer; const GPUVAddr mipmap_address = relative_address - layer_size * layer;
const auto mipmap_it = const auto mipmap_it =
binary_find(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address); Common::BinaryFind(mipmap_offsets.begin(), mipmap_offsets.end(), mipmap_address);
if (mipmap_it == mipmap_offsets.end()) { if (mipmap_it == mipmap_offsets.end()) {
return {}; return {};
} }
@ -172,8 +163,8 @@ protected:
virtual void DecorateSurfaceName() = 0; virtual void DecorateSurfaceName() = 0;
const SurfaceParams params; const SurfaceParams params;
const std::size_t layer_size; std::size_t layer_size;
const std::size_t guest_memory_size; std::size_t guest_memory_size;
const std::size_t host_memory_size; const std::size_t host_memory_size;
GPUVAddr gpu_addr{}; GPUVAddr gpu_addr{};
CacheAddr cache_addr{}; CacheAddr cache_addr{};
@ -268,9 +259,11 @@ public:
return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels)); return GetView(ViewParams(overview_params.target, 0, num_layers, 0, params.num_levels));
} }
std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr) { std::optional<TView> EmplaceView(const SurfaceParams& view_params, const GPUVAddr view_addr,
if (view_addr < gpu_addr || params.target == SurfaceTarget::Texture3D || const std::size_t candidate_size) {
params.num_levels == 1 || view_params.target == SurfaceTarget::Texture3D) { if (params.target == SurfaceTarget::Texture3D ||
(params.num_levels == 1 && !params.is_layered) ||
view_params.target == SurfaceTarget::Texture3D) {
return {}; return {};
} }
const auto layer_mipmap{GetLayerMipmap(view_addr)}; const auto layer_mipmap{GetLayerMipmap(view_addr)};
@ -279,8 +272,7 @@ public:
} }
const u32 layer{layer_mipmap->first}; const u32 layer{layer_mipmap->first};
const u32 mipmap{layer_mipmap->second}; const u32 mipmap{layer_mipmap->second};
const std::size_t size{view_params.GetGuestSizeInBytes()}; if (GetMipmapSize(mipmap) != candidate_size) {
if (GetMipmapSize(mipmap) != size) {
// TODO: The view may cover many mimaps, this case can still go on. // TODO: The view may cover many mimaps, this case can still go on.
// This edge-case can be safely be ignored since it will just result in worse // This edge-case can be safely be ignored since it will just result in worse
// performance. // performance.

View File

@ -4,13 +4,12 @@
#include <map> #include <map>
#include "common/cityhash.h"
#include "common/alignment.h" #include "common/alignment.h"
#include "common/cityhash.h"
#include "core/core.h" #include "core/core.h"
#include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_bytecode.h"
#include "video_core/surface.h" #include "video_core/surface.h"
#include "video_core/texture_cache/surface_params.h" #include "video_core/texture_cache/surface_params.h"
#include "video_core/textures/decoders.h"
namespace VideoCommon { namespace VideoCommon {
@ -169,18 +168,6 @@ SurfaceParams SurfaceParams::CreateForFermiCopySurface(
return params; return params;
} }
u32 SurfaceParams::GetMipWidth(u32 level) const {
return std::max(1U, width >> level);
}
u32 SurfaceParams::GetMipHeight(u32 level) const {
return std::max(1U, height >> level);
}
u32 SurfaceParams::GetMipDepth(u32 level) const {
return is_layered ? depth : std::max(1U, depth >> level);
}
bool SurfaceParams::IsLayered() const { bool SurfaceParams::IsLayered() const {
switch (target) { switch (target) {
case SurfaceTarget::Texture1DArray: case SurfaceTarget::Texture1DArray:
@ -275,22 +262,6 @@ std::size_t SurfaceParams::GetHostLayerSize(u32 level) const {
return GetInnerMipmapMemorySize(level, true, false); return GetInnerMipmapMemorySize(level, true, false);
} }
u32 SurfaceParams::GetDefaultBlockWidth() const {
return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
}
u32 SurfaceParams::GetDefaultBlockHeight() const {
return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
}
u32 SurfaceParams::GetBitsPerPixel() const {
return VideoCore::Surface::GetFormatBpp(pixel_format);
}
u32 SurfaceParams::GetBytesPerPixel() const {
return VideoCore::Surface::GetBytesPerPixel(pixel_format);
}
bool SurfaceParams::IsPixelFormatZeta() const { bool SurfaceParams::IsPixelFormatZeta() const {
return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat && return pixel_format >= VideoCore::Surface::PixelFormat::MaxColorFormat &&
pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat; pixel_format < VideoCore::Surface::PixelFormat::MaxDepthStencilFormat;

View File

@ -10,8 +10,9 @@
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/engines/fermi_2d.h" #include "video_core/engines/fermi_2d.h"
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/surface.h"
#include "video_core/shader/shader_ir.h" #include "video_core/shader/shader_ir.h"
#include "video_core/surface.h"
#include "video_core/textures/decoders.h"
namespace VideoCommon { namespace VideoCommon {
@ -50,10 +51,17 @@ public:
std::size_t GetHostSizeInBytes() const { std::size_t GetHostSizeInBytes() const {
std::size_t host_size_in_bytes; std::size_t host_size_in_bytes;
if (IsPixelFormatASTC(pixel_format)) { if (IsPixelFormatASTC(pixel_format)) {
constexpr std::size_t rgb8_bpp = 4ULL;
// ASTC is uncompressed in software, in emulated as RGBA8 // ASTC is uncompressed in software, in emulated as RGBA8
host_size_in_bytes = static_cast<std::size_t>(Common::AlignUp(width, GetDefaultBlockWidth())) * host_size_in_bytes = 0;
static_cast<std::size_t>(Common::AlignUp(height, GetDefaultBlockHeight())) * for (std::size_t level = 0; level < num_levels; level++) {
static_cast<std::size_t>(depth) * 4ULL; const std::size_t width =
Common::AlignUp(GetMipWidth(level), GetDefaultBlockWidth());
const std::size_t height =
Common::AlignUp(GetMipHeight(level), GetDefaultBlockHeight());
const std::size_t depth = is_layered ? depth : GetMipDepth(level);
host_size_in_bytes += width * height * depth * rgb8_bpp;
}
} else { } else {
host_size_in_bytes = GetInnerMemorySize(true, false, false); host_size_in_bytes = GetInnerMemorySize(true, false, false);
} }
@ -65,13 +73,19 @@ public:
} }
/// Returns the width of a given mipmap level. /// Returns the width of a given mipmap level.
u32 GetMipWidth(u32 level) const; u32 GetMipWidth(u32 level) const {
return std::max(1U, width >> level);
}
/// Returns the height of a given mipmap level. /// Returns the height of a given mipmap level.
u32 GetMipHeight(u32 level) const; u32 GetMipHeight(u32 level) const {
return std::max(1U, height >> level);
}
/// Returns the depth of a given mipmap level. /// Returns the depth of a given mipmap level.
u32 GetMipDepth(u32 level) const; u32 GetMipDepth(u32 level) const {
return is_layered ? depth : std::max(1U, depth >> level);
}
/// Returns the block height of a given mipmap level. /// Returns the block height of a given mipmap level.
u32 GetMipBlockHeight(u32 level) const; u32 GetMipBlockHeight(u32 level) const;
@ -79,6 +93,12 @@ public:
/// Returns the block depth of a given mipmap level. /// Returns the block depth of a given mipmap level.
u32 GetMipBlockDepth(u32 level) const; u32 GetMipBlockDepth(u32 level) const;
// Helper used for out of class size calculations
static std::size_t AlignLayered(const std::size_t out_size, const u32 block_height,
const u32 block_depth) {
return Common::AlignUp(out_size, Tegra::Texture::GetGOBSize() * block_height * block_depth);
}
/// Returns the offset in bytes in guest memory of a given mipmap level. /// Returns the offset in bytes in guest memory of a given mipmap level.
std::size_t GetGuestMipmapLevelOffset(u32 level) const; std::size_t GetGuestMipmapLevelOffset(u32 level) const;
@ -98,16 +118,24 @@ public:
std::size_t GetHostLayerSize(u32 level) const; std::size_t GetHostLayerSize(u32 level) const;
/// Returns the default block width. /// Returns the default block width.
u32 GetDefaultBlockWidth() const; u32 GetDefaultBlockWidth() const {
return VideoCore::Surface::GetDefaultBlockWidth(pixel_format);
}
/// Returns the default block height. /// Returns the default block height.
u32 GetDefaultBlockHeight() const; u32 GetDefaultBlockHeight() const {
return VideoCore::Surface::GetDefaultBlockHeight(pixel_format);
}
/// Returns the bits per pixel. /// Returns the bits per pixel.
u32 GetBitsPerPixel() const; u32 GetBitsPerPixel() const {
return VideoCore::Surface::GetFormatBpp(pixel_format);
}
/// Returns the bytes per pixel. /// Returns the bytes per pixel.
u32 GetBytesPerPixel() const; u32 GetBytesPerPixel() const {
return VideoCore::Surface::GetBytesPerPixel(pixel_format);
}
/// Returns true if the pixel format is a depth and/or stencil format. /// Returns true if the pixel format is a depth and/or stencil format.
bool IsPixelFormatZeta() const; bool IsPixelFormatZeta() const;

View File

@ -120,10 +120,6 @@ public:
return {}; return {};
} }
if (regs.color_mask[index].raw == 0) {
return {};
}
auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index), auto surface_view = GetSurface(gpu_addr, SurfaceParams::CreateForFramebuffer(system, index),
preserve_contents); preserve_contents);
if (render_targets[index].target) if (render_targets[index].target)
@ -165,7 +161,9 @@ public:
const Tegra::Engines::Fermi2D::Regs::Surface& dst_config, const Tegra::Engines::Fermi2D::Regs::Surface& dst_config,
const Common::Rectangle<u32>& src_rect, const Common::Rectangle<u32>& src_rect,
const Common::Rectangle<u32>& dst_rect) { const Common::Rectangle<u32>& dst_rect) {
ImageBlit(GetFermiSurface(src_config), GetFermiSurface(dst_config), src_rect, dst_rect); TSurface dst_surface = GetFermiSurface(dst_config);
ImageBlit(GetFermiSurface(src_config), dst_surface, src_rect, dst_rect);
dst_surface->MarkAsModified(true, Tick());
} }
TSurface TryFindFramebufferSurface(const u8* host_ptr) { TSurface TryFindFramebufferSurface(const u8* host_ptr) {
@ -270,10 +268,6 @@ private:
RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params, RecycleStrategy PickStrategy(std::vector<TSurface>& overlaps, const SurfaceParams& params,
const GPUVAddr gpu_addr, const bool untopological) { const GPUVAddr gpu_addr, const bool untopological) {
// Untopological decision
if (untopological) {
return RecycleStrategy::Ignore;
}
// 3D Textures decision // 3D Textures decision
if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) { if (params.block_depth > 1 || params.target == SurfaceTarget::Texture3D) {
return RecycleStrategy::Flush; return RecycleStrategy::Flush;
@ -284,12 +278,16 @@ private:
return RecycleStrategy::Flush; return RecycleStrategy::Flush;
} }
} }
// Untopological decision
if (untopological) {
return RecycleStrategy::Ignore;
}
return RecycleStrategy::Ignore; return RecycleStrategy::Ignore;
} }
std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps, std::pair<TSurface, TView> RecycleSurface(std::vector<TSurface>& overlaps,
const SurfaceParams& params, const GPUVAddr gpu_addr, const SurfaceParams& params, const GPUVAddr gpu_addr,
const u8* host_ptr, const bool preserve_contents, const bool preserve_contents,
const bool untopological) { const bool untopological) {
for (auto surface : overlaps) { for (auto surface : overlaps) {
Unregister(surface); Unregister(surface);
@ -328,6 +326,7 @@ private:
} }
Unregister(current_surface); Unregister(current_surface);
Register(new_surface); Register(new_surface);
new_surface->MarkAsModified(current_surface->IsModified(), Tick());
return {new_surface, new_surface->GetMainView()}; return {new_surface, new_surface->GetMainView()};
} }
@ -351,6 +350,7 @@ private:
if (params.target == SurfaceTarget::Texture3D) { if (params.target == SurfaceTarget::Texture3D) {
return {}; return {};
} }
bool modified = false;
TSurface new_surface = GetUncachedSurface(gpu_addr, params); TSurface new_surface = GetUncachedSurface(gpu_addr, params);
for (auto surface : overlaps) { for (auto surface : overlaps) {
const SurfaceParams& src_params = surface->GetSurfaceParams(); const SurfaceParams& src_params = surface->GetSurfaceParams();
@ -358,7 +358,7 @@ private:
// We send this cases to recycle as they are more complex to handle // We send this cases to recycle as they are more complex to handle
return {}; return {};
} }
const std::size_t candidate_size = src_params.GetGuestSizeInBytes(); const std::size_t candidate_size = surface->GetSizeInBytes();
auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())}; auto mipmap_layer{new_surface->GetLayerMipmap(surface->GetGpuAddr())};
if (!mipmap_layer) { if (!mipmap_layer) {
return {}; return {};
@ -368,6 +368,7 @@ private:
if (new_surface->GetMipmapSize(mipmap) != candidate_size) { if (new_surface->GetMipmapSize(mipmap) != candidate_size) {
return {}; return {};
} }
modified |= surface->IsModified();
// Now we got all the data set up // Now we got all the data set up
const u32 dst_width{params.GetMipWidth(mipmap)}; const u32 dst_width{params.GetMipWidth(mipmap)};
const u32 dst_height{params.GetMipHeight(mipmap)}; const u32 dst_height{params.GetMipHeight(mipmap)};
@ -381,6 +382,7 @@ private:
force_reconfiguration |= surface->IsProtected(); force_reconfiguration |= surface->IsProtected();
Unregister(surface, true); Unregister(surface, true);
} }
new_surface->MarkAsModified(modified, Tick());
Register(new_surface); Register(new_surface);
return {{new_surface, new_surface->GetMainView()}}; return {{new_surface, new_surface->GetMainView()}};
} }
@ -399,8 +401,7 @@ private:
for (auto surface : overlaps) { for (auto surface : overlaps) {
if (!surface->MatchesTopology(params)) { if (!surface->MatchesTopology(params)) {
return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, true);
true);
} }
} }
@ -418,27 +419,26 @@ private:
} }
} }
if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) { if (!current_surface->IsInside(gpu_addr, gpu_addr + candidate_size)) {
return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false);
false);
} }
std::optional<TView> view = current_surface->EmplaceView(params, gpu_addr); std::optional<TView> view =
current_surface->EmplaceView(params, gpu_addr, candidate_size);
if (view.has_value()) { if (view.has_value()) {
const bool is_mirage = !current_surface->MatchFormat(params.pixel_format); const bool is_mirage = !current_surface->MatchFormat(params.pixel_format);
if (is_mirage) { if (is_mirage) {
LOG_CRITICAL(HW_GPU, "Mirage View Unsupported"); LOG_CRITICAL(HW_GPU, "Mirage View Unsupported");
return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false);
false);
} }
return {current_surface, *view}; return {current_surface, *view};
} }
return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false);
} else { } else {
std::optional<std::pair<TSurface, TView>> view = std::optional<std::pair<TSurface, TView>> view =
ReconstructSurface(overlaps, params, gpu_addr, host_ptr); ReconstructSurface(overlaps, params, gpu_addr, host_ptr);
if (view.has_value()) { if (view.has_value()) {
return *view; return *view;
} }
return RecycleSurface(overlaps, params, gpu_addr, host_ptr, preserve_contents, false); return RecycleSurface(overlaps, params, gpu_addr, preserve_contents, false);
} }
} }