Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions src/xenia/gpu/spirv_shader_translator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1931,8 +1931,7 @@ void SpirvShaderTranslator::StartFragmentShaderBeforeMain() {
}

void SpirvShaderTranslator::StartFragmentShaderInMain() {
// TODO(Triang3l): Allow memory export with resolution scaling only for the
// center host pixel, with sample shading (for depth format conversion) only
// TODO(Triang3l): With sample shading (for depth format conversion) only
// for the bottom-right sample (unlike in Direct3D, the sample mask input
// doesn't include covered samples of the primitive that correspond to other
// invocations, so use the sample that's the most friendly to the half-pixel
Expand Down Expand Up @@ -2088,7 +2087,6 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
// see the actual hardware instructions in both OpBitwiseXor and OpFNegate
// cases.
spv::Id const_sign_bit = builder_->makeUintConstant(UINT32_C(1) << 31);
// TODO(Triang3l): Resolution scale inversion.
// X - pixel X .0 in the magnitude, is back-facing in the sign bit.
assert_true(input_fragment_coordinates_ != spv::NoResult);
id_vector_temp_.clear();
Expand All @@ -2102,6 +2100,12 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
input_fragment_coordinates_,
id_vector_temp_),
spv::NoPrecision)));
// Apply resolution scale inversion after truncating.
if (draw_resolution_scale_x_ > 1) {
param_gen_x = builder_->createBinOp(
spv::OpFMul, type_float_, param_gen_x,
builder_->makeFloatConstant(1.0f / float(draw_resolution_scale_x_)));
}
if (!modification.pixel.param_gen_point) {
assert_true(input_front_facing_ != spv::NoResult);
param_gen_x = builder_->createTriOp(
Expand Down Expand Up @@ -2137,6 +2141,12 @@ void SpirvShaderTranslator::StartFragmentShaderInMain() {
input_fragment_coordinates_,
id_vector_temp_),
spv::NoPrecision)));
// Apply resolution scale inversion after truncating.
if (draw_resolution_scale_y_ > 1) {
param_gen_y = builder_->createBinOp(
spv::OpFMul, type_float_, param_gen_y,
builder_->makeFloatConstant(1.0f / float(draw_resolution_scale_y_)));
}
if (modification.pixel.param_gen_point) {
param_gen_y = builder_->createUnaryOp(
spv::OpBitcast, type_float_,
Expand Down
10 changes: 8 additions & 2 deletions src/xenia/gpu/spirv_shader_translator.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,11 +349,15 @@ class SpirvShaderTranslator : public ShaderTranslator {
SpirvShaderTranslator(const Features& features,
bool native_2x_msaa_with_attachments,
bool native_2x_msaa_no_attachments,
bool edram_fragment_shader_interlock)
bool edram_fragment_shader_interlock,
uint32_t draw_resolution_scale_x = 1,
uint32_t draw_resolution_scale_y = 1)
: features_(features),
native_2x_msaa_with_attachments_(native_2x_msaa_with_attachments),
native_2x_msaa_no_attachments_(native_2x_msaa_no_attachments),
edram_fragment_shader_interlock_(edram_fragment_shader_interlock) {}
edram_fragment_shader_interlock_(edram_fragment_shader_interlock),
draw_resolution_scale_x_(draw_resolution_scale_x),
draw_resolution_scale_y_(draw_resolution_scale_y) {}

uint64_t GetDefaultVertexShaderModification(
uint32_t dynamic_addressable_register_count,
Expand Down Expand Up @@ -711,6 +715,8 @@ class SpirvShaderTranslator : public ShaderTranslator {
Features features_;
bool native_2x_msaa_with_attachments_;
bool native_2x_msaa_no_attachments_;
uint32_t draw_resolution_scale_x_;
uint32_t draw_resolution_scale_y_;

// For safety with different drivers (even though fragment shader interlock in
// SPIR-V only has one control flow requirement - that both begin and end must
Expand Down
4 changes: 3 additions & 1 deletion src/xenia/gpu/spirv_shader_translator_fetch.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,9 @@ void SpirvShaderTranslator::ProcessTextureFetchInstruction(
coordinates[coordinate_component_index] = coordinates_operand;
}

// TODO(Triang3l): Reverting the resolution scale.
// Resolution scale doesn't need reverting for texture weights - weights are
// calculated from fractional parts of coordinates which are
// scale-independent.

if (instr.opcode == ucode::FetchOpcode::kGetTextureWeights) {
// FIXME(Triang3l): Filtering modes should possibly be taken into account,
Expand Down
68 changes: 64 additions & 4 deletions src/xenia/gpu/spirv_shader_translator_memexport.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,70 @@ void SpirvShaderTranslator::ExportToMemory(uint8_t export_eM) {

// Check if memory export is allowed in this guest shader invocation.
std::optional<SpirvBuilder::IfBuilder> if_memexport_allowed;
if (main_memexport_allowed_ != spv::NoResult) {
if_memexport_allowed.emplace(main_memexport_allowed_,
spv::SelectionControlDontFlattenMask,
*builder_);
spv::Id memexport_allowed = main_memexport_allowed_;

// For pixel shaders with resolution scaling, only allow memory export from
// the center host pixel to avoid duplicate exports.
if (is_pixel_shader() &&
(draw_resolution_scale_x_ > 1 || draw_resolution_scale_y_ > 1)) {
assert_true(input_fragment_coordinates_ != spv::NoResult);

// Check if we're at the center pixel (scale/2 for both X and Y).
spv::Id is_center_pixel = builder_->makeBoolConstant(true);

// Check X coordinate.
if (draw_resolution_scale_x_ > 1) {
id_vector_temp_.clear();
id_vector_temp_.push_back(const_int_0_);
spv::Id pixel_x = builder_->createUnaryOp(
spv::OpConvertFToU, type_uint_,
builder_->createLoad(
builder_->createAccessChain(spv::StorageClassInput,
input_fragment_coordinates_,
id_vector_temp_),
spv::NoPrecision));
spv::Id pixel_x_remainder = builder_->createBinOp(
spv::OpUMod, type_uint_, pixel_x,
builder_->makeUintConstant(draw_resolution_scale_x_));
is_center_pixel = builder_->createBinOp(
spv::OpLogicalAnd, type_bool_, is_center_pixel,
builder_->createBinOp(
spv::OpIEqual, type_bool_, pixel_x_remainder,
builder_->makeUintConstant(draw_resolution_scale_x_ >> 1)));
}

// Check Y coordinate.
if (draw_resolution_scale_y_ > 1) {
id_vector_temp_.clear();
id_vector_temp_.push_back(builder_->makeIntConstant(1));
spv::Id pixel_y = builder_->createUnaryOp(
spv::OpConvertFToU, type_uint_,
builder_->createLoad(
builder_->createAccessChain(spv::StorageClassInput,
input_fragment_coordinates_,
id_vector_temp_),
spv::NoPrecision));
spv::Id pixel_y_remainder = builder_->createBinOp(
spv::OpUMod, type_uint_, pixel_y,
builder_->makeUintConstant(draw_resolution_scale_y_));
is_center_pixel = builder_->createBinOp(
spv::OpLogicalAnd, type_bool_, is_center_pixel,
builder_->createBinOp(
spv::OpIEqual, type_bool_, pixel_y_remainder,
builder_->makeUintConstant(draw_resolution_scale_y_ >> 1)));
}

// Combine with existing memexport_allowed condition.
memexport_allowed =
memexport_allowed != spv::NoResult
? builder_->createBinOp(spv::OpLogicalAnd, type_bool_,
memexport_allowed, is_center_pixel)
: is_center_pixel;
}

if (memexport_allowed != spv::NoResult) {
if_memexport_allowed.emplace(
memexport_allowed, spv::SelectionControlDontFlattenMask, *builder_);
}

// If the pixel was killed (but the actual killing on the SPIR-V side has not
Expand Down
18 changes: 11 additions & 7 deletions src/xenia/gpu/spirv_shader_translator_rb.cc
Original file line number Diff line number Diff line change
Expand Up @@ -765,9 +765,10 @@ void SpirvShaderTranslator::CompleteFragmentShaderInMain() {
fsi_color_targets_written =
builder_->createLoad(var_main_fsi_color_written_, spv::NoPrecision);
fsi_const_int_1 = builder_->makeIntConstant(1);
// TODO(Triang3l): Resolution scaling.
// Apply resolution scaling to EDRAM size.
fsi_const_edram_size_dwords = builder_->makeUintConstant(
xenos::kEdramTileWidthSamples * xenos::kEdramTileHeightSamples *
xenos::kEdramTileWidthSamples * draw_resolution_scale_x_ *
xenos::kEdramTileHeightSamples * draw_resolution_scale_y_ *
xenos::kEdramTileCount);
for (uint32_t i = 0; i < 4; ++i) {
fsi_samples_covered[i] = builder_->createBinOp(
Expand Down Expand Up @@ -1449,10 +1450,12 @@ void SpirvShaderTranslator::FSI_LoadEdramOffsets(spv::Id msaa_samples) {
// Get 40 x 16 x resolution scale 32bpp half-tile or 40x16 64bpp tile index.
// Working with 40x16-sample portions for 64bpp and for swapping for depth -
// dividing by 40, not by 80.
// TODO(Triang3l): Resolution scaling.
uint32_t tile_width = xenos::kEdramTileWidthSamples;
// Apply resolution scaling to tile dimensions.
uint32_t tile_width =
xenos::kEdramTileWidthSamples * draw_resolution_scale_x_;
spv::Id const_tile_half_width = builder_->makeUintConstant(tile_width >> 1);
uint32_t tile_height = xenos::kEdramTileHeightSamples;
uint32_t tile_height =
xenos::kEdramTileHeightSamples * draw_resolution_scale_y_;
spv::Id const_tile_height = builder_->makeUintConstant(tile_height);
spv::Id tile_half_index[2], tile_half_sample_coordinates[2];
for (uint32_t i = 0; i < 2; ++i) {
Expand Down Expand Up @@ -1565,8 +1568,9 @@ spv::Id SpirvShaderTranslator::FSI_AddSampleOffset(spv::Id sample_0_address,
return sample_0_address;
}
spv::Id sample_offset;
// TODO(Triang3l): Resolution scaling.
uint32_t tile_width = xenos::kEdramTileWidthSamples;
// Apply resolution scaling to tile width.
uint32_t tile_width =
xenos::kEdramTileWidthSamples * draw_resolution_scale_x_;
if (sample_index == 1) {
sample_offset = builder_->makeIntConstant(tile_width);
} else {
Expand Down
13 changes: 9 additions & 4 deletions src/xenia/gpu/texture_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -183,10 +183,15 @@ TextureCache::~TextureCache() {

bool TextureCache::GetConfigDrawResolutionScale(uint32_t& x_out,
uint32_t& y_out) {
uint32_t config_x =
uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_x));
uint32_t config_y =
uint32_t(std::max(INT32_C(1), cvars::draw_resolution_scale_y));
// Clamp to valid range [1, max] to ensure safe conversion to uint32_t
int32_t config_x_signed =
std::clamp(cvars::draw_resolution_scale_x, INT32_C(1),
static_cast<int32_t>(kMaxDrawResolutionScaleAlongAxis));
int32_t config_y_signed =
std::clamp(cvars::draw_resolution_scale_y, INT32_C(1),
static_cast<int32_t>(kMaxDrawResolutionScaleAlongAxis));
uint32_t config_x = static_cast<uint32_t>(config_x_signed);
uint32_t config_y = static_cast<uint32_t>(config_y_signed);
uint32_t clamped_x = std::min(kMaxDrawResolutionScaleAlongAxis, config_x);
uint32_t clamped_y = std::min(kMaxDrawResolutionScaleAlongAxis, config_y);
x_out = clamped_x;
Expand Down
33 changes: 23 additions & 10 deletions src/xenia/gpu/vulkan/vulkan_command_processor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -273,10 +273,13 @@ bool VulkanCommandProcessor::SetupContext() {
<< shared_memory_binding_count_log2;

// Requires the transient descriptor set layouts.
// TODO(Triang3l): Get the actual draw resolution scale when the texture cache
// supports resolution scaling.
// Get draw resolution scale using the same method as D3D12
uint32_t draw_resolution_scale_x, draw_resolution_scale_y;
TextureCache::GetConfigDrawResolutionScale(draw_resolution_scale_x,
draw_resolution_scale_y);
render_target_cache_ = std::make_unique<VulkanRenderTargetCache>(
*register_file_, *memory_, trace_writer_, 1, 1, *this);
*register_file_, *memory_, trace_writer_, draw_resolution_scale_x,
draw_resolution_scale_y, *this);
if (!render_target_cache_->Initialize(shared_memory_binding_count)) {
XELOGE("Failed to initialize the render target cache");
return false;
Expand Down Expand Up @@ -339,10 +342,10 @@ bool VulkanCommandProcessor::SetupContext() {
}

// Requires the transient descriptor set layouts.
// TODO(Triang3l): Actual draw resolution scale.
texture_cache_ =
VulkanTextureCache::Create(*register_file_, *shared_memory_, 1, 1, *this,
guest_shader_pipeline_stages_);
// Use the same draw resolution scale as render target cache
texture_cache_ = VulkanTextureCache::Create(
*register_file_, *shared_memory_, draw_resolution_scale_x,
draw_resolution_scale_y, *this, guest_shader_pipeline_stages_);
if (!texture_cache_) {
XELOGE("Failed to initialize the texture cache");
return false;
Expand Down Expand Up @@ -2442,15 +2445,19 @@ bool VulkanCommandProcessor::IssueDraw(xenos::PrimitiveType prim_type,
// life. Or even disregard the viewport bounds range in the fragment shader
// interlocks case completely - apply the viewport and the scissor offset
// directly to pixel address and to things like ps_param_gen.
uint32_t draw_resolution_scale_x = texture_cache_->draw_resolution_scale_x();
uint32_t draw_resolution_scale_y = texture_cache_->draw_resolution_scale_y();
draw_util::GetHostViewportInfo(
regs, 1, 1, false, device_properties.maxViewportDimensions[0],
regs, draw_resolution_scale_x, draw_resolution_scale_y, false,
device_properties.maxViewportDimensions[0],
device_properties.maxViewportDimensions[1], true,
normalized_depth_control, false, host_render_targets_used,
pixel_shader && pixel_shader->writes_depth(), viewport_info);

// Update dynamic graphics pipeline state.
UpdateDynamicState(viewport_info, primitive_polygonal,
normalized_depth_control);
normalized_depth_control, draw_resolution_scale_x,
draw_resolution_scale_y);

auto vgt_draw_initiator = regs.Get<reg::VGT_DRAW_INITIATOR>();

Expand Down Expand Up @@ -3243,7 +3250,8 @@ void VulkanCommandProcessor::DestroyScratchBuffer() {

void VulkanCommandProcessor::UpdateDynamicState(
const draw_util::ViewportInfo& viewport_info, bool primitive_polygonal,
reg::RB_DEPTHCONTROL normalized_depth_control) {
reg::RB_DEPTHCONTROL normalized_depth_control,
uint32_t draw_resolution_scale_x, uint32_t draw_resolution_scale_y) {
#if XE_GPU_FINE_GRAINED_DRAW_SCOPES
SCOPE_profile_cpu_f("gpu");
#endif // XE_GPU_FINE_GRAINED_DRAW_SCOPES
Expand Down Expand Up @@ -3279,6 +3287,11 @@ void VulkanCommandProcessor::UpdateDynamicState(
// Scissor.
draw_util::Scissor scissor;
draw_util::GetScissor(regs, scissor);
// Scale the scissor to match the render target resolution scale
scissor.offset[0] *= draw_resolution_scale_x;
scissor.offset[1] *= draw_resolution_scale_y;
scissor.extent[0] *= draw_resolution_scale_x;
scissor.extent[1] *= draw_resolution_scale_y;
VkRect2D scissor_rect;
scissor_rect.offset.x = int32_t(scissor.offset[0]);
scissor_rect.offset.y = int32_t(scissor.offset[1]);
Expand Down
4 changes: 3 additions & 1 deletion src/xenia/gpu/vulkan/vulkan_command_processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,9 @@ class VulkanCommandProcessor : public CommandProcessor {

void UpdateDynamicState(const draw_util::ViewportInfo& viewport_info,
bool primitive_polygonal,
reg::RB_DEPTHCONTROL normalized_depth_control);
reg::RB_DEPTHCONTROL normalized_depth_control,
uint32_t draw_resolution_scale_x,
uint32_t draw_resolution_scale_y);
void UpdateSystemConstantValues(
bool primitive_polygonal,
const PrimitiveProcessor::ProcessingResult& primitive_processing_result,
Expand Down
4 changes: 3 additions & 1 deletion src/xenia/gpu/vulkan/vulkan_pipeline_cache.cc
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,9 @@ bool VulkanPipelineCache::Initialize() {
SpirvShaderTranslator::Features(vulkan_device),
render_target_cache_.msaa_2x_attachments_supported(),
render_target_cache_.msaa_2x_no_attachments_supported(),
edram_fragment_shader_interlock);
edram_fragment_shader_interlock,
render_target_cache_.draw_resolution_scale_x(),
render_target_cache_.draw_resolution_scale_y());

if (edram_fragment_shader_interlock) {
std::vector<uint8_t> depth_only_fragment_shader_code =
Expand Down
Loading