improving rasterization performances
Test / build_and_test (push) Successful in 33s
Build / build (push) Successful in 1m2s

This commit is contained in:
2026-05-14 21:44:53 +02:00
parent 1eb367ac17
commit d460f22a45
10 changed files with 227 additions and 112 deletions
+11 -7
View File
@@ -360,7 +360,7 @@ pub fn getTexelMemoryOffset(self: *const Self, offset: vk.Offset3D, subresource:
return try self.getSubresourceOffset(subresource.aspect_mask, subresource.mip_level, subresource.array_layer) + self.getTexelMemoryOffsetInSubresource(offset, subresource);
}
fn getSubresourceOffset(self: *const Self, aspect_mask: vk.ImageAspectFlags, mip_level: u32, layer: u32) VkError!usize {
pub fn getSubresourceOffset(self: *const Self, aspect_mask: vk.ImageAspectFlags, mip_level: u32, layer: u32) VkError!usize {
var offset = try self.getAspectOffset(aspect_mask);
for (0..mip_level) |mip| {
offset += self.getMultiSampledLevelSize(aspect_mask, @intCast(mip));
@@ -464,18 +464,22 @@ pub fn getMipLevelExtent(self: *const Self, mip_level: u32) vk.Extent3D {
pub fn getSliceMemSizeForMipLevel(interface: *const Interface, aspect_mask: vk.ImageAspectFlags, mip_level: u32) usize {
const self: *const Self = @alignCast(@fieldParentPtr("interface", interface));
const mip_extent = self.getMipLevelExtent(mip_level);
const format = self.interface.formatFromAspect(aspect_mask);
return base.format.sliceMemSize(format, mip_extent.width, mip_extent.height);
return self.getSliceMemSizeForMipLevelWithFormat(aspect_mask, mip_level, interface.format);
}
pub fn getRowPitchMemSizeForMipLevel(interface: *const Interface, aspect_mask: vk.ImageAspectFlags, mip_level: u32) usize {
const self: *const Self = @alignCast(@fieldParentPtr("interface", interface));
return self.getRowPitchMemSizeForMipLevelWithFormat(aspect_mask, mip_level, interface.format);
}
pub fn getSliceMemSizeForMipLevelWithFormat(self: *const Self, aspect_mask: vk.ImageAspectFlags, mip_level: u32, format: vk.Format) usize {
const mip_extent = self.getMipLevelExtent(mip_level);
const format = self.interface.formatFromAspect(aspect_mask);
return base.format.pitchMemSize(format, mip_extent.width);
return base.format.sliceMemSize(base.format.fromAspect(format, aspect_mask), mip_extent.width, mip_extent.height);
}
pub fn getRowPitchMemSizeForMipLevelWithFormat(self: *const Self, aspect_mask: vk.ImageAspectFlags, mip_level: u32, format: vk.Format) usize {
const mip_extent = self.getMipLevelExtent(mip_level);
return base.format.pitchMemSize(base.format.fromAspect(format, aspect_mask), mip_extent.width);
}
pub inline fn mapAs(self: *const Self, comptime T: type) VkError!*T {
+16 -9
View File
@@ -19,9 +19,14 @@ const SoftShaderModule = @import("SoftShaderModule.zig");
const Self = @This();
pub const Interface = base.Pipeline;
const Runtime = struct {
mutex: std.Io.Mutex,
rt: spv.Runtime,
};
const Shader = struct {
module: *SoftShaderModule,
runtimes: []spv.Runtime,
runtimes: []Runtime,
entry: []const u8,
};
@@ -77,10 +82,11 @@ pub fn createCompute(device: *base.Device, allocator: std.mem.Allocator, cache:
soft_module.ref();
shader.module = soft_module;
const runtimes = runtimes_allocator.alloc(spv.Runtime, runtimes_count) catch return VkError.OutOfDeviceMemory;
const runtimes = runtimes_allocator.alloc(Runtime, runtimes_count) catch return VkError.OutOfDeviceMemory;
for (runtimes) |*runtime| {
runtime.* = spv.Runtime.init(
runtime.mutex = .init;
runtime.rt = spv.Runtime.init(
runtimes_allocator,
&soft_module.module,
.{
@@ -97,7 +103,7 @@ pub fn createCompute(device: *base.Device, allocator: std.mem.Allocator, cache:
if (specialization.p_map_entries) |map| {
const data: []const u8 = @as([*]const u8, @ptrCast(@alignCast(specialization.p_data)))[0..specialization.data_size];
for (map[0..], 0..specialization.map_entry_count) |entry, _| {
runtime.addSpecializationInfo(
runtime.rt.addSpecializationInfo(
runtimes_allocator,
.{
.id = @intCast(entry.constant_id),
@@ -160,10 +166,11 @@ pub fn createGraphics(device: *base.Device, allocator: std.mem.Allocator, cache:
soft_module.ref();
shader.module = soft_module;
const runtimes = runtimes_allocator.alloc(spv.Runtime, runtimes_count) catch return VkError.OutOfHostMemory;
const runtimes = runtimes_allocator.alloc(Runtime, runtimes_count) catch return VkError.OutOfHostMemory;
for (runtimes) |*runtime| {
runtime.* = spv.Runtime.init(
runtime.mutex = .init;
runtime.rt = spv.Runtime.init(
runtimes_allocator,
&soft_module.module,
.{
@@ -180,7 +187,7 @@ pub fn createGraphics(device: *base.Device, allocator: std.mem.Allocator, cache:
if (specialization.p_map_entries) |map| {
const data: []const u8 = @as([*]const u8, @ptrCast(@alignCast(specialization.p_data)))[0..specialization.data_size];
for (map[0..], 0..specialization.map_entry_count) |entry, _| {
runtime.addSpecializationInfo(runtimes_allocator, .{
runtime.rt.addSpecializationInfo(runtimes_allocator, .{
.id = @intCast(entry.constant_id),
.offset = @intCast(entry.offset),
.size = @intCast(entry.size),
@@ -230,8 +237,8 @@ pub fn destroy(interface: *Interface, allocator: std.mem.Allocator) void {
var it = self.stages.iterator();
while (it.next()) |entry| {
entry.value.module.unref(allocator);
for (entry.value.runtimes) |*rt| {
rt.function_stack.clearAndFree(device_allocator); // Hacky to avoid leaks
for (entry.value.runtimes) |*runtime| {
runtime.rt.function_stack.clearAndFree(device_allocator); // Hacky to avoid leaks
}
}
self.runtimes_allocator.deinit();
-10
View File
@@ -6,7 +6,6 @@ const Self = @This();
const Allocator = std.mem.Allocator;
const Alignment = std.mem.Alignment;
mutex: base.SpinMutex,
child_allocator: std.mem.Allocator,
bound: usize,
total_bytes_allocated: std.atomic.Value(usize),
@@ -15,7 +14,6 @@ current_bytes_allocated: std.atomic.Value(usize),
pub fn init(child_allocator: Allocator, bound: usize) Self {
return .{
.mutex = .{},
.child_allocator = child_allocator,
.bound = bound,
.total_bytes_allocated = std.atomic.Value(usize).init(0),
@@ -46,8 +44,6 @@ pub inline fn queryPeakFootprint(self: *Self) usize {
fn alloc(context: *anyopaque, len: usize, alignment: Alignment, ret_addr: usize) ?[*]u8 {
const self: *Self = @ptrCast(@alignCast(context));
self.mutex.lock();
defer self.mutex.unlock();
if (self.current_bytes_allocated.fetchAdd(len, .monotonic) >= self.bound)
return null;
_ = self.total_bytes_allocated.fetchAdd(len, .monotonic);
@@ -58,8 +54,6 @@ fn alloc(context: *anyopaque, len: usize, alignment: Alignment, ret_addr: usize)
fn resize(context: *anyopaque, ptr: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) bool {
const self: *Self = @ptrCast(@alignCast(context));
self.mutex.lock();
defer self.mutex.unlock();
_ = self.current_bytes_allocated.fetchSub(ptr.len, .monotonic);
if (self.current_bytes_allocated.fetchAdd(new_len, .monotonic) >= self.bound)
return false;
@@ -69,8 +63,6 @@ fn resize(context: *anyopaque, ptr: []u8, alignment: Alignment, new_len: usize,
fn remap(context: *anyopaque, ptr: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) ?[*]u8 {
const self: *Self = @ptrCast(@alignCast(context));
self.mutex.lock();
defer self.mutex.unlock();
_ = self.current_bytes_allocated.fetchSub(ptr.len, .monotonic);
if (self.current_bytes_allocated.fetchAdd(new_len, .monotonic) >= self.bound)
return null;
@@ -80,8 +72,6 @@ fn remap(context: *anyopaque, ptr: []u8, alignment: Alignment, new_len: usize, r
fn free(context: *anyopaque, ptr: []u8, alignment: Alignment, ret_addr: usize) void {
const self: *Self = @ptrCast(@alignCast(context));
self.mutex.lock();
defer self.mutex.unlock();
_ = self.current_bytes_allocated.fetchSub(ptr.len, .monotonic);
return self.child_allocator.rawFree(ptr, alignment, ret_addr);
}
+1 -1
View File
@@ -92,7 +92,7 @@ inline fn run(data: RunData) !void {
const io = data.self.device.interface.io();
const shader = data.pipeline.stages.getPtrAssertContains(.compute);
const rt = &shader.runtimes[data.batch_id];
const rt = &shader.runtimes[data.batch_id].rt;
const entry = try rt.getEntryPointByName(shader.entry);
+3
View File
@@ -65,6 +65,8 @@ pub const DrawCall = struct {
render_pass: *SoftRenderPass,
framebuffer: *SoftFramebuffer,
rasterizer_wait_group: std.Io.Group,
stats: struct {
polygons_drawn: usize,
},
@@ -82,6 +84,7 @@ pub const DrawCall = struct {
.depth_attachment = if (render_pass.interface.subpasses[0].depth_stencil_attachments) |desc| framebuffer.interface.attachments[desc.attachment] else null,
.render_pass = render_pass,
.framebuffer = framebuffer,
.rasterizer_wait_group = .init,
.stats = .{
.polygons_drawn = 0,
},
+8 -1
View File
@@ -13,11 +13,18 @@ const VkError = base.VkError;
const SpvRuntimeError = spv.Runtime.RuntimeError;
pub fn shaderInvocation(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall, batch_id: usize, position: zm.F32x4, inputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][]const u8) SpvRuntimeError!zm.F32x4 {
const io = draw_call.renderer.device.interface.io();
_ = position;
const pipeline = draw_call.renderer.state.pipeline orelse return zm.f32x4s(0.0);
const shader = pipeline.stages.getPtrAssertContains(.fragment);
const rt = &shader.runtimes[batch_id];
const runtime = &shader.runtimes[batch_id];
const mutex = &runtime.mutex;
const rt = &runtime.rt;
mutex.lock(io) catch return SpvRuntimeError.Unknown;
defer mutex.unlock(io);
const entry = try rt.getEntryPointByName(shader.entry);
const output_result = try rt.getResultByLocation(0, .output);
+131 -8
View File
@@ -5,17 +5,67 @@ const clip = @import("clip.zig");
const bresenham = @import("rasterizer/bresenham.zig");
const edge_function = @import("rasterizer/edge_function.zig");
const common = @import("rasterizer/common.zig");
const Renderer = @import("Renderer.zig");
const Vertex = Renderer.Vertex;
const DrawCall = Renderer.DrawCall;
const SoftImage = @import("../SoftImage.zig");
const VkError = base.VkError;
pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall) VkError!void {
const io = draw_call.renderer.device.interface.io();
const pipeline_data = (renderer.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics;
const topology = pipeline_data.input_assembly.topology;
const color_attachment = if (draw_call.render_pass.interface.subpasses[0].color_attachments) |attachments| attachments[0].attachment else return VkError.InvalidAttachmentDrv;
const render_target_view: *base.ImageView = draw_call.color_attachments[color_attachment];
const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image));
const color_range = render_target_view.subresource_range;
const color_format = render_target_view.format;
const color_attachment_subresource_offset = try render_target.getSubresourceOffset(
color_range.aspect_mask,
color_range.base_mip_level,
color_range.base_array_layer,
);
const color_attachment_subresource_size = render_target.getLayerSize(color_range.aspect_mask);
const color_attachment_access: common.RenderTargetAccess = .{
.mutex = undefined,
.base = try render_target.mapAsSliceWithAddedOffset(u8, color_attachment_subresource_offset, color_attachment_subresource_size),
.row_pitch = render_target.getRowPitchMemSizeForMipLevelWithFormat(color_range.aspect_mask, color_range.base_mip_level, color_format),
.texel_size = base.format.texelSize(color_format),
.format = color_format,
};
const depth_attachment_view: ?*base.ImageView = if (draw_call.depth_attachment) |view| view else null;
const depth_attachment: ?*SoftImage = if (depth_attachment_view) |view| @alignCast(@fieldParentPtr("interface", view.image)) else null;
var depth_attachment_access: ?common.RenderTargetAccess = blk: {
if (depth_attachment == null)
break :blk null;
const depth_range = depth_attachment_view.?.subresource_range;
const depth_format = depth_attachment_view.?.format;
const attachment_subresource_offset = try depth_attachment.?.getSubresourceOffset(
depth_range.aspect_mask,
depth_range.base_mip_level,
depth_range.base_array_layer,
);
const attachment_subresource_size = depth_attachment.?.getLayerSize(depth_range.aspect_mask);
break :blk .{
.mutex = .init,
.base = try depth_attachment.?.mapAsSliceWithAddedOffset(u8, attachment_subresource_offset, attachment_subresource_size),
.row_pitch = render_target.getRowPitchMemSizeForMipLevelWithFormat(depth_range.aspect_mask, depth_range.base_mip_level, depth_format),
.texel_size = base.format.texelSize(depth_format),
.format = depth_format,
};
};
switch (topology) {
.triangle_list => for (0..@divTrunc(draw_call.vertices.len, 3)) |triangle_index| {
const first_vertex = triangle_index * 3;
@@ -23,7 +73,16 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato
const v1 = &draw_call.vertices[first_vertex + 1];
const v2 = &draw_call.vertices[first_vertex + 2];
try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v0, v1, v2);
try clipTransformAndRasterizeTriangle(
renderer,
allocator,
draw_call,
v0,
v1,
v2,
&color_attachment_access,
if (depth_attachment_access) |*access| access else null,
);
},
.triangle_fan => if (draw_call.vertices.len >= 3) {
const v0 = &draw_call.vertices[0];
@@ -31,7 +90,16 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato
const v1 = &draw_call.vertices[vertex_index];
const v2 = &draw_call.vertices[vertex_index + 1];
try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v0, v1, v2);
try clipTransformAndRasterizeTriangle(
renderer,
allocator,
draw_call,
v0,
v1,
v2,
&color_attachment_access,
if (depth_attachment_access) |*access| access else null,
);
}
},
.triangle_strip => if (draw_call.vertices.len >= 3) {
@@ -41,17 +109,46 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato
const v2 = &draw_call.vertices[vertex_index + 2];
if ((vertex_index & 1) == 0) {
try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v0, v1, v2);
try clipTransformAndRasterizeTriangle(
renderer,
allocator,
draw_call,
v0,
v1,
v2,
&color_attachment_access,
if (depth_attachment_access) |*access| access else null,
);
} else {
try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v1, v0, v2);
try clipTransformAndRasterizeTriangle(
renderer,
allocator,
draw_call,
v1,
v0,
v2,
&color_attachment_access,
if (depth_attachment_access) |*access| access else null,
);
}
}
},
else => base.unsupported("primitive topology {any}", .{topology}),
}
draw_call.rasterizer_wait_group.await(io) catch return VkError.DeviceLost;
}
fn clipTransformAndRasterizeTriangle(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall, v0: *Vertex, v1: *Vertex, v2: *Vertex) VkError!void {
fn clipTransformAndRasterizeTriangle(
renderer: *Renderer,
allocator: std.mem.Allocator,
draw_call: *DrawCall,
v0: *Vertex,
v1: *Vertex,
v2: *Vertex,
color_attachment_access: *const common.RenderTargetAccess,
depth_attachment_access: ?*common.RenderTargetAccess,
) VkError!void {
const clipped_polygon = try clip.clipTriangle(allocator, v0, v1, v2);
if (clipped_polygon.len < 3)
@@ -66,11 +163,29 @@ fn clipTransformAndRasterizeTriangle(renderer: *Renderer, allocator: std.mem.All
clip.viewportTransformVertex(draw_call.viewport, &tv1);
clip.viewportTransformVertex(draw_call.viewport, &tv2);
try rasterizeTriangle(renderer, allocator, draw_call, &tv0, &tv1, &tv2);
try rasterizeTriangle(
renderer,
allocator,
draw_call,
&tv0,
&tv1,
&tv2,
color_attachment_access,
depth_attachment_access,
);
}
}
fn rasterizeTriangle(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall, v0: *Vertex, v1: *Vertex, v2: *Vertex) VkError!void {
fn rasterizeTriangle(
renderer: *Renderer,
allocator: std.mem.Allocator,
draw_call: *DrawCall,
v0: *Vertex,
v1: *Vertex,
v2: *Vertex,
color_attachment_access: *const common.RenderTargetAccess,
depth_attachment_access: ?*common.RenderTargetAccess,
) VkError!void {
if (try triangleIsCulled(renderer, v0, v1, v2))
return;
@@ -78,7 +193,15 @@ fn rasterizeTriangle(renderer: *Renderer, allocator: std.mem.Allocator, draw_cal
const pipeline_data = (renderer.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics;
switch (pipeline_data.rasterization.polygon_mode) {
.fill => try edge_function.drawTriangle(allocator, draw_call, v0, v1, v2),
.fill => try edge_function.drawTriangle(
allocator,
draw_call,
v0,
v1,
v2,
color_attachment_access,
depth_attachment_access,
),
.line => {
try bresenham.drawLine(allocator, draw_call, v0, v1);
try bresenham.drawLine(allocator, draw_call, v1, v2);
+8
View File
@@ -9,6 +9,14 @@ const Renderer = @import("../Renderer.zig");
const VkError = base.VkError;
const F32x4 = zm.F32x4;
pub const RenderTargetAccess = struct {
mutex: std.Io.Mutex,
base: []u8,
row_pitch: usize,
texel_size: usize,
format: vk.Format,
};
pub fn scissorContainsPixel(scissor: vk.Rect2D, x: i32, y: i32) bool {
const min_x: i64 = @as(i64, scissor.offset.x);
const min_y: i64 = @as(i64, scissor.offset.y);
+48 -62
View File
@@ -6,9 +6,9 @@ const zm = base.zm;
const common = @import("common.zig");
const fragment = @import("../fragment.zig");
const blitter = @import("../blitter.zig");
const Renderer = @import("../Renderer.zig");
const SoftImage = @import("../../SoftImage.zig");
const VkError = base.VkError;
const SpvRuntimeError = spv.Runtime.RuntimeError;
@@ -23,12 +23,22 @@ const RunData = struct {
min_y: i32,
max_y: i32,
area: f32,
v0: Renderer.Vertex,
v1: Renderer.Vertex,
v2: Renderer.Vertex,
color_attachment_access: *const common.RenderTargetAccess,
depth_attachment_access: ?*common.RenderTargetAccess,
};
pub fn drawTriangle(
allocator: std.mem.Allocator,
draw_call: *Renderer.DrawCall,
v0: *Renderer.Vertex,
v1: *Renderer.Vertex,
v2: *Renderer.Vertex,
};
pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall, v0: *Renderer.Vertex, v1: *Renderer.Vertex, v2: *Renderer.Vertex) VkError!void {
color_attachment_access: *const common.RenderTargetAccess,
depth_attachment_access: ?*common.RenderTargetAccess,
) VkError!void {
const io = draw_call.renderer.device.interface.io();
const min_x: i32 = @intFromFloat(@floor(@min(v0.position[0], v1.position[0], v2.position[0])));
@@ -43,7 +53,7 @@ pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall,
const pipeline = draw_call.renderer.state.pipeline orelse return;
const runtimes_count = (pipeline.stages.getPtr(.fragment) orelse return).runtimes.len;
const grid_size: usize = @intFromFloat(@floor(@sqrt(@as(f32, @floatFromInt(runtimes_count)))));
const grid_size: usize = @intFromFloat(@ceil(@sqrt(@as(f32, @floatFromInt(runtimes_count)))));
const width: usize = @intCast(max_x - min_x + 1);
const height: usize = @intCast(max_y - min_y + 1);
@@ -53,7 +63,6 @@ pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall,
var batch_id: usize = 0;
var wg: std.Io.Group = .init;
for (0..grid_size) |gy| {
for (0..grid_size) |gx| {
defer batch_id = @mod(batch_id + 1, runtimes_count);
@@ -78,20 +87,25 @@ pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall,
.allocator = allocator,
.draw_call = draw_call,
.batch_id = batch_id,
.v0 = v0,
.v1 = v1,
.v2 = v2,
.v0 = v0.*,
.v1 = v1.*,
.v2 = v2.*,
.area = area,
.min_x = run_min_x,
.max_x = run_max_x,
.min_y = run_min_y,
.max_y = run_max_y,
.color_attachment_access = color_attachment_access,
.depth_attachment_access = depth_attachment_access,
};
wg.async(io, runWrapper, .{run_data});
draw_call.rasterizer_wait_group.async(io, runWrapper, .{run_data});
}
}
wg.await(io) catch return VkError.DeviceLost;
// To avoid mess with pixel render order without depth buffer to sort them
if (depth_attachment_access == null)
draw_call.rasterizer_wait_group.await(io) catch return VkError.DeviceLost;
}
inline fn edgeFunction(a: F32x4, b: F32x4, p: F32x4) f32 {
@@ -108,12 +122,7 @@ fn runWrapper(data: RunData) void {
}
inline fn run(data: RunData) !void {
const color_attachment = if (data.draw_call.render_pass.interface.subpasses[0].color_attachments) |attachments| attachments[0].attachment else return VkError.InvalidAttachmentDrv;
const render_target_view: *base.ImageView = data.draw_call.color_attachments[color_attachment];
const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image));
const depth_attachment_view: ?*base.ImageView = if (data.draw_call.depth_attachment) |view| view else null;
const depth_attachment: ?*SoftImage = if (depth_attachment_view) |view| @alignCast(@fieldParentPtr("interface", view.image)) else null;
const io = data.draw_call.renderer.device.interface.io();
var y = data.min_y;
while (y <= data.max_y) : (y += 1) {
@@ -142,38 +151,12 @@ inline fn run(data: RunData) !void {
const b2 = w2 / data.area;
const z = (b0 * data.v0.position[2]) + (b1 * data.v1.position[2]) + (b2 * data.v2.position[2]);
if (depth_attachment) |depth| {
const depth_value = try depth.readFloat4(
.{
.x = x,
.y = y,
.z = 0,
},
.{
.aspect_mask = depth_attachment_view.?.subresource_range.aspect_mask,
.mip_level = depth_attachment_view.?.subresource_range.base_mip_level,
.array_layer = depth_attachment_view.?.subresource_range.base_array_layer,
},
depth_attachment_view.?.format,
);
// Early depth test to avoid unnecesary computations
if (data.depth_attachment_access) |depth| {
const offset = @as(usize, @intCast(x)) * depth.texel_size + @as(usize, @intCast(y)) * depth.row_pitch;
const depth_value = blitter.readFloat4(depth.base[offset..], depth.format);
if (z >= depth_value[0])
continue;
try depth.writeFloat4(
.{
.x = x,
.y = y,
.z = 0,
},
.{
.aspect_mask = depth_attachment_view.?.subresource_range.aspect_mask,
.mip_level = depth_attachment_view.?.subresource_range.base_mip_level,
.array_layer = depth_attachment_view.?.subresource_range.base_array_layer,
},
depth_attachment_view.?.format,
zm.f32x4s(z),
);
}
const pixel = fragment.shaderInvocation(
@@ -181,7 +164,7 @@ inline fn run(data: RunData) !void {
data.draw_call,
data.batch_id,
zm.f32x4(@floatFromInt(x), @floatFromInt(y), z, 1.0),
try common.interpolateVertexOutputs(data.allocator, data.v0, data.v1, data.v2, b0, b1, b2),
try common.interpolateVertexOutputs(data.allocator, &data.v0, &data.v1, &data.v2, b0, b1, b2),
) catch |err| {
std.log.scoped(.@"Fragment stage").err("catched a '{s}'", .{@errorName(err)});
if (@errorReturnTrace()) |trace| {
@@ -190,20 +173,23 @@ inline fn run(data: RunData) !void {
return;
};
try render_target.writeFloat4(
.{
.x = x,
.y = y,
.z = 0,
},
.{
.aspect_mask = render_target_view.subresource_range.aspect_mask,
.mip_level = render_target_view.subresource_range.base_mip_level,
.array_layer = render_target_view.subresource_range.base_array_layer,
},
render_target_view.format,
pixel,
);
const color_offset = @as(usize, @intCast(x)) * data.color_attachment_access.texel_size + @as(usize, @intCast(y)) * data.color_attachment_access.row_pitch;
// After work depth test to avoid overwritten depth pixels during fragment invocations
if (data.depth_attachment_access) |depth| {
const depth_offset = @as(usize, @intCast(x)) * depth.texel_size + @as(usize, @intCast(y)) * depth.row_pitch;
depth.mutex.lock(io) catch return VkError.DeviceLost;
defer depth.mutex.unlock(io);
const depth_value = blitter.readFloat4(depth.base[depth_offset..], depth.format);
if (z >= depth_value[0])
continue;
blitter.writeFloat4(zm.f32x4s(z), depth.base[depth_offset..], depth.format);
blitter.writeFloat4(pixel, data.color_attachment_access.base[color_offset..], data.color_attachment_access.format);
} else {
blitter.writeFloat4(pixel, data.color_attachment_access.base[color_offset..], data.color_attachment_access.format);
}
}
}
}
+1 -14
View File
@@ -35,7 +35,7 @@ pub fn runWrapper(data: RunData) void {
inline fn run(data: RunData) !void {
const shader = data.pipeline.stages.getPtrAssertContains(.vertex);
const rt = &shader.runtimes[data.batch_id];
const rt = &shader.runtimes[data.batch_id].rt;
try rt.populatePushConstants(data.draw_call.renderer.state.push_constant_blob[0..]);
const entry = try rt.getEntryPointByName(shader.entry);
@@ -79,19 +79,6 @@ inline fn run(data: RunData) !void {
const output: *Renderer.Vertex = &data.draw_call.vertices[(data.instance_index * data.vertex_count) + invocation_index];
try rt.readBuiltIn(std.mem.asBytes(&output.position), .Position);
if (invocation_index == 0) {
const io = data.draw_call.renderer.device.interface.io();
const file = try std.Io.Dir.cwd().createFile(
io,
"vertex_result_table_dump.txt",
.{ .truncate = true },
);
defer file.close(io);
var buffer = [_]u8{0} ** 1024;
var writer = file.writer(io, buffer[0..]);
try rt.dumpResultsTable(data.allocator, &writer.interface);
}
for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
const result_word = rt.getResultByLocation(@intCast(location), .output) catch |err| switch (err) {
SpvRuntimeError.NotFound => continue,