improving rasterization performances
This commit is contained in:
+11
-7
@@ -360,7 +360,7 @@ pub fn getTexelMemoryOffset(self: *const Self, offset: vk.Offset3D, subresource:
|
||||
return try self.getSubresourceOffset(subresource.aspect_mask, subresource.mip_level, subresource.array_layer) + self.getTexelMemoryOffsetInSubresource(offset, subresource);
|
||||
}
|
||||
|
||||
fn getSubresourceOffset(self: *const Self, aspect_mask: vk.ImageAspectFlags, mip_level: u32, layer: u32) VkError!usize {
|
||||
pub fn getSubresourceOffset(self: *const Self, aspect_mask: vk.ImageAspectFlags, mip_level: u32, layer: u32) VkError!usize {
|
||||
var offset = try self.getAspectOffset(aspect_mask);
|
||||
for (0..mip_level) |mip| {
|
||||
offset += self.getMultiSampledLevelSize(aspect_mask, @intCast(mip));
|
||||
@@ -464,18 +464,22 @@ pub fn getMipLevelExtent(self: *const Self, mip_level: u32) vk.Extent3D {
|
||||
|
||||
pub fn getSliceMemSizeForMipLevel(interface: *const Interface, aspect_mask: vk.ImageAspectFlags, mip_level: u32) usize {
|
||||
const self: *const Self = @alignCast(@fieldParentPtr("interface", interface));
|
||||
|
||||
const mip_extent = self.getMipLevelExtent(mip_level);
|
||||
const format = self.interface.formatFromAspect(aspect_mask);
|
||||
return base.format.sliceMemSize(format, mip_extent.width, mip_extent.height);
|
||||
return self.getSliceMemSizeForMipLevelWithFormat(aspect_mask, mip_level, interface.format);
|
||||
}
|
||||
|
||||
pub fn getRowPitchMemSizeForMipLevel(interface: *const Interface, aspect_mask: vk.ImageAspectFlags, mip_level: u32) usize {
|
||||
const self: *const Self = @alignCast(@fieldParentPtr("interface", interface));
|
||||
return self.getRowPitchMemSizeForMipLevelWithFormat(aspect_mask, mip_level, interface.format);
|
||||
}
|
||||
|
||||
pub fn getSliceMemSizeForMipLevelWithFormat(self: *const Self, aspect_mask: vk.ImageAspectFlags, mip_level: u32, format: vk.Format) usize {
|
||||
const mip_extent = self.getMipLevelExtent(mip_level);
|
||||
const format = self.interface.formatFromAspect(aspect_mask);
|
||||
return base.format.pitchMemSize(format, mip_extent.width);
|
||||
return base.format.sliceMemSize(base.format.fromAspect(format, aspect_mask), mip_extent.width, mip_extent.height);
|
||||
}
|
||||
|
||||
pub fn getRowPitchMemSizeForMipLevelWithFormat(self: *const Self, aspect_mask: vk.ImageAspectFlags, mip_level: u32, format: vk.Format) usize {
|
||||
const mip_extent = self.getMipLevelExtent(mip_level);
|
||||
return base.format.pitchMemSize(base.format.fromAspect(format, aspect_mask), mip_extent.width);
|
||||
}
|
||||
|
||||
pub inline fn mapAs(self: *const Self, comptime T: type) VkError!*T {
|
||||
|
||||
@@ -19,9 +19,14 @@ const SoftShaderModule = @import("SoftShaderModule.zig");
|
||||
const Self = @This();
|
||||
pub const Interface = base.Pipeline;
|
||||
|
||||
const Runtime = struct {
|
||||
mutex: std.Io.Mutex,
|
||||
rt: spv.Runtime,
|
||||
};
|
||||
|
||||
const Shader = struct {
|
||||
module: *SoftShaderModule,
|
||||
runtimes: []spv.Runtime,
|
||||
runtimes: []Runtime,
|
||||
entry: []const u8,
|
||||
};
|
||||
|
||||
@@ -77,10 +82,11 @@ pub fn createCompute(device: *base.Device, allocator: std.mem.Allocator, cache:
|
||||
soft_module.ref();
|
||||
shader.module = soft_module;
|
||||
|
||||
const runtimes = runtimes_allocator.alloc(spv.Runtime, runtimes_count) catch return VkError.OutOfDeviceMemory;
|
||||
const runtimes = runtimes_allocator.alloc(Runtime, runtimes_count) catch return VkError.OutOfDeviceMemory;
|
||||
|
||||
for (runtimes) |*runtime| {
|
||||
runtime.* = spv.Runtime.init(
|
||||
runtime.mutex = .init;
|
||||
runtime.rt = spv.Runtime.init(
|
||||
runtimes_allocator,
|
||||
&soft_module.module,
|
||||
.{
|
||||
@@ -97,7 +103,7 @@ pub fn createCompute(device: *base.Device, allocator: std.mem.Allocator, cache:
|
||||
if (specialization.p_map_entries) |map| {
|
||||
const data: []const u8 = @as([*]const u8, @ptrCast(@alignCast(specialization.p_data)))[0..specialization.data_size];
|
||||
for (map[0..], 0..specialization.map_entry_count) |entry, _| {
|
||||
runtime.addSpecializationInfo(
|
||||
runtime.rt.addSpecializationInfo(
|
||||
runtimes_allocator,
|
||||
.{
|
||||
.id = @intCast(entry.constant_id),
|
||||
@@ -160,10 +166,11 @@ pub fn createGraphics(device: *base.Device, allocator: std.mem.Allocator, cache:
|
||||
soft_module.ref();
|
||||
shader.module = soft_module;
|
||||
|
||||
const runtimes = runtimes_allocator.alloc(spv.Runtime, runtimes_count) catch return VkError.OutOfHostMemory;
|
||||
const runtimes = runtimes_allocator.alloc(Runtime, runtimes_count) catch return VkError.OutOfHostMemory;
|
||||
|
||||
for (runtimes) |*runtime| {
|
||||
runtime.* = spv.Runtime.init(
|
||||
runtime.mutex = .init;
|
||||
runtime.rt = spv.Runtime.init(
|
||||
runtimes_allocator,
|
||||
&soft_module.module,
|
||||
.{
|
||||
@@ -180,7 +187,7 @@ pub fn createGraphics(device: *base.Device, allocator: std.mem.Allocator, cache:
|
||||
if (specialization.p_map_entries) |map| {
|
||||
const data: []const u8 = @as([*]const u8, @ptrCast(@alignCast(specialization.p_data)))[0..specialization.data_size];
|
||||
for (map[0..], 0..specialization.map_entry_count) |entry, _| {
|
||||
runtime.addSpecializationInfo(runtimes_allocator, .{
|
||||
runtime.rt.addSpecializationInfo(runtimes_allocator, .{
|
||||
.id = @intCast(entry.constant_id),
|
||||
.offset = @intCast(entry.offset),
|
||||
.size = @intCast(entry.size),
|
||||
@@ -230,8 +237,8 @@ pub fn destroy(interface: *Interface, allocator: std.mem.Allocator) void {
|
||||
var it = self.stages.iterator();
|
||||
while (it.next()) |entry| {
|
||||
entry.value.module.unref(allocator);
|
||||
for (entry.value.runtimes) |*rt| {
|
||||
rt.function_stack.clearAndFree(device_allocator); // Hacky to avoid leaks
|
||||
for (entry.value.runtimes) |*runtime| {
|
||||
runtime.rt.function_stack.clearAndFree(device_allocator); // Hacky to avoid leaks
|
||||
}
|
||||
}
|
||||
self.runtimes_allocator.deinit();
|
||||
|
||||
@@ -6,7 +6,6 @@ const Self = @This();
|
||||
const Allocator = std.mem.Allocator;
|
||||
const Alignment = std.mem.Alignment;
|
||||
|
||||
mutex: base.SpinMutex,
|
||||
child_allocator: std.mem.Allocator,
|
||||
bound: usize,
|
||||
total_bytes_allocated: std.atomic.Value(usize),
|
||||
@@ -15,7 +14,6 @@ current_bytes_allocated: std.atomic.Value(usize),
|
||||
|
||||
pub fn init(child_allocator: Allocator, bound: usize) Self {
|
||||
return .{
|
||||
.mutex = .{},
|
||||
.child_allocator = child_allocator,
|
||||
.bound = bound,
|
||||
.total_bytes_allocated = std.atomic.Value(usize).init(0),
|
||||
@@ -46,8 +44,6 @@ pub inline fn queryPeakFootprint(self: *Self) usize {
|
||||
|
||||
fn alloc(context: *anyopaque, len: usize, alignment: Alignment, ret_addr: usize) ?[*]u8 {
|
||||
const self: *Self = @ptrCast(@alignCast(context));
|
||||
self.mutex.lock();
|
||||
defer self.mutex.unlock();
|
||||
if (self.current_bytes_allocated.fetchAdd(len, .monotonic) >= self.bound)
|
||||
return null;
|
||||
_ = self.total_bytes_allocated.fetchAdd(len, .monotonic);
|
||||
@@ -58,8 +54,6 @@ fn alloc(context: *anyopaque, len: usize, alignment: Alignment, ret_addr: usize)
|
||||
|
||||
fn resize(context: *anyopaque, ptr: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) bool {
|
||||
const self: *Self = @ptrCast(@alignCast(context));
|
||||
self.mutex.lock();
|
||||
defer self.mutex.unlock();
|
||||
_ = self.current_bytes_allocated.fetchSub(ptr.len, .monotonic);
|
||||
if (self.current_bytes_allocated.fetchAdd(new_len, .monotonic) >= self.bound)
|
||||
return false;
|
||||
@@ -69,8 +63,6 @@ fn resize(context: *anyopaque, ptr: []u8, alignment: Alignment, new_len: usize,
|
||||
|
||||
fn remap(context: *anyopaque, ptr: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) ?[*]u8 {
|
||||
const self: *Self = @ptrCast(@alignCast(context));
|
||||
self.mutex.lock();
|
||||
defer self.mutex.unlock();
|
||||
_ = self.current_bytes_allocated.fetchSub(ptr.len, .monotonic);
|
||||
if (self.current_bytes_allocated.fetchAdd(new_len, .monotonic) >= self.bound)
|
||||
return null;
|
||||
@@ -80,8 +72,6 @@ fn remap(context: *anyopaque, ptr: []u8, alignment: Alignment, new_len: usize, r
|
||||
|
||||
fn free(context: *anyopaque, ptr: []u8, alignment: Alignment, ret_addr: usize) void {
|
||||
const self: *Self = @ptrCast(@alignCast(context));
|
||||
self.mutex.lock();
|
||||
defer self.mutex.unlock();
|
||||
_ = self.current_bytes_allocated.fetchSub(ptr.len, .monotonic);
|
||||
return self.child_allocator.rawFree(ptr, alignment, ret_addr);
|
||||
}
|
||||
|
||||
@@ -92,7 +92,7 @@ inline fn run(data: RunData) !void {
|
||||
const io = data.self.device.interface.io();
|
||||
|
||||
const shader = data.pipeline.stages.getPtrAssertContains(.compute);
|
||||
const rt = &shader.runtimes[data.batch_id];
|
||||
const rt = &shader.runtimes[data.batch_id].rt;
|
||||
|
||||
const entry = try rt.getEntryPointByName(shader.entry);
|
||||
|
||||
|
||||
@@ -65,6 +65,8 @@ pub const DrawCall = struct {
|
||||
render_pass: *SoftRenderPass,
|
||||
framebuffer: *SoftFramebuffer,
|
||||
|
||||
rasterizer_wait_group: std.Io.Group,
|
||||
|
||||
stats: struct {
|
||||
polygons_drawn: usize,
|
||||
},
|
||||
@@ -82,6 +84,7 @@ pub const DrawCall = struct {
|
||||
.depth_attachment = if (render_pass.interface.subpasses[0].depth_stencil_attachments) |desc| framebuffer.interface.attachments[desc.attachment] else null,
|
||||
.render_pass = render_pass,
|
||||
.framebuffer = framebuffer,
|
||||
.rasterizer_wait_group = .init,
|
||||
.stats = .{
|
||||
.polygons_drawn = 0,
|
||||
},
|
||||
|
||||
@@ -13,11 +13,18 @@ const VkError = base.VkError;
|
||||
const SpvRuntimeError = spv.Runtime.RuntimeError;
|
||||
|
||||
pub fn shaderInvocation(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall, batch_id: usize, position: zm.F32x4, inputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][]const u8) SpvRuntimeError!zm.F32x4 {
|
||||
const io = draw_call.renderer.device.interface.io();
|
||||
|
||||
_ = position;
|
||||
const pipeline = draw_call.renderer.state.pipeline orelse return zm.f32x4s(0.0);
|
||||
|
||||
const shader = pipeline.stages.getPtrAssertContains(.fragment);
|
||||
const rt = &shader.runtimes[batch_id];
|
||||
const runtime = &shader.runtimes[batch_id];
|
||||
const mutex = &runtime.mutex;
|
||||
const rt = &runtime.rt;
|
||||
|
||||
mutex.lock(io) catch return SpvRuntimeError.Unknown;
|
||||
defer mutex.unlock(io);
|
||||
|
||||
const entry = try rt.getEntryPointByName(shader.entry);
|
||||
const output_result = try rt.getResultByLocation(0, .output);
|
||||
|
||||
@@ -5,17 +5,67 @@ const clip = @import("clip.zig");
|
||||
|
||||
const bresenham = @import("rasterizer/bresenham.zig");
|
||||
const edge_function = @import("rasterizer/edge_function.zig");
|
||||
const common = @import("rasterizer/common.zig");
|
||||
|
||||
const Renderer = @import("Renderer.zig");
|
||||
const Vertex = Renderer.Vertex;
|
||||
const DrawCall = Renderer.DrawCall;
|
||||
const SoftImage = @import("../SoftImage.zig");
|
||||
|
||||
const VkError = base.VkError;
|
||||
|
||||
pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall) VkError!void {
|
||||
const io = draw_call.renderer.device.interface.io();
|
||||
|
||||
const pipeline_data = (renderer.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics;
|
||||
const topology = pipeline_data.input_assembly.topology;
|
||||
|
||||
const color_attachment = if (draw_call.render_pass.interface.subpasses[0].color_attachments) |attachments| attachments[0].attachment else return VkError.InvalidAttachmentDrv;
|
||||
const render_target_view: *base.ImageView = draw_call.color_attachments[color_attachment];
|
||||
const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image));
|
||||
|
||||
const color_range = render_target_view.subresource_range;
|
||||
const color_format = render_target_view.format;
|
||||
|
||||
const color_attachment_subresource_offset = try render_target.getSubresourceOffset(
|
||||
color_range.aspect_mask,
|
||||
color_range.base_mip_level,
|
||||
color_range.base_array_layer,
|
||||
);
|
||||
const color_attachment_subresource_size = render_target.getLayerSize(color_range.aspect_mask);
|
||||
const color_attachment_access: common.RenderTargetAccess = .{
|
||||
.mutex = undefined,
|
||||
.base = try render_target.mapAsSliceWithAddedOffset(u8, color_attachment_subresource_offset, color_attachment_subresource_size),
|
||||
.row_pitch = render_target.getRowPitchMemSizeForMipLevelWithFormat(color_range.aspect_mask, color_range.base_mip_level, color_format),
|
||||
.texel_size = base.format.texelSize(color_format),
|
||||
.format = color_format,
|
||||
};
|
||||
|
||||
const depth_attachment_view: ?*base.ImageView = if (draw_call.depth_attachment) |view| view else null;
|
||||
const depth_attachment: ?*SoftImage = if (depth_attachment_view) |view| @alignCast(@fieldParentPtr("interface", view.image)) else null;
|
||||
|
||||
var depth_attachment_access: ?common.RenderTargetAccess = blk: {
|
||||
if (depth_attachment == null)
|
||||
break :blk null;
|
||||
|
||||
const depth_range = depth_attachment_view.?.subresource_range;
|
||||
const depth_format = depth_attachment_view.?.format;
|
||||
|
||||
const attachment_subresource_offset = try depth_attachment.?.getSubresourceOffset(
|
||||
depth_range.aspect_mask,
|
||||
depth_range.base_mip_level,
|
||||
depth_range.base_array_layer,
|
||||
);
|
||||
const attachment_subresource_size = depth_attachment.?.getLayerSize(depth_range.aspect_mask);
|
||||
break :blk .{
|
||||
.mutex = .init,
|
||||
.base = try depth_attachment.?.mapAsSliceWithAddedOffset(u8, attachment_subresource_offset, attachment_subresource_size),
|
||||
.row_pitch = render_target.getRowPitchMemSizeForMipLevelWithFormat(depth_range.aspect_mask, depth_range.base_mip_level, depth_format),
|
||||
.texel_size = base.format.texelSize(depth_format),
|
||||
.format = depth_format,
|
||||
};
|
||||
};
|
||||
|
||||
switch (topology) {
|
||||
.triangle_list => for (0..@divTrunc(draw_call.vertices.len, 3)) |triangle_index| {
|
||||
const first_vertex = triangle_index * 3;
|
||||
@@ -23,7 +73,16 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato
|
||||
const v1 = &draw_call.vertices[first_vertex + 1];
|
||||
const v2 = &draw_call.vertices[first_vertex + 2];
|
||||
|
||||
try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v0, v1, v2);
|
||||
try clipTransformAndRasterizeTriangle(
|
||||
renderer,
|
||||
allocator,
|
||||
draw_call,
|
||||
v0,
|
||||
v1,
|
||||
v2,
|
||||
&color_attachment_access,
|
||||
if (depth_attachment_access) |*access| access else null,
|
||||
);
|
||||
},
|
||||
.triangle_fan => if (draw_call.vertices.len >= 3) {
|
||||
const v0 = &draw_call.vertices[0];
|
||||
@@ -31,7 +90,16 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato
|
||||
const v1 = &draw_call.vertices[vertex_index];
|
||||
const v2 = &draw_call.vertices[vertex_index + 1];
|
||||
|
||||
try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v0, v1, v2);
|
||||
try clipTransformAndRasterizeTriangle(
|
||||
renderer,
|
||||
allocator,
|
||||
draw_call,
|
||||
v0,
|
||||
v1,
|
||||
v2,
|
||||
&color_attachment_access,
|
||||
if (depth_attachment_access) |*access| access else null,
|
||||
);
|
||||
}
|
||||
},
|
||||
.triangle_strip => if (draw_call.vertices.len >= 3) {
|
||||
@@ -41,17 +109,46 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato
|
||||
const v2 = &draw_call.vertices[vertex_index + 2];
|
||||
|
||||
if ((vertex_index & 1) == 0) {
|
||||
try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v0, v1, v2);
|
||||
try clipTransformAndRasterizeTriangle(
|
||||
renderer,
|
||||
allocator,
|
||||
draw_call,
|
||||
v0,
|
||||
v1,
|
||||
v2,
|
||||
&color_attachment_access,
|
||||
if (depth_attachment_access) |*access| access else null,
|
||||
);
|
||||
} else {
|
||||
try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v1, v0, v2);
|
||||
try clipTransformAndRasterizeTriangle(
|
||||
renderer,
|
||||
allocator,
|
||||
draw_call,
|
||||
v1,
|
||||
v0,
|
||||
v2,
|
||||
&color_attachment_access,
|
||||
if (depth_attachment_access) |*access| access else null,
|
||||
);
|
||||
}
|
||||
}
|
||||
},
|
||||
else => base.unsupported("primitive topology {any}", .{topology}),
|
||||
}
|
||||
|
||||
draw_call.rasterizer_wait_group.await(io) catch return VkError.DeviceLost;
|
||||
}
|
||||
|
||||
fn clipTransformAndRasterizeTriangle(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall, v0: *Vertex, v1: *Vertex, v2: *Vertex) VkError!void {
|
||||
fn clipTransformAndRasterizeTriangle(
|
||||
renderer: *Renderer,
|
||||
allocator: std.mem.Allocator,
|
||||
draw_call: *DrawCall,
|
||||
v0: *Vertex,
|
||||
v1: *Vertex,
|
||||
v2: *Vertex,
|
||||
color_attachment_access: *const common.RenderTargetAccess,
|
||||
depth_attachment_access: ?*common.RenderTargetAccess,
|
||||
) VkError!void {
|
||||
const clipped_polygon = try clip.clipTriangle(allocator, v0, v1, v2);
|
||||
|
||||
if (clipped_polygon.len < 3)
|
||||
@@ -66,11 +163,29 @@ fn clipTransformAndRasterizeTriangle(renderer: *Renderer, allocator: std.mem.All
|
||||
clip.viewportTransformVertex(draw_call.viewport, &tv1);
|
||||
clip.viewportTransformVertex(draw_call.viewport, &tv2);
|
||||
|
||||
try rasterizeTriangle(renderer, allocator, draw_call, &tv0, &tv1, &tv2);
|
||||
try rasterizeTriangle(
|
||||
renderer,
|
||||
allocator,
|
||||
draw_call,
|
||||
&tv0,
|
||||
&tv1,
|
||||
&tv2,
|
||||
color_attachment_access,
|
||||
depth_attachment_access,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
fn rasterizeTriangle(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall, v0: *Vertex, v1: *Vertex, v2: *Vertex) VkError!void {
|
||||
fn rasterizeTriangle(
|
||||
renderer: *Renderer,
|
||||
allocator: std.mem.Allocator,
|
||||
draw_call: *DrawCall,
|
||||
v0: *Vertex,
|
||||
v1: *Vertex,
|
||||
v2: *Vertex,
|
||||
color_attachment_access: *const common.RenderTargetAccess,
|
||||
depth_attachment_access: ?*common.RenderTargetAccess,
|
||||
) VkError!void {
|
||||
if (try triangleIsCulled(renderer, v0, v1, v2))
|
||||
return;
|
||||
|
||||
@@ -78,7 +193,15 @@ fn rasterizeTriangle(renderer: *Renderer, allocator: std.mem.Allocator, draw_cal
|
||||
|
||||
const pipeline_data = (renderer.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics;
|
||||
switch (pipeline_data.rasterization.polygon_mode) {
|
||||
.fill => try edge_function.drawTriangle(allocator, draw_call, v0, v1, v2),
|
||||
.fill => try edge_function.drawTriangle(
|
||||
allocator,
|
||||
draw_call,
|
||||
v0,
|
||||
v1,
|
||||
v2,
|
||||
color_attachment_access,
|
||||
depth_attachment_access,
|
||||
),
|
||||
.line => {
|
||||
try bresenham.drawLine(allocator, draw_call, v0, v1);
|
||||
try bresenham.drawLine(allocator, draw_call, v1, v2);
|
||||
|
||||
@@ -9,6 +9,14 @@ const Renderer = @import("../Renderer.zig");
|
||||
const VkError = base.VkError;
|
||||
const F32x4 = zm.F32x4;
|
||||
|
||||
pub const RenderTargetAccess = struct {
|
||||
mutex: std.Io.Mutex,
|
||||
base: []u8,
|
||||
row_pitch: usize,
|
||||
texel_size: usize,
|
||||
format: vk.Format,
|
||||
};
|
||||
|
||||
pub fn scissorContainsPixel(scissor: vk.Rect2D, x: i32, y: i32) bool {
|
||||
const min_x: i64 = @as(i64, scissor.offset.x);
|
||||
const min_y: i64 = @as(i64, scissor.offset.y);
|
||||
|
||||
@@ -6,9 +6,9 @@ const zm = base.zm;
|
||||
|
||||
const common = @import("common.zig");
|
||||
const fragment = @import("../fragment.zig");
|
||||
const blitter = @import("../blitter.zig");
|
||||
|
||||
const Renderer = @import("../Renderer.zig");
|
||||
const SoftImage = @import("../../SoftImage.zig");
|
||||
|
||||
const VkError = base.VkError;
|
||||
const SpvRuntimeError = spv.Runtime.RuntimeError;
|
||||
@@ -23,12 +23,22 @@ const RunData = struct {
|
||||
min_y: i32,
|
||||
max_y: i32,
|
||||
area: f32,
|
||||
v0: Renderer.Vertex,
|
||||
v1: Renderer.Vertex,
|
||||
v2: Renderer.Vertex,
|
||||
color_attachment_access: *const common.RenderTargetAccess,
|
||||
depth_attachment_access: ?*common.RenderTargetAccess,
|
||||
};
|
||||
|
||||
pub fn drawTriangle(
|
||||
allocator: std.mem.Allocator,
|
||||
draw_call: *Renderer.DrawCall,
|
||||
v0: *Renderer.Vertex,
|
||||
v1: *Renderer.Vertex,
|
||||
v2: *Renderer.Vertex,
|
||||
};
|
||||
|
||||
pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall, v0: *Renderer.Vertex, v1: *Renderer.Vertex, v2: *Renderer.Vertex) VkError!void {
|
||||
color_attachment_access: *const common.RenderTargetAccess,
|
||||
depth_attachment_access: ?*common.RenderTargetAccess,
|
||||
) VkError!void {
|
||||
const io = draw_call.renderer.device.interface.io();
|
||||
|
||||
const min_x: i32 = @intFromFloat(@floor(@min(v0.position[0], v1.position[0], v2.position[0])));
|
||||
@@ -43,7 +53,7 @@ pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall,
|
||||
const pipeline = draw_call.renderer.state.pipeline orelse return;
|
||||
|
||||
const runtimes_count = (pipeline.stages.getPtr(.fragment) orelse return).runtimes.len;
|
||||
const grid_size: usize = @intFromFloat(@floor(@sqrt(@as(f32, @floatFromInt(runtimes_count)))));
|
||||
const grid_size: usize = @intFromFloat(@ceil(@sqrt(@as(f32, @floatFromInt(runtimes_count)))));
|
||||
|
||||
const width: usize = @intCast(max_x - min_x + 1);
|
||||
const height: usize = @intCast(max_y - min_y + 1);
|
||||
@@ -53,7 +63,6 @@ pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall,
|
||||
|
||||
var batch_id: usize = 0;
|
||||
|
||||
var wg: std.Io.Group = .init;
|
||||
for (0..grid_size) |gy| {
|
||||
for (0..grid_size) |gx| {
|
||||
defer batch_id = @mod(batch_id + 1, runtimes_count);
|
||||
@@ -78,20 +87,25 @@ pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall,
|
||||
.allocator = allocator,
|
||||
.draw_call = draw_call,
|
||||
.batch_id = batch_id,
|
||||
.v0 = v0,
|
||||
.v1 = v1,
|
||||
.v2 = v2,
|
||||
.v0 = v0.*,
|
||||
.v1 = v1.*,
|
||||
.v2 = v2.*,
|
||||
.area = area,
|
||||
.min_x = run_min_x,
|
||||
.max_x = run_max_x,
|
||||
.min_y = run_min_y,
|
||||
.max_y = run_max_y,
|
||||
.color_attachment_access = color_attachment_access,
|
||||
.depth_attachment_access = depth_attachment_access,
|
||||
};
|
||||
|
||||
wg.async(io, runWrapper, .{run_data});
|
||||
draw_call.rasterizer_wait_group.async(io, runWrapper, .{run_data});
|
||||
}
|
||||
}
|
||||
wg.await(io) catch return VkError.DeviceLost;
|
||||
|
||||
// To avoid mess with pixel render order without depth buffer to sort them
|
||||
if (depth_attachment_access == null)
|
||||
draw_call.rasterizer_wait_group.await(io) catch return VkError.DeviceLost;
|
||||
}
|
||||
|
||||
inline fn edgeFunction(a: F32x4, b: F32x4, p: F32x4) f32 {
|
||||
@@ -108,12 +122,7 @@ fn runWrapper(data: RunData) void {
|
||||
}
|
||||
|
||||
inline fn run(data: RunData) !void {
|
||||
const color_attachment = if (data.draw_call.render_pass.interface.subpasses[0].color_attachments) |attachments| attachments[0].attachment else return VkError.InvalidAttachmentDrv;
|
||||
const render_target_view: *base.ImageView = data.draw_call.color_attachments[color_attachment];
|
||||
const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image));
|
||||
|
||||
const depth_attachment_view: ?*base.ImageView = if (data.draw_call.depth_attachment) |view| view else null;
|
||||
const depth_attachment: ?*SoftImage = if (depth_attachment_view) |view| @alignCast(@fieldParentPtr("interface", view.image)) else null;
|
||||
const io = data.draw_call.renderer.device.interface.io();
|
||||
|
||||
var y = data.min_y;
|
||||
while (y <= data.max_y) : (y += 1) {
|
||||
@@ -142,38 +151,12 @@ inline fn run(data: RunData) !void {
|
||||
const b2 = w2 / data.area;
|
||||
const z = (b0 * data.v0.position[2]) + (b1 * data.v1.position[2]) + (b2 * data.v2.position[2]);
|
||||
|
||||
if (depth_attachment) |depth| {
|
||||
const depth_value = try depth.readFloat4(
|
||||
.{
|
||||
.x = x,
|
||||
.y = y,
|
||||
.z = 0,
|
||||
},
|
||||
.{
|
||||
.aspect_mask = depth_attachment_view.?.subresource_range.aspect_mask,
|
||||
.mip_level = depth_attachment_view.?.subresource_range.base_mip_level,
|
||||
.array_layer = depth_attachment_view.?.subresource_range.base_array_layer,
|
||||
},
|
||||
depth_attachment_view.?.format,
|
||||
);
|
||||
|
||||
// Early depth test to avoid unnecesary computations
|
||||
if (data.depth_attachment_access) |depth| {
|
||||
const offset = @as(usize, @intCast(x)) * depth.texel_size + @as(usize, @intCast(y)) * depth.row_pitch;
|
||||
const depth_value = blitter.readFloat4(depth.base[offset..], depth.format);
|
||||
if (z >= depth_value[0])
|
||||
continue;
|
||||
|
||||
try depth.writeFloat4(
|
||||
.{
|
||||
.x = x,
|
||||
.y = y,
|
||||
.z = 0,
|
||||
},
|
||||
.{
|
||||
.aspect_mask = depth_attachment_view.?.subresource_range.aspect_mask,
|
||||
.mip_level = depth_attachment_view.?.subresource_range.base_mip_level,
|
||||
.array_layer = depth_attachment_view.?.subresource_range.base_array_layer,
|
||||
},
|
||||
depth_attachment_view.?.format,
|
||||
zm.f32x4s(z),
|
||||
);
|
||||
}
|
||||
|
||||
const pixel = fragment.shaderInvocation(
|
||||
@@ -181,7 +164,7 @@ inline fn run(data: RunData) !void {
|
||||
data.draw_call,
|
||||
data.batch_id,
|
||||
zm.f32x4(@floatFromInt(x), @floatFromInt(y), z, 1.0),
|
||||
try common.interpolateVertexOutputs(data.allocator, data.v0, data.v1, data.v2, b0, b1, b2),
|
||||
try common.interpolateVertexOutputs(data.allocator, &data.v0, &data.v1, &data.v2, b0, b1, b2),
|
||||
) catch |err| {
|
||||
std.log.scoped(.@"Fragment stage").err("catched a '{s}'", .{@errorName(err)});
|
||||
if (@errorReturnTrace()) |trace| {
|
||||
@@ -190,20 +173,23 @@ inline fn run(data: RunData) !void {
|
||||
return;
|
||||
};
|
||||
|
||||
try render_target.writeFloat4(
|
||||
.{
|
||||
.x = x,
|
||||
.y = y,
|
||||
.z = 0,
|
||||
},
|
||||
.{
|
||||
.aspect_mask = render_target_view.subresource_range.aspect_mask,
|
||||
.mip_level = render_target_view.subresource_range.base_mip_level,
|
||||
.array_layer = render_target_view.subresource_range.base_array_layer,
|
||||
},
|
||||
render_target_view.format,
|
||||
pixel,
|
||||
);
|
||||
const color_offset = @as(usize, @intCast(x)) * data.color_attachment_access.texel_size + @as(usize, @intCast(y)) * data.color_attachment_access.row_pitch;
|
||||
|
||||
// After work depth test to avoid overwritten depth pixels during fragment invocations
|
||||
if (data.depth_attachment_access) |depth| {
|
||||
const depth_offset = @as(usize, @intCast(x)) * depth.texel_size + @as(usize, @intCast(y)) * depth.row_pitch;
|
||||
|
||||
depth.mutex.lock(io) catch return VkError.DeviceLost;
|
||||
defer depth.mutex.unlock(io);
|
||||
|
||||
const depth_value = blitter.readFloat4(depth.base[depth_offset..], depth.format);
|
||||
if (z >= depth_value[0])
|
||||
continue;
|
||||
blitter.writeFloat4(zm.f32x4s(z), depth.base[depth_offset..], depth.format);
|
||||
blitter.writeFloat4(pixel, data.color_attachment_access.base[color_offset..], data.color_attachment_access.format);
|
||||
} else {
|
||||
blitter.writeFloat4(pixel, data.color_attachment_access.base[color_offset..], data.color_attachment_access.format);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ pub fn runWrapper(data: RunData) void {
|
||||
|
||||
inline fn run(data: RunData) !void {
|
||||
const shader = data.pipeline.stages.getPtrAssertContains(.vertex);
|
||||
const rt = &shader.runtimes[data.batch_id];
|
||||
const rt = &shader.runtimes[data.batch_id].rt;
|
||||
try rt.populatePushConstants(data.draw_call.renderer.state.push_constant_blob[0..]);
|
||||
|
||||
const entry = try rt.getEntryPointByName(shader.entry);
|
||||
@@ -79,19 +79,6 @@ inline fn run(data: RunData) !void {
|
||||
const output: *Renderer.Vertex = &data.draw_call.vertices[(data.instance_index * data.vertex_count) + invocation_index];
|
||||
try rt.readBuiltIn(std.mem.asBytes(&output.position), .Position);
|
||||
|
||||
if (invocation_index == 0) {
|
||||
const io = data.draw_call.renderer.device.interface.io();
|
||||
const file = try std.Io.Dir.cwd().createFile(
|
||||
io,
|
||||
"vertex_result_table_dump.txt",
|
||||
.{ .truncate = true },
|
||||
);
|
||||
defer file.close(io);
|
||||
var buffer = [_]u8{0} ** 1024;
|
||||
var writer = file.writer(io, buffer[0..]);
|
||||
try rt.dumpResultsTable(data.allocator, &writer.interface);
|
||||
}
|
||||
|
||||
for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
|
||||
const result_word = rt.getResultByLocation(@intCast(location), .output) catch |err| switch (err) {
|
||||
SpvRuntimeError.NotFound => continue,
|
||||
|
||||
Reference in New Issue
Block a user