From d38cbfac9be4a598da632e2858d29844ee63989b Mon Sep 17 00:00:00 2001 From: Kbz-8 Date: Mon, 27 Apr 2026 23:52:00 +0200 Subject: [PATCH] adding face culling, better varying management and more primitive topologies --- src/soft/device/Renderer.zig | 102 +++++++++++++++++++++--- src/soft/device/fragment_dispatcher.zig | 4 +- src/soft/device/rasterizer.zig | 20 ++--- src/soft/device/vertex_dispatcher.zig | 11 ++- 4 files changed, 106 insertions(+), 31 deletions(-) diff --git a/src/soft/device/Renderer.zig b/src/soft/device/Renderer.zig index 2694773..bd18886 100644 --- a/src/soft/device/Renderer.zig +++ b/src/soft/device/Renderer.zig @@ -40,7 +40,10 @@ pub const DynamicState = struct { pub const Vertex = struct { position: F32x4, - outputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS]?[]u8, + outputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS]?struct { + interpolation_type: enum { smooth, flat, noperspective }, + blob: []u8, + }, }; pub const Fragment = struct { @@ -99,7 +102,7 @@ pub fn draw(self: *Self, vertex_count: usize, instance_count: usize, first_verte }; for (draw_call.vertices) |*vertex| { - vertex.outputs = [_]?[]u8{null} ** spv.SPIRV_MAX_OUTPUT_LOCATIONS; + @memset(vertex.outputs[0..], null); } self.vertexShaderStage(allocator, &draw_call, vertex_count, instance_count) catch |err| { @@ -215,21 +218,34 @@ fn rasterizationStage(self: *Self, allocator: std.mem.Allocator, draw_call: *Dra const pipeline_data = (self.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics; const topology = pipeline_data.input_assembly.topology; switch (topology) { - .triangle_list => for (0..@divExact(draw_call.vertices.len, 3)) |triangle_index| { + .triangle_list => for (0..@divTrunc(draw_call.vertices.len, 3)) |triangle_index| { const first_vertex = triangle_index * 3; const v0 = &draw_call.vertices[first_vertex + 0]; const v1 = &draw_call.vertices[first_vertex + 1]; const v2 = &draw_call.vertices[first_vertex + 2]; - switch (pipeline_data.rasterization.polygon_mode) { - .fill => try rasterizer.drawTriangleFilled(allocator, &fragments, v0, v1, v2), - .line => { - try rasterizer.drawLineBresenham(allocator, &fragments, v0, v1); - try rasterizer.drawLineBresenham(allocator, &fragments, v1, v2); - try rasterizer.drawLineBresenham(allocator, &fragments, v2, v0); - }, - .point => {}, - else => base.unsupported("polygon mode {any}", .{pipeline_data.rasterization.polygon_mode}), + try self.rasterizeTriangle(allocator, &fragments, v0, v1, v2, v0, v1, v2); + }, + .triangle_fan => if (draw_call.vertices.len >= 3) { + const v0 = &draw_call.vertices[0]; + for (1..(draw_call.vertices.len - 1)) |vertex_index| { + const v1 = &draw_call.vertices[vertex_index]; + const v2 = &draw_call.vertices[vertex_index + 1]; + + try self.rasterizeTriangle(allocator, &fragments, v0, v1, v2, v0, v1, v2); + } + }, + .triangle_strip => if (draw_call.vertices.len >= 3) { + for (0..(draw_call.vertices.len - 2)) |vertex_index| { + const v0 = &draw_call.vertices[vertex_index + 0]; + const v1 = &draw_call.vertices[vertex_index + 1]; + const v2 = &draw_call.vertices[vertex_index + 2]; + + if ((vertex_index & 1) == 0) { + try self.rasterizeTriangle(allocator, &fragments, v0, v1, v2, v0, v1, v2); + } else { + try self.rasterizeTriangle(allocator, &fragments, v0, v1, v2, v1, v0, v2); + } } }, else => base.unsupported("primitive topology {any}", .{topology}), @@ -238,6 +254,68 @@ fn rasterizationStage(self: *Self, allocator: std.mem.Allocator, draw_call: *Dra draw_call.fragments = fragments.toOwnedSlice(allocator) catch return VkError.OutOfDeviceMemory; } +fn triangleArea2(v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) f32 { + const x0 = v0.position[0]; + const y0 = v0.position[1]; + const x1 = v1.position[0]; + const y1 = v1.position[1]; + const x2 = v2.position[0]; + const y2 = v2.position[1]; + + return ((x1 - x0) * (y2 - y0)) - ((y1 - y0) * (x2 - x0)); +} + +fn triangleIsCulled(self: *Self, v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) VkError!bool { + const pipeline_data = (self.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics; + const rasterization = pipeline_data.rasterization; + const cull_mode = rasterization.cull_mode; + + if (!cull_mode.front_bit and !cull_mode.back_bit) + return false; + + if (cull_mode.front_bit and cull_mode.back_bit) + return true; + + const area = triangleArea2(v0, v1, v2); + if (area == 0.0) + return true; + + const front_face = switch (rasterization.front_face) { + .counter_clockwise => area < 0.0, + .clockwise => area > 0.0, + else => return false, + }; + + return (cull_mode.front_bit and front_face) or (cull_mode.back_bit and !front_face); +} + +fn rasterizeTriangle( + self: *Self, + allocator: std.mem.Allocator, + fragments: *std.ArrayList(Fragment), + v0: *Vertex, + v1: *Vertex, + v2: *Vertex, + cull_v0: *const Vertex, + cull_v1: *const Vertex, + cull_v2: *const Vertex, +) VkError!void { + if (try self.triangleIsCulled(cull_v0, cull_v1, cull_v2)) + return; + + const pipeline_data = (self.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics; + switch (pipeline_data.rasterization.polygon_mode) { + .fill => try rasterizer.drawTriangleFilled(allocator, fragments, v0, v1, v2), + .line => { + try rasterizer.drawLineBresenham(allocator, fragments, v0, v1); + try rasterizer.drawLineBresenham(allocator, fragments, v1, v2); + try rasterizer.drawLineBresenham(allocator, fragments, v2, v0); + }, + .point => {}, + else => base.unsupported("polygon mode {any}", .{pipeline_data.rasterization.polygon_mode}), + } +} + fn fragmentShaderStage(self: *Self, draw_call: *DrawCall) !void { const pipeline = self.state.pipeline orelse return; const batch_size = (pipeline.stages.getPtr(.fragment) orelse return).runtimes.len; diff --git a/src/soft/device/fragment_dispatcher.zig b/src/soft/device/fragment_dispatcher.zig index 7826db8..c3912d8 100644 --- a/src/soft/device/fragment_dispatcher.zig +++ b/src/soft/device/fragment_dispatcher.zig @@ -48,9 +48,7 @@ inline fn run(data: RunData) !void { SpvRuntimeError.NotFound => continue, else => return err, }; - if (result_word != 0) { - try rt.writeInput(fragment.inputs[location], result_word); - } + try rt.writeInput(fragment.inputs[location], result_word); } rt.callEntryPoint(allocator, entry) catch |err| switch (err) { diff --git a/src/soft/device/rasterizer.zig b/src/soft/device/rasterizer.zig index 5b51fa0..3f0fbb0 100644 --- a/src/soft/device/rasterizer.zig +++ b/src/soft/device/rasterizer.zig @@ -37,31 +37,31 @@ fn interpolateVertexOutputs( const out1 = v1.outputs[location] orelse continue; const out2 = v2.outputs[location] orelse continue; - if (out0.len == 0) { - inputs[location] = out0; + if (out0.interpolation_type == .flat or out0.blob.len == 0) { + inputs[location] = out0.blob; continue; } - const len = @min(out0.len, out1.len, out2.len); + const len = @min(out0.blob.len, out1.blob.len, out2.blob.len); const input = allocator.alloc(u8, len) catch return VkError.OutOfDeviceMemory; var byte_index: usize = 0; while (byte_index + @sizeOf(F32x4) <= len) : (byte_index += @sizeOf(F32x4)) { - const value0 = std.mem.bytesToValue(F32x4, out0[byte_index..]); - const value1 = std.mem.bytesToValue(F32x4, out1[byte_index..]); - const value2 = std.mem.bytesToValue(F32x4, out2[byte_index..]); + const value0 = std.mem.bytesToValue(F32x4, out0.blob[byte_index..]); + const value1 = std.mem.bytesToValue(F32x4, out1.blob[byte_index..]); + const value2 = std.mem.bytesToValue(F32x4, out2.blob[byte_index..]); writePacked(F32x4, input[byte_index..], interpolateF32x4(value0, value1, value2, b0, b1, b2)); } while (byte_index + @sizeOf(f32) <= len) : (byte_index += @sizeOf(f32)) { - const value0 = std.mem.bytesToValue(f32, out0[byte_index..]); - const value1 = std.mem.bytesToValue(f32, out1[byte_index..]); - const value2 = std.mem.bytesToValue(f32, out2[byte_index..]); + const value0 = std.mem.bytesToValue(f32, out0.blob[byte_index..]); + const value1 = std.mem.bytesToValue(f32, out1.blob[byte_index..]); + const value2 = std.mem.bytesToValue(f32, out2.blob[byte_index..]); writePacked(f32, input[byte_index..], (value0 * b0) + (value1 * b1) + (value2 * b2)); } if (byte_index < len) - @memcpy(input[byte_index..], out0[byte_index..len]); + @memcpy(input[byte_index..], out0.blob[byte_index..len]); inputs[location] = input; } diff --git a/src/soft/device/vertex_dispatcher.zig b/src/soft/device/vertex_dispatcher.zig index 074ddd0..05d01de 100644 --- a/src/soft/device/vertex_dispatcher.zig +++ b/src/soft/device/vertex_dispatcher.zig @@ -80,12 +80,11 @@ inline fn run(data: RunData) !void { SpvRuntimeError.NotFound => continue, else => return err, }; - if (result_word == 0) - continue; - const value = rt.results[result_word].getConstValue() catch continue; - const needed_size = try value.getPlainMemorySize(); - output.outputs[location] = data.allocator.alloc(u8, needed_size) catch return VkError.OutOfDeviceMemory; - try rt.readOutput(output.outputs[location].?, result_word); + output.outputs[location] = .{ + .interpolation_type = if (rt.hasResultDecoration(result_word, .Flat)) .flat else .smooth, // TODO : handle noperspective + .blob = data.allocator.alloc(u8, try rt.getResultMemorySize(result_word)) catch return VkError.OutOfDeviceMemory, + }; + try rt.readOutput(output.outputs[location].?.blob, result_word); } } }