adding pipeline dynamic state and vertex output interpollation
Build / build (push) Successful in 2m40s
Test / build_and_test (push) Successful in 33m34s

This commit is contained in:
2026-04-27 19:39:49 +02:00
parent f35bce907e
commit 02bb54b841
12 changed files with 286 additions and 80 deletions
+54 -19
View File
@@ -3,6 +3,7 @@ const vk = @import("vulkan");
const base = @import("base");
const zm = base.zm;
const lib = @import("../lib.zig");
const spv = @import("spv");
pub const F32x4 = zm.F32x4;
@@ -32,18 +33,24 @@ pub const VertexBuffer = struct {
};
pub const DynamicState = struct {
viewport: vk.Viewport,
scissor: vk.Rect2D,
line_width: f32,
viewports: ?[]const vk.Viewport,
scissor: ?[]vk.Rect2D,
line_width: ?f32,
};
pub const Vertex = struct {
position: F32x4,
outputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS]?[]u8,
};
pub const Fragment = struct {
position: F32x4,
color: F32x4,
inputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8,
};
pub const DrawCall = struct {
vertices: []F32x4,
vertices: []Vertex,
fragments: []Fragment,
};
@@ -60,11 +67,17 @@ pub fn init(device: *SoftDevice, state: *PipelineState) Self {
.state = state,
.render_pass = null,
.framebuffer = null,
.dynamic_state = undefined,
.dynamic_state = .{
.viewports = null,
.scissor = null,
.line_width = null,
},
};
}
pub fn draw(self: *Self, vertex_count: usize, instance_count: usize, first_vertex: usize, first_instance: usize) VkError!void {
const io = self.device.interface.io();
const render_target_view: *base.ImageView = (self.framebuffer orelse return).interface.attachments[0];
const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image));
const render_target_memory = if (render_target.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv;
@@ -73,20 +86,32 @@ pub fn draw(self: *Self, vertex_count: usize, instance_count: usize, first_verte
defer arena.deinit();
const allocator = arena.allocator();
const timer = std.Io.Timestamp.now(io, .real);
defer if (comptime base.config.logs) {
const duration = timer.untilNow(io, .real);
const ms = duration.toMicroseconds();
std.log.scoped(.SoftwareRenderer).debug("Drawcall stats:\n> Took {d}us\n> Allocated {d} KB", .{ ms, @divTrunc(arena.queryCapacity(), 1000) });
};
var draw_call: DrawCall = .{
.vertices = allocator.alloc(F32x4, vertex_count * instance_count) catch return VkError.OutOfDeviceMemory,
.vertices = allocator.alloc(Vertex, vertex_count * instance_count) catch return VkError.OutOfDeviceMemory,
.fragments = undefined,
};
self.vertexShaderStage(&draw_call, vertex_count, instance_count) catch |err| {
for (draw_call.vertices) |*vertex| {
vertex.outputs = [_]?[]u8{null} ** spv.SPIRV_MAX_OUTPUT_LOCATIONS;
}
self.vertexShaderStage(allocator, &draw_call, vertex_count, instance_count) catch |err| {
std.log.scoped(.@"Vertex stage").err("catched a '{s}'", .{@errorName(err)});
if (@errorReturnTrace()) |trace| {
std.debug.dumpErrorReturnTrace(trace);
}
};
self.primitiveAssemblyStage(&draw_call);
try self.primitiveAssemblyStage(&draw_call);
try self.rasterizationStage(allocator, &draw_call);
self.fragmentShaderStage(&draw_call) catch |err| {
std.log.scoped(.@"Fragment stage").err("catched a '{s}'", .{@errorName(err)});
if (@errorReturnTrace()) |trace| {
@@ -121,7 +146,7 @@ pub fn deinit(self: *Self) void {
_ = self;
}
fn vertexShaderStage(self: *Self, draw_call: *DrawCall, vertex_count: usize, instance_count: usize) !void {
fn vertexShaderStage(self: *Self, allocator: std.mem.Allocator, draw_call: *DrawCall, vertex_count: usize, instance_count: usize) !void {
const pipeline = self.state.pipeline orelse return;
const batch_size = (pipeline.stages.getPtr(.vertex) orelse return).runtimes.len;
@@ -129,6 +154,7 @@ fn vertexShaderStage(self: *Self, draw_call: *DrawCall, vertex_count: usize, ins
for (0..instance_count) |instance_index| {
for (0..@min(batch_size, vertex_count)) |batch_id| {
const run_data: vertex_dispatcher.RunData = .{
.allocator = allocator,
.renderer = self,
.pipeline = pipeline,
.batch_id = batch_id,
@@ -144,14 +170,23 @@ fn vertexShaderStage(self: *Self, draw_call: *DrawCall, vertex_count: usize, ins
wg.await(self.device.interface.io()) catch return VkError.DeviceLost;
}
fn primitiveAssemblyStage(self: *Self, draw_call: *DrawCall) void {
const viewport = (self.state.pipeline orelse return).interface.mode.graphics.viewport_state.viewports[0];
fn primitiveAssemblyStage(self: *Self, draw_call: *DrawCall) VkError!void {
const viewport = blk: {
const pipeline_data = &(self.state.pipeline orelse return VkError.InvalidPipelineDrv).interface.mode.graphics;
if (pipeline_data.dynamic_state.viewport) {
if (self.dynamic_state.viewports) |viewports|
break :blk viewports[0];
}
if (pipeline_data.viewport_state.viewports) |viewports|
break :blk viewports[0];
return VkError.Unknown;
};
for (draw_call.vertices) |*vertex| {
const x = vertex[0];
const y = vertex[1];
const z = vertex[2];
const w = vertex[3];
const x = vertex.position[0];
const y = vertex.position[1];
const z = vertex.position[2];
const w = vertex.position[3];
// Perspective division.
const x_ndc = x / w;
@@ -170,7 +205,7 @@ fn primitiveAssemblyStage(self: *Self, draw_call: *DrawCall) void {
const y_screen = ((p_y / 2.0) * y_ndc) + o_y;
const z_screen = (p_z * z_ndc) + o_z;
vertex.* = zm.f32x4(x_screen, y_screen, z_screen, 1.0);
vertex.position = zm.f32x4(x_screen, y_screen, z_screen, 1.0);
}
}
@@ -182,9 +217,9 @@ fn rasterizationStage(self: *Self, allocator: std.mem.Allocator, draw_call: *Dra
switch (topology) {
.triangle_list => for (0..@divExact(draw_call.vertices.len, 3)) |triangle_index| {
const first_vertex = triangle_index * 3;
const v0 = draw_call.vertices[first_vertex + 0];
const v1 = draw_call.vertices[first_vertex + 1];
const v2 = draw_call.vertices[first_vertex + 2];
const v0 = &draw_call.vertices[first_vertex + 0];
const v1 = &draw_call.vertices[first_vertex + 1];
const v2 = &draw_call.vertices[first_vertex + 2];
switch (pipeline_data.rasterization.polygon_mode) {
.fill => try rasterizer.drawTriangleFilled(allocator, &fragments, v0, v1, v2),
+14 -3
View File
@@ -41,6 +41,18 @@ inline fn run(data: RunData) !void {
var invocation_index: usize = data.batch_id;
while (invocation_index < data.fragment_count) : (invocation_index += data.batch_size) {
const fragment: *Renderer.Fragment = &data.draw_call.fragments[invocation_index];
for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
const result_word = rt.getResultByLocation(@intCast(location), .input) catch |err| switch (err) {
SpvRuntimeError.NotFound => continue,
else => return err,
};
if (result_word != 0) {
try rt.writeInput(fragment.inputs[location], result_word);
}
}
rt.callEntryPoint(allocator, entry) catch |err| switch (err) {
// Some errors can be safely ignored
SpvRuntimeError.OutOfBounds,
@@ -49,8 +61,7 @@ inline fn run(data: RunData) !void {
else => return err,
};
const output: *F32x4 = &data.draw_call.fragments[invocation_index].color;
try rt.readOutput(std.mem.asBytes(output), output_result);
output.* = std.math.clamp(output.*, zm.f32x4s(0.0), zm.f32x4s(1.0));
try rt.readOutput(std.mem.asBytes(&fragment.color), output_result);
fragment.color = std.math.clamp(fragment.color, zm.f32x4s(0.0), zm.f32x4s(1.0));
}
}
+90 -18
View File
@@ -8,14 +8,76 @@ const VkError = base.VkError;
const lib = @import("../lib.zig");
const Renderer = @import("Renderer.zig");
const spv = @import("spv");
pub const F32x4 = zm.F32x4;
pub fn drawLineBresenham(allocator: std.mem.Allocator, fragments: *std.ArrayList(Renderer.Fragment), v0: F32x4, v1: F32x4) VkError!void {
var x0: i32 = @intFromFloat(v0[0]);
var y0: i32 = @intFromFloat(v0[1]);
var x1: i32 = @intFromFloat(v1[0]);
var y1: i32 = @intFromFloat(v1[1]);
fn writePacked(comptime T: type, bytes: []u8, value: T) void {
const raw: [@sizeOf(T)]u8 = @bitCast(value);
@memcpy(bytes[0..@sizeOf(T)], raw[0..]);
}
fn interpolateF32x4(value0: F32x4, value1: F32x4, value2: F32x4, b0: f32, b1: f32, b2: f32) F32x4 {
return (value0 * @as(F32x4, @splat(b0))) + (value1 * @as(F32x4, @splat(b1))) + (value2 * @as(F32x4, @splat(b2)));
}
fn interpolateVertexOutputs(
allocator: std.mem.Allocator,
v0: *const Renderer.Vertex,
v1: *const Renderer.Vertex,
v2: *const Renderer.Vertex,
b0: f32,
b1: f32,
b2: f32,
) VkError![spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 {
var inputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 = undefined;
for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
const out0 = v0.outputs[location] orelse continue;
const out1 = v1.outputs[location] orelse continue;
const out2 = v2.outputs[location] orelse continue;
if (out0.len == 0) {
inputs[location] = out0;
continue;
}
const len = @min(out0.len, out1.len, out2.len);
const input = allocator.alloc(u8, len) catch return VkError.OutOfDeviceMemory;
var byte_index: usize = 0;
while (byte_index + @sizeOf(F32x4) <= len) : (byte_index += @sizeOf(F32x4)) {
const value0 = std.mem.bytesToValue(F32x4, out0[byte_index..]);
const value1 = std.mem.bytesToValue(F32x4, out1[byte_index..]);
const value2 = std.mem.bytesToValue(F32x4, out2[byte_index..]);
writePacked(F32x4, input[byte_index..], interpolateF32x4(value0, value1, value2, b0, b1, b2));
}
while (byte_index + @sizeOf(f32) <= len) : (byte_index += @sizeOf(f32)) {
const value0 = std.mem.bytesToValue(f32, out0[byte_index..]);
const value1 = std.mem.bytesToValue(f32, out1[byte_index..]);
const value2 = std.mem.bytesToValue(f32, out2[byte_index..]);
writePacked(f32, input[byte_index..], (value0 * b0) + (value1 * b1) + (value2 * b2));
}
if (byte_index < len)
@memcpy(input[byte_index..], out0[byte_index..len]);
inputs[location] = input;
}
return inputs;
}
fn interpolateLineOutputs(allocator: std.mem.Allocator, v0: *const Renderer.Vertex, v1: *const Renderer.Vertex, t: f32) VkError![spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 {
return interpolateVertexOutputs(allocator, v0, v1, v0, 1.0 - t, t, 0.0);
}
pub fn drawLineBresenham(allocator: std.mem.Allocator, fragments: *std.ArrayList(Renderer.Fragment), v0: *Renderer.Vertex, v1: *Renderer.Vertex) VkError!void {
var x0: i32 = @intFromFloat(v0.position[0]);
var y0: i32 = @intFromFloat(v0.position[1]);
var x1: i32 = @intFromFloat(v1.position[0]);
var y1: i32 = @intFromFloat(v1.position[1]);
const steep = blk: {
if (@abs(y1 - y0) > @abs(x1 - x0)) {
@@ -26,9 +88,12 @@ pub fn drawLineBresenham(allocator: std.mem.Allocator, fragments: *std.ArrayList
break :blk false;
};
var start_vertex = v0;
var end_vertex = v1;
if (x0 > x1) {
std.mem.swap(i32, &x0, &x1);
std.mem.swap(i32, &y0, &y1);
std.mem.swap(*Renderer.Vertex, &start_vertex, &end_vertex);
}
const d_err = @abs(y1 - y0);
@@ -42,10 +107,14 @@ pub fn drawLineBresenham(allocator: std.mem.Allocator, fragments: *std.ArrayList
while (x <= x1) : (x += 1) {
const x_fragment: f32 = @floatFromInt(if (steep) y else x);
const y_fragment: f32 = @floatFromInt(if (steep) x else y);
const t = @as(f32, @floatFromInt(x - x0)) / @as(f32, @floatFromInt(@max(d_x, 1)));
const z = ((1.0 - t) * start_vertex.position[2]) + (t * end_vertex.position[2]);
fragments.append(allocator, .{
.position = zm.f32x4(x_fragment, y_fragment, 0.0, 1.0),
.position = zm.f32x4(x_fragment, y_fragment, z, 1.0),
.color = zm.f32x4(1.0, 1.0, 1.0, 1.0),
.inputs = try interpolateLineOutputs(allocator, start_vertex, end_vertex, t),
}) catch return VkError.OutOfDeviceMemory;
err -= @intCast(d_err);
@@ -60,14 +129,15 @@ fn edgeFunction(a: F32x4, b: F32x4, p: F32x4) f32 {
return ((p[0] - a[0]) * (b[1] - a[1])) - ((p[1] - a[1]) * (b[0] - a[0]));
}
pub fn drawTriangleFilled(allocator: std.mem.Allocator, fragments: *std.ArrayList(Renderer.Fragment), v0: F32x4, v1: F32x4, v2: F32x4) VkError!void {
const min_x: i32 = @intFromFloat(@floor(@min(v0[0], @min(v1[0], v2[0]))));
const max_x: i32 = @intFromFloat(@ceil(@max(v0[0], @max(v1[0], v2[0]))));
const min_y: i32 = @intFromFloat(@floor(@min(v0[1], @min(v1[1], v2[1]))));
const max_y: i32 = @intFromFloat(@ceil(@max(v0[1], @max(v1[1], v2[1]))));
pub fn drawTriangleFilled(allocator: std.mem.Allocator, fragments: *std.ArrayList(Renderer.Fragment), v0: *Renderer.Vertex, v1: *Renderer.Vertex, v2: *Renderer.Vertex) VkError!void {
const min_x: i32 = @intFromFloat(@floor(@min(v0.position[0], v1.position[0], v2.position[0])));
const max_x: i32 = @intFromFloat(@ceil(@max(v0.position[0], v1.position[0], v2.position[0])));
const min_y: i32 = @intFromFloat(@floor(@min(v0.position[1], v1.position[1], v2.position[1])));
const max_y: i32 = @intFromFloat(@ceil(@max(v0.position[1], v1.position[1], v2.position[1])));
const area = edgeFunction(v0, v1, v2);
if (area == 0.0) return;
const area = edgeFunction(v0.position, v1.position, v2.position);
if (area == 0.0)
return;
var y = min_y;
while (y <= max_y) : (y += 1) {
@@ -75,25 +145,27 @@ pub fn drawTriangleFilled(allocator: std.mem.Allocator, fragments: *std.ArrayLis
while (x <= max_x) : (x += 1) {
const p = zm.f32x4(@as(f32, @floatFromInt(x)) + 0.5, @as(f32, @floatFromInt(y)) + 0.5, 0.0, 1.0);
const w0 = edgeFunction(v1, v2, p);
const w1 = edgeFunction(v2, v0, p);
const w2 = edgeFunction(v0, v1, p);
const w0 = edgeFunction(v1.position, v2.position, p);
const w1 = edgeFunction(v2.position, v0.position, p);
const w2 = edgeFunction(v0.position, v1.position, p);
const inside = if (area > 0.0)
w0 >= 0.0 and w1 >= 0.0 and w2 >= 0.0
else
w0 <= 0.0 and w1 <= 0.0 and w2 <= 0.0;
if (!inside) continue;
if (!inside)
continue;
const b0 = w0 / area;
const b1 = w1 / area;
const b2 = w2 / area;
const z = (b0 * v0[2]) + (b1 * v1[2]) + (b2 * v2[2]);
const z = (b0 * v0.position[2]) + (b1 * v1.position[2]) + (b2 * v2.position[2]);
fragments.append(allocator, .{
.position = zm.f32x4(@floatFromInt(x), @floatFromInt(y), z, 1.0),
.color = zm.f32x4(1.0, 1.0, 1.0, 1.0),
.inputs = try interpolateVertexOutputs(allocator, v0, v1, v2, b0, b1, b2),
}) catch return VkError.OutOfDeviceMemory;
}
}
+28 -12
View File
@@ -12,6 +12,7 @@ const SoftPipeline = @import("../SoftPipeline.zig");
const VkError = base.VkError;
pub const RunData = struct {
allocator: std.mem.Allocator,
renderer: *Renderer,
pipeline: *SoftPipeline,
batch_id: usize,
@@ -45,20 +46,22 @@ inline fn run(data: RunData) !void {
else => return err,
};
for (data.pipeline.interface.mode.graphics.input_assembly.attribute_description orelse return) |attribute| {
const location_result = try rt.getResultByLocation(attribute.location, .input);
if (data.pipeline.interface.mode.graphics.input_assembly.attribute_description) |attributes| {
for (attributes) |attribute| {
const location_result = try rt.getResultByLocation(attribute.location, .input);
const binding_info = (data.pipeline.interface.mode.graphics.input_assembly.binding_description orelse return)[attribute.binding];
const binding_info = (data.pipeline.interface.mode.graphics.input_assembly.binding_description orelse return)[attribute.binding];
const vertex_buffer = data.renderer.state.data.graphics.vertex_buffers[attribute.binding];
const buffer = vertex_buffer.buffer;
const buffer_memory_size = base.format.texelSize(attribute.format);
const buffer_memory = if (buffer.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv;
const offset = buffer.interface.offset + (binding_info.stride * invocation_index) + attribute.offset;
const vertex_buffer = data.renderer.state.data.graphics.vertex_buffers[attribute.binding];
const buffer = vertex_buffer.buffer;
const buffer_memory_size = base.format.texelSize(attribute.format);
const buffer_memory = if (buffer.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv;
const offset = buffer.interface.offset + (binding_info.stride * invocation_index) + attribute.offset;
const buffer_memory_map: []u8 = @as([*]u8, @ptrCast(@alignCast(try buffer_memory.map(offset, buffer_memory_size))))[0..buffer_memory_size];
const buffer_memory_map: []u8 = @as([*]u8, @ptrCast(@alignCast(try buffer_memory.map(offset, buffer_memory_size))))[0..buffer_memory_size];
try rt.writeInput(buffer_memory_map, location_result);
try rt.writeInput(buffer_memory_map, location_result);
}
}
rt.callEntryPoint(allocator, entry) catch |err| switch (err) {
@@ -69,8 +72,21 @@ inline fn run(data: RunData) !void {
else => return err,
};
const output: *F32x4 = &data.draw_call.vertices[(data.instance_index * data.vertex_count) + invocation_index];
try rt.readBuiltIn(std.mem.asBytes(output), .Position);
const output: *Renderer.Vertex = &data.draw_call.vertices[(data.instance_index * data.vertex_count) + invocation_index];
try rt.readBuiltIn(std.mem.asBytes(&output.position), .Position);
for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
const result_word = rt.getResultByLocation(@intCast(location), .output) catch |err| switch (err) {
SpvRuntimeError.NotFound => continue,
else => return err,
};
if (result_word == 0)
continue;
const value = rt.results[result_word].getConstValue() catch continue;
const needed_size = try value.getPlainMemorySize();
output.outputs[location] = data.allocator.alloc(u8, needed_size) catch return VkError.OutOfDeviceMemory;
try rt.readOutput(output.outputs[location].?, result_word);
}
}
}