From f35bce907e31101c90cf8987ac03df6bf6b34e03 Mon Sep 17 00:00:00 2001 From: Kbz-8 Date: Sun, 26 Apr 2026 22:29:30 +0200 Subject: [PATCH] first triangle rendering ! --- src/soft/SoftCommandBuffer.zig | 21 ++- src/soft/device/Renderer.zig | 205 ++++++++++++++++++------ src/soft/device/blitter.zig | 64 ++++---- src/soft/device/fragment_dispatcher.zig | 56 +++++++ src/soft/device/rasterizer.zig | 100 ++++++++++++ src/soft/device/vertex_dispatcher.zig | 44 ++++- src/vulkan/Pipeline.zig | 34 ++++ 7 files changed, 434 insertions(+), 90 deletions(-) create mode 100644 src/soft/device/fragment_dispatcher.zig create mode 100644 src/soft/device/rasterizer.zig diff --git a/src/soft/SoftCommandBuffer.zig b/src/soft/SoftCommandBuffer.zig index 139d255..f384b89 100644 --- a/src/soft/SoftCommandBuffer.zig +++ b/src/soft/SoftCommandBuffer.zig @@ -133,6 +133,25 @@ pub fn beginRenderPass(interface: *Interface, render_pass: *base.RenderPass, fra const impl: *Impl = @ptrCast(@alignCast(context)); device.renderer.render_pass = impl.render_pass; device.renderer.framebuffer = impl.framebuffer; + + for (impl.render_pass.interface.attachments, impl.framebuffer.interface.attachments, 0..) |desc, attachment, index| { + const image: *SoftImage = @alignCast(@fieldParentPtr("interface", attachment.image)); + const clear_format = try image.getClearFormat(); + + switch (desc.load_op) { + .clear => { + try blitter.clear( + (impl.clear_values orelse return VkError.Unknown)[index], + clear_format, + image, + attachment.format, + attachment.subresource_range, + null, + ); + }, + else => {}, + } + } } }; @@ -142,7 +161,7 @@ pub fn beginRenderPass(interface: *Interface, render_pass: *base.RenderPass, fra .render_pass = @alignCast(@fieldParentPtr("interface", render_pass)), .framebuffer = @alignCast(@fieldParentPtr("interface", framebuffer)), .render_area = render_area, - .clear_values = clear_values, + .clear_values = if (clear_values) |values| allocator.dupe(vk.ClearValue, values) catch return VkError.OutOfHostMemory else null, // Will be freed on cmdbuf reset or destroy }; self.commands.append(allocator, .{ .ptr = cmd, .vtable = &.{ .execute = CommandImpl.execute } }) catch return VkError.OutOfHostMemory; } diff --git a/src/soft/device/Renderer.zig b/src/soft/device/Renderer.zig index 28da44b..19462d3 100644 --- a/src/soft/device/Renderer.zig +++ b/src/soft/device/Renderer.zig @@ -1,10 +1,10 @@ const std = @import("std"); const vk = @import("vulkan"); const base = @import("base"); -const zm = @import("zmath"); +const zm = base.zm; const lib = @import("../lib.zig"); -const F32x4 = zm.F32x4; +pub const F32x4 = zm.F32x4; const PipelineState = @import("Device.zig").PipelineState; @@ -12,10 +12,14 @@ const SoftBuffer = @import("../SoftBuffer.zig"); const SoftDescriptorSet = @import("../SoftDescriptorSet.zig"); const SoftDevice = @import("../SoftDevice.zig"); const SoftFramebuffer = @import("../SoftFramebuffer.zig"); +const SoftImage = @import("../SoftImage.zig"); const SoftPipeline = @import("../SoftPipeline.zig"); const SoftRenderPass = @import("../SoftRenderPass.zig"); +const blitter = @import("blitter.zig"); +const rasterizer = @import("rasterizer.zig"); const vertex_dispatcher = @import("vertex_dispatcher.zig"); +const fragment_dispatcher = @import("fragment_dispatcher.zig"); const VkError = base.VkError; @@ -33,6 +37,16 @@ pub const DynamicState = struct { line_width: f32, }; +pub const Fragment = struct { + position: F32x4, + color: F32x4, +}; + +pub const DrawCall = struct { + vertices: []F32x4, + fragments: []Fragment, +}; + device: *SoftDevice, state: *PipelineState, @@ -51,72 +65,161 @@ pub fn init(device: *SoftDevice, state: *PipelineState) Self { } pub fn draw(self: *Self, vertex_count: usize, instance_count: usize, first_vertex: usize, first_instance: usize) VkError!void { + const render_target_view: *base.ImageView = (self.framebuffer orelse return).interface.attachments[0]; + const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image)); + const render_target_memory = if (render_target.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv; + + var arena: std.heap.ArenaAllocator = .init(self.device.device_allocator.allocator()); + defer arena.deinit(); + const allocator = arena.allocator(); + + var draw_call: DrawCall = .{ + .vertices = allocator.alloc(F32x4, vertex_count * instance_count) catch return VkError.OutOfDeviceMemory, + .fragments = undefined, + }; + + self.vertexShaderStage(&draw_call, vertex_count, instance_count) catch |err| { + std.log.scoped(.@"Vertex stage").err("catched a '{s}'", .{@errorName(err)}); + if (@errorReturnTrace()) |trace| { + std.debug.dumpErrorReturnTrace(trace); + } + }; + + self.primitiveAssemblyStage(&draw_call); + try self.rasterizationStage(allocator, &draw_call); + self.fragmentShaderStage(&draw_call) catch |err| { + std.log.scoped(.@"Fragment stage").err("catched a '{s}'", .{@errorName(err)}); + if (@errorReturnTrace()) |trace| { + std.debug.dumpErrorReturnTrace(trace); + } + }; + + const texel_size = base.format.texelSize(render_target_view.format); + + for (draw_call.fragments) |fragment| { + const texel_offset = try render_target.getTexelMemoryOffset( + .{ + .x = @intFromFloat(fragment.position[0]), + .y = @intFromFloat(fragment.position[1]), + .z = @intFromFloat(fragment.position[2]), + }, + .{ + .aspect_mask = render_target_view.subresource_range.aspect_mask, + .mip_level = render_target_view.subresource_range.base_mip_level, + .array_layer = render_target_view.subresource_range.base_array_layer, + }, + ); + const map: []u8 = @as([*]u8, @ptrCast(try render_target_memory.map(render_target.interface.memory_offset + texel_offset, texel_size)))[0..texel_size]; + blitter.writeFloat4(fragment.color, map, render_target_view.format); + } + _ = first_vertex; _ = first_instance; - - self.inputAssemblyStage() catch |err| { - std.log.scoped(.@"Input assembly stage").err("catched a '{s}'", .{@errorName(err)}); - if (@errorReturnTrace()) |trace| { - std.debug.dumpErrorReturnTrace(trace); - } - }; - - self.vertexShaderStage(vertex_count, instance_count) catch |err| { - std.log.scoped(.@"Input assembly stage").err("catched a '{s}'", .{@errorName(err)}); - if (@errorReturnTrace()) |trace| { - std.debug.dumpErrorReturnTrace(trace); - } - }; - - self.primitiveAssemblyStage(); - self.fragmentShaderStage(); } pub fn deinit(self: *Self) void { _ = self; } -fn inputAssemblyStage(self: *Self) !void { - const pipeline = self.state.pipeline orelse return; - for ((pipeline.stages.getPtr(.vertex) orelse return).runtimes) |*rt| { - for (pipeline.interface.mode.graphics.input_assembly.attribute_description orelse return) |attribute| { - const location_result = try rt.getResultByLocation(attribute.location, .input); - - const vertex_buffer = self.state.data.graphics.vertex_buffers[attribute.binding]; - const buffer = vertex_buffer.buffer; - const buffer_memory_size = base.format.texelSize(attribute.format); - const buffer_memory = if (buffer.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv; - const buffer_memory_map: []u8 = @as([*]u8, @ptrCast(@alignCast(try buffer_memory.map(buffer.interface.offset + attribute.offset, buffer_memory_size))))[0..buffer_memory_size]; - - try rt.writeInput(buffer_memory_map, location_result); - } - } -} - -fn vertexShaderStage(self: *Self, vertex_count: usize, instance_count: usize) !void { - const invocation_count = vertex_count * instance_count; +fn vertexShaderStage(self: *Self, draw_call: *DrawCall, vertex_count: usize, instance_count: usize) !void { const pipeline = self.state.pipeline orelse return; const batch_size = (pipeline.stages.getPtr(.vertex) orelse return).runtimes.len; var wg: std.Io.Group = .init; - for (0..@min(batch_size, invocation_count)) |batch_id| { - const run_data: vertex_dispatcher.RunData = .{ - .renderer = self, - .pipeline = pipeline, - .batch_id = batch_id, - .batch_size = batch_size, - .invocation_count = invocation_count, - }; + for (0..instance_count) |instance_index| { + for (0..@min(batch_size, vertex_count)) |batch_id| { + const run_data: vertex_dispatcher.RunData = .{ + .renderer = self, + .pipeline = pipeline, + .batch_id = batch_id, + .batch_size = batch_size, + .vertex_count = vertex_count, + .instance_index = instance_index, + .draw_call = draw_call, + }; - wg.async(self.device.interface.io(), vertex_dispatcher.runWrapper, .{run_data}); + wg.async(self.device.interface.io(), vertex_dispatcher.runWrapper, .{run_data}); + } } wg.await(self.device.interface.io()) catch return VkError.DeviceLost; } -fn primitiveAssemblyStage(self: *Self) void { - _ = self; +fn primitiveAssemblyStage(self: *Self, draw_call: *DrawCall) void { + const viewport = (self.state.pipeline orelse return).interface.mode.graphics.viewport_state.viewports[0]; + + for (draw_call.vertices) |*vertex| { + const x = vertex[0]; + const y = vertex[1]; + const z = vertex[2]; + const w = vertex[3]; + + // Perspective division. + const x_ndc = x / w; + const y_ndc = y / w; + const z_ndc = z / w; + + const p_x = viewport.width; + const p_y = viewport.height; + const p_z = viewport.max_depth - viewport.min_depth; + + const o_x = viewport.x + viewport.width / 2.0; + const o_y = viewport.y + viewport.height / 2.0; + const o_z = viewport.min_depth; + + const x_screen = ((p_x / 2.0) * x_ndc) + o_x; + const y_screen = ((p_y / 2.0) * y_ndc) + o_y; + const z_screen = (p_z * z_ndc) + o_z; + + vertex.* = zm.f32x4(x_screen, y_screen, z_screen, 1.0); + } } -fn fragmentShaderStage(self: *Self) void { - _ = self; +fn rasterizationStage(self: *Self, allocator: std.mem.Allocator, draw_call: *DrawCall) VkError!void { + var fragments: std.ArrayList(Fragment) = .empty; + + const pipeline_data = (self.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics; + const topology = pipeline_data.input_assembly.topology; + switch (topology) { + .triangle_list => for (0..@divExact(draw_call.vertices.len, 3)) |triangle_index| { + const first_vertex = triangle_index * 3; + const v0 = draw_call.vertices[first_vertex + 0]; + const v1 = draw_call.vertices[first_vertex + 1]; + const v2 = draw_call.vertices[first_vertex + 2]; + + switch (pipeline_data.rasterization.polygon_mode) { + .fill => try rasterizer.drawTriangleFilled(allocator, &fragments, v0, v1, v2), + .line => { + try rasterizer.drawLineBresenham(allocator, &fragments, v0, v1); + try rasterizer.drawLineBresenham(allocator, &fragments, v1, v2); + try rasterizer.drawLineBresenham(allocator, &fragments, v2, v0); + }, + .point => {}, + else => base.unsupported("polygon mode {any}", .{pipeline_data.rasterization.polygon_mode}), + } + }, + else => base.unsupported("primitive topology {any}", .{topology}), + } + + draw_call.fragments = fragments.toOwnedSlice(allocator) catch return VkError.OutOfDeviceMemory; +} + +fn fragmentShaderStage(self: *Self, draw_call: *DrawCall) !void { + const pipeline = self.state.pipeline orelse return; + const batch_size = (pipeline.stages.getPtr(.fragment) orelse return).runtimes.len; + const fragment_count = draw_call.fragments.len; + + var wg: std.Io.Group = .init; + for (0..@min(batch_size, fragment_count)) |batch_id| { + const run_data: fragment_dispatcher.RunData = .{ + .renderer = self, + .pipeline = pipeline, + .batch_id = batch_id, + .batch_size = batch_size, + .fragment_count = fragment_count, + .draw_call = draw_call, + }; + + wg.async(self.device.interface.io(), fragment_dispatcher.runWrapper, .{run_data}); + } + wg.await(self.device.interface.io()) catch return VkError.DeviceLost; } diff --git a/src/soft/device/blitter.zig b/src/soft/device/blitter.zig index 91237b4..8820f3c 100644 --- a/src/soft/device/blitter.zig +++ b/src/soft/device/blitter.zig @@ -269,7 +269,7 @@ fn sample(src: []const u8, pos: F32x4, dim: F32x4, slice_bytes: usize, pitch_byt const src_map = src[computeOffset3D(x, y, z, slice_bytes, pitch_bytes, src_texel_size)..]; - color = readFloat4(src_map, state); + color = readFloat4(src_map, state.src_format); } else { var x: f32 = pos[0]; var y: f32 = pos[1]; @@ -304,14 +304,14 @@ fn sample(src: []const u8, pos: F32x4, dim: F32x4, slice_bytes: usize, pitch_byt const sample_1_0_1 = src[computeOffset3D(ix0, iy1, iz1, slice_bytes, pitch_bytes, src_texel_size)..]; const sample_1_1_1 = src[computeOffset3D(ix1, iy1, iz1, slice_bytes, pitch_bytes, src_texel_size)..]; - const pixel_0_0_0 = readFloat4(sample_0_0_0, state); - const pixel_0_1_0 = readFloat4(sample_0_1_0, state); - const pixel_1_0_0 = readFloat4(sample_1_0_0, state); - const pixel_1_1_0 = readFloat4(sample_1_1_0, state); - const pixel_0_0_1 = readFloat4(sample_0_0_1, state); - const pixel_0_1_1 = readFloat4(sample_0_1_1, state); - const pixel_1_0_1 = readFloat4(sample_1_0_1, state); - const pixel_1_1_1 = readFloat4(sample_1_1_1, state); + const pixel_0_0_0 = readFloat4(sample_0_0_0, state.src_format); + const pixel_0_1_0 = readFloat4(sample_0_1_0, state.src_format); + const pixel_1_0_0 = readFloat4(sample_1_0_0, state.src_format); + const pixel_1_1_0 = readFloat4(sample_1_1_0, state.src_format); + const pixel_0_0_1 = readFloat4(sample_0_0_1, state.src_format); + const pixel_0_1_1 = readFloat4(sample_0_1_1, state.src_format); + const pixel_1_0_1 = readFloat4(sample_1_0_1, state.src_format); + const pixel_1_1_1 = readFloat4(sample_1_1_1, state.src_format); const fx = zm.f32x4s(fx0 - @as(f32, @floatFromInt(ix0))); const fy = zm.f32x4s(fy0 - @as(f32, @floatFromInt(iy0))); @@ -328,10 +328,10 @@ fn sample(src: []const u8, pos: F32x4, dim: F32x4, slice_bytes: usize, pitch_byt const sample_1_0 = src[computeOffset3D(ix0, iy1, iz0, slice_bytes, pitch_bytes, src_texel_size)..]; const sample_1_1 = src[computeOffset3D(ix1, iy1, iz0, slice_bytes, pitch_bytes, src_texel_size)..]; - const pixel_0_0 = readFloat4(sample_0_0, state); - const pixel_0_1 = readFloat4(sample_0_1, state); - const pixel_1_0 = readFloat4(sample_1_0, state); - const pixel_1_1 = readFloat4(sample_1_1, state); + const pixel_0_0 = readFloat4(sample_0_0, state.src_format); + const pixel_0_1 = readFloat4(sample_0_1, state.src_format); + const pixel_1_0 = readFloat4(sample_1_0, state.src_format); + const pixel_1_1 = readFloat4(sample_1_1, state.src_format); const fx = zm.f32x4s(fx0 - @as(f32, @floatFromInt(ix0))); const fy = zm.f32x4s(fy0 - @as(f32, @floatFromInt(iy0))); @@ -468,9 +468,9 @@ fn blit(state: State, data: BlitData) void { var clear_color_f: ?F32x4 = null; if (state.clear) { if (are_both_int) { - clear_color_i = readInt4(data.src_map, state); + clear_color_i = readInt4(data.src_map, state.src_format); } else { - clear_color_f = applyScaleAndClamp(readFloat4(data.src_map, state), state); + clear_color_f = applyScaleAndClamp(readFloat4(data.src_map, state.src_format), state); } } @@ -488,12 +488,12 @@ fn blit(state: State, data: BlitData) void { if (clear_color_i) |color| { for (0..state.dst_samples) |_| { - writeInt4(color, dst_pixel, state); + writeInt4(color, dst_pixel, state.dst_format); dst_pixel = if (dst_pixel.len < data.dst_slice_pitch_bytes) break else dst_pixel[data.dst_slice_pitch_bytes..]; } } else if (clear_color_f) |color| { for (0..state.dst_samples) |_| { - writeFloat4(color, dst_pixel, state); + writeFloat4(color, dst_pixel, state.dst_format); dst_pixel = if (dst_pixel.len < data.dst_slice_pitch_bytes) break else dst_pixel[data.dst_slice_pitch_bytes..]; } } else if (are_both_int) { @@ -509,15 +509,15 @@ fn blit(state: State, data: BlitData) void { const src_map = data.src_map[computeOffset3D(ix, iy, iz, data.src_slice_pitch_bytes, data.src_row_pitch_bytes, base.format.texelSize(state.src_format))..]; - const color = readInt4(src_map, state); + const color = readInt4(src_map, state.src_format); for (0..state.dst_samples) |_| { - writeInt4(color, dst_pixel, state); + writeInt4(color, dst_pixel, state.dst_format); dst_pixel = if (dst_pixel.len < data.dst_slice_pitch_bytes) break else dst_pixel[data.dst_slice_pitch_bytes..]; } } else { const color = sample(data.src_map, .{ x, y, z, 0.0 }, data.dim, data.src_slice_pitch_bytes, data.src_row_pitch_bytes, state); for (0..state.dst_samples) |_| { - writeFloat4(color, dst_pixel, state); + writeFloat4(color, dst_pixel, state.dst_format); dst_pixel = if (dst_pixel.len < data.dst_slice_pitch_bytes) break else dst_pixel[data.dst_slice_pitch_bytes..]; } } @@ -548,10 +548,10 @@ fn applyScaleAndClamp(base_color: F32x4, state: State) F32x4 { return color; } -fn readFloat4(map: []const u8, state: State) F32x4 { +pub fn readFloat4(map: []const u8, src_format: vk.Format) F32x4 { var c: F32x4 = .{ 0.0, 0.0, 0.0, 1.0 }; - switch (state.src_format) { + switch (src_format) { .r8_snorm, .r8_unorm, => c[0] = @as(f32, @floatFromInt(map[0])) / 255.0, @@ -584,14 +584,14 @@ fn readFloat4(map: []const u8, state: State) F32x4 { .r32g32b32a32_sfloat => c = std.mem.bytesToValue(F32x4, map), - else => base.unsupported("Blitter: read float from source format {any}", .{state.src_format}), + else => base.unsupported("Blitter: read float from source format {any}", .{src_format}), } return c; } -fn writeFloat4(color: F32x4, map: []u8, state: State) void { - switch (state.dst_format) { +pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void { + switch (dst_format) { .r8_snorm, .r8_unorm, => map[0] = @intFromFloat(@round(color[0] * 255.0)), @@ -634,14 +634,14 @@ fn writeFloat4(color: F32x4, map: []u8, state: State) void { .r32g32b32a32_sfloat => std.mem.bytesAsValue(F32x4, map).* = color, - else => base.unsupported("Blitter: write float to destination format {any}", .{state.dst_format}), + else => base.unsupported("Blitter: write float to destination format {any}", .{dst_format}), } } -fn readInt4(map: []const u8, state: State) U32x4 { +pub fn readInt4(map: []const u8, src_format: vk.Format) U32x4 { var c: U32x4 = .{ 0.0, 0.0, 0.0, 1.0 }; - switch (state.src_format) { + switch (src_format) { .r8_sint, .r8_uint, => c[0] = map[0], @@ -674,14 +674,14 @@ fn readInt4(map: []const u8, state: State) U32x4 { .r32g32b32a32_uint, => c = std.mem.bytesToValue(U32x4, map), - else => base.unsupported("Blitter: read int from source format {any}", .{state.src_format}), + else => base.unsupported("Blitter: read int from source format {any}", .{src_format}), } return c; } -fn writeInt4(color: U32x4, map: []u8, state: State) void { - switch (state.dst_format) { +pub fn writeInt4(color: U32x4, map: []u8, dst_format: vk.Format) void { + switch (dst_format) { .r8_sint, .r8_uint, => map[0] = @truncate(color[0]), @@ -716,6 +716,6 @@ fn writeInt4(color: U32x4, map: []u8, state: State) void { .r32g32b32a32_uint, => std.mem.bytesAsValue(U32x4, map).* = color, - else => base.unsupported("Blitter: write int to destination format {any}", .{state.dst_format}), + else => base.unsupported("Blitter: write int to destination format {any}", .{dst_format}), } } diff --git a/src/soft/device/fragment_dispatcher.zig b/src/soft/device/fragment_dispatcher.zig new file mode 100644 index 0000000..1fcbe96 --- /dev/null +++ b/src/soft/device/fragment_dispatcher.zig @@ -0,0 +1,56 @@ +const std = @import("std"); +const spv = @import("spv"); +const base = @import("base"); +const zm = base.zm; + +const F32x4 = Renderer.F32x4; + +const SpvRuntimeError = spv.Runtime.RuntimeError; + +const Renderer = @import("Renderer.zig"); +const SoftPipeline = @import("../SoftPipeline.zig"); + +const VkError = base.VkError; + +pub const RunData = struct { + renderer: *Renderer, + pipeline: *SoftPipeline, + batch_id: usize, + batch_size: usize, + fragment_count: usize, + draw_call: *Renderer.DrawCall, +}; + +pub fn runWrapper(data: RunData) void { + @call(.always_inline, run, .{data}) catch |err| { + std.log.scoped(.@"SPIR-V runtime").err("SPIR-V runtime catched a '{s}'", .{@errorName(err)}); + if (@errorReturnTrace()) |trace| { + std.debug.dumpErrorReturnTrace(trace); + } + }; +} + +inline fn run(data: RunData) !void { + const allocator = data.renderer.device.device_allocator.allocator(); + + const shader = data.pipeline.stages.getPtrAssertContains(.fragment); + const rt = &shader.runtimes[data.batch_id]; + + const entry = try rt.getEntryPointByName(shader.entry); + const output_result = try rt.getResultByLocation(0, .output); + + var invocation_index: usize = data.batch_id; + while (invocation_index < data.fragment_count) : (invocation_index += data.batch_size) { + rt.callEntryPoint(allocator, entry) catch |err| switch (err) { + // Some errors can be safely ignored + SpvRuntimeError.OutOfBounds, + SpvRuntimeError.Killed, + => {}, + else => return err, + }; + + const output: *F32x4 = &data.draw_call.fragments[invocation_index].color; + try rt.readOutput(std.mem.asBytes(output), output_result); + output.* = std.math.clamp(output.*, zm.f32x4s(0.0), zm.f32x4s(1.0)); + } +} diff --git a/src/soft/device/rasterizer.zig b/src/soft/device/rasterizer.zig new file mode 100644 index 0000000..0ec7740 --- /dev/null +++ b/src/soft/device/rasterizer.zig @@ -0,0 +1,100 @@ +const std = @import("std"); +const vk = @import("vulkan"); +const base = @import("base"); +const zm = base.zm; + +const VkError = base.VkError; + +const lib = @import("../lib.zig"); + +const Renderer = @import("Renderer.zig"); + +pub const F32x4 = zm.F32x4; + +pub fn drawLineBresenham(allocator: std.mem.Allocator, fragments: *std.ArrayList(Renderer.Fragment), v0: F32x4, v1: F32x4) VkError!void { + var x0: i32 = @intFromFloat(v0[0]); + var y0: i32 = @intFromFloat(v0[1]); + var x1: i32 = @intFromFloat(v1[0]); + var y1: i32 = @intFromFloat(v1[1]); + + const steep = blk: { + if (@abs(y1 - y0) > @abs(x1 - x0)) { + std.mem.swap(i32, &x0, &y0); + std.mem.swap(i32, &x1, &y1); + break :blk true; + } + break :blk false; + }; + + if (x0 > x1) { + std.mem.swap(i32, &x0, &x1); + std.mem.swap(i32, &y0, &y1); + } + + const d_err = @abs(y1 - y0); + const d_x = x1 - x0; + const y_step: i32 = if (y0 > y1) -1 else 1; + + var err = @divTrunc(d_x, 2); // Pixel center. + var y = y0; + + var x = x0; + while (x <= x1) : (x += 1) { + const x_fragment: f32 = @floatFromInt(if (steep) y else x); + const y_fragment: f32 = @floatFromInt(if (steep) x else y); + + fragments.append(allocator, .{ + .position = zm.f32x4(x_fragment, y_fragment, 0.0, 1.0), + .color = zm.f32x4(1.0, 1.0, 1.0, 1.0), + }) catch return VkError.OutOfDeviceMemory; + + err -= @intCast(d_err); + if (err < 0) { + y += y_step; + err += d_x; + } + } +} + +fn edgeFunction(a: F32x4, b: F32x4, p: F32x4) f32 { + return ((p[0] - a[0]) * (b[1] - a[1])) - ((p[1] - a[1]) * (b[0] - a[0])); +} + +pub fn drawTriangleFilled(allocator: std.mem.Allocator, fragments: *std.ArrayList(Renderer.Fragment), v0: F32x4, v1: F32x4, v2: F32x4) VkError!void { + const min_x: i32 = @intFromFloat(@floor(@min(v0[0], @min(v1[0], v2[0])))); + const max_x: i32 = @intFromFloat(@ceil(@max(v0[0], @max(v1[0], v2[0])))); + const min_y: i32 = @intFromFloat(@floor(@min(v0[1], @min(v1[1], v2[1])))); + const max_y: i32 = @intFromFloat(@ceil(@max(v0[1], @max(v1[1], v2[1])))); + + const area = edgeFunction(v0, v1, v2); + if (area == 0.0) return; + + var y = min_y; + while (y <= max_y) : (y += 1) { + var x = min_x; + while (x <= max_x) : (x += 1) { + const p = zm.f32x4(@as(f32, @floatFromInt(x)) + 0.5, @as(f32, @floatFromInt(y)) + 0.5, 0.0, 1.0); + + const w0 = edgeFunction(v1, v2, p); + const w1 = edgeFunction(v2, v0, p); + const w2 = edgeFunction(v0, v1, p); + + const inside = if (area > 0.0) + w0 >= 0.0 and w1 >= 0.0 and w2 >= 0.0 + else + w0 <= 0.0 and w1 <= 0.0 and w2 <= 0.0; + + if (!inside) continue; + + const b0 = w0 / area; + const b1 = w1 / area; + const b2 = w2 / area; + const z = (b0 * v0[2]) + (b1 * v1[2]) + (b2 * v2[2]); + + fragments.append(allocator, .{ + .position = zm.f32x4(@floatFromInt(x), @floatFromInt(y), z, 1.0), + .color = zm.f32x4(1.0, 1.0, 1.0, 1.0), + }) catch return VkError.OutOfDeviceMemory; + } + } +} diff --git a/src/soft/device/vertex_dispatcher.zig b/src/soft/device/vertex_dispatcher.zig index 28bc5f5..05ecd10 100644 --- a/src/soft/device/vertex_dispatcher.zig +++ b/src/soft/device/vertex_dispatcher.zig @@ -1,17 +1,24 @@ const std = @import("std"); const spv = @import("spv"); +const base = @import("base"); + +const F32x4 = Renderer.F32x4; const SpvRuntimeError = spv.Runtime.RuntimeError; const Renderer = @import("Renderer.zig"); const SoftPipeline = @import("../SoftPipeline.zig"); +const VkError = base.VkError; + pub const RunData = struct { renderer: *Renderer, pipeline: *SoftPipeline, batch_id: usize, batch_size: usize, - invocation_count: usize, + vertex_count: usize, + instance_index: usize, + draw_call: *Renderer.DrawCall, }; pub fn runWrapper(data: RunData) void { @@ -32,17 +39,42 @@ inline fn run(data: RunData) !void { const entry = try rt.getEntryPointByName(shader.entry); var invocation_index: usize = data.batch_id; - while (invocation_index < data.invocation_count) : (invocation_index += data.batch_size) { + while (invocation_index < data.vertex_count) : (invocation_index += data.batch_size) { + setupBuiltins(rt, invocation_index, data.instance_index) catch |err| switch (err) { + SpvRuntimeError.NotFound => {}, + else => return err, + }; + + for (data.pipeline.interface.mode.graphics.input_assembly.attribute_description orelse return) |attribute| { + const location_result = try rt.getResultByLocation(attribute.location, .input); + + const binding_info = (data.pipeline.interface.mode.graphics.input_assembly.binding_description orelse return)[attribute.binding]; + + const vertex_buffer = data.renderer.state.data.graphics.vertex_buffers[attribute.binding]; + const buffer = vertex_buffer.buffer; + const buffer_memory_size = base.format.texelSize(attribute.format); + const buffer_memory = if (buffer.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv; + const offset = buffer.interface.offset + (binding_info.stride * invocation_index) + attribute.offset; + + const buffer_memory_map: []u8 = @as([*]u8, @ptrCast(@alignCast(try buffer_memory.map(offset, buffer_memory_size))))[0..buffer_memory_size]; + + try rt.writeInput(buffer_memory_map, location_result); + } + rt.callEntryPoint(allocator, entry) catch |err| switch (err) { - // Some errors can be ignored + // Some errors can be safely ignored SpvRuntimeError.OutOfBounds, SpvRuntimeError.Killed, => {}, else => return err, }; - var output: [4]f32 = undefined; - try rt.readBuiltIn(std.mem.asBytes(output[0..output.len]), .Position); - std.debug.print("Output: Vec4{any}\n", .{output}); + const output: *F32x4 = &data.draw_call.vertices[(data.instance_index * data.vertex_count) + invocation_index]; + try rt.readBuiltIn(std.mem.asBytes(output), .Position); } } + +fn setupBuiltins(rt: *spv.Runtime, invocation_index: usize, instance_index: usize) !void { + try rt.writeBuiltIn(std.mem.asBytes(&invocation_index), .VertexIndex); + try rt.writeBuiltIn(std.mem.asBytes(&instance_index), .InstanceIndex); +} diff --git a/src/vulkan/Pipeline.zig b/src/vulkan/Pipeline.zig index 262efb1..d18957d 100644 --- a/src/vulkan/Pipeline.zig +++ b/src/vulkan/Pipeline.zig @@ -24,6 +24,16 @@ mode: union(enum) { attribute_description: ?[]vk.VertexInputAttributeDescription, topology: vk.PrimitiveTopology, }, + viewport_state: struct { + viewports: []vk.Viewport, + scissor: []vk.Rect2D, + }, + rasterization: struct { + polygon_mode: vk.PolygonMode, + cull_mode: vk.CullModeFlags, + front_face: vk.FrontFace, + line_width: f32, + }, }, }, @@ -84,6 +94,30 @@ pub fn initGraphics(device: *Device, allocator: std.mem.Allocator, cache: ?*Pipe }, .topology = if (info.p_input_assembly_state) |state| state.topology else return VkError.ValidationFailed, }, + .viewport_state = .{ + .viewports = blk: { + if (info.p_viewport_state) |viewport_state| { + if (viewport_state.p_viewports) |viewports| { + break :blk allocator.dupe(vk.Viewport, viewports[0..viewport_state.viewport_count]) catch return VkError.OutOfHostMemory; + } + } + return VkError.ValidationFailed; + }, + .scissor = blk: { + if (info.p_viewport_state) |viewport_state| { + if (viewport_state.p_scissors) |scissors| { + break :blk allocator.dupe(vk.Rect2D, scissors[0..viewport_state.scissor_count]) catch return VkError.OutOfHostMemory; + } + } + return VkError.ValidationFailed; + }, + }, + .rasterization = .{ + .polygon_mode = if (info.p_rasterization_state) |state| state.polygon_mode else return VkError.ValidationFailed, + .cull_mode = if (info.p_rasterization_state) |state| state.cull_mode else return VkError.ValidationFailed, + .front_face = if (info.p_rasterization_state) |state| state.front_face else return VkError.ValidationFailed, + .line_width = if (info.p_rasterization_state) |state| state.line_width else return VkError.ValidationFailed, + }, }, }, };