From b5b05776d895d7e65de07454a7446b2182df4f7a Mon Sep 17 00:00:00 2001 From: Kbz-8 Date: Wed, 13 May 2026 22:05:25 +0200 Subject: [PATCH] refactoring renderer --- build.zig.zon | 14 +- src/soft/SoftCommandBuffer.zig | 26 ++ ...renaAllocator.zig => BoundedAllocator.zig} | 36 +- src/soft/device/Renderer.zig | 318 ++++-------------- src/soft/device/clip.zig | 191 +++++++++++ src/soft/device/fragment.zig | 45 +++ src/soft/device/fragment_dispatcher.zig | 65 ---- src/soft/device/rasterizer.zig | 257 ++++++-------- src/soft/device/rasterizer/bresenham.zig | 169 ++++++++++ src/soft/device/rasterizer/common.zig | 87 +++++ src/soft/device/rasterizer/edge_function.zig | 170 ++++++++++ src/soft/device/vertex_dispatcher.zig | 29 +- src/vulkan/CommandBuffer.zig | 5 + src/vulkan/lib_vulkan.zig | 8 +- src/vulkan/logger.zig | 2 +- 15 files changed, 915 insertions(+), 507 deletions(-) rename src/soft/device/{BoundedArenaAllocator.zig => BoundedAllocator.zig} (52%) create mode 100644 src/soft/device/clip.zig create mode 100644 src/soft/device/fragment.zig delete mode 100644 src/soft/device/fragment_dispatcher.zig create mode 100644 src/soft/device/rasterizer/bresenham.zig create mode 100644 src/soft/device/rasterizer/common.zig create mode 100644 src/soft/device/rasterizer/edge_function.zig diff --git a/build.zig.zon b/build.zig.zon index 529fb08..0d5f865 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -30,16 +30,16 @@ .hash = "cpuinfo-0.0.1-RLgIQYrTMgGqfQMOd1nAa2EuglXOh5gR9bNzwMzQTemt", .lazy = true, }, - //.SPIRV_Interpreter = .{ - // .url = "git+https://git.kbz8.me/kbz_8/SPIRV-Interpreter#ca33cfe3e997503208f031d270018c10d0611989", - // .hash = "SPIRV_Interpreter-0.0.1-ajmpnwBCBQBqUKcAUTwaaxbvYmX2s0KFzF_9Bc_ntqs4", - // .lazy = true, - //}, .SPIRV_Interpreter = .{ - // For development - .path = "git+https://git.kbz8.me/kbz_8/SPIRV-Interpreter#3139f3cfdd844818ab42ebce1d430a1f524025d5", + .url = "git+https://git.kbz8.me/kbz_8/SPIRV-Interpreter#c0825d53158cd5a5fc38f12d155d1158efc9b371", + .hash = "SPIRV_Interpreter-0.0.1-ajmpn0RFBQBe3oaZ5-aVNJQ7FMancJXlmCNt7mYUP5WP", .lazy = true, }, + //.SPIRV_Interpreter = .{ + // // For development + // .path = "../SPIRV-Interpreter", + // .lazy = true, + //}, }, .paths = .{ diff --git a/src/soft/SoftCommandBuffer.zig b/src/soft/SoftCommandBuffer.zig index 838867a..b07e239 100644 --- a/src/soft/SoftCommandBuffer.zig +++ b/src/soft/SoftCommandBuffer.zig @@ -72,6 +72,7 @@ pub fn create(device: *base.Device, allocator: std.mem.Allocator, info: *const v .reset = reset, .resetEvent = resetEvent, .setEvent = setEvent, + .setScissor = setScissor, .setViewport = setViewport, .waitEvent = waitEvent, }; @@ -899,6 +900,31 @@ pub fn setEvent(interface: *Interface, event: *base.Event, stage: vk.PipelineSta self.commands.append(allocator, .{ .ptr = cmd, .vtable = &.{ .execute = CommandImpl.execute } }) catch return VkError.OutOfHostMemory; } +pub fn setScissor(interface: *Interface, first: u32, scissor: []const vk.Rect2D) VkError!void { + const self: *Self = @alignCast(@fieldParentPtr("interface", interface)); + const allocator = self.command_allocator.allocator(); + + const CommandImpl = struct { + const Impl = @This(); + + first: u32, + scissor: []const vk.Rect2D, + + pub fn execute(context: *anyopaque, device: *ExecutionDevice) VkError!void { + const impl: *Impl = @ptrCast(@alignCast(context)); + device.renderer.dynamic_state.scissor = impl.scissor; // Unsafe + } + }; + + const cmd = allocator.create(CommandImpl) catch return VkError.OutOfHostMemory; + errdefer allocator.destroy(cmd); + cmd.* = .{ + .first = first, + .scissor = allocator.dupe(vk.Rect2D, scissor) catch return VkError.OutOfHostMemory, // Will be freed on cmdbuf reset or destroy + }; + self.commands.append(allocator, .{ .ptr = cmd, .vtable = &.{ .execute = CommandImpl.execute } }) catch return VkError.OutOfHostMemory; +} + pub fn setViewport(interface: *Interface, first: u32, viewports: []const vk.Viewport) VkError!void { const self: *Self = @alignCast(@fieldParentPtr("interface", interface)); const allocator = self.command_allocator.allocator(); diff --git a/src/soft/device/BoundedArenaAllocator.zig b/src/soft/device/BoundedAllocator.zig similarity index 52% rename from src/soft/device/BoundedArenaAllocator.zig rename to src/soft/device/BoundedAllocator.zig index 4c74c67..37d3257 100644 --- a/src/soft/device/BoundedArenaAllocator.zig +++ b/src/soft/device/BoundedAllocator.zig @@ -7,21 +7,21 @@ const Allocator = std.mem.Allocator; const Alignment = std.mem.Alignment; mutex: base.SpinMutex, -arena: std.heap.ArenaAllocator, +child_allocator: std.mem.Allocator, bound: usize, +total_bytes_allocated: std.atomic.Value(usize), +current_bytes_allocated: std.atomic.Value(usize), pub fn init(child_allocator: Allocator, bound: usize) Self { return .{ .mutex = .{}, - .arena = .init(child_allocator), + .child_allocator = child_allocator, .bound = bound, + .total_bytes_allocated = std.atomic.Value(usize).init(0), + .current_bytes_allocated = std.atomic.Value(usize).init(0), }; } -pub fn deinit(self: *Self) void { - self.arena.deinit(); -} - pub fn allocator(self: *const Self) Allocator { return .{ .ptr = @ptrCast(@constCast(self)), // Ugly const cast for convenience @@ -34,40 +34,46 @@ pub fn allocator(self: *const Self) Allocator { }; } -pub inline fn queryCapacity(self: *Self) usize { - return self.arena.queryCapacity(); +pub inline fn queryFootprint(self: *Self) usize { + return self.total_bytes_allocated.load(.monotonic); } fn alloc(context: *anyopaque, len: usize, alignment: Alignment, ret_addr: usize) ?[*]u8 { const self: *Self = @ptrCast(@alignCast(context)); self.mutex.lock(); defer self.mutex.unlock(); - if (self.arena.queryCapacity() >= self.bound) + if (self.current_bytes_allocated.fetchAdd(len, .monotonic) >= self.bound) return null; - return self.arena.allocator().rawAlloc(len, alignment, ret_addr); + _ = self.total_bytes_allocated.fetchAdd(len, .monotonic); + return self.child_allocator.rawAlloc(len, alignment, ret_addr); } fn resize(context: *anyopaque, ptr: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) bool { const self: *Self = @ptrCast(@alignCast(context)); self.mutex.lock(); defer self.mutex.unlock(); - if (self.arena.queryCapacity() >= self.bound) + _ = self.current_bytes_allocated.fetchSub(ptr.len, .monotonic); + if (self.current_bytes_allocated.fetchAdd(new_len, .monotonic) >= self.bound) return false; - return self.arena.allocator().rawResize(ptr, alignment, new_len, ret_addr); + _ = self.total_bytes_allocated.fetchAdd(new_len, .monotonic); + return self.child_allocator.rawResize(ptr, alignment, new_len, ret_addr); } fn remap(context: *anyopaque, ptr: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) ?[*]u8 { const self: *Self = @ptrCast(@alignCast(context)); self.mutex.lock(); defer self.mutex.unlock(); - if (self.arena.queryCapacity() >= self.bound) + _ = self.current_bytes_allocated.fetchSub(ptr.len, .monotonic); + if (self.current_bytes_allocated.fetchAdd(new_len, .monotonic) >= self.bound) return null; - return self.arena.allocator().rawRemap(ptr, alignment, new_len, ret_addr); + _ = self.total_bytes_allocated.fetchAdd(new_len, .monotonic); + return self.child_allocator.rawRemap(ptr, alignment, new_len, ret_addr); } fn free(context: *anyopaque, ptr: []u8, alignment: Alignment, ret_addr: usize) void { const self: *Self = @ptrCast(@alignCast(context)); self.mutex.lock(); defer self.mutex.unlock(); - return self.arena.allocator().rawFree(ptr, alignment, ret_addr); + _ = self.current_bytes_allocated.fetchSub(ptr.len, .monotonic); + return self.child_allocator.rawFree(ptr, alignment, ret_addr); } diff --git a/src/soft/device/Renderer.zig b/src/soft/device/Renderer.zig index 8689ab7..a7d2b39 100644 --- a/src/soft/device/Renderer.zig +++ b/src/soft/device/Renderer.zig @@ -2,28 +2,25 @@ const std = @import("std"); const vk = @import("vulkan"); const base = @import("base"); const zm = base.zm; -const lib = @import("../lib.zig"); const spv = @import("spv"); -pub const F32x4 = zm.F32x4; - const PipelineState = @import("Device.zig").PipelineState; -const BoundedArenaAllocator = @import("BoundedArenaAllocator.zig"); +const BoundedAllocator = @import("BoundedAllocator.zig"); const SoftBuffer = @import("../SoftBuffer.zig"); const SoftDescriptorSet = @import("../SoftDescriptorSet.zig"); const SoftDevice = @import("../SoftDevice.zig"); const SoftFramebuffer = @import("../SoftFramebuffer.zig"); -const SoftImage = @import("../SoftImage.zig"); const SoftPipeline = @import("../SoftPipeline.zig"); const SoftRenderPass = @import("../SoftRenderPass.zig"); const blitter = @import("blitter.zig"); const rasterizer = @import("rasterizer.zig"); const vertex_dispatcher = @import("vertex_dispatcher.zig"); -const fragment_dispatcher = @import("fragment_dispatcher.zig"); +const clip = @import("clip.zig"); const VkError = base.VkError; +const F32x4 = zm.F32x4; const Self = @This(); @@ -43,7 +40,7 @@ pub const IndexBuffer = struct { pub const DynamicState = struct { viewports: ?[]const vk.Viewport, - scissor: ?[]vk.Rect2D, + scissor: ?[]const vk.Rect2D, line_width: ?f32, }; @@ -55,20 +52,19 @@ pub const Vertex = struct { }, }; -pub const Fragment = struct { - position: F32x4, - color: F32x4, - inputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8, -}; - pub const DrawCall = struct { + renderer: *Self, vertices: []Vertex, - fragments: []Fragment, - pub fn init(allocator: std.mem.Allocator, vertex_count: usize, instance_count: usize) VkError!@This() { + viewport: vk.Viewport, + scissor: vk.Rect2D, + + pub fn init(allocator: std.mem.Allocator, vertex_count: usize, instance_count: usize, renderer: *Self) VkError!@This() { const self: @This() = .{ .vertices = allocator.alloc(Vertex, vertex_count * instance_count) catch return VkError.OutOfDeviceMemory, - .fragments = undefined, + .renderer = renderer, + .viewport = undefined, + .scissor = undefined, }; for (self.vertices) |*vertex| { @@ -100,20 +96,35 @@ pub fn init(device: *SoftDevice, state: *PipelineState) Self { }; } +pub fn deinit(self: *Self) void { + _ = self; +} + pub fn draw(self: *Self, vertex_count: usize, instance_count: usize, first_vertex: usize, first_instance: usize) VkError!void { + var bounded_allocator: BoundedAllocator = .init(self.device.device_allocator.allocator(), @"1GiB"); + try self.drawCall(&bounded_allocator, vertex_count, instance_count, first_vertex, first_instance, null); +} + +pub fn drawIndexed(self: *Self, index_count: usize, instance_count: usize, first_index: usize, first_instance: usize, vertex_offset: i32) VkError!void { + var bounded_allocator: BoundedAllocator = .init(self.device.device_allocator.allocator(), @"1GiB"); + const allocator = bounded_allocator.allocator(); + + const indices = try self.readIndexBuffer(allocator, index_count, first_index, vertex_offset); + + try self.drawCall(&bounded_allocator, index_count, instance_count, 0, first_instance, indices); +} + +fn drawCall(self: *Self, bounded_allocator: *BoundedAllocator, vertex_count: usize, instance_count: usize, first_vertex: usize, first_instance: usize, indices: ?[]const i32) VkError!void { const io = self.device.interface.io(); + const allocator = bounded_allocator.allocator(); - var arena: BoundedArenaAllocator = .init(self.device.device_allocator.allocator(), @"1GiB"); - defer arena.deinit(); - const allocator = arena.allocator(); - - var draw_call = try DrawCall.init(allocator, vertex_count, instance_count); + var draw_call = try DrawCall.init(allocator, vertex_count, instance_count, self); const timer = std.Io.Timestamp.now(io, .real); defer if (comptime base.config.logs != .none) { const duration = timer.untilNow(io, .real); const ms = duration.toMicroseconds(); - const memory_footprint = @divTrunc(arena.queryCapacity(), 1000); + const memory_footprint = @divTrunc(bounded_allocator.queryFootprint(), 1000); const logger = std.log.scoped(.SoftwareRenderer); if (memory_footprint > 256_000) logger.warn("Drawcall stats:\n> Took {d}us\n> Allocated {d} KB", .{ ms, memory_footprint }) @@ -121,50 +132,18 @@ pub fn draw(self: *Self, vertex_count: usize, instance_count: usize, first_verte logger.debug("Drawcall stats:\n> Took {d}us\n> Allocated {d} KB", .{ ms, memory_footprint }); }; - self.vertexShaderStage(allocator, &draw_call, vertex_count, instance_count, first_vertex, first_instance, null) catch |err| { + self.vertexShaderStage(allocator, &draw_call, vertex_count, instance_count, first_vertex, first_instance, indices) catch |err| { std.log.scoped(.@"Vertex stage").err("catched a '{s}'", .{@errorName(err)}); if (@errorReturnTrace()) |trace| { std.debug.dumpErrorReturnTrace(trace); } + return VkError.Unknown; }; - try self.postVertexDraw(allocator, &draw_call); -} + draw_call.viewport = try self.resolveViewport(0); + draw_call.scissor = try self.resolveScissor(0); -pub fn drawIndexed(self: *Self, index_count: usize, instance_count: usize, first_index: usize, first_instance: usize, vertex_offset: i32) VkError!void { - const io = self.device.interface.io(); - - var arena: BoundedArenaAllocator = .init(self.device.device_allocator.allocator(), @"1GiB"); - defer arena.deinit(); - const allocator = arena.allocator(); - - var draw_call = try DrawCall.init(allocator, index_count, instance_count); - const indices = try self.readIndexBuffer(allocator, index_count, first_index, vertex_offset); - - const timer = std.Io.Timestamp.now(io, .real); - defer if (comptime base.config.logs != .none) { - const duration = timer.untilNow(io, .real); - const ms = duration.toMicroseconds(); - const memory_footprint = @divTrunc(arena.queryCapacity(), 1000); - const logger = std.log.scoped(.SoftwareRenderer); - if (memory_footprint > 256_000) - logger.warn("Drawcall indexed stats:\n> Took {d}us\n> Allocated {d} KB", .{ ms, memory_footprint }) - else - logger.debug("Drawcall indexed stats:\n> Took {d}us\n> Allocated {d} KB", .{ ms, memory_footprint }); - }; - - self.vertexShaderStage(allocator, &draw_call, index_count, instance_count, 0, first_instance, indices) catch |err| { - std.log.scoped(.@"Vertex stage").err("catched a '{s}'", .{@errorName(err)}); - if (@errorReturnTrace()) |trace| { - std.debug.dumpErrorReturnTrace(trace); - } - }; - - try self.postVertexDraw(allocator, &draw_call); -} - -pub fn deinit(self: *Self) void { - _ = self; + try rasterizer.processThenFragmentStage(self, allocator, &draw_call); } fn vertexShaderStage(self: *Self, allocator: std.mem.Allocator, draw_call: *DrawCall, vertex_count: usize, instance_count: usize, first_vertex: usize, first_instance: usize, indices: ?[]const i32) !void { @@ -176,7 +155,6 @@ fn vertexShaderStage(self: *Self, allocator: std.mem.Allocator, draw_call: *Draw for (0..@min(batch_size, vertex_count)) |batch_id| { const run_data: vertex_dispatcher.RunData = .{ .allocator = allocator, - .renderer = self, .pipeline = pipeline, .batch_id = batch_id, .batch_size = batch_size, @@ -194,167 +172,6 @@ fn vertexShaderStage(self: *Self, allocator: std.mem.Allocator, draw_call: *Draw wg.await(self.device.interface.io()) catch return VkError.DeviceLost; } -fn postVertexDraw(self: *Self, allocator: std.mem.Allocator, draw_call: *DrawCall) VkError!void { - const render_target_view: *base.ImageView = (self.framebuffer orelse return).interface.attachments[0]; - const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image)); - - try self.primitiveAssemblyStage(draw_call); - try self.rasterizationStage(allocator, draw_call); - - self.fragmentShaderStage(draw_call) catch |err| { - std.log.scoped(.@"Fragment stage").err("catched a '{s}'", .{@errorName(err)}); - if (@errorReturnTrace()) |trace| { - std.debug.dumpErrorReturnTrace(trace); - } - }; - - for (draw_call.fragments) |fragment| { - try render_target.writeFloat4( - .{ - .x = @intFromFloat(fragment.position[0]), - .y = @intFromFloat(fragment.position[1]), - .z = 0, // FIXME - }, - .{ - .aspect_mask = render_target_view.subresource_range.aspect_mask, - .mip_level = render_target_view.subresource_range.base_mip_level, - .array_layer = render_target_view.subresource_range.base_array_layer, - }, - render_target_view.format, - fragment.color, - ); - } -} - -fn primitiveAssemblyStage(self: *Self, draw_call: *DrawCall) VkError!void { - const viewport = blk: { - const pipeline_data = &(self.state.pipeline orelse return VkError.InvalidPipelineDrv).interface.mode.graphics; - if (pipeline_data.dynamic_state.viewport) { - if (self.dynamic_state.viewports) |viewports| - break :blk viewports[0]; - } - if (pipeline_data.viewport_state.viewports) |viewports| - break :blk viewports[0]; - return VkError.Unknown; - }; - - for (draw_call.vertices) |*vertex| { - const x = vertex.position[0]; - const y = vertex.position[1]; - const z = vertex.position[2]; - const w = vertex.position[3]; - - // Perspective division. - const x_ndc = x / w; - const y_ndc = y / w; - const z_ndc = z / w; - - const p_x = viewport.width; - const p_y = viewport.height; - const p_z = viewport.max_depth - viewport.min_depth; - - const o_x = viewport.x + viewport.width / 2.0; - const o_y = viewport.y + viewport.height / 2.0; - const o_z = viewport.min_depth; - - const x_screen = ((p_x / 2.0) * x_ndc) + o_x; - const y_screen = ((p_y / 2.0) * y_ndc) + o_y; - const z_screen = (p_z * z_ndc) + o_z; - - vertex.position = zm.f32x4(x_screen, y_screen, z_screen, 1.0); - } -} - -fn rasterizationStage(self: *Self, allocator: std.mem.Allocator, draw_call: *DrawCall) VkError!void { - var fragments: std.ArrayList(Fragment) = .empty; - - const pipeline_data = (self.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics; - const topology = pipeline_data.input_assembly.topology; - switch (topology) { - .triangle_list => for (0..@divTrunc(draw_call.vertices.len, 3)) |triangle_index| { - const first_vertex = triangle_index * 3; - const v0 = &draw_call.vertices[first_vertex + 0]; - const v1 = &draw_call.vertices[first_vertex + 1]; - const v2 = &draw_call.vertices[first_vertex + 2]; - - try self.rasterizeTriangle(allocator, &fragments, v0, v1, v2, v0, v1, v2); - }, - .triangle_fan => if (draw_call.vertices.len >= 3) { - const v0 = &draw_call.vertices[0]; - for (1..(draw_call.vertices.len - 1)) |vertex_index| { - const v1 = &draw_call.vertices[vertex_index]; - const v2 = &draw_call.vertices[vertex_index + 1]; - - try self.rasterizeTriangle(allocator, &fragments, v0, v1, v2, v0, v1, v2); - } - }, - .triangle_strip => if (draw_call.vertices.len >= 3) { - for (0..(draw_call.vertices.len - 2)) |vertex_index| { - const v0 = &draw_call.vertices[vertex_index + 0]; - const v1 = &draw_call.vertices[vertex_index + 1]; - const v2 = &draw_call.vertices[vertex_index + 2]; - - if ((vertex_index & 1) == 0) { - try self.rasterizeTriangle(allocator, &fragments, v0, v1, v2, v0, v1, v2); - } else { - try self.rasterizeTriangle(allocator, &fragments, v0, v1, v2, v1, v0, v2); - } - } - }, - else => base.unsupported("primitive topology {any}", .{topology}), - } - - draw_call.fragments = fragments.toOwnedSlice(allocator) catch return VkError.OutOfDeviceMemory; -} - -fn rasterizeTriangle( - self: *Self, - allocator: std.mem.Allocator, - fragments: *std.ArrayList(Fragment), - v0: *Vertex, - v1: *Vertex, - v2: *Vertex, - cull_v0: *const Vertex, - cull_v1: *const Vertex, - cull_v2: *const Vertex, -) VkError!void { - if (try self.triangleIsCulled(cull_v0, cull_v1, cull_v2)) - return; - - const pipeline_data = (self.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics; - switch (pipeline_data.rasterization.polygon_mode) { - .fill => try rasterizer.drawTriangleFilled(allocator, fragments, v0, v1, v2), - .line => { - try rasterizer.drawLineBresenham(allocator, fragments, v0, v1); - try rasterizer.drawLineBresenham(allocator, fragments, v1, v2); - try rasterizer.drawLineBresenham(allocator, fragments, v2, v0); - }, - .point => {}, - else => base.unsupported("polygon mode {any}", .{pipeline_data.rasterization.polygon_mode}), - } -} - -fn fragmentShaderStage(self: *Self, draw_call: *DrawCall) !void { - const pipeline = self.state.pipeline orelse return; - const batch_size = (pipeline.stages.getPtr(.fragment) orelse return).runtimes.len; - const fragment_count = draw_call.fragments.len; - - var wg: std.Io.Group = .init; - for (0..@min(batch_size, fragment_count)) |batch_id| { - const run_data: fragment_dispatcher.RunData = .{ - .renderer = self, - .pipeline = pipeline, - .batch_id = batch_id, - .batch_size = batch_size, - .fragment_count = fragment_count, - .draw_call = draw_call, - }; - - wg.async(self.device.interface.io(), fragment_dispatcher.runWrapper, .{run_data}); - } - wg.await(self.device.interface.io()) catch return VkError.DeviceLost; -} - fn readIndexBuffer(self: *Self, allocator: std.mem.Allocator, index_count: usize, first_index: usize, vertex_offset: i32) VkError![]i32 { const index_buffer = self.state.data.graphics.index_buffer; const buffer = index_buffer.buffer; @@ -392,37 +209,44 @@ fn indexTypeSize(index_type: vk.IndexType) ?usize { }; } -fn triangleArea2(v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) f32 { - const x0 = v0.position[0]; - const y0 = v0.position[1]; - const x1 = v1.position[0]; - const y1 = v1.position[1]; - const x2 = v2.position[0]; - const y2 = v2.position[1]; +fn resolveViewport(self: *Self, viewport_index: usize) VkError!vk.Viewport { + const pipeline_data = + &(self.state.pipeline orelse return VkError.InvalidPipelineDrv).interface.mode.graphics; - return ((x1 - x0) * (y2 - y0)) - ((y1 - y0) * (x2 - x0)); + if (pipeline_data.dynamic_state.viewport) { + if (self.dynamic_state.viewports) |viewports| { + if (viewport_index < viewports.len) + return viewports[viewport_index]; + } + + return VkError.Unknown; + } + + if (pipeline_data.viewport_state.viewports) |viewports| { + if (viewport_index < viewports.len) + return viewports[viewport_index]; + } + + return VkError.Unknown; } -fn triangleIsCulled(self: *Self, v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) VkError!bool { - const pipeline_data = (self.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics; - const rasterization = pipeline_data.rasterization; - const cull_mode = rasterization.cull_mode; +fn resolveScissor(self: *Self, scissor_index: usize) VkError!vk.Rect2D { + const pipeline_data = + &(self.state.pipeline orelse return VkError.InvalidPipelineDrv).interface.mode.graphics; - if (!cull_mode.front_bit and !cull_mode.back_bit) - return false; + if (pipeline_data.dynamic_state.scissor) { + if (self.dynamic_state.scissor) |scissor| { + if (scissor_index < scissor.len) + return scissor[scissor_index]; + } - if (cull_mode.front_bit and cull_mode.back_bit) - return true; + return VkError.Unknown; + } - const area = triangleArea2(v0, v1, v2); - if (area == 0.0) - return true; + if (pipeline_data.viewport_state.scissor) |scissor| { + if (scissor_index < scissor.len) + return scissor[scissor_index]; + } - const front_face = switch (rasterization.front_face) { - .counter_clockwise => area < 0.0, - .clockwise => area > 0.0, - else => return false, - }; - - return (cull_mode.front_bit and front_face) or (cull_mode.back_bit and !front_face); + return VkError.Unknown; } diff --git a/src/soft/device/clip.zig b/src/soft/device/clip.zig new file mode 100644 index 0000000..7de6999 --- /dev/null +++ b/src/soft/device/clip.zig @@ -0,0 +1,191 @@ +const std = @import("std"); +const vk = @import("vulkan"); +const base = @import("base"); +const zm = base.zm; +const lib = @import("../lib.zig"); +const spv = @import("spv"); + +pub const F32x4 = zm.F32x4; + +const Renderer = @import("Renderer.zig"); +const Vertex = Renderer.Vertex; + +const VkError = base.VkError; + +const ClipPlane = enum { + Left, + Right, + Bottom, + Top, + Near, + Far, +}; + +const MAX_CLIPPED_POLYGON_VERTICES = 16; + +const ClippedPolygon = struct { + vertices: [MAX_CLIPPED_POLYGON_VERTICES]Vertex = undefined, + len: usize = 0, + + fn append(self: *@This(), vertex: Vertex) VkError!void { + if (self.len >= self.vertices.len) + return VkError.OutOfDeviceMemory; + + self.vertices[self.len] = vertex; + self.len += 1; + } +}; + +fn clipDistance(position: F32x4, plane: ClipPlane) f32 { + const x = position[0]; + const y = position[1]; + const z = position[2]; + const w = position[3]; + + return switch (plane) { + .Left => x + w, + .Right => w - x, + .Bottom => y + w, + .Top => w - y, + .Near => z, + .Far => w - z, + }; +} + +fn vertexInsidePlane(vertex: *const Vertex, plane: ClipPlane) bool { + return clipDistance(vertex.position, plane) >= 0.0; +} + +fn copyBlob(allocator: std.mem.Allocator, blob: []const u8) VkError![]u8 { + const result = allocator.alloc(u8, blob.len) catch return VkError.OutOfDeviceMemory; + @memcpy(result, blob); + return result; +} + +fn writePacked(comptime T: type, bytes: []u8, value: T) void { + const raw: [@sizeOf(T)]u8 = @bitCast(value); + @memcpy(bytes[0..@sizeOf(T)], raw[0..]); +} + +fn interpolateBlob(allocator: std.mem.Allocator, a: []const u8, b: []const u8, t: f32) VkError![]u8 { + const len = @min(a.len, b.len); + const result = allocator.alloc(u8, len) catch return VkError.OutOfDeviceMemory; + + var byte_index: usize = 0; + while (byte_index + @sizeOf(F32x4) <= len) : (byte_index += @sizeOf(F32x4)) { + const value_a = std.mem.bytesToValue(F32x4, a[byte_index..]); + const value_b = std.mem.bytesToValue(F32x4, b[byte_index..]); + writePacked(F32x4, result[byte_index..], value_a + ((value_b - value_a) * @as(F32x4, @splat(t)))); + } + + while (byte_index + @sizeOf(f32) <= len) : (byte_index += @sizeOf(f32)) { + const value_a = std.mem.bytesToValue(f32, a[byte_index..]); + const value_b = std.mem.bytesToValue(f32, b[byte_index..]); + writePacked(f32, result[byte_index..], value_a + ((value_b - value_a) * t)); + } + + if (byte_index < len) + @memcpy(result[byte_index..], a[byte_index..len]); + + return result; +} + +fn interpolateVertexForClipping(allocator: std.mem.Allocator, a: *const Vertex, b: *const Vertex, t: f32) VkError!Vertex { + var result: Vertex = .{ + .position = a.position + ((b.position - a.position) * @as(F32x4, @splat(t))), + .outputs = undefined, + }; + + @memset(result.outputs[0..], null); + + for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| { + const out_a = a.outputs[location] orelse continue; + const out_b = b.outputs[location] orelse continue; + + result.outputs[location] = .{ + .interpolation_type = out_a.interpolation_type, + .blob = if (out_a.interpolation_type == .flat) + try copyBlob(allocator, out_a.blob) + else + try interpolateBlob(allocator, out_a.blob, out_b.blob, t), + }; + } + + return result; +} + +fn clipPolygonAgainstPlane(allocator: std.mem.Allocator, input: *const ClippedPolygon, plane: ClipPlane) VkError!ClippedPolygon { + var output: ClippedPolygon = .{}; + + if (input.len == 0) + return output; + + var previous = input.vertices[input.len - 1]; + var previous_inside = vertexInsidePlane(&previous, plane); + var previous_distance = clipDistance(previous.position, plane); + + for (input.vertices[0..input.len]) |current| { + const current_inside = vertexInsidePlane(¤t, plane); + const current_distance = clipDistance(current.position, plane); + + if (current_inside != previous_inside) { + const t = previous_distance / (previous_distance - current_distance); + try output.append(try interpolateVertexForClipping(allocator, &previous, ¤t, t)); + } + + if (current_inside) + try output.append(current); + + previous = current; + previous_inside = current_inside; + previous_distance = current_distance; + } + + return output; +} + +pub fn clipTriangle(allocator: std.mem.Allocator, v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) VkError!ClippedPolygon { + var polygon: ClippedPolygon = .{}; + try polygon.append(v0.*); + try polygon.append(v1.*); + try polygon.append(v2.*); + + const planes = [_]ClipPlane{ + .Left, + .Right, + .Bottom, + .Top, + .Near, + .Far, + }; + + for (planes) |plane| { + polygon = try clipPolygonAgainstPlane(allocator, &polygon, plane); + if (polygon.len < 3) + return polygon; + } + + return polygon; +} + +pub fn viewportTransformVertex(viewport: vk.Viewport, vertex: *Vertex) void { + const x, const y, const z, const w = vertex.position; + + const x_ndc = x / w; + const y_ndc = y / w; + const z_ndc = z / w; + + const p_x = viewport.width; + const p_y = viewport.height; + const p_z = viewport.max_depth - viewport.min_depth; + + const o_x = viewport.x + viewport.width / 2.0; + const o_y = viewport.y + viewport.height / 2.0; + const o_z = viewport.min_depth; + + const x_screen = ((p_x / 2.0) * x_ndc) + o_x; + const y_screen = ((p_y / 2.0) * y_ndc) + o_y; + const z_screen = (p_z * z_ndc) + o_z; + + vertex.position = zm.f32x4(x_screen, y_screen, z_screen, w); +} diff --git a/src/soft/device/fragment.zig b/src/soft/device/fragment.zig new file mode 100644 index 0000000..9e4c0ae --- /dev/null +++ b/src/soft/device/fragment.zig @@ -0,0 +1,45 @@ +const std = @import("std"); +const vk = @import("vulkan"); +const base = @import("base"); +const zm = base.zm; +const spv = @import("spv"); + +const lib = @import("../lib.zig"); + +const Renderer = @import("Renderer.zig"); +const SoftImage = @import("../SoftImage.zig"); + +const VkError = base.VkError; +const SpvRuntimeError = spv.Runtime.RuntimeError; + +pub fn shaderInvocation(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall, batch_id: usize, position: zm.F32x4, inputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][]const u8) SpvRuntimeError!zm.F32x4 { + _ = position; + const pipeline = draw_call.renderer.state.pipeline orelse return zm.f32x4s(0.0); + + const shader = pipeline.stages.getPtrAssertContains(.fragment); + const rt = &shader.runtimes[batch_id]; + + const entry = try rt.getEntryPointByName(shader.entry); + const output_result = try rt.getResultByLocation(0, .output); + + for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| { + const result_word = rt.getResultByLocation(@intCast(location), .input) catch |err| switch (err) { + SpvRuntimeError.NotFound => continue, + else => return err, + }; + try rt.writeInput(inputs[location], result_word); + allocator.free(inputs[location]); + } + + rt.callEntryPoint(allocator, entry) catch |err| switch (err) { + // Some errors can be safely ignored + SpvRuntimeError.OutOfBounds, + SpvRuntimeError.Killed, + => {}, + else => return err, + }; + + var color = zm.f32x4s(0.0); + try rt.readOutput(std.mem.asBytes(&color), output_result); + return std.math.clamp(color, zm.f32x4s(0.0), zm.f32x4s(1.0)); +} diff --git a/src/soft/device/fragment_dispatcher.zig b/src/soft/device/fragment_dispatcher.zig deleted file mode 100644 index c3912d8..0000000 --- a/src/soft/device/fragment_dispatcher.zig +++ /dev/null @@ -1,65 +0,0 @@ -const std = @import("std"); -const spv = @import("spv"); -const base = @import("base"); -const zm = base.zm; - -const F32x4 = Renderer.F32x4; - -const SpvRuntimeError = spv.Runtime.RuntimeError; - -const Renderer = @import("Renderer.zig"); -const SoftPipeline = @import("../SoftPipeline.zig"); - -const VkError = base.VkError; - -pub const RunData = struct { - renderer: *Renderer, - pipeline: *SoftPipeline, - batch_id: usize, - batch_size: usize, - fragment_count: usize, - draw_call: *Renderer.DrawCall, -}; - -pub fn runWrapper(data: RunData) void { - @call(.always_inline, run, .{data}) catch |err| { - std.log.scoped(.@"SPIR-V runtime").err("SPIR-V runtime catched a '{s}'", .{@errorName(err)}); - if (@errorReturnTrace()) |trace| { - std.debug.dumpErrorReturnTrace(trace); - } - }; -} - -inline fn run(data: RunData) !void { - const allocator = data.renderer.device.device_allocator.allocator(); - - const shader = data.pipeline.stages.getPtrAssertContains(.fragment); - const rt = &shader.runtimes[data.batch_id]; - - const entry = try rt.getEntryPointByName(shader.entry); - const output_result = try rt.getResultByLocation(0, .output); - - var invocation_index: usize = data.batch_id; - while (invocation_index < data.fragment_count) : (invocation_index += data.batch_size) { - const fragment: *Renderer.Fragment = &data.draw_call.fragments[invocation_index]; - - for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| { - const result_word = rt.getResultByLocation(@intCast(location), .input) catch |err| switch (err) { - SpvRuntimeError.NotFound => continue, - else => return err, - }; - try rt.writeInput(fragment.inputs[location], result_word); - } - - rt.callEntryPoint(allocator, entry) catch |err| switch (err) { - // Some errors can be safely ignored - SpvRuntimeError.OutOfBounds, - SpvRuntimeError.Killed, - => {}, - else => return err, - }; - - try rt.readOutput(std.mem.asBytes(&fragment.color), output_result); - fragment.color = std.math.clamp(fragment.color, zm.f32x4s(0.0), zm.f32x4s(1.0)); - } -} diff --git a/src/soft/device/rasterizer.zig b/src/soft/device/rasterizer.zig index 0f37221..c899da8 100644 --- a/src/soft/device/rasterizer.zig +++ b/src/soft/device/rasterizer.zig @@ -1,176 +1,119 @@ const std = @import("std"); -const vk = @import("vulkan"); const base = @import("base"); -const zm = base.zm; + +const clip = @import("clip.zig"); + +const bresenham = @import("rasterizer/bresenham.zig"); +const edge_function = @import("rasterizer/edge_function.zig"); + +const Renderer = @import("Renderer.zig"); +const Vertex = Renderer.Vertex; +const DrawCall = Renderer.DrawCall; const VkError = base.VkError; -const lib = @import("../lib.zig"); +pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall) VkError!void { + const pipeline_data = (renderer.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics; + const topology = pipeline_data.input_assembly.topology; -const Renderer = @import("Renderer.zig"); -const spv = @import("spv"); + switch (topology) { + .triangle_list => for (0..@divTrunc(draw_call.vertices.len, 3)) |triangle_index| { + const first_vertex = triangle_index * 3; + const v0 = &draw_call.vertices[first_vertex + 0]; + const v1 = &draw_call.vertices[first_vertex + 1]; + const v2 = &draw_call.vertices[first_vertex + 2]; -pub const F32x4 = zm.F32x4; + try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v0, v1, v2); + }, + .triangle_fan => if (draw_call.vertices.len >= 3) { + const v0 = &draw_call.vertices[0]; + for (1..(draw_call.vertices.len - 1)) |vertex_index| { + const v1 = &draw_call.vertices[vertex_index]; + const v2 = &draw_call.vertices[vertex_index + 1]; -fn writePacked(comptime T: type, bytes: []u8, value: T) void { - const raw: [@sizeOf(T)]u8 = @bitCast(value); - @memcpy(bytes[0..@sizeOf(T)], raw[0..]); -} + try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v0, v1, v2); + } + }, + .triangle_strip => if (draw_call.vertices.len >= 3) { + for (0..(draw_call.vertices.len - 2)) |vertex_index| { + const v0 = &draw_call.vertices[vertex_index + 0]; + const v1 = &draw_call.vertices[vertex_index + 1]; + const v2 = &draw_call.vertices[vertex_index + 2]; -fn interpolateF32x4(value0: F32x4, value1: F32x4, value2: F32x4, b0: f32, b1: f32, b2: f32) F32x4 { - return (value0 * @as(F32x4, @splat(b0))) + (value1 * @as(F32x4, @splat(b1))) + (value2 * @as(F32x4, @splat(b2))); -} - -var calls: usize = 0; - -fn interpolateVertexOutputs( - allocator: std.mem.Allocator, - v0: *const Renderer.Vertex, - v1: *const Renderer.Vertex, - v2: *const Renderer.Vertex, - b0: f32, - b1: f32, - b2: f32, -) VkError![spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 { - var inputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 = undefined; - - for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| { - const out0 = v0.outputs[location] orelse continue; - const out1 = v1.outputs[location] orelse continue; - const out2 = v2.outputs[location] orelse continue; - - if (out0.interpolation_type == .flat or out0.blob.len == 0) { - inputs[location] = out0.blob; - continue; - } - - const len = @min(out0.blob.len, out1.blob.len, out2.blob.len); - calls += 1; - std.debug.print("test {d}\n", .{calls}); - const input = allocator.alloc(u8, len) catch return VkError.OutOfDeviceMemory; - - var byte_index: usize = 0; - while (byte_index + @sizeOf(F32x4) <= len) : (byte_index += @sizeOf(F32x4)) { - const value0 = std.mem.bytesToValue(F32x4, out0.blob[byte_index..]); - const value1 = std.mem.bytesToValue(F32x4, out1.blob[byte_index..]); - const value2 = std.mem.bytesToValue(F32x4, out2.blob[byte_index..]); - writePacked(F32x4, input[byte_index..], interpolateF32x4(value0, value1, value2, b0, b1, b2)); - } - - while (byte_index + @sizeOf(f32) <= len) : (byte_index += @sizeOf(f32)) { - const value0 = std.mem.bytesToValue(f32, out0.blob[byte_index..]); - const value1 = std.mem.bytesToValue(f32, out1.blob[byte_index..]); - const value2 = std.mem.bytesToValue(f32, out2.blob[byte_index..]); - writePacked(f32, input[byte_index..], (value0 * b0) + (value1 * b1) + (value2 * b2)); - } - - if (byte_index < len) - @memcpy(input[byte_index..], out0.blob[byte_index..len]); - - inputs[location] = input; - } - - return inputs; -} - -fn interpolateLineOutputs(allocator: std.mem.Allocator, v0: *const Renderer.Vertex, v1: *const Renderer.Vertex, t: f32) VkError![spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 { - return interpolateVertexOutputs(allocator, v0, v1, v0, 1.0 - t, t, 0.0); -} - -pub fn drawLineBresenham(allocator: std.mem.Allocator, fragments: *std.ArrayList(Renderer.Fragment), v0: *Renderer.Vertex, v1: *Renderer.Vertex) VkError!void { - var x0: i32 = @intFromFloat(v0.position[0]); - var y0: i32 = @intFromFloat(v0.position[1]); - var x1: i32 = @intFromFloat(v1.position[0]); - var y1: i32 = @intFromFloat(v1.position[1]); - - const steep = blk: { - if (@abs(y1 - y0) > @abs(x1 - x0)) { - std.mem.swap(i32, &x0, &y0); - std.mem.swap(i32, &x1, &y1); - break :blk true; - } - break :blk false; - }; - - var start_vertex = v0; - var end_vertex = v1; - if (x0 > x1) { - std.mem.swap(i32, &x0, &x1); - std.mem.swap(i32, &y0, &y1); - std.mem.swap(*Renderer.Vertex, &start_vertex, &end_vertex); - } - - const d_err = @abs(y1 - y0); - const d_x = x1 - x0; - const y_step: i32 = if (y0 > y1) -1 else 1; - - var err = @divTrunc(d_x, 2); // Pixel center. - var y = y0; - - var x = x0; - while (x <= x1) : (x += 1) { - const x_fragment: f32 = @floatFromInt(if (steep) y else x); - const y_fragment: f32 = @floatFromInt(if (steep) x else y); - const t = @as(f32, @floatFromInt(x - x0)) / @as(f32, @floatFromInt(@max(d_x, 1))); - - const z = ((1.0 - t) * start_vertex.position[2]) + (t * end_vertex.position[2]); - - fragments.append(allocator, .{ - .position = zm.f32x4(x_fragment, y_fragment, z, 1.0), - .color = zm.f32x4(1.0, 1.0, 1.0, 1.0), - .inputs = try interpolateLineOutputs(allocator, start_vertex, end_vertex, t), - }) catch return VkError.OutOfDeviceMemory; - - err -= @intCast(d_err); - if (err < 0) { - y += y_step; - err += d_x; - } + if ((vertex_index & 1) == 0) { + try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v0, v1, v2); + } else { + try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v1, v0, v2); + } + } + }, + else => base.unsupported("primitive topology {any}", .{topology}), } } -fn edgeFunction(a: F32x4, b: F32x4, p: F32x4) f32 { - return ((p[0] - a[0]) * (b[1] - a[1])) - ((p[1] - a[1]) * (b[0] - a[0])); -} +fn clipTransformAndRasterizeTriangle(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall, v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) VkError!void { + const clipped_polygon = try clip.clipTriangle(allocator, v0, v1, v2); -pub fn drawTriangleFilled(allocator: std.mem.Allocator, fragments: *std.ArrayList(Renderer.Fragment), v0: *Renderer.Vertex, v1: *Renderer.Vertex, v2: *Renderer.Vertex) VkError!void { - const min_x: i32 = @intFromFloat(@floor(@min(v0.position[0], v1.position[0], v2.position[0]))); - const max_x: i32 = @intFromFloat(@ceil(@max(v0.position[0], v1.position[0], v2.position[0]))); - const min_y: i32 = @intFromFloat(@floor(@min(v0.position[1], v1.position[1], v2.position[1]))); - const max_y: i32 = @intFromFloat(@ceil(@max(v0.position[1], v1.position[1], v2.position[1]))); - - const area = edgeFunction(v0.position, v1.position, v2.position); - if (area == 0.0) + if (clipped_polygon.len < 3) return; - var y = min_y; - while (y <= max_y) : (y += 1) { - var x = min_x; - while (x <= max_x) : (x += 1) { - const p = zm.f32x4(@as(f32, @floatFromInt(x)) + 0.5, @as(f32, @floatFromInt(y)) + 0.5, 0.0, 1.0); + for (1..(clipped_polygon.len - 1)) |vertex_index| { + var tv0 = clipped_polygon.vertices[0]; + var tv1 = clipped_polygon.vertices[vertex_index]; + var tv2 = clipped_polygon.vertices[vertex_index + 1]; - const w0 = edgeFunction(v1.position, v2.position, p); - const w1 = edgeFunction(v2.position, v0.position, p); - const w2 = edgeFunction(v0.position, v1.position, p); + clip.viewportTransformVertex(draw_call.viewport, &tv0); + clip.viewportTransformVertex(draw_call.viewport, &tv1); + clip.viewportTransformVertex(draw_call.viewport, &tv2); - const inside = if (area > 0.0) - w0 >= 0.0 and w1 >= 0.0 and w2 >= 0.0 - else - w0 <= 0.0 and w1 <= 0.0 and w2 <= 0.0; - - if (!inside) - continue; - - const b0 = w0 / area; - const b1 = w1 / area; - const b2 = w2 / area; - const z = (b0 * v0.position[2]) + (b1 * v1.position[2]) + (b2 * v2.position[2]); - - fragments.append(allocator, .{ - .position = zm.f32x4(@floatFromInt(x), @floatFromInt(y), z, 1.0), - .color = zm.f32x4(1.0, 1.0, 1.0, 1.0), - .inputs = try interpolateVertexOutputs(allocator, v0, v1, v2, b0, b1, b2), - }) catch return VkError.OutOfDeviceMemory; - } + try rasterizeTriangle(renderer, allocator, draw_call, &tv0, &tv1, &tv2); } } + +fn rasterizeTriangle(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall, v0: *Vertex, v1: *Vertex, v2: *Vertex) VkError!void { + if (try triangleIsCulled(renderer, v0, v1, v2)) + return; + + const pipeline_data = (renderer.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics; + switch (pipeline_data.rasterization.polygon_mode) { + .fill => try edge_function.drawTriangle(allocator, draw_call, v0, v1, v2), + .line => { + try bresenham.drawLine(allocator, draw_call, v0, v1); + try bresenham.drawLine(allocator, draw_call, v1, v2); + try bresenham.drawLine(allocator, draw_call, v2, v0); + }, + .point => {}, // TODO + else => base.unsupported("polygon mode {any}", .{pipeline_data.rasterization.polygon_mode}), + } +} + +fn triangleIsCulled(renderer: *Renderer, v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) VkError!bool { + const pipeline_data = (renderer.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics; + const rasterization = pipeline_data.rasterization; + const cull_mode = rasterization.cull_mode; + + if (!cull_mode.front_bit and !cull_mode.back_bit) + return false; + + if (cull_mode.front_bit and cull_mode.back_bit) + return true; + + const area = triangleArea(v0, v1, v2); + if (area == 0.0) + return true; + + const front_face = switch (rasterization.front_face) { + .counter_clockwise => area < 0.0, + .clockwise => area > 0.0, + else => return false, + }; + + return (cull_mode.front_bit and front_face) or (cull_mode.back_bit and !front_face); +} + +inline fn triangleArea(v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) f32 { + const x0, const y0, _, _ = v0.position; + const x1, const y1, _, _ = v1.position; + const x2, const y2, _, _ = v2.position; + return ((x1 - x0) * (y2 - y0)) - ((y1 - y0) * (x2 - x0)); +} diff --git a/src/soft/device/rasterizer/bresenham.zig b/src/soft/device/rasterizer/bresenham.zig new file mode 100644 index 0000000..2ac1a22 --- /dev/null +++ b/src/soft/device/rasterizer/bresenham.zig @@ -0,0 +1,169 @@ +const std = @import("std"); +const base = @import("base"); +const spv = @import("spv"); +const zm = base.zm; + +const common = @import("common.zig"); +const fragment = @import("../fragment.zig"); + +const Renderer = @import("../Renderer.zig"); +const SoftImage = @import("../../SoftImage.zig"); + +const VkError = base.VkError; +const SpvRuntimeError = spv.Runtime.RuntimeError; +const F32x4 = zm.F32x4; + +const RunData = struct { + allocator: std.mem.Allocator, + draw_call: *Renderer.DrawCall, + batch_id: usize, + x0: i32, + y0: i32, + d_x: i32, + d_err: i32, + y_step: i32, + steep: bool, + start_vertex: *Renderer.Vertex, + end_vertex: *Renderer.Vertex, + start_step: usize, + end_step: usize, +}; + +pub fn drawLine(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall, v0: *Renderer.Vertex, v1: *Renderer.Vertex) VkError!void { + const io = draw_call.renderer.device.interface.io(); + + var x0: i32 = @intFromFloat(v0.position[0]); + var y0: i32 = @intFromFloat(v0.position[1]); + var x1: i32 = @intFromFloat(v1.position[0]); + var y1: i32 = @intFromFloat(v1.position[1]); + + const steep = blk: { + if (@abs(y1 - y0) > @abs(x1 - x0)) { + std.mem.swap(i32, &x0, &y0); + std.mem.swap(i32, &x1, &y1); + break :blk true; + } + break :blk false; + }; + + var start_vertex = v0; + var end_vertex = v1; + if (x0 > x1) { + std.mem.swap(i32, &x0, &x1); + std.mem.swap(i32, &y0, &y1); + std.mem.swap(*Renderer.Vertex, &start_vertex, &end_vertex); + } + + const d_err: i32 = @intCast(@abs(y1 - y0)); + const d_x = x1 - x0; + const y_step: i32 = if (y0 > y1) -1 else 1; + + const pipeline = draw_call.renderer.state.pipeline orelse return; + + var wg: std.Io.Group = .init; + const runtimes_count = (pipeline.stages.getPtr(.fragment) orelse return).runtimes.len; + if (runtimes_count == 0) + return; + + const step_count: usize = @as(usize, @intCast(d_x)) + 1; + const runs_count = @min(runtimes_count, step_count); + const steps_per_run = @divTrunc(step_count + runs_count - 1, runs_count); + + var batch_id: usize = 0; + for (0..runs_count) |run_index| { + defer batch_id = @mod(batch_id + 1, runtimes_count); + + const start_step = run_index * steps_per_run; + if (start_step >= step_count) + continue; + + const end_step = @min(start_step + steps_per_run - 1, step_count - 1); + + const run_data: RunData = .{ + .allocator = allocator, + .draw_call = draw_call, + .batch_id = batch_id, + .x0 = x0, + .y0 = y0, + .d_x = d_x, + .d_err = d_err, + .y_step = y_step, + .steep = steep, + .start_vertex = start_vertex, + .end_vertex = end_vertex, + .start_step = start_step, + .end_step = end_step, + }; + + wg.async(io, runWrapper, .{run_data}); + } + wg.await(io) catch return VkError.DeviceLost; +} + +inline fn bresenhamYAtStep(y0: i32, d_x: i32, d_err: i32, y_step: i32, step: usize) i32 { + if (d_x == 0) + return y0; + + const numerator = (@as(i64, @intCast(step)) * @as(i64, d_err)) + @as(i64, @divTrunc(d_x - 1, 2)); + const y_offset: i32 = @intCast(@divTrunc(numerator, @as(i64, d_x))); + return y0 + (y_step * y_offset); +} + +fn runWrapper(data: RunData) void { + @call(.always_inline, run, .{data}) catch |err| { + std.log.scoped(.@"Rasterization stage").err("line fill mode catched a '{s}'", .{@errorName(err)}); + if (@errorReturnTrace()) |trace| { + std.debug.dumpErrorReturnTrace(trace); + } + }; +} + +inline fn run(data: RunData) !void { + const render_target_view: *base.ImageView = (data.draw_call.renderer.framebuffer orelse return).interface.attachments[0]; + const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image)); + + var step = data.start_step; + while (step <= data.end_step) : (step += 1) { + const x = data.x0 + @as(i32, @intCast(step)); + const y = bresenhamYAtStep(data.y0, data.d_x, data.d_err, data.y_step, step); + + const pixel_x = if (data.steep) y else x; + const pixel_y = if (data.steep) x else y; + + if (!common.scissorContainsPixel(data.draw_call.scissor, pixel_x, pixel_y)) { + continue; + } + + const t = @as(f32, @floatFromInt(step)) / @as(f32, @floatFromInt(@max(data.d_x, 1))); + const z = ((1.0 - t) * data.start_vertex.position[2]) + (t * data.end_vertex.position[2]); + + const pixel = fragment.shaderInvocation( + data.allocator, + data.draw_call, + data.batch_id, + zm.f32x4(@floatFromInt(pixel_x), @floatFromInt(pixel_y), z, 1.0), + try common.interpolateLineOutputs(data.allocator, data.start_vertex, data.end_vertex, t), + ) catch |err| { + std.log.scoped(.@"Fragment stage").err("catched a '{s}'", .{@errorName(err)}); + if (@errorReturnTrace()) |trace| { + std.debug.dumpErrorReturnTrace(trace); + } + return; + }; + + try render_target.writeFloat4( + .{ + .x = pixel_x, + .y = pixel_y, + .z = 0, // FIXME + }, + .{ + .aspect_mask = render_target_view.subresource_range.aspect_mask, + .mip_level = render_target_view.subresource_range.base_mip_level, + .array_layer = render_target_view.subresource_range.base_array_layer, + }, + render_target_view.format, + pixel, + ); + } +} diff --git a/src/soft/device/rasterizer/common.zig b/src/soft/device/rasterizer/common.zig new file mode 100644 index 0000000..3e75a63 --- /dev/null +++ b/src/soft/device/rasterizer/common.zig @@ -0,0 +1,87 @@ +const std = @import("std"); +const vk = @import("vulkan"); +const base = @import("base"); +const zm = base.zm; +const spv = @import("spv"); + +const Renderer = @import("../Renderer.zig"); + +const VkError = base.VkError; +const F32x4 = zm.F32x4; + +pub fn scissorContainsPixel(scissor: vk.Rect2D, x: i32, y: i32) bool { + const min_x: i64 = @as(i64, scissor.offset.x); + const min_y: i64 = @as(i64, scissor.offset.y); + + const max_x: i64 = min_x + @as(i64, @intCast(scissor.extent.width)); + const max_y: i64 = min_y + @as(i64, @intCast(scissor.extent.height)); + + const pixel_x: i64 = @as(i64, x); + const pixel_y: i64 = @as(i64, y); + + return pixel_x >= min_x and + pixel_x < max_x and + pixel_y >= min_y and + pixel_y < max_y; +} + +fn writePacked(comptime T: type, bytes: []u8, value: T) void { + const raw: [@sizeOf(T)]u8 = @bitCast(value); + @memcpy(bytes[0..@sizeOf(T)], raw[0..]); +} + +fn interpolateF32x4(value0: F32x4, value1: F32x4, value2: F32x4, b0: f32, b1: f32, b2: f32) F32x4 { + return (value0 * @as(F32x4, @splat(b0))) + (value1 * @as(F32x4, @splat(b1))) + (value2 * @as(F32x4, @splat(b2))); +} + +pub fn interpolateVertexOutputs( + allocator: std.mem.Allocator, + v0: *const Renderer.Vertex, + v1: *const Renderer.Vertex, + v2: *const Renderer.Vertex, + b0: f32, + b1: f32, + b2: f32, +) VkError![spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 { + var inputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 = undefined; + + for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| { + const out0 = v0.outputs[location] orelse continue; + const out1 = v1.outputs[location] orelse continue; + const out2 = v2.outputs[location] orelse continue; + + if (out0.interpolation_type == .flat or out0.blob.len == 0) { + inputs[location] = out0.blob; + continue; + } + + const len = @min(out0.blob.len, out1.blob.len, out2.blob.len); + const input = allocator.alloc(u8, len) catch return VkError.OutOfDeviceMemory; + + var byte_index: usize = 0; + while (byte_index + @sizeOf(F32x4) <= len) : (byte_index += @sizeOf(F32x4)) { + const value0 = std.mem.bytesToValue(F32x4, out0.blob[byte_index..]); + const value1 = std.mem.bytesToValue(F32x4, out1.blob[byte_index..]); + const value2 = std.mem.bytesToValue(F32x4, out2.blob[byte_index..]); + writePacked(F32x4, input[byte_index..], interpolateF32x4(value0, value1, value2, b0, b1, b2)); + } + + while (byte_index + @sizeOf(f32) <= len) : (byte_index += @sizeOf(f32)) { + const value0 = std.mem.bytesToValue(f32, out0.blob[byte_index..]); + const value1 = std.mem.bytesToValue(f32, out1.blob[byte_index..]); + const value2 = std.mem.bytesToValue(f32, out2.blob[byte_index..]); + writePacked(f32, input[byte_index..], (value0 * b0) + (value1 * b1) + (value2 * b2)); + } + + if (byte_index < len) + @memcpy(input[byte_index..], out0.blob[byte_index..len]); + + inputs[location] = input; + } + + return inputs; +} + +pub fn interpolateLineOutputs(allocator: std.mem.Allocator, v0: *const Renderer.Vertex, v1: *const Renderer.Vertex, t: f32) VkError![spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 { + return interpolateVertexOutputs(allocator, v0, v1, v0, 1.0 - t, t, 0.0); +} diff --git a/src/soft/device/rasterizer/edge_function.zig b/src/soft/device/rasterizer/edge_function.zig new file mode 100644 index 0000000..8dc2a9b --- /dev/null +++ b/src/soft/device/rasterizer/edge_function.zig @@ -0,0 +1,170 @@ +const std = @import("std"); +const vk = @import("vulkan"); +const base = @import("base"); +const spv = @import("spv"); +const zm = base.zm; + +const common = @import("common.zig"); +const fragment = @import("../fragment.zig"); + +const Renderer = @import("../Renderer.zig"); +const SoftImage = @import("../../SoftImage.zig"); + +const VkError = base.VkError; +const SpvRuntimeError = spv.Runtime.RuntimeError; +const F32x4 = zm.F32x4; + +const RunData = struct { + allocator: std.mem.Allocator, + draw_call: *Renderer.DrawCall, + batch_id: usize, + min_x: i32, + max_x: i32, + min_y: i32, + max_y: i32, + area: f32, + v0: *Renderer.Vertex, + v1: *Renderer.Vertex, + v2: *Renderer.Vertex, +}; + +pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall, v0: *Renderer.Vertex, v1: *Renderer.Vertex, v2: *Renderer.Vertex) VkError!void { + const io = draw_call.renderer.device.interface.io(); + + const min_x: i32 = @intFromFloat(@floor(@min(v0.position[0], v1.position[0], v2.position[0]))); + const max_x: i32 = @intFromFloat(@ceil(@max(v0.position[0], v1.position[0], v2.position[0]))); + const min_y: i32 = @intFromFloat(@floor(@min(v0.position[1], v1.position[1], v2.position[1]))); + const max_y: i32 = @intFromFloat(@ceil(@max(v0.position[1], v1.position[1], v2.position[1]))); + + const area = edgeFunction(v0.position, v1.position, v2.position); + if (area == 0.0) + return; + + const pipeline = draw_call.renderer.state.pipeline orelse return; + + var wg: std.Io.Group = .init; + const runtimes_count = (pipeline.stages.getPtr(.fragment) orelse return).runtimes.len; + const grid_size: usize = @intFromFloat(@floor(@sqrt(@as(f32, @floatFromInt(runtimes_count))))); + + const width: usize = @intCast(max_x - min_x + 1); + const height: usize = @intCast(max_y - min_y + 1); + + const cols_per_run = @divTrunc(width + grid_size - 1, grid_size); + const rows_per_run = @divTrunc(height + grid_size - 1, grid_size); + + var batch_id: usize = 0; + for (0..grid_size) |gy| { + for (0..grid_size) |gx| { + defer batch_id = @mod(batch_id + 1, runtimes_count); + + const run_min_x = min_x + @as(i32, @intCast(gx * cols_per_run)); + const run_min_y = min_y + @as(i32, @intCast(gy * rows_per_run)); + + if (run_min_x > max_x or run_min_y > max_y) + continue; + + const run_max_x = @min( + run_min_x + @as(i32, @intCast(cols_per_run)) - 1, + max_x, + ); + + const run_max_y = @min( + run_min_y + @as(i32, @intCast(rows_per_run)) - 1, + max_y, + ); + + const run_data: RunData = .{ + .allocator = allocator, + .draw_call = draw_call, + .batch_id = batch_id, + .v0 = v0, + .v1 = v1, + .v2 = v2, + .area = area, + .min_x = run_min_x, + .max_x = run_max_x, + .min_y = run_min_y, + .max_y = run_max_y, + }; + + wg.async(io, runWrapper, .{run_data}); + } + } + wg.await(io) catch return VkError.DeviceLost; +} + +inline fn edgeFunction(a: F32x4, b: F32x4, p: F32x4) f32 { + return ((p[0] - a[0]) * (b[1] - a[1])) - ((p[1] - a[1]) * (b[0] - a[0])); +} + +fn runWrapper(data: RunData) void { + @call(.always_inline, run, .{data}) catch |err| { + std.log.scoped(.@"Rasterization stage").err("triangle fill mode catched a '{s}'", .{@errorName(err)}); + if (@errorReturnTrace()) |trace| { + std.debug.dumpErrorReturnTrace(trace); + } + }; +} + +inline fn run(data: RunData) !void { + const render_target_view: *base.ImageView = (data.draw_call.renderer.framebuffer orelse return).interface.attachments[0]; + const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image)); + + var y = data.min_y; + while (y <= data.max_y) : (y += 1) { + var x = data.min_x; + while (x <= data.max_x) : (x += 1) { + if (!common.scissorContainsPixel(data.draw_call.scissor, x, y)) { + continue; + } + + const p = zm.f32x4(@as(f32, @floatFromInt(x)) + 0.5, @as(f32, @floatFromInt(y)) + 0.5, 0.0, 1.0); + + const w0 = edgeFunction(data.v1.position, data.v2.position, p); + const w1 = edgeFunction(data.v2.position, data.v0.position, p); + const w2 = edgeFunction(data.v0.position, data.v1.position, p); + + const inside = if (data.area > 0.0) + w0 >= 0.0 and w1 >= 0.0 and w2 >= 0.0 + else + w0 <= 0.0 and w1 <= 0.0 and w2 <= 0.0; + + if (!inside) + continue; + + const b0 = w0 / data.area; + const b1 = w1 / data.area; + const b2 = w2 / data.area; + const z = (b0 * data.v0.position[2]) + (b1 * data.v1.position[2]) + (b2 * data.v2.position[2]); + + const pixel = fragment.shaderInvocation( + data.allocator, + data.draw_call, + data.batch_id, + zm.f32x4(@floatFromInt(x), @floatFromInt(y), z, 1.0), + try common.interpolateVertexOutputs(data.allocator, data.v0, data.v1, data.v2, b0, b1, b2), + ) catch |err| { + std.log.scoped(.@"Fragment stage").err("catched a '{s}'", .{@errorName(err)}); + if (@errorReturnTrace()) |trace| { + std.debug.dumpErrorReturnTrace(trace); + } + return; + }; + + try render_target.writeFloat4( + .{ + .x = x, + .y = y, + .z = 0, // FIXME + }, + .{ + .aspect_mask = render_target_view.subresource_range.aspect_mask, + .mip_level = render_target_view.subresource_range.base_mip_level, + .array_layer = render_target_view.subresource_range.base_array_layer, + }, + render_target_view.format, + pixel, + ); + } + } +} diff --git a/src/soft/device/vertex_dispatcher.zig b/src/soft/device/vertex_dispatcher.zig index 717f17f..87c355a 100644 --- a/src/soft/device/vertex_dispatcher.zig +++ b/src/soft/device/vertex_dispatcher.zig @@ -13,7 +13,6 @@ const VkError = base.VkError; pub const RunData = struct { allocator: std.mem.Allocator, - renderer: *Renderer, pipeline: *SoftPipeline, batch_id: usize, batch_size: usize, @@ -35,11 +34,9 @@ pub fn runWrapper(data: RunData) void { } inline fn run(data: RunData) !void { - const allocator = data.renderer.device.device_allocator.allocator(); - const shader = data.pipeline.stages.getPtrAssertContains(.vertex); const rt = &shader.runtimes[data.batch_id]; - try rt.populatePushConstants(data.renderer.state.push_constant_blob[0..]); + try rt.populatePushConstants(data.draw_call.renderer.state.push_constant_blob[0..]); const entry = try rt.getEntryPointByName(shader.entry); @@ -59,7 +56,7 @@ inline fn run(data: RunData) !void { const binding_info = (data.pipeline.interface.mode.graphics.input_assembly.binding_description orelse return)[attribute.binding]; - const vertex_buffer = data.renderer.state.data.graphics.vertex_buffers[attribute.binding]; + const vertex_buffer = data.draw_call.renderer.state.data.graphics.vertex_buffers[attribute.binding]; const buffer = vertex_buffer.buffer; const buffer_memory_size = base.format.texelSize(attribute.format); const buffer_memory = if (buffer.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv; @@ -71,7 +68,7 @@ inline fn run(data: RunData) !void { } } - rt.callEntryPoint(allocator, entry) catch |err| switch (err) { + rt.callEntryPoint(data.allocator, entry) catch |err| switch (err) { // Some errors can be safely ignored SpvRuntimeError.OutOfBounds, SpvRuntimeError.Killed, @@ -82,6 +79,19 @@ inline fn run(data: RunData) !void { const output: *Renderer.Vertex = &data.draw_call.vertices[(data.instance_index * data.vertex_count) + invocation_index]; try rt.readBuiltIn(std.mem.asBytes(&output.position), .Position); + if (invocation_index == 0) { + const io = data.draw_call.renderer.device.interface.io(); + const file = try std.Io.Dir.cwd().createFile( + io, + "vertex_result_table_dump.txt", + .{ .truncate = true }, + ); + defer file.close(io); + var buffer = [_]u8{0} ** 1024; + var writer = file.writer(io, buffer[0..]); + try rt.dumpResultsTable(data.allocator, &writer.interface); + } + for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| { const result_word = rt.getResultByLocation(@intCast(location), .output) catch |err| switch (err) { SpvRuntimeError.NotFound => continue, @@ -97,6 +107,9 @@ inline fn run(data: RunData) !void { } fn setupBuiltins(rt: *spv.Runtime, vertex_index: usize, instance_index: usize) !void { - try rt.writeBuiltIn(std.mem.asBytes(&vertex_index), .VertexIndex); - try rt.writeBuiltIn(std.mem.asBytes(&instance_index), .InstanceIndex); + const vertex_index_u32: u32 = @intCast(vertex_index); + const instance_index_u32: u32 = @intCast(instance_index); + + try rt.writeBuiltIn(std.mem.asBytes(&vertex_index_u32), .VertexIndex); + try rt.writeBuiltIn(std.mem.asBytes(&instance_index_u32), .InstanceIndex); } diff --git a/src/vulkan/CommandBuffer.zig b/src/vulkan/CommandBuffer.zig index 82474bb..c5d8e65 100644 --- a/src/vulkan/CommandBuffer.zig +++ b/src/vulkan/CommandBuffer.zig @@ -66,6 +66,7 @@ pub const DispatchTable = struct { reset: *const fn (*Self, vk.CommandBufferResetFlags) VkError!void, resetEvent: *const fn (*Self, *Event, vk.PipelineStageFlags) VkError!void, setEvent: *const fn (*Self, *Event, vk.PipelineStageFlags) VkError!void, + setScissor: *const fn (*Self, u32, []const vk.Rect2D) VkError!void, setViewport: *const fn (*Self, u32, []const vk.Viewport) VkError!void, waitEvent: *const fn (*Self, *Event, vk.PipelineStageFlags, vk.PipelineStageFlags, []const vk.MemoryBarrier, []const vk.BufferMemoryBarrier, []const vk.ImageMemoryBarrier) VkError!void, }; @@ -266,6 +267,10 @@ pub inline fn setEvent(self: *Self, event: *Event, stage: vk.PipelineStageFlags) try self.dispatch_table.setEvent(self, event, stage); } +pub inline fn setScissor(self: *Self, first: u32, scissor: []const vk.Rect2D) VkError!void { + try self.dispatch_table.setScissor(self, first, scissor); +} + pub inline fn setViewport(self: *Self, first: u32, viewports: []const vk.Viewport) VkError!void { try self.dispatch_table.setViewport(self, first, viewports); } diff --git a/src/vulkan/lib_vulkan.zig b/src/vulkan/lib_vulkan.zig index d79a0e1..3e29cb5 100644 --- a/src/vulkan/lib_vulkan.zig +++ b/src/vulkan/lib_vulkan.zig @@ -2083,13 +2083,7 @@ pub export fn strollCmdSetScissor(p_cmd: vk.CommandBuffer, first: u32, count: u3 defer entryPointEndLogTrace(); const cmd = Dispatchable(CommandBuffer).fromHandleObject(p_cmd) catch |err| return errorLogger(err); - - notImplementedWarning(); - - _ = cmd; - _ = first; - _ = count; - _ = scissors; + cmd.setScissor(first, scissors[0..count]) catch |err| return errorLogger(err); } pub export fn strollCmdSetStencilCompareMask(p_cmd: vk.CommandBuffer, face_mask: vk.StencilFaceFlags, compare_mask: u32) callconv(vk.vulkan_call_conv) void { diff --git a/src/vulkan/logger.zig b/src/vulkan/logger.zig index 7f9b5ec..a27a7fa 100644 --- a/src/vulkan/logger.zig +++ b/src/vulkan/logger.zig @@ -59,7 +59,7 @@ pub fn log(comptime level: std.log.Level, comptime scope: @EnumLiteral(), compti file.lock(io, .exclusive) catch {}; defer file.unlock(io); - const now = std.Io.Timestamp.now(io, .cpu_process).toMicroseconds(); + const now = std.Io.Timestamp.now(io, .real).toMicroseconds(); const now_us: u16 = @intCast(@mod(now, 1000)); const now_ms: u16 = @intCast(@mod(@divTrunc(now, 1000), std.time.ms_per_s));