refactoring renderer

implementing push constants
2026-05-13 22:05:25 +02:00 · 2026-05-12 03:01:17 +02:00
22 changed files with 979 additions and 536 deletions
@@ -31,8 +31,8 @@
            .lazy = true,
        },
        .SPIRV_Interpreter = .{
-            .url = "git+https://git.kbz8.me/kbz_8/SPIRV-Interpreter#9d20363ae852e1b400cb62508cb672bcfd5b3716",
-            .hash = "SPIRV_Interpreter-0.0.1-ajmpn6QuBQDDfo3uv6HauRc4DLNp2b0pZkfwyuzF-w9d",
+            .url = "git+https://git.kbz8.me/kbz_8/SPIRV-Interpreter#c0825d53158cd5a5fc38f12d155d1158efc9b371",
+            .hash = "SPIRV_Interpreter-0.0.1-ajmpn0RFBQBe3oaZ5-aVNJQ7FMancJXlmCNt7mYUP5WP",
            .lazy = true,
        },
        //.SPIRV_Interpreter = .{
@@ -13,9 +13,6 @@ pub const Interface = base.Buffer;
 interface: Interface,

 pub fn create(device: *base.Device, allocator: std.mem.Allocator, info: *const vk.BufferCreateInfo) VkError!*Self {
-    if (info.size > lib.MAX_MEMORY_ALLOCATION_SIZE)
-        return VkError.OutOfDeviceMemory;
-
    const self = allocator.create(Self) catch return VkError.OutOfHostMemory;
    errdefer allocator.destroy(self);

@@ -68,9 +68,11 @@ pub fn create(device: *base.Device, allocator: std.mem.Allocator, info: *const v
        .executeCommands = executeCommands,
        .fillBuffer = fillBuffer,
        .pipelineBarrier = pipelineBarrier,
+        .pushConstants = pushConstants,
        .reset = reset,
        .resetEvent = resetEvent,
        .setEvent = setEvent,
+        .setScissor = setScissor,
        .setViewport = setViewport,
        .waitEvent = waitEvent,
    };
@@ -150,7 +152,7 @@ pub fn beginRenderPass(interface: *Interface, render_pass: *base.RenderPass, fra
                }

                switch (desc.stencil_load_op) {
-                    .clear => clear_mask = .{ .stencil_bit = true },
+                    .clear => clear_mask.stencil_bit = true,
                    else => {},
                }

@@ -813,6 +815,41 @@ pub fn pipelineBarrier(interface: *Interface, src_stage: vk.PipelineStageFlags,
    _ = image_barriers;
 }

+pub fn pushConstants(interface: *Interface, stages: vk.ShaderStageFlags, offset: u32, blob: []const u8) VkError!void {
+    const self: *Self = @alignCast(@fieldParentPtr("interface", interface));
+    const allocator = self.command_allocator.allocator();
+
+    const CommandImpl = struct {
+        const Impl = @This();
+
+        stages: vk.ShaderStageFlags,
+        offset: u32,
+        blob: []const u8,
+
+        pub fn execute(context: *anyopaque, device: *ExecutionDevice) VkError!void {
+            const impl: *Impl = @ptrCast(@alignCast(context));
+
+            const size = @min(lib.PUSH_CONSTANT_SIZE - impl.offset, impl.blob.len);
+            // TODO: pipeline layout offset
+            if (impl.stages.vertex_bit or impl.stages.fragment_bit) {
+                @memcpy(device.pipeline_states[ExecutionDevice.GRAPHICS_PIPELINE_STATE].push_constant_blob[impl.offset..size], impl.blob[0..size]);
+            }
+            if (impl.stages.compute_bit) {
+                @memcpy(device.pipeline_states[ExecutionDevice.COMPUTE_PIPELINE_STATE].push_constant_blob[impl.offset..size], impl.blob[0..size]);
+            }
+        }
+    };
+
+    const cmd = allocator.create(CommandImpl) catch return VkError.OutOfHostMemory;
+    errdefer allocator.destroy(cmd);
+    cmd.* = .{
+        .stages = stages,
+        .offset = offset,
+        .blob = allocator.dupe(u8, blob) catch return VkError.OutOfHostMemory, // Will be freed on cmdbuf reset or destroy
+    };
+    self.commands.append(allocator, .{ .ptr = cmd, .vtable = &.{ .execute = CommandImpl.execute } }) catch return VkError.OutOfHostMemory;
+}
+
 pub fn resetEvent(interface: *Interface, event: *base.Event, stage: vk.PipelineStageFlags) VkError!void {
    const self: *Self = @alignCast(@fieldParentPtr("interface", interface));
    const allocator = self.command_allocator.allocator();
@@ -863,6 +900,31 @@ pub fn setEvent(interface: *Interface, event: *base.Event, stage: vk.PipelineSta
    self.commands.append(allocator, .{ .ptr = cmd, .vtable = &.{ .execute = CommandImpl.execute } }) catch return VkError.OutOfHostMemory;
 }

+pub fn setScissor(interface: *Interface, first: u32, scissor: []const vk.Rect2D) VkError!void {
+    const self: *Self = @alignCast(@fieldParentPtr("interface", interface));
+    const allocator = self.command_allocator.allocator();
+
+    const CommandImpl = struct {
+        const Impl = @This();
+
+        first: u32,
+        scissor: []const vk.Rect2D,
+
+        pub fn execute(context: *anyopaque, device: *ExecutionDevice) VkError!void {
+            const impl: *Impl = @ptrCast(@alignCast(context));
+            device.renderer.dynamic_state.scissor = impl.scissor; // Unsafe
+        }
+    };
+
+    const cmd = allocator.create(CommandImpl) catch return VkError.OutOfHostMemory;
+    errdefer allocator.destroy(cmd);
+    cmd.* = .{
+        .first = first,
+        .scissor = allocator.dupe(vk.Rect2D, scissor) catch return VkError.OutOfHostMemory, // Will be freed on cmdbuf reset or destroy
+    };
+    self.commands.append(allocator, .{ .ptr = cmd, .vtable = &.{ .execute = CommandImpl.execute } }) catch return VkError.OutOfHostMemory;
+}
+
 pub fn setViewport(interface: *Interface, first: u32, viewports: []const vk.Viewport) VkError!void {
    const self: *Self = @alignCast(@fieldParentPtr("interface", interface));
    const allocator = self.command_allocator.allocator();
@@ -13,9 +13,6 @@ interface: Interface,
 data: []u8,

 pub fn create(device: *SoftDevice, allocator: std.mem.Allocator, size: vk.DeviceSize, memory_type_index: u32) VkError!*Self {
-    if (size > lib.MAX_MEMORY_ALLOCATION_SIZE)
-        return VkError.OutOfDeviceMemory;
-
    const self = allocator.create(Self) catch return VkError.OutOfHostMemory;
    errdefer allocator.destroy(self);

@@ -68,8 +68,8 @@ pub fn create(allocator: std.mem.Allocator, instance: *base.Instance) VkError!*S
        .max_texel_buffer_elements = 65536,
        .max_uniform_buffer_range = 16384,
        .max_storage_buffer_range = 134217728,
-        .max_push_constants_size = 128,
-        .max_memory_allocation_count = lib.MAX_ALLOCATION_COUNT,
+        .max_push_constants_size = lib.PUSH_CONSTANT_SIZE,
+        .max_memory_allocation_count = std.math.maxInt(u32),
        .max_sampler_allocation_count = 4096,
        .buffer_image_granularity = 131072,
        .sparse_address_space_size = 0,
@@ -180,7 +180,7 @@ pub fn create(allocator: std.mem.Allocator, instance: *base.Instance) VkError!*S
    };
    interface.mem_props.memory_heap_count = 1;
    interface.mem_props.memory_heaps[0] = .{
-        .size = lib.PHYSICAL_DEVICE_HEAP_SIZE,
+        .size = std.process.totalSystemMemory() catch lib.PHYSICAL_DEVICE_FALLBACK_HEAP_SIZE,
        .flags = .{ .device_local_bit = true },
    };

@@ -189,8 +189,8 @@ pub fn create(allocator: std.mem.Allocator, instance: *base.Instance) VkError!*S
        .shader_float_64 = .true,
        .shader_int_64 = .true,
        .shader_int_16 = .true,
-        .texture_compression_etc2 = .true,
-        .texture_compression_bc = .true,
+        .texture_compression_etc2 = .false,
+        .texture_compression_bc = .false,
    };

    var queue_family_props = [_]vk.QueueFamilyProperties{
@@ -7,21 +7,21 @@ const Allocator = std.mem.Allocator;
 const Alignment = std.mem.Alignment;

 mutex: base.SpinMutex,
-arena: std.heap.ArenaAllocator,
+child_allocator: std.mem.Allocator,
 bound: usize,
+total_bytes_allocated: std.atomic.Value(usize),
+current_bytes_allocated: std.atomic.Value(usize),

 pub fn init(child_allocator: Allocator, bound: usize) Self {
    return .{
        .mutex = .{},
-        .arena = .init(child_allocator),
+        .child_allocator = child_allocator,
        .bound = bound,
+        .total_bytes_allocated = std.atomic.Value(usize).init(0),
+        .current_bytes_allocated = std.atomic.Value(usize).init(0),
    };
 }

-pub fn deinit(self: *Self) void {
-    self.arena.deinit();
-}
-
 pub fn allocator(self: *const Self) Allocator {
    return .{
        .ptr = @ptrCast(@constCast(self)), // Ugly const cast for convenience
@@ -34,40 +34,46 @@ pub fn allocator(self: *const Self) Allocator {
    };
 }

-pub inline fn queryCapacity(self: *Self) usize {
-    return self.arena.queryCapacity();
+pub inline fn queryFootprint(self: *Self) usize {
+    return self.total_bytes_allocated.load(.monotonic);
 }

 fn alloc(context: *anyopaque, len: usize, alignment: Alignment, ret_addr: usize) ?[*]u8 {
    const self: *Self = @ptrCast(@alignCast(context));
    self.mutex.lock();
    defer self.mutex.unlock();
-    if (self.arena.queryCapacity() >= self.bound)
+    if (self.current_bytes_allocated.fetchAdd(len, .monotonic) >= self.bound)
        return null;
-    return self.arena.allocator().rawAlloc(len, alignment, ret_addr);
+    _ = self.total_bytes_allocated.fetchAdd(len, .monotonic);
+    return self.child_allocator.rawAlloc(len, alignment, ret_addr);
 }

 fn resize(context: *anyopaque, ptr: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) bool {
    const self: *Self = @ptrCast(@alignCast(context));
    self.mutex.lock();
    defer self.mutex.unlock();
-    if (self.arena.queryCapacity() >= self.bound)
+    _ = self.current_bytes_allocated.fetchSub(ptr.len, .monotonic);
+    if (self.current_bytes_allocated.fetchAdd(new_len, .monotonic) >= self.bound)
        return false;
-    return self.arena.allocator().rawResize(ptr, alignment, new_len, ret_addr);
+    _ = self.total_bytes_allocated.fetchAdd(new_len, .monotonic);
+    return self.child_allocator.rawResize(ptr, alignment, new_len, ret_addr);
 }

 fn remap(context: *anyopaque, ptr: []u8, alignment: Alignment, new_len: usize, ret_addr: usize) ?[*]u8 {
    const self: *Self = @ptrCast(@alignCast(context));
    self.mutex.lock();
    defer self.mutex.unlock();
-    if (self.arena.queryCapacity() >= self.bound)
+    _ = self.current_bytes_allocated.fetchSub(ptr.len, .monotonic);
+    if (self.current_bytes_allocated.fetchAdd(new_len, .monotonic) >= self.bound)
        return null;
-    return self.arena.allocator().rawRemap(ptr, alignment, new_len, ret_addr);
+    _ = self.total_bytes_allocated.fetchAdd(new_len, .monotonic);
+    return self.child_allocator.rawRemap(ptr, alignment, new_len, ret_addr);
 }

 fn free(context: *anyopaque, ptr: []u8, alignment: Alignment, ret_addr: usize) void {
    const self: *Self = @ptrCast(@alignCast(context));
    self.mutex.lock();
    defer self.mutex.unlock();
-    return self.arena.allocator().rawFree(ptr, alignment, ret_addr);
+    _ = self.current_bytes_allocated.fetchSub(ptr.len, .monotonic);
+    return self.child_allocator.rawFree(ptr, alignment, ret_addr);
 }
@@ -22,6 +22,7 @@ pub const COMPUTE_PIPELINE_STATE = 1;
 pub const PipelineState = struct {
    pipeline: ?*SoftPipeline,
    sets: [base.VULKAN_MAX_DESCRIPTOR_SETS]?*SoftDescriptorSet,
+    push_constant_blob: [lib.PUSH_CONSTANT_SIZE]u8,
    data: union {
        compute: struct {},
        graphics: struct {
@@ -43,6 +44,7 @@ pub fn init(self: *Self, device: *SoftDevice) void {
        state.* = .{
            .pipeline = null,
            .sets = [_]?*SoftDescriptorSet{null} ** base.VULKAN_MAX_DESCRIPTOR_SETS,
+            .push_constant_blob = @splat(0),
            .data = switch (i) {
                GRAPHICS_PIPELINE_STATE => .{
                    .graphics = .{
@@ -2,28 +2,25 @@ const std = @import("std");
 const vk = @import("vulkan");
 const base = @import("base");
 const zm = base.zm;
-const lib = @import("../lib.zig");
 const spv = @import("spv");

-pub const F32x4 = zm.F32x4;
-
 const PipelineState = @import("Device.zig").PipelineState;
-const BoundedArenaAllocator = @import("BoundedArenaAllocator.zig");
+const BoundedAllocator = @import("BoundedAllocator.zig");

 const SoftBuffer = @import("../SoftBuffer.zig");
 const SoftDescriptorSet = @import("../SoftDescriptorSet.zig");
 const SoftDevice = @import("../SoftDevice.zig");
 const SoftFramebuffer = @import("../SoftFramebuffer.zig");
-const SoftImage = @import("../SoftImage.zig");
 const SoftPipeline = @import("../SoftPipeline.zig");
 const SoftRenderPass = @import("../SoftRenderPass.zig");

 const blitter = @import("blitter.zig");
 const rasterizer = @import("rasterizer.zig");
 const vertex_dispatcher = @import("vertex_dispatcher.zig");
-const fragment_dispatcher = @import("fragment_dispatcher.zig");
+const clip = @import("clip.zig");

 const VkError = base.VkError;
+const F32x4 = zm.F32x4;

 const Self = @This();

@@ -43,7 +40,7 @@ pub const IndexBuffer = struct {

 pub const DynamicState = struct {
    viewports: ?[]const vk.Viewport,
-    scissor: ?[]vk.Rect2D,
+    scissor: ?[]const vk.Rect2D,
    line_width: ?f32,
 };

@@ -55,20 +52,19 @@ pub const Vertex = struct {
    },
 };

-pub const Fragment = struct {
-    position: F32x4,
-    color: F32x4,
-    inputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8,
-};
-
 pub const DrawCall = struct {
+    renderer: *Self,
    vertices: []Vertex,
-    fragments: []Fragment,

-    pub fn init(allocator: std.mem.Allocator, vertex_count: usize, instance_count: usize) VkError!@This() {
+    viewport: vk.Viewport,
+    scissor: vk.Rect2D,
+
+    pub fn init(allocator: std.mem.Allocator, vertex_count: usize, instance_count: usize, renderer: *Self) VkError!@This() {
        const self: @This() = .{
            .vertices = allocator.alloc(Vertex, vertex_count * instance_count) catch return VkError.OutOfDeviceMemory,
-            .fragments = undefined,
+            .renderer = renderer,
+            .viewport = undefined,
+            .scissor = undefined,
        };

        for (self.vertices) |*vertex| {
@@ -100,20 +96,35 @@ pub fn init(device: *SoftDevice, state: *PipelineState) Self {
    };
 }

+pub fn deinit(self: *Self) void {
+    _ = self;
+}
+
 pub fn draw(self: *Self, vertex_count: usize, instance_count: usize, first_vertex: usize, first_instance: usize) VkError!void {
+    var bounded_allocator: BoundedAllocator = .init(self.device.device_allocator.allocator(), @"1GiB");
+    try self.drawCall(&bounded_allocator, vertex_count, instance_count, first_vertex, first_instance, null);
+}
+
+pub fn drawIndexed(self: *Self, index_count: usize, instance_count: usize, first_index: usize, first_instance: usize, vertex_offset: i32) VkError!void {
+    var bounded_allocator: BoundedAllocator = .init(self.device.device_allocator.allocator(), @"1GiB");
+    const allocator = bounded_allocator.allocator();
+
+    const indices = try self.readIndexBuffer(allocator, index_count, first_index, vertex_offset);
+
+    try self.drawCall(&bounded_allocator, index_count, instance_count, 0, first_instance, indices);
+}
+
+fn drawCall(self: *Self, bounded_allocator: *BoundedAllocator, vertex_count: usize, instance_count: usize, first_vertex: usize, first_instance: usize, indices: ?[]const i32) VkError!void {
    const io = self.device.interface.io();
+    const allocator = bounded_allocator.allocator();

-    var arena: BoundedArenaAllocator = .init(self.device.device_allocator.allocator(), @"1GiB");
-    defer arena.deinit();
-    const allocator = arena.allocator();
-
-    var draw_call = try DrawCall.init(allocator, vertex_count, instance_count);
+    var draw_call = try DrawCall.init(allocator, vertex_count, instance_count, self);

    const timer = std.Io.Timestamp.now(io, .real);
    defer if (comptime base.config.logs != .none) {
        const duration = timer.untilNow(io, .real);
        const ms = duration.toMicroseconds();
-        const memory_footprint = @divTrunc(arena.queryCapacity(), 1000);
+        const memory_footprint = @divTrunc(bounded_allocator.queryFootprint(), 1000);
        const logger = std.log.scoped(.SoftwareRenderer);
        if (memory_footprint > 256_000)
            logger.warn("Drawcall stats:\n>   Took {d}us\n>   Allocated {d} KB", .{ ms, memory_footprint })
@@ -121,50 +132,18 @@ pub fn draw(self: *Self, vertex_count: usize, instance_count: usize, first_verte
            logger.debug("Drawcall stats:\n>   Took {d}us\n>   Allocated {d} KB", .{ ms, memory_footprint });
    };

-    self.vertexShaderStage(allocator, &draw_call, vertex_count, instance_count, first_vertex, first_instance, null) catch |err| {
+    self.vertexShaderStage(allocator, &draw_call, vertex_count, instance_count, first_vertex, first_instance, indices) catch |err| {
        std.log.scoped(.@"Vertex stage").err("catched a '{s}'", .{@errorName(err)});
        if (@errorReturnTrace()) |trace| {
            std.debug.dumpErrorReturnTrace(trace);
        }
+        return VkError.Unknown;
    };

-    try self.postVertexDraw(allocator, &draw_call);
-}
+    draw_call.viewport = try self.resolveViewport(0);
+    draw_call.scissor = try self.resolveScissor(0);

-pub fn drawIndexed(self: *Self, index_count: usize, instance_count: usize, first_index: usize, first_instance: usize, vertex_offset: i32) VkError!void {
-    const io = self.device.interface.io();
-
-    var arena: BoundedArenaAllocator = .init(self.device.device_allocator.allocator(), @"1GiB");
-    defer arena.deinit();
-    const allocator = arena.allocator();
-
-    var draw_call = try DrawCall.init(allocator, index_count, instance_count);
-    const indices = try self.readIndexBuffer(allocator, index_count, first_index, vertex_offset);
-
-    const timer = std.Io.Timestamp.now(io, .real);
-    defer if (comptime base.config.logs != .none) {
-        const duration = timer.untilNow(io, .real);
-        const ms = duration.toMicroseconds();
-        const memory_footprint = @divTrunc(arena.queryCapacity(), 1000);
-        const logger = std.log.scoped(.SoftwareRenderer);
-        if (memory_footprint > 256_000)
-            logger.warn("Drawcall indexed stats:\n>   Took {d}us\n>   Allocated {d} KB", .{ ms, memory_footprint })
-        else
-            logger.debug("Drawcall indexed stats:\n>   Took {d}us\n>   Allocated {d} KB", .{ ms, memory_footprint });
-    };
-
-    self.vertexShaderStage(allocator, &draw_call, index_count, instance_count, 0, first_instance, indices) catch |err| {
-        std.log.scoped(.@"Vertex stage").err("catched a '{s}'", .{@errorName(err)});
-        if (@errorReturnTrace()) |trace| {
-            std.debug.dumpErrorReturnTrace(trace);
-        }
-    };
-
-    try self.postVertexDraw(allocator, &draw_call);
-}
-
-pub fn deinit(self: *Self) void {
-    _ = self;
+    try rasterizer.processThenFragmentStage(self, allocator, &draw_call);
 }

 fn vertexShaderStage(self: *Self, allocator: std.mem.Allocator, draw_call: *DrawCall, vertex_count: usize, instance_count: usize, first_vertex: usize, first_instance: usize, indices: ?[]const i32) !void {
@@ -176,7 +155,6 @@ fn vertexShaderStage(self: *Self, allocator: std.mem.Allocator, draw_call: *Draw
        for (0..@min(batch_size, vertex_count)) |batch_id| {
            const run_data: vertex_dispatcher.RunData = .{
                .allocator = allocator,
-                .renderer = self,
                .pipeline = pipeline,
                .batch_id = batch_id,
                .batch_size = batch_size,
@@ -194,167 +172,6 @@ fn vertexShaderStage(self: *Self, allocator: std.mem.Allocator, draw_call: *Draw
    wg.await(self.device.interface.io()) catch return VkError.DeviceLost;
 }

-fn postVertexDraw(self: *Self, allocator: std.mem.Allocator, draw_call: *DrawCall) VkError!void {
-    const render_target_view: *base.ImageView = (self.framebuffer orelse return).interface.attachments[0];
-    const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image));
-
-    try self.primitiveAssemblyStage(draw_call);
-    try self.rasterizationStage(allocator, draw_call);
-
-    self.fragmentShaderStage(draw_call) catch |err| {
-        std.log.scoped(.@"Fragment stage").err("catched a '{s}'", .{@errorName(err)});
-        if (@errorReturnTrace()) |trace| {
-            std.debug.dumpErrorReturnTrace(trace);
-        }
-    };
-
-    for (draw_call.fragments) |fragment| {
-        try render_target.writeFloat4(
-            .{
-                .x = @intFromFloat(fragment.position[0]),
-                .y = @intFromFloat(fragment.position[1]),
-                .z = 0, // FIXME
-            },
-            .{
-                .aspect_mask = render_target_view.subresource_range.aspect_mask,
-                .mip_level = render_target_view.subresource_range.base_mip_level,
-                .array_layer = render_target_view.subresource_range.base_array_layer,
-            },
-            render_target_view.format,
-            fragment.color,
-        );
-    }
-}
-
-fn primitiveAssemblyStage(self: *Self, draw_call: *DrawCall) VkError!void {
-    const viewport = blk: {
-        const pipeline_data = &(self.state.pipeline orelse return VkError.InvalidPipelineDrv).interface.mode.graphics;
-        if (pipeline_data.dynamic_state.viewport) {
-            if (self.dynamic_state.viewports) |viewports|
-                break :blk viewports[0];
-        }
-        if (pipeline_data.viewport_state.viewports) |viewports|
-            break :blk viewports[0];
-        return VkError.Unknown;
-    };
-
-    for (draw_call.vertices) |*vertex| {
-        const x = vertex.position[0];
-        const y = vertex.position[1];
-        const z = vertex.position[2];
-        const w = vertex.position[3];
-
-        // Perspective division.
-        const x_ndc = x / w;
-        const y_ndc = y / w;
-        const z_ndc = z / w;
-
-        const p_x = viewport.width;
-        const p_y = viewport.height;
-        const p_z = viewport.max_depth - viewport.min_depth;
-
-        const o_x = viewport.x + viewport.width / 2.0;
-        const o_y = viewport.y + viewport.height / 2.0;
-        const o_z = viewport.min_depth;
-
-        const x_screen = ((p_x / 2.0) * x_ndc) + o_x;
-        const y_screen = ((p_y / 2.0) * y_ndc) + o_y;
-        const z_screen = (p_z * z_ndc) + o_z;
-
-        vertex.position = zm.f32x4(x_screen, y_screen, z_screen, 1.0);
-    }
-}
-
-fn rasterizationStage(self: *Self, allocator: std.mem.Allocator, draw_call: *DrawCall) VkError!void {
-    var fragments: std.ArrayList(Fragment) = .empty;
-
-    const pipeline_data = (self.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics;
-    const topology = pipeline_data.input_assembly.topology;
-    switch (topology) {
-        .triangle_list => for (0..@divTrunc(draw_call.vertices.len, 3)) |triangle_index| {
-            const first_vertex = triangle_index * 3;
-            const v0 = &draw_call.vertices[first_vertex + 0];
-            const v1 = &draw_call.vertices[first_vertex + 1];
-            const v2 = &draw_call.vertices[first_vertex + 2];
-
-            try self.rasterizeTriangle(allocator, &fragments, v0, v1, v2, v0, v1, v2);
-        },
-        .triangle_fan => if (draw_call.vertices.len >= 3) {
-            const v0 = &draw_call.vertices[0];
-            for (1..(draw_call.vertices.len - 1)) |vertex_index| {
-                const v1 = &draw_call.vertices[vertex_index];
-                const v2 = &draw_call.vertices[vertex_index + 1];
-
-                try self.rasterizeTriangle(allocator, &fragments, v0, v1, v2, v0, v1, v2);
-            }
-        },
-        .triangle_strip => if (draw_call.vertices.len >= 3) {
-            for (0..(draw_call.vertices.len - 2)) |vertex_index| {
-                const v0 = &draw_call.vertices[vertex_index + 0];
-                const v1 = &draw_call.vertices[vertex_index + 1];
-                const v2 = &draw_call.vertices[vertex_index + 2];
-
-                if ((vertex_index & 1) == 0) {
-                    try self.rasterizeTriangle(allocator, &fragments, v0, v1, v2, v0, v1, v2);
-                } else {
-                    try self.rasterizeTriangle(allocator, &fragments, v0, v1, v2, v1, v0, v2);
-                }
-            }
-        },
-        else => base.unsupported("primitive topology {any}", .{topology}),
-    }
-
-    draw_call.fragments = fragments.toOwnedSlice(allocator) catch return VkError.OutOfDeviceMemory;
-}
-
-fn rasterizeTriangle(
-    self: *Self,
-    allocator: std.mem.Allocator,
-    fragments: *std.ArrayList(Fragment),
-    v0: *Vertex,
-    v1: *Vertex,
-    v2: *Vertex,
-    cull_v0: *const Vertex,
-    cull_v1: *const Vertex,
-    cull_v2: *const Vertex,
-) VkError!void {
-    if (try self.triangleIsCulled(cull_v0, cull_v1, cull_v2))
-        return;
-
-    const pipeline_data = (self.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics;
-    switch (pipeline_data.rasterization.polygon_mode) {
-        .fill => try rasterizer.drawTriangleFilled(allocator, fragments, v0, v1, v2),
-        .line => {
-            try rasterizer.drawLineBresenham(allocator, fragments, v0, v1);
-            try rasterizer.drawLineBresenham(allocator, fragments, v1, v2);
-            try rasterizer.drawLineBresenham(allocator, fragments, v2, v0);
-        },
-        .point => {},
-        else => base.unsupported("polygon mode {any}", .{pipeline_data.rasterization.polygon_mode}),
-    }
-}
-
-fn fragmentShaderStage(self: *Self, draw_call: *DrawCall) !void {
-    const pipeline = self.state.pipeline orelse return;
-    const batch_size = (pipeline.stages.getPtr(.fragment) orelse return).runtimes.len;
-    const fragment_count = draw_call.fragments.len;
-
-    var wg: std.Io.Group = .init;
-    for (0..@min(batch_size, fragment_count)) |batch_id| {
-        const run_data: fragment_dispatcher.RunData = .{
-            .renderer = self,
-            .pipeline = pipeline,
-            .batch_id = batch_id,
-            .batch_size = batch_size,
-            .fragment_count = fragment_count,
-            .draw_call = draw_call,
-        };
-
-        wg.async(self.device.interface.io(), fragment_dispatcher.runWrapper, .{run_data});
-    }
-    wg.await(self.device.interface.io()) catch return VkError.DeviceLost;
-}
-
 fn readIndexBuffer(self: *Self, allocator: std.mem.Allocator, index_count: usize, first_index: usize, vertex_offset: i32) VkError![]i32 {
    const index_buffer = self.state.data.graphics.index_buffer;
    const buffer = index_buffer.buffer;
@@ -392,37 +209,44 @@ fn indexTypeSize(index_type: vk.IndexType) ?usize {
    };
 }

-fn triangleArea2(v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) f32 {
-    const x0 = v0.position[0];
-    const y0 = v0.position[1];
-    const x1 = v1.position[0];
-    const y1 = v1.position[1];
-    const x2 = v2.position[0];
-    const y2 = v2.position[1];
+fn resolveViewport(self: *Self, viewport_index: usize) VkError!vk.Viewport {
+    const pipeline_data =
+        &(self.state.pipeline orelse return VkError.InvalidPipelineDrv).interface.mode.graphics;

-    return ((x1 - x0) * (y2 - y0)) - ((y1 - y0) * (x2 - x0));
+    if (pipeline_data.dynamic_state.viewport) {
+        if (self.dynamic_state.viewports) |viewports| {
+            if (viewport_index < viewports.len)
+                return viewports[viewport_index];
+        }
+
+        return VkError.Unknown;
+    }
+
+    if (pipeline_data.viewport_state.viewports) |viewports| {
+        if (viewport_index < viewports.len)
+            return viewports[viewport_index];
+    }
+
+    return VkError.Unknown;
 }

-fn triangleIsCulled(self: *Self, v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) VkError!bool {
-    const pipeline_data = (self.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics;
-    const rasterization = pipeline_data.rasterization;
-    const cull_mode = rasterization.cull_mode;
+fn resolveScissor(self: *Self, scissor_index: usize) VkError!vk.Rect2D {
+    const pipeline_data =
+        &(self.state.pipeline orelse return VkError.InvalidPipelineDrv).interface.mode.graphics;

-    if (!cull_mode.front_bit and !cull_mode.back_bit)
-        return false;
+    if (pipeline_data.dynamic_state.scissor) {
+        if (self.dynamic_state.scissor) |scissor| {
+            if (scissor_index < scissor.len)
+                return scissor[scissor_index];
+        }

-    if (cull_mode.front_bit and cull_mode.back_bit)
-        return true;
+        return VkError.Unknown;
+    }

-    const area = triangleArea2(v0, v1, v2);
-    if (area == 0.0)
-        return true;
+    if (pipeline_data.viewport_state.scissor) |scissor| {
+        if (scissor_index < scissor.len)
+            return scissor[scissor_index];
+    }

-    const front_face = switch (rasterization.front_face) {
-        .counter_clockwise => area < 0.0,
-        .clockwise => area > 0.0,
-        else => return false,
-    };
-
-    return (cull_mode.front_bit and front_face) or (cull_mode.back_bit and !front_face);
+    return VkError.Unknown;
 }
@@ -0,0 +1,191 @@
+const std = @import("std");
+const vk = @import("vulkan");
+const base = @import("base");
+const zm = base.zm;
+const lib = @import("../lib.zig");
+const spv = @import("spv");
+
+pub const F32x4 = zm.F32x4;
+
+const Renderer = @import("Renderer.zig");
+const Vertex = Renderer.Vertex;
+
+const VkError = base.VkError;
+
+const ClipPlane = enum {
+    Left,
+    Right,
+    Bottom,
+    Top,
+    Near,
+    Far,
+};
+
+const MAX_CLIPPED_POLYGON_VERTICES = 16;
+
+const ClippedPolygon = struct {
+    vertices: [MAX_CLIPPED_POLYGON_VERTICES]Vertex = undefined,
+    len: usize = 0,
+
+    fn append(self: *@This(), vertex: Vertex) VkError!void {
+        if (self.len >= self.vertices.len)
+            return VkError.OutOfDeviceMemory;
+
+        self.vertices[self.len] = vertex;
+        self.len += 1;
+    }
+};
+
+fn clipDistance(position: F32x4, plane: ClipPlane) f32 {
+    const x = position[0];
+    const y = position[1];
+    const z = position[2];
+    const w = position[3];
+
+    return switch (plane) {
+        .Left => x + w,
+        .Right => w - x,
+        .Bottom => y + w,
+        .Top => w - y,
+        .Near => z,
+        .Far => w - z,
+    };
+}
+
+fn vertexInsidePlane(vertex: *const Vertex, plane: ClipPlane) bool {
+    return clipDistance(vertex.position, plane) >= 0.0;
+}
+
+fn copyBlob(allocator: std.mem.Allocator, blob: []const u8) VkError![]u8 {
+    const result = allocator.alloc(u8, blob.len) catch return VkError.OutOfDeviceMemory;
+    @memcpy(result, blob);
+    return result;
+}
+
+fn writePacked(comptime T: type, bytes: []u8, value: T) void {
+    const raw: [@sizeOf(T)]u8 = @bitCast(value);
+    @memcpy(bytes[0..@sizeOf(T)], raw[0..]);
+}
+
+fn interpolateBlob(allocator: std.mem.Allocator, a: []const u8, b: []const u8, t: f32) VkError![]u8 {
+    const len = @min(a.len, b.len);
+    const result = allocator.alloc(u8, len) catch return VkError.OutOfDeviceMemory;
+
+    var byte_index: usize = 0;
+    while (byte_index + @sizeOf(F32x4) <= len) : (byte_index += @sizeOf(F32x4)) {
+        const value_a = std.mem.bytesToValue(F32x4, a[byte_index..]);
+        const value_b = std.mem.bytesToValue(F32x4, b[byte_index..]);
+        writePacked(F32x4, result[byte_index..], value_a + ((value_b - value_a) * @as(F32x4, @splat(t))));
+    }
+
+    while (byte_index + @sizeOf(f32) <= len) : (byte_index += @sizeOf(f32)) {
+        const value_a = std.mem.bytesToValue(f32, a[byte_index..]);
+        const value_b = std.mem.bytesToValue(f32, b[byte_index..]);
+        writePacked(f32, result[byte_index..], value_a + ((value_b - value_a) * t));
+    }
+
+    if (byte_index < len)
+        @memcpy(result[byte_index..], a[byte_index..len]);
+
+    return result;
+}
+
+fn interpolateVertexForClipping(allocator: std.mem.Allocator, a: *const Vertex, b: *const Vertex, t: f32) VkError!Vertex {
+    var result: Vertex = .{
+        .position = a.position + ((b.position - a.position) * @as(F32x4, @splat(t))),
+        .outputs = undefined,
+    };
+
+    @memset(result.outputs[0..], null);
+
+    for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
+        const out_a = a.outputs[location] orelse continue;
+        const out_b = b.outputs[location] orelse continue;
+
+        result.outputs[location] = .{
+            .interpolation_type = out_a.interpolation_type,
+            .blob = if (out_a.interpolation_type == .flat)
+                try copyBlob(allocator, out_a.blob)
+            else
+                try interpolateBlob(allocator, out_a.blob, out_b.blob, t),
+        };
+    }
+
+    return result;
+}
+
+fn clipPolygonAgainstPlane(allocator: std.mem.Allocator, input: *const ClippedPolygon, plane: ClipPlane) VkError!ClippedPolygon {
+    var output: ClippedPolygon = .{};
+
+    if (input.len == 0)
+        return output;
+
+    var previous = input.vertices[input.len - 1];
+    var previous_inside = vertexInsidePlane(&previous, plane);
+    var previous_distance = clipDistance(previous.position, plane);
+
+    for (input.vertices[0..input.len]) |current| {
+        const current_inside = vertexInsidePlane(&current, plane);
+        const current_distance = clipDistance(current.position, plane);
+
+        if (current_inside != previous_inside) {
+            const t = previous_distance / (previous_distance - current_distance);
+            try output.append(try interpolateVertexForClipping(allocator, &previous, &current, t));
+        }
+
+        if (current_inside)
+            try output.append(current);
+
+        previous = current;
+        previous_inside = current_inside;
+        previous_distance = current_distance;
+    }
+
+    return output;
+}
+
+pub fn clipTriangle(allocator: std.mem.Allocator, v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) VkError!ClippedPolygon {
+    var polygon: ClippedPolygon = .{};
+    try polygon.append(v0.*);
+    try polygon.append(v1.*);
+    try polygon.append(v2.*);
+
+    const planes = [_]ClipPlane{
+        .Left,
+        .Right,
+        .Bottom,
+        .Top,
+        .Near,
+        .Far,
+    };
+
+    for (planes) |plane| {
+        polygon = try clipPolygonAgainstPlane(allocator, &polygon, plane);
+        if (polygon.len < 3)
+            return polygon;
+    }
+
+    return polygon;
+}
+
+pub fn viewportTransformVertex(viewport: vk.Viewport, vertex: *Vertex) void {
+    const x, const y, const z, const w = vertex.position;
+
+    const x_ndc = x / w;
+    const y_ndc = y / w;
+    const z_ndc = z / w;
+
+    const p_x = viewport.width;
+    const p_y = viewport.height;
+    const p_z = viewport.max_depth - viewport.min_depth;
+
+    const o_x = viewport.x + viewport.width / 2.0;
+    const o_y = viewport.y + viewport.height / 2.0;
+    const o_z = viewport.min_depth;
+
+    const x_screen = ((p_x / 2.0) * x_ndc) + o_x;
+    const y_screen = ((p_y / 2.0) * y_ndc) + o_y;
+    const z_screen = (p_z * z_ndc) + o_z;
+
+    vertex.position = zm.f32x4(x_screen, y_screen, z_screen, w);
+}
@@ -0,0 +1,45 @@
+const std = @import("std");
+const vk = @import("vulkan");
+const base = @import("base");
+const zm = base.zm;
+const spv = @import("spv");
+
+const lib = @import("../lib.zig");
+
+const Renderer = @import("Renderer.zig");
+const SoftImage = @import("../SoftImage.zig");
+
+const VkError = base.VkError;
+const SpvRuntimeError = spv.Runtime.RuntimeError;
+
+pub fn shaderInvocation(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall, batch_id: usize, position: zm.F32x4, inputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][]const u8) SpvRuntimeError!zm.F32x4 {
+    _ = position;
+    const pipeline = draw_call.renderer.state.pipeline orelse return zm.f32x4s(0.0);
+
+    const shader = pipeline.stages.getPtrAssertContains(.fragment);
+    const rt = &shader.runtimes[batch_id];
+
+    const entry = try rt.getEntryPointByName(shader.entry);
+    const output_result = try rt.getResultByLocation(0, .output);
+
+    for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
+        const result_word = rt.getResultByLocation(@intCast(location), .input) catch |err| switch (err) {
+            SpvRuntimeError.NotFound => continue,
+            else => return err,
+        };
+        try rt.writeInput(inputs[location], result_word);
+        allocator.free(inputs[location]);
+    }
+
+    rt.callEntryPoint(allocator, entry) catch |err| switch (err) {
+        // Some errors can be safely ignored
+        SpvRuntimeError.OutOfBounds,
+        SpvRuntimeError.Killed,
+        => {},
+        else => return err,
+    };
+
+    var color = zm.f32x4s(0.0);
+    try rt.readOutput(std.mem.asBytes(&color), output_result);
+    return std.math.clamp(color, zm.f32x4s(0.0), zm.f32x4s(1.0));
+}
@@ -1,65 +0,0 @@
-const std = @import("std");
-const spv = @import("spv");
-const base = @import("base");
-const zm = base.zm;
-
-const F32x4 = Renderer.F32x4;
-
-const SpvRuntimeError = spv.Runtime.RuntimeError;
-
-const Renderer = @import("Renderer.zig");
-const SoftPipeline = @import("../SoftPipeline.zig");
-
-const VkError = base.VkError;
-
-pub const RunData = struct {
-    renderer: *Renderer,
-    pipeline: *SoftPipeline,
-    batch_id: usize,
-    batch_size: usize,
-    fragment_count: usize,
-    draw_call: *Renderer.DrawCall,
-};
-
-pub fn runWrapper(data: RunData) void {
-    @call(.always_inline, run, .{data}) catch |err| {
-        std.log.scoped(.@"SPIR-V runtime").err("SPIR-V runtime catched a '{s}'", .{@errorName(err)});
-        if (@errorReturnTrace()) |trace| {
-            std.debug.dumpErrorReturnTrace(trace);
-        }
-    };
-}
-
-inline fn run(data: RunData) !void {
-    const allocator = data.renderer.device.device_allocator.allocator();
-
-    const shader = data.pipeline.stages.getPtrAssertContains(.fragment);
-    const rt = &shader.runtimes[data.batch_id];
-
-    const entry = try rt.getEntryPointByName(shader.entry);
-    const output_result = try rt.getResultByLocation(0, .output);
-
-    var invocation_index: usize = data.batch_id;
-    while (invocation_index < data.fragment_count) : (invocation_index += data.batch_size) {
-        const fragment: *Renderer.Fragment = &data.draw_call.fragments[invocation_index];
-
-        for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
-            const result_word = rt.getResultByLocation(@intCast(location), .input) catch |err| switch (err) {
-                SpvRuntimeError.NotFound => continue,
-                else => return err,
-            };
-            try rt.writeInput(fragment.inputs[location], result_word);
-        }
-
-        rt.callEntryPoint(allocator, entry) catch |err| switch (err) {
-            // Some errors can be safely ignored
-            SpvRuntimeError.OutOfBounds,
-            SpvRuntimeError.Killed,
-            => {},
-            else => return err,
-        };
-
-        try rt.readOutput(std.mem.asBytes(&fragment.color), output_result);
-        fragment.color = std.math.clamp(fragment.color, zm.f32x4s(0.0), zm.f32x4s(1.0));
-    }
-}
@@ -1,172 +1,119 @@
 const std = @import("std");
-const vk = @import("vulkan");
 const base = @import("base");
-const zm = base.zm;
+
+const clip = @import("clip.zig");
+
+const bresenham = @import("rasterizer/bresenham.zig");
+const edge_function = @import("rasterizer/edge_function.zig");
+
+const Renderer = @import("Renderer.zig");
+const Vertex = Renderer.Vertex;
+const DrawCall = Renderer.DrawCall;

 const VkError = base.VkError;

-const lib = @import("../lib.zig");
+pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall) VkError!void {
+    const pipeline_data = (renderer.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics;
+    const topology = pipeline_data.input_assembly.topology;

-const Renderer = @import("Renderer.zig");
-const spv = @import("spv");
+    switch (topology) {
+        .triangle_list => for (0..@divTrunc(draw_call.vertices.len, 3)) |triangle_index| {
+            const first_vertex = triangle_index * 3;
+            const v0 = &draw_call.vertices[first_vertex + 0];
+            const v1 = &draw_call.vertices[first_vertex + 1];
+            const v2 = &draw_call.vertices[first_vertex + 2];

-pub const F32x4 = zm.F32x4;
+            try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v0, v1, v2);
+        },
+        .triangle_fan => if (draw_call.vertices.len >= 3) {
+            const v0 = &draw_call.vertices[0];
+            for (1..(draw_call.vertices.len - 1)) |vertex_index| {
+                const v1 = &draw_call.vertices[vertex_index];
+                const v2 = &draw_call.vertices[vertex_index + 1];

-fn writePacked(comptime T: type, bytes: []u8, value: T) void {
-    const raw: [@sizeOf(T)]u8 = @bitCast(value);
-    @memcpy(bytes[0..@sizeOf(T)], raw[0..]);
-}
+                try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v0, v1, v2);
+            }
+        },
+        .triangle_strip => if (draw_call.vertices.len >= 3) {
+            for (0..(draw_call.vertices.len - 2)) |vertex_index| {
+                const v0 = &draw_call.vertices[vertex_index + 0];
+                const v1 = &draw_call.vertices[vertex_index + 1];
+                const v2 = &draw_call.vertices[vertex_index + 2];

-fn interpolateF32x4(value0: F32x4, value1: F32x4, value2: F32x4, b0: f32, b1: f32, b2: f32) F32x4 {
-    return (value0 * @as(F32x4, @splat(b0))) + (value1 * @as(F32x4, @splat(b1))) + (value2 * @as(F32x4, @splat(b2)));
-}
-
-fn interpolateVertexOutputs(
-    allocator: std.mem.Allocator,
-    v0: *const Renderer.Vertex,
-    v1: *const Renderer.Vertex,
-    v2: *const Renderer.Vertex,
-    b0: f32,
-    b1: f32,
-    b2: f32,
-) VkError![spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 {
-    var inputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 = undefined;
-
-    for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
-        const out0 = v0.outputs[location] orelse continue;
-        const out1 = v1.outputs[location] orelse continue;
-        const out2 = v2.outputs[location] orelse continue;
-
-        if (out0.interpolation_type == .flat or out0.blob.len == 0) {
-            inputs[location] = out0.blob;
-            continue;
-        }
-
-        const len = @min(out0.blob.len, out1.blob.len, out2.blob.len);
-        const input = allocator.alloc(u8, len) catch return VkError.OutOfDeviceMemory;
-
-        var byte_index: usize = 0;
-        while (byte_index + @sizeOf(F32x4) <= len) : (byte_index += @sizeOf(F32x4)) {
-            const value0 = std.mem.bytesToValue(F32x4, out0.blob[byte_index..]);
-            const value1 = std.mem.bytesToValue(F32x4, out1.blob[byte_index..]);
-            const value2 = std.mem.bytesToValue(F32x4, out2.blob[byte_index..]);
-            writePacked(F32x4, input[byte_index..], interpolateF32x4(value0, value1, value2, b0, b1, b2));
-        }
-
-        while (byte_index + @sizeOf(f32) <= len) : (byte_index += @sizeOf(f32)) {
-            const value0 = std.mem.bytesToValue(f32, out0.blob[byte_index..]);
-            const value1 = std.mem.bytesToValue(f32, out1.blob[byte_index..]);
-            const value2 = std.mem.bytesToValue(f32, out2.blob[byte_index..]);
-            writePacked(f32, input[byte_index..], (value0 * b0) + (value1 * b1) + (value2 * b2));
-        }
-
-        if (byte_index < len)
-            @memcpy(input[byte_index..], out0.blob[byte_index..len]);
-
-        inputs[location] = input;
-    }
-
-    return inputs;
-}
-
-fn interpolateLineOutputs(allocator: std.mem.Allocator, v0: *const Renderer.Vertex, v1: *const Renderer.Vertex, t: f32) VkError![spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 {
-    return interpolateVertexOutputs(allocator, v0, v1, v0, 1.0 - t, t, 0.0);
-}
-
-pub fn drawLineBresenham(allocator: std.mem.Allocator, fragments: *std.ArrayList(Renderer.Fragment), v0: *Renderer.Vertex, v1: *Renderer.Vertex) VkError!void {
-    var x0: i32 = @intFromFloat(v0.position[0]);
-    var y0: i32 = @intFromFloat(v0.position[1]);
-    var x1: i32 = @intFromFloat(v1.position[0]);
-    var y1: i32 = @intFromFloat(v1.position[1]);
-
-    const steep = blk: {
-        if (@abs(y1 - y0) > @abs(x1 - x0)) {
-            std.mem.swap(i32, &x0, &y0);
-            std.mem.swap(i32, &x1, &y1);
-            break :blk true;
-        }
-        break :blk false;
-    };
-
-    var start_vertex = v0;
-    var end_vertex = v1;
-    if (x0 > x1) {
-        std.mem.swap(i32, &x0, &x1);
-        std.mem.swap(i32, &y0, &y1);
-        std.mem.swap(*Renderer.Vertex, &start_vertex, &end_vertex);
-    }
-
-    const d_err = @abs(y1 - y0);
-    const d_x = x1 - x0;
-    const y_step: i32 = if (y0 > y1) -1 else 1;
-
-    var err = @divTrunc(d_x, 2); // Pixel center.
-    var y = y0;
-
-    var x = x0;
-    while (x <= x1) : (x += 1) {
-        const x_fragment: f32 = @floatFromInt(if (steep) y else x);
-        const y_fragment: f32 = @floatFromInt(if (steep) x else y);
-        const t = @as(f32, @floatFromInt(x - x0)) / @as(f32, @floatFromInt(@max(d_x, 1)));
-
-        const z = ((1.0 - t) * start_vertex.position[2]) + (t * end_vertex.position[2]);
-
-        fragments.append(allocator, .{
-            .position = zm.f32x4(x_fragment, y_fragment, z, 1.0),
-            .color = zm.f32x4(1.0, 1.0, 1.0, 1.0),
-            .inputs = try interpolateLineOutputs(allocator, start_vertex, end_vertex, t),
-        }) catch return VkError.OutOfDeviceMemory;
-
-        err -= @intCast(d_err);
-        if (err < 0) {
-            y += y_step;
-            err += d_x;
-        }
+                if ((vertex_index & 1) == 0) {
+                    try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v0, v1, v2);
+                } else {
+                    try clipTransformAndRasterizeTriangle(renderer, allocator, draw_call, v1, v0, v2);
+                }
+            }
+        },
+        else => base.unsupported("primitive topology {any}", .{topology}),
    }
 }

-fn edgeFunction(a: F32x4, b: F32x4, p: F32x4) f32 {
-    return ((p[0] - a[0]) * (b[1] - a[1])) - ((p[1] - a[1]) * (b[0] - a[0]));
-}
+fn clipTransformAndRasterizeTriangle(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall, v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) VkError!void {
+    const clipped_polygon = try clip.clipTriangle(allocator, v0, v1, v2);

-pub fn drawTriangleFilled(allocator: std.mem.Allocator, fragments: *std.ArrayList(Renderer.Fragment), v0: *Renderer.Vertex, v1: *Renderer.Vertex, v2: *Renderer.Vertex) VkError!void {
-    const min_x: i32 = @intFromFloat(@floor(@min(v0.position[0], v1.position[0], v2.position[0])));
-    const max_x: i32 = @intFromFloat(@ceil(@max(v0.position[0], v1.position[0], v2.position[0])));
-    const min_y: i32 = @intFromFloat(@floor(@min(v0.position[1], v1.position[1], v2.position[1])));
-    const max_y: i32 = @intFromFloat(@ceil(@max(v0.position[1], v1.position[1], v2.position[1])));
-
-    const area = edgeFunction(v0.position, v1.position, v2.position);
-    if (area == 0.0)
+    if (clipped_polygon.len < 3)
        return;

-    var y = min_y;
-    while (y <= max_y) : (y += 1) {
-        var x = min_x;
-        while (x <= max_x) : (x += 1) {
-            const p = zm.f32x4(@as(f32, @floatFromInt(x)) + 0.5, @as(f32, @floatFromInt(y)) + 0.5, 0.0, 1.0);
+    for (1..(clipped_polygon.len - 1)) |vertex_index| {
+        var tv0 = clipped_polygon.vertices[0];
+        var tv1 = clipped_polygon.vertices[vertex_index];
+        var tv2 = clipped_polygon.vertices[vertex_index + 1];

-            const w0 = edgeFunction(v1.position, v2.position, p);
-            const w1 = edgeFunction(v2.position, v0.position, p);
-            const w2 = edgeFunction(v0.position, v1.position, p);
+        clip.viewportTransformVertex(draw_call.viewport, &tv0);
+        clip.viewportTransformVertex(draw_call.viewport, &tv1);
+        clip.viewportTransformVertex(draw_call.viewport, &tv2);

-            const inside = if (area > 0.0)
-                w0 >= 0.0 and w1 >= 0.0 and w2 >= 0.0
-            else
-                w0 <= 0.0 and w1 <= 0.0 and w2 <= 0.0;
-
-            if (!inside)
-                continue;
-
-            const b0 = w0 / area;
-            const b1 = w1 / area;
-            const b2 = w2 / area;
-            const z = (b0 * v0.position[2]) + (b1 * v1.position[2]) + (b2 * v2.position[2]);
-
-            fragments.append(allocator, .{
-                .position = zm.f32x4(@floatFromInt(x), @floatFromInt(y), z, 1.0),
-                .color = zm.f32x4(1.0, 1.0, 1.0, 1.0),
-                .inputs = try interpolateVertexOutputs(allocator, v0, v1, v2, b0, b1, b2),
-            }) catch return VkError.OutOfDeviceMemory;
-        }
+        try rasterizeTriangle(renderer, allocator, draw_call, &tv0, &tv1, &tv2);
    }
 }
+
+fn rasterizeTriangle(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall, v0: *Vertex, v1: *Vertex, v2: *Vertex) VkError!void {
+    if (try triangleIsCulled(renderer, v0, v1, v2))
+        return;
+
+    const pipeline_data = (renderer.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics;
+    switch (pipeline_data.rasterization.polygon_mode) {
+        .fill => try edge_function.drawTriangle(allocator, draw_call, v0, v1, v2),
+        .line => {
+            try bresenham.drawLine(allocator, draw_call, v0, v1);
+            try bresenham.drawLine(allocator, draw_call, v1, v2);
+            try bresenham.drawLine(allocator, draw_call, v2, v0);
+        },
+        .point => {}, // TODO
+        else => base.unsupported("polygon mode {any}", .{pipeline_data.rasterization.polygon_mode}),
+    }
+}
+
+fn triangleIsCulled(renderer: *Renderer, v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) VkError!bool {
+    const pipeline_data = (renderer.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics;
+    const rasterization = pipeline_data.rasterization;
+    const cull_mode = rasterization.cull_mode;
+
+    if (!cull_mode.front_bit and !cull_mode.back_bit)
+        return false;
+
+    if (cull_mode.front_bit and cull_mode.back_bit)
+        return true;
+
+    const area = triangleArea(v0, v1, v2);
+    if (area == 0.0)
+        return true;
+
+    const front_face = switch (rasterization.front_face) {
+        .counter_clockwise => area < 0.0,
+        .clockwise => area > 0.0,
+        else => return false,
+    };
+
+    return (cull_mode.front_bit and front_face) or (cull_mode.back_bit and !front_face);
+}
+
+inline fn triangleArea(v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) f32 {
+    const x0, const y0, _, _ = v0.position;
+    const x1, const y1, _, _ = v1.position;
+    const x2, const y2, _, _ = v2.position;
+    return ((x1 - x0) * (y2 - y0)) - ((y1 - y0) * (x2 - x0));
+}
@@ -0,0 +1,169 @@
+const std = @import("std");
+const base = @import("base");
+const spv = @import("spv");
+const zm = base.zm;
+
+const common = @import("common.zig");
+const fragment = @import("../fragment.zig");
+
+const Renderer = @import("../Renderer.zig");
+const SoftImage = @import("../../SoftImage.zig");
+
+const VkError = base.VkError;
+const SpvRuntimeError = spv.Runtime.RuntimeError;
+const F32x4 = zm.F32x4;
+
+const RunData = struct {
+    allocator: std.mem.Allocator,
+    draw_call: *Renderer.DrawCall,
+    batch_id: usize,
+    x0: i32,
+    y0: i32,
+    d_x: i32,
+    d_err: i32,
+    y_step: i32,
+    steep: bool,
+    start_vertex: *Renderer.Vertex,
+    end_vertex: *Renderer.Vertex,
+    start_step: usize,
+    end_step: usize,
+};
+
+pub fn drawLine(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall, v0: *Renderer.Vertex, v1: *Renderer.Vertex) VkError!void {
+    const io = draw_call.renderer.device.interface.io();
+
+    var x0: i32 = @intFromFloat(v0.position[0]);
+    var y0: i32 = @intFromFloat(v0.position[1]);
+    var x1: i32 = @intFromFloat(v1.position[0]);
+    var y1: i32 = @intFromFloat(v1.position[1]);
+
+    const steep = blk: {
+        if (@abs(y1 - y0) > @abs(x1 - x0)) {
+            std.mem.swap(i32, &x0, &y0);
+            std.mem.swap(i32, &x1, &y1);
+            break :blk true;
+        }
+        break :blk false;
+    };
+
+    var start_vertex = v0;
+    var end_vertex = v1;
+    if (x0 > x1) {
+        std.mem.swap(i32, &x0, &x1);
+        std.mem.swap(i32, &y0, &y1);
+        std.mem.swap(*Renderer.Vertex, &start_vertex, &end_vertex);
+    }
+
+    const d_err: i32 = @intCast(@abs(y1 - y0));
+    const d_x = x1 - x0;
+    const y_step: i32 = if (y0 > y1) -1 else 1;
+
+    const pipeline = draw_call.renderer.state.pipeline orelse return;
+
+    var wg: std.Io.Group = .init;
+    const runtimes_count = (pipeline.stages.getPtr(.fragment) orelse return).runtimes.len;
+    if (runtimes_count == 0)
+        return;
+
+    const step_count: usize = @as(usize, @intCast(d_x)) + 1;
+    const runs_count = @min(runtimes_count, step_count);
+    const steps_per_run = @divTrunc(step_count + runs_count - 1, runs_count);
+
+    var batch_id: usize = 0;
+    for (0..runs_count) |run_index| {
+        defer batch_id = @mod(batch_id + 1, runtimes_count);
+
+        const start_step = run_index * steps_per_run;
+        if (start_step >= step_count)
+            continue;
+
+        const end_step = @min(start_step + steps_per_run - 1, step_count - 1);
+
+        const run_data: RunData = .{
+            .allocator = allocator,
+            .draw_call = draw_call,
+            .batch_id = batch_id,
+            .x0 = x0,
+            .y0 = y0,
+            .d_x = d_x,
+            .d_err = d_err,
+            .y_step = y_step,
+            .steep = steep,
+            .start_vertex = start_vertex,
+            .end_vertex = end_vertex,
+            .start_step = start_step,
+            .end_step = end_step,
+        };
+
+        wg.async(io, runWrapper, .{run_data});
+    }
+    wg.await(io) catch return VkError.DeviceLost;
+}
+
+inline fn bresenhamYAtStep(y0: i32, d_x: i32, d_err: i32, y_step: i32, step: usize) i32 {
+    if (d_x == 0)
+        return y0;
+
+    const numerator = (@as(i64, @intCast(step)) * @as(i64, d_err)) + @as(i64, @divTrunc(d_x - 1, 2));
+    const y_offset: i32 = @intCast(@divTrunc(numerator, @as(i64, d_x)));
+    return y0 + (y_step * y_offset);
+}
+
+fn runWrapper(data: RunData) void {
+    @call(.always_inline, run, .{data}) catch |err| {
+        std.log.scoped(.@"Rasterization stage").err("line fill mode catched a '{s}'", .{@errorName(err)});
+        if (@errorReturnTrace()) |trace| {
+            std.debug.dumpErrorReturnTrace(trace);
+        }
+    };
+}
+
+inline fn run(data: RunData) !void {
+    const render_target_view: *base.ImageView = (data.draw_call.renderer.framebuffer orelse return).interface.attachments[0];
+    const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image));
+
+    var step = data.start_step;
+    while (step <= data.end_step) : (step += 1) {
+        const x = data.x0 + @as(i32, @intCast(step));
+        const y = bresenhamYAtStep(data.y0, data.d_x, data.d_err, data.y_step, step);
+
+        const pixel_x = if (data.steep) y else x;
+        const pixel_y = if (data.steep) x else y;
+
+        if (!common.scissorContainsPixel(data.draw_call.scissor, pixel_x, pixel_y)) {
+            continue;
+        }
+
+        const t = @as(f32, @floatFromInt(step)) / @as(f32, @floatFromInt(@max(data.d_x, 1)));
+        const z = ((1.0 - t) * data.start_vertex.position[2]) + (t * data.end_vertex.position[2]);
+
+        const pixel = fragment.shaderInvocation(
+            data.allocator,
+            data.draw_call,
+            data.batch_id,
+            zm.f32x4(@floatFromInt(pixel_x), @floatFromInt(pixel_y), z, 1.0),
+            try common.interpolateLineOutputs(data.allocator, data.start_vertex, data.end_vertex, t),
+        ) catch |err| {
+            std.log.scoped(.@"Fragment stage").err("catched a '{s}'", .{@errorName(err)});
+            if (@errorReturnTrace()) |trace| {
+                std.debug.dumpErrorReturnTrace(trace);
+            }
+            return;
+        };
+
+        try render_target.writeFloat4(
+            .{
+                .x = pixel_x,
+                .y = pixel_y,
+                .z = 0, // FIXME
+            },
+            .{
+                .aspect_mask = render_target_view.subresource_range.aspect_mask,
+                .mip_level = render_target_view.subresource_range.base_mip_level,
+                .array_layer = render_target_view.subresource_range.base_array_layer,
+            },
+            render_target_view.format,
+            pixel,
+        );
+    }
+}
@@ -0,0 +1,87 @@
+const std = @import("std");
+const vk = @import("vulkan");
+const base = @import("base");
+const zm = base.zm;
+const spv = @import("spv");
+
+const Renderer = @import("../Renderer.zig");
+
+const VkError = base.VkError;
+const F32x4 = zm.F32x4;
+
+pub fn scissorContainsPixel(scissor: vk.Rect2D, x: i32, y: i32) bool {
+    const min_x: i64 = @as(i64, scissor.offset.x);
+    const min_y: i64 = @as(i64, scissor.offset.y);
+
+    const max_x: i64 = min_x + @as(i64, @intCast(scissor.extent.width));
+    const max_y: i64 = min_y + @as(i64, @intCast(scissor.extent.height));
+
+    const pixel_x: i64 = @as(i64, x);
+    const pixel_y: i64 = @as(i64, y);
+
+    return pixel_x >= min_x and
+        pixel_x < max_x and
+        pixel_y >= min_y and
+        pixel_y < max_y;
+}
+
+fn writePacked(comptime T: type, bytes: []u8, value: T) void {
+    const raw: [@sizeOf(T)]u8 = @bitCast(value);
+    @memcpy(bytes[0..@sizeOf(T)], raw[0..]);
+}
+
+fn interpolateF32x4(value0: F32x4, value1: F32x4, value2: F32x4, b0: f32, b1: f32, b2: f32) F32x4 {
+    return (value0 * @as(F32x4, @splat(b0))) + (value1 * @as(F32x4, @splat(b1))) + (value2 * @as(F32x4, @splat(b2)));
+}
+
+pub fn interpolateVertexOutputs(
+    allocator: std.mem.Allocator,
+    v0: *const Renderer.Vertex,
+    v1: *const Renderer.Vertex,
+    v2: *const Renderer.Vertex,
+    b0: f32,
+    b1: f32,
+    b2: f32,
+) VkError![spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 {
+    var inputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 = undefined;
+
+    for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
+        const out0 = v0.outputs[location] orelse continue;
+        const out1 = v1.outputs[location] orelse continue;
+        const out2 = v2.outputs[location] orelse continue;
+
+        if (out0.interpolation_type == .flat or out0.blob.len == 0) {
+            inputs[location] = out0.blob;
+            continue;
+        }
+
+        const len = @min(out0.blob.len, out1.blob.len, out2.blob.len);
+        const input = allocator.alloc(u8, len) catch return VkError.OutOfDeviceMemory;
+
+        var byte_index: usize = 0;
+        while (byte_index + @sizeOf(F32x4) <= len) : (byte_index += @sizeOf(F32x4)) {
+            const value0 = std.mem.bytesToValue(F32x4, out0.blob[byte_index..]);
+            const value1 = std.mem.bytesToValue(F32x4, out1.blob[byte_index..]);
+            const value2 = std.mem.bytesToValue(F32x4, out2.blob[byte_index..]);
+            writePacked(F32x4, input[byte_index..], interpolateF32x4(value0, value1, value2, b0, b1, b2));
+        }
+
+        while (byte_index + @sizeOf(f32) <= len) : (byte_index += @sizeOf(f32)) {
+            const value0 = std.mem.bytesToValue(f32, out0.blob[byte_index..]);
+            const value1 = std.mem.bytesToValue(f32, out1.blob[byte_index..]);
+            const value2 = std.mem.bytesToValue(f32, out2.blob[byte_index..]);
+            writePacked(f32, input[byte_index..], (value0 * b0) + (value1 * b1) + (value2 * b2));
+        }
+
+        if (byte_index < len)
+            @memcpy(input[byte_index..], out0.blob[byte_index..len]);
+
+        inputs[location] = input;
+    }
+
+    return inputs;
+}
+
+pub fn interpolateLineOutputs(allocator: std.mem.Allocator, v0: *const Renderer.Vertex, v1: *const Renderer.Vertex, t: f32) VkError![spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 {
+    return interpolateVertexOutputs(allocator, v0, v1, v0, 1.0 - t, t, 0.0);
+}
@@ -0,0 +1,170 @@
+const std = @import("std");
+const vk = @import("vulkan");
+const base = @import("base");
+const spv = @import("spv");
+const zm = base.zm;
+
+const common = @import("common.zig");
+const fragment = @import("../fragment.zig");
+
+const Renderer = @import("../Renderer.zig");
+const SoftImage = @import("../../SoftImage.zig");
+
+const VkError = base.VkError;
+const SpvRuntimeError = spv.Runtime.RuntimeError;
+const F32x4 = zm.F32x4;
+
+const RunData = struct {
+    allocator: std.mem.Allocator,
+    draw_call: *Renderer.DrawCall,
+    batch_id: usize,
+    min_x: i32,
+    max_x: i32,
+    min_y: i32,
+    max_y: i32,
+    area: f32,
+    v0: *Renderer.Vertex,
+    v1: *Renderer.Vertex,
+    v2: *Renderer.Vertex,
+};
+
+pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall, v0: *Renderer.Vertex, v1: *Renderer.Vertex, v2: *Renderer.Vertex) VkError!void {
+    const io = draw_call.renderer.device.interface.io();
+
+    const min_x: i32 = @intFromFloat(@floor(@min(v0.position[0], v1.position[0], v2.position[0])));
+    const max_x: i32 = @intFromFloat(@ceil(@max(v0.position[0], v1.position[0], v2.position[0])));
+    const min_y: i32 = @intFromFloat(@floor(@min(v0.position[1], v1.position[1], v2.position[1])));
+    const max_y: i32 = @intFromFloat(@ceil(@max(v0.position[1], v1.position[1], v2.position[1])));
+
+    const area = edgeFunction(v0.position, v1.position, v2.position);
+    if (area == 0.0)
+        return;
+
+    const pipeline = draw_call.renderer.state.pipeline orelse return;
+
+    var wg: std.Io.Group = .init;
+    const runtimes_count = (pipeline.stages.getPtr(.fragment) orelse return).runtimes.len;
+    const grid_size: usize = @intFromFloat(@floor(@sqrt(@as(f32, @floatFromInt(runtimes_count)))));
+
+    const width: usize = @intCast(max_x - min_x + 1);
+    const height: usize = @intCast(max_y - min_y + 1);
+
+    const cols_per_run = @divTrunc(width + grid_size - 1, grid_size);
+    const rows_per_run = @divTrunc(height + grid_size - 1, grid_size);
+
+    var batch_id: usize = 0;
+    for (0..grid_size) |gy| {
+        for (0..grid_size) |gx| {
+            defer batch_id = @mod(batch_id + 1, runtimes_count);
+
+            const run_min_x = min_x + @as(i32, @intCast(gx * cols_per_run));
+            const run_min_y = min_y + @as(i32, @intCast(gy * rows_per_run));
+
+            if (run_min_x > max_x or run_min_y > max_y)
+                continue;
+
+            const run_max_x = @min(
+                run_min_x + @as(i32, @intCast(cols_per_run)) - 1,
+                max_x,
+            );
+
+            const run_max_y = @min(
+                run_min_y + @as(i32, @intCast(rows_per_run)) - 1,
+                max_y,
+            );
+
+            const run_data: RunData = .{
+                .allocator = allocator,
+                .draw_call = draw_call,
+                .batch_id = batch_id,
+                .v0 = v0,
+                .v1 = v1,
+                .v2 = v2,
+                .area = area,
+                .min_x = run_min_x,
+                .max_x = run_max_x,
+                .min_y = run_min_y,
+                .max_y = run_max_y,
+            };
+
+            wg.async(io, runWrapper, .{run_data});
+        }
+    }
+    wg.await(io) catch return VkError.DeviceLost;
+}
+
+inline fn edgeFunction(a: F32x4, b: F32x4, p: F32x4) f32 {
+    return ((p[0] - a[0]) * (b[1] - a[1])) - ((p[1] - a[1]) * (b[0] - a[0]));
+}
+
+fn runWrapper(data: RunData) void {
+    @call(.always_inline, run, .{data}) catch |err| {
+        std.log.scoped(.@"Rasterization stage").err("triangle fill mode catched a '{s}'", .{@errorName(err)});
+        if (@errorReturnTrace()) |trace| {
+            std.debug.dumpErrorReturnTrace(trace);
+        }
+    };
+}
+
+inline fn run(data: RunData) !void {
+    const render_target_view: *base.ImageView = (data.draw_call.renderer.framebuffer orelse return).interface.attachments[0];
+    const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image));
+
+    var y = data.min_y;
+    while (y <= data.max_y) : (y += 1) {
+        var x = data.min_x;
+        while (x <= data.max_x) : (x += 1) {
+            if (!common.scissorContainsPixel(data.draw_call.scissor, x, y)) {
+                continue;
+            }
+
+            const p = zm.f32x4(@as(f32, @floatFromInt(x)) + 0.5, @as(f32, @floatFromInt(y)) + 0.5, 0.0, 1.0);
+
+            const w0 = edgeFunction(data.v1.position, data.v2.position, p);
+            const w1 = edgeFunction(data.v2.position, data.v0.position, p);
+            const w2 = edgeFunction(data.v0.position, data.v1.position, p);
+
+            const inside = if (data.area > 0.0)
+                w0 >= 0.0 and w1 >= 0.0 and w2 >= 0.0
+            else
+                w0 <= 0.0 and w1 <= 0.0 and w2 <= 0.0;
+
+            if (!inside)
+                continue;
+
+            const b0 = w0 / data.area;
+            const b1 = w1 / data.area;
+            const b2 = w2 / data.area;
+            const z = (b0 * data.v0.position[2]) + (b1 * data.v1.position[2]) + (b2 * data.v2.position[2]);
+
+            const pixel = fragment.shaderInvocation(
+                data.allocator,
+                data.draw_call,
+                data.batch_id,
+                zm.f32x4(@floatFromInt(x), @floatFromInt(y), z, 1.0),
+                try common.interpolateVertexOutputs(data.allocator, data.v0, data.v1, data.v2, b0, b1, b2),
+            ) catch |err| {
+                std.log.scoped(.@"Fragment stage").err("catched a '{s}'", .{@errorName(err)});
+                if (@errorReturnTrace()) |trace| {
+                    std.debug.dumpErrorReturnTrace(trace);
+                }
+                return;
+            };
+
+            try render_target.writeFloat4(
+                .{
+                    .x = x,
+                    .y = y,
+                    .z = 0, // FIXME
+                },
+                .{
+                    .aspect_mask = render_target_view.subresource_range.aspect_mask,
+                    .mip_level = render_target_view.subresource_range.base_mip_level,
+                    .array_layer = render_target_view.subresource_range.base_array_layer,
+                },
+                render_target_view.format,
+                pixel,
+            );
+        }
+    }
+}
@@ -13,7 +13,6 @@ const VkError = base.VkError;

 pub const RunData = struct {
    allocator: std.mem.Allocator,
-    renderer: *Renderer,
    pipeline: *SoftPipeline,
    batch_id: usize,
    batch_size: usize,
@@ -35,10 +34,9 @@ pub fn runWrapper(data: RunData) void {
 }

 inline fn run(data: RunData) !void {
-    const allocator = data.renderer.device.device_allocator.allocator();
-
    const shader = data.pipeline.stages.getPtrAssertContains(.vertex);
    const rt = &shader.runtimes[data.batch_id];
+    try rt.populatePushConstants(data.draw_call.renderer.state.push_constant_blob[0..]);

    const entry = try rt.getEntryPointByName(shader.entry);

@@ -58,7 +56,7 @@ inline fn run(data: RunData) !void {

                const binding_info = (data.pipeline.interface.mode.graphics.input_assembly.binding_description orelse return)[attribute.binding];

-                const vertex_buffer = data.renderer.state.data.graphics.vertex_buffers[attribute.binding];
+                const vertex_buffer = data.draw_call.renderer.state.data.graphics.vertex_buffers[attribute.binding];
                const buffer = vertex_buffer.buffer;
                const buffer_memory_size = base.format.texelSize(attribute.format);
                const buffer_memory = if (buffer.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv;
@@ -70,7 +68,7 @@ inline fn run(data: RunData) !void {
            }
        }

-        rt.callEntryPoint(allocator, entry) catch |err| switch (err) {
+        rt.callEntryPoint(data.allocator, entry) catch |err| switch (err) {
            // Some errors can be safely ignored
            SpvRuntimeError.OutOfBounds,
            SpvRuntimeError.Killed,
@@ -81,6 +79,19 @@ inline fn run(data: RunData) !void {
        const output: *Renderer.Vertex = &data.draw_call.vertices[(data.instance_index * data.vertex_count) + invocation_index];
        try rt.readBuiltIn(std.mem.asBytes(&output.position), .Position);

+        if (invocation_index == 0) {
+            const io = data.draw_call.renderer.device.interface.io();
+            const file = try std.Io.Dir.cwd().createFile(
+                io,
+                "vertex_result_table_dump.txt",
+                .{ .truncate = true },
+            );
+            defer file.close(io);
+            var buffer = [_]u8{0} ** 1024;
+            var writer = file.writer(io, buffer[0..]);
+            try rt.dumpResultsTable(data.allocator, &writer.interface);
+        }
+
        for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
            const result_word = rt.getResultByLocation(@intCast(location), .output) catch |err| switch (err) {
                SpvRuntimeError.NotFound => continue,
@@ -96,6 +107,9 @@ inline fn run(data: RunData) !void {
 }

 fn setupBuiltins(rt: *spv.Runtime, vertex_index: usize, instance_index: usize) !void {
-    try rt.writeBuiltIn(std.mem.asBytes(&vertex_index), .VertexIndex);
-    try rt.writeBuiltIn(std.mem.asBytes(&instance_index), .InstanceIndex);
+    const vertex_index_u32: u32 = @intCast(vertex_index);
+    const instance_index_u32: u32 = @intCast(instance_index);
+
+    try rt.writeBuiltIn(std.mem.asBytes(&vertex_index_u32), .VertexIndex);
+    try rt.writeBuiltIn(std.mem.asBytes(&instance_index_u32), .InstanceIndex);
 }
@@ -60,15 +60,15 @@ pub const MIN_STORAGE_BUFFER_ALIGNMENT = 256;
 pub const MAX_VERTEX_INPUT_BINDINGS = 16;
 pub const MAX_VERTEX_INPUT_ATTRIBUTES = 32;

+pub const PUSH_CONSTANT_SIZE = 256;
+
 pub const MAX_IMAGE_LEVELS_1D = 15;
 pub const MAX_IMAGE_LEVELS_2D = 15;
 pub const MAX_IMAGE_LEVELS_3D = 12;
 pub const MAX_IMAGE_LEVELS_CUBE = 15;
 pub const MAX_IMAGE_ARRAY_LAYERS = 2048;

-pub const PHYSICAL_DEVICE_HEAP_SIZE = 0x80000000; // 2 GiB
-pub const MAX_MEMORY_ALLOCATION_SIZE = 0x80000000; // 2 GiB
-pub const MAX_ALLOCATION_COUNT = 4096;
+pub const PHYSICAL_DEVICE_FALLBACK_HEAP_SIZE = 0x10000000; // 256MB

 pub const std_options = base.std_options;

@@ -62,9 +62,11 @@ pub const DispatchTable = struct {
    executeCommands: *const fn (*Self, *Self) VkError!void,
    fillBuffer: *const fn (*Self, *Buffer, vk.DeviceSize, vk.DeviceSize, u32) VkError!void,
    pipelineBarrier: *const fn (*Self, vk.PipelineStageFlags, vk.PipelineStageFlags, vk.DependencyFlags, []const vk.MemoryBarrier, []const vk.BufferMemoryBarrier, []const vk.ImageMemoryBarrier) VkError!void,
+    pushConstants: *const fn (*Self, vk.ShaderStageFlags, u32, []const u8) VkError!void,
    reset: *const fn (*Self, vk.CommandBufferResetFlags) VkError!void,
    resetEvent: *const fn (*Self, *Event, vk.PipelineStageFlags) VkError!void,
    setEvent: *const fn (*Self, *Event, vk.PipelineStageFlags) VkError!void,
+    setScissor: *const fn (*Self, u32, []const vk.Rect2D) VkError!void,
    setViewport: *const fn (*Self, u32, []const vk.Viewport) VkError!void,
    waitEvent: *const fn (*Self, *Event, vk.PipelineStageFlags, vk.PipelineStageFlags, []const vk.MemoryBarrier, []const vk.BufferMemoryBarrier, []const vk.ImageMemoryBarrier) VkError!void,
 };
@@ -253,6 +255,10 @@ pub inline fn pipelineBarrier(
    try self.dispatch_table.pipelineBarrier(self, src_stage, dst_stage, dependency, memory_barriers, buffer_barriers, image_barriers);
 }

+pub inline fn pushConstants(self: *Self, stages: vk.ShaderStageFlags, offset: u32, blob: []const u8) VkError!void {
+    try self.dispatch_table.pushConstants(self, stages, offset, blob);
+}
+
 pub inline fn resetEvent(self: *Self, event: *Event, stage: vk.PipelineStageFlags) VkError!void {
    try self.dispatch_table.resetEvent(self, event, stage);
 }
@@ -261,6 +267,10 @@ pub inline fn setEvent(self: *Self, event: *Event, stage: vk.PipelineStageFlags)
    try self.dispatch_table.setEvent(self, event, stage);
 }

+pub inline fn setScissor(self: *Self, first: u32, scissor: []const vk.Rect2D) VkError!void {
+    try self.dispatch_table.setScissor(self, first, scissor);
+}
+
 pub inline fn setViewport(self: *Self, first: u32, viewports: []const vk.Viewport) VkError!void {
    try self.dispatch_table.setViewport(self, first, viewports);
 }
@@ -1,12 +1,13 @@
 const std = @import("std");
 const vk = @import("vulkan");

-const NonDispatchable = @import("NonDispatchable.zig");
+const NonDispatchable = @import("NonDispatchable.zig").NonDispatchable;

 const VkError = @import("error_set.zig").VkError;

 const Device = @import("Device.zig");
 const PipelineCache = @import("PipelineCache.zig");
+const PipelineLayout = @import("PipelineLayout.zig");

 const Self = @This();
 pub const ObjectType: vk.ObjectType = .pipeline;
@@ -28,6 +29,7 @@ owner: *Device,
 vtable: *const VTable,
 bind_point: vk.PipelineBindPoint,
 stages: vk.ShaderStageFlags,
+layout: *PipelineLayout,
 mode: union(enum) {
    compute: struct {},
    graphics: struct {
@@ -55,14 +57,18 @@ pub const VTable = struct {
 };

 pub fn initCompute(device: *Device, allocator: std.mem.Allocator, cache: ?*PipelineCache, info: *const vk.ComputePipelineCreateInfo) VkError!Self {
-    _ = allocator;
    _ = cache;

+    const layout = try NonDispatchable(PipelineLayout).fromHandleObject(info.layout);
+    layout.ref();
+    errdefer layout.unref(allocator);
+
    return .{
        .owner = device,
        .vtable = undefined,
        .bind_point = .compute,
        .stages = info.stage.stage,
+        .layout = layout,
        .mode = .{ .compute = .{} },
    };
 }
@@ -70,6 +76,10 @@ pub fn initCompute(device: *Device, allocator: std.mem.Allocator, cache: ?*Pipel
 pub fn initGraphics(device: *Device, allocator: std.mem.Allocator, cache: ?*PipelineCache, info: *const vk.GraphicsPipelineCreateInfo) VkError!Self {
    _ = cache;

+    const layout = try NonDispatchable(PipelineLayout).fromHandleObject(info.layout);
+    layout.ref();
+    errdefer layout.unref(allocator);
+
    var stages: vk.ShaderStageFlags = .{};
    if (info.p_stages) |p_stages| {
        for (p_stages[0..info.stage_count]) |stage| {
@@ -82,6 +92,7 @@ pub fn initGraphics(device: *Device, allocator: std.mem.Allocator, cache: ?*Pipe
        .vtable = undefined,
        .bind_point = .graphics,
        .stages = stages,
+        .layout = layout,
        .mode = .{
            .graphics = .{
                .input_assembly = .{
@@ -172,5 +183,6 @@ pub inline fn destroy(self: *Self, allocator: std.mem.Allocator) void {
            }
        },
    }
+    self.layout.unref(allocator);
    self.vtable.destroy(self, allocator);
 }
@@ -23,19 +23,6 @@ dynamic_descriptor_offsets: [lib.VULKAN_MAX_DESCRIPTOR_SETS]usize,
 push_ranges_count: usize,
 push_ranges: [lib.VULKAN_MAX_PUSH_CONSTANT_RANGES]vk.PushConstantRange,

-/// Mesa's common Vulkan runtime states:
-///
-/// It's often necessary to store a pointer to the descriptor set layout in
-/// the descriptor so that any entrypoint which has access to a descriptor
-/// set also has the layout. While layouts are often passed into various
-/// entrypoints, they're notably missing from vkUpdateDescriptorSets(). In
-/// order to implement descriptor writes, you either need to stash a pointer
-/// to the descriptor set layout in the descriptor set or you need to copy
-/// all of the relevant information.  Storing a pointer is a lot cheaper.
-///
-/// Because descriptor set layout lifetimes and descriptor set lifetimes are
-/// not guaranteed to coincide, we have to reference count if we're going to
-/// do this.
 ref_count: std.atomic.Value(usize),

 vtable: *const VTable,
@@ -1957,20 +1957,14 @@ pub export fn strollCmdPipelineBarrier(
    ) catch |err| return errorLogger(err);
 }

-pub export fn strollCmdPushConstants(p_cmd: vk.CommandBuffer, layout: vk.PipelineLayout, flags: vk.ShaderStageFlags, offset: u32, size: u32, values: *const anyopaque) callconv(vk.vulkan_call_conv) void {
+pub export fn strollCmdPushConstants(p_cmd: vk.CommandBuffer, layout: vk.PipelineLayout, flags: vk.ShaderStageFlags, offset: u32, size: u32, data: [*]const u8) callconv(vk.vulkan_call_conv) void {
    entryPointBeginLogTrace(.vkCmdPushConstants);
    defer entryPointEndLogTrace();

    const cmd = Dispatchable(CommandBuffer).fromHandleObject(p_cmd) catch |err| return errorLogger(err);
+    cmd.pushConstants(flags, offset, data[0..size]) catch |err| return errorLogger(err);

-    notImplementedWarning();
-
-    _ = cmd;
-    _ = layout;
-    _ = flags;
-    _ = offset;
-    _ = size;
-    _ = values;
+    _ = layout; // Pipelines embed their layout which is more trustworthy
 }

 pub export fn strollCmdResetQueryPool(p_cmd: vk.CommandBuffer, p_pool: vk.QueryPool, first: u32, count: u32) callconv(vk.vulkan_call_conv) void {
@@ -2089,13 +2083,7 @@ pub export fn strollCmdSetScissor(p_cmd: vk.CommandBuffer, first: u32, count: u3
    defer entryPointEndLogTrace();

    const cmd = Dispatchable(CommandBuffer).fromHandleObject(p_cmd) catch |err| return errorLogger(err);
-
-    notImplementedWarning();
-
-    _ = cmd;
-    _ = first;
-    _ = count;
-    _ = scissors;
+    cmd.setScissor(first, scissors[0..count]) catch |err| return errorLogger(err);
 }

 pub export fn strollCmdSetStencilCompareMask(p_cmd: vk.CommandBuffer, face_mask: vk.StencilFaceFlags, compare_mask: u32) callconv(vk.vulkan_call_conv) void {
@@ -59,7 +59,7 @@ pub fn log(comptime level: std.log.Level, comptime scope: @EnumLiteral(), compti
    file.lock(io, .exclusive) catch {};
    defer file.unlock(io);

-    const now = std.Io.Timestamp.now(io, .cpu_process).toMicroseconds();
+    const now = std.Io.Timestamp.now(io, .real).toMicroseconds();

    const now_us: u16 = @intCast(@mod(now, 1000));
    const now_ms: u16 = @intCast(@mod(@divTrunc(now, 1000), std.time.ms_per_s));
Author	SHA1	Message	Date
kbz_8	b5b05776d8	refactoring renderer Test / build_and_test (push) Successful in 35s Details Build / build (push) Successful in 1m20s Details	2026-05-13 22:05:25 +02:00
kbz_8	faae8e86e0	implementing push constants	2026-05-12 03:01:17 +02:00