fixing slow memory leak

2026-05-14 00:23:46 +02:00
parent b5b05776d8
commit 124ea12d2e
12 changed files with 185 additions and 200 deletions
@@ -100,7 +100,7 @@ pub fn drawLine(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall, v0:
    wg.await(io) catch return VkError.DeviceLost;
 }

-inline fn bresenhamYAtStep(y0: i32, d_x: i32, d_err: i32, y_step: i32, step: usize) i32 {
+fn bresenhamYAtStep(y0: i32, d_x: i32, d_err: i32, y_step: i32, step: usize) i32 {
    if (d_x == 0)
        return y0;

@@ -25,15 +25,6 @@ pub fn scissorContainsPixel(scissor: vk.Rect2D, x: i32, y: i32) bool {
        pixel_y < max_y;
 }

-fn writePacked(comptime T: type, bytes: []u8, value: T) void {
-    const raw: [@sizeOf(T)]u8 = @bitCast(value);
-    @memcpy(bytes[0..@sizeOf(T)], raw[0..]);
-}
-
-fn interpolateF32x4(value0: F32x4, value1: F32x4, value2: F32x4, b0: f32, b1: f32, b2: f32) F32x4 {
-    return (value0 * @as(F32x4, @splat(b0))) + (value1 * @as(F32x4, @splat(b1))) + (value2 * @as(F32x4, @splat(b2)));
-}
-
 pub fn interpolateVertexOutputs(
    allocator: std.mem.Allocator,
    v0: *const Renderer.Vertex,
@@ -63,14 +54,14 @@ pub fn interpolateVertexOutputs(
            const value0 = std.mem.bytesToValue(F32x4, out0.blob[byte_index..]);
            const value1 = std.mem.bytesToValue(F32x4, out1.blob[byte_index..]);
            const value2 = std.mem.bytesToValue(F32x4, out2.blob[byte_index..]);
-            writePacked(F32x4, input[byte_index..], interpolateF32x4(value0, value1, value2, b0, b1, b2));
+            base.utils.writePacked(F32x4, input[byte_index..], interpolateF32x4(value0, value1, value2, b0, b1, b2));
        }

        while (byte_index + @sizeOf(f32) <= len) : (byte_index += @sizeOf(f32)) {
            const value0 = std.mem.bytesToValue(f32, out0.blob[byte_index..]);
            const value1 = std.mem.bytesToValue(f32, out1.blob[byte_index..]);
            const value2 = std.mem.bytesToValue(f32, out2.blob[byte_index..]);
-            writePacked(f32, input[byte_index..], (value0 * b0) + (value1 * b1) + (value2 * b2));
+            base.utils.writePacked(f32, input[byte_index..], (value0 * b0) + (value1 * b1) + (value2 * b2));
        }

        if (byte_index < len)
@@ -85,3 +76,7 @@ pub fn interpolateVertexOutputs(
 pub fn interpolateLineOutputs(allocator: std.mem.Allocator, v0: *const Renderer.Vertex, v1: *const Renderer.Vertex, t: f32) VkError![spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 {
    return interpolateVertexOutputs(allocator, v0, v1, v0, 1.0 - t, t, 0.0);
 }
+
+inline fn interpolateF32x4(value0: F32x4, value1: F32x4, value2: F32x4, b0: f32, b1: f32, b2: f32) F32x4 {
+    return (value0 * zm.f32x4s(b0)) + (value1 * zm.f32x4s(b1)) + (value2 * zm.f32x4s(b2));
+}
@@ -42,7 +42,6 @@ pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall,

    const pipeline = draw_call.renderer.state.pipeline orelse return;

-    var wg: std.Io.Group = .init;
    const runtimes_count = (pipeline.stages.getPtr(.fragment) orelse return).runtimes.len;
    const grid_size: usize = @intFromFloat(@floor(@sqrt(@as(f32, @floatFromInt(runtimes_count)))));

@@ -53,6 +52,8 @@ pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall,
    const rows_per_run = @divTrunc(height + grid_size - 1, grid_size);

    var batch_id: usize = 0;
+
+    var wg: std.Io.Group = .init;
    for (0..grid_size) |gy| {
        for (0..grid_size) |gx| {
            defer batch_id = @mod(batch_id + 1, runtimes_count);