fixing slow memory leak
This commit is contained in:
+59
-57
@@ -54,9 +54,13 @@ pub fn createCompute(device: *base.Device, allocator: std.mem.Allocator, cache:
|
|||||||
|
|
||||||
const device_allocator = soft_device.device_allocator.allocator();
|
const device_allocator = soft_device.device_allocator.allocator();
|
||||||
|
|
||||||
var runtimes_allocator_arena: std.heap.ArenaAllocator = .init(device_allocator);
|
self.* = .{
|
||||||
errdefer runtimes_allocator_arena.deinit();
|
.interface = interface,
|
||||||
const runtimes_allocator = runtimes_allocator_arena.allocator();
|
.runtimes_allocator = .init(device_allocator),
|
||||||
|
.stages = std.EnumMap(Stages, Shader).init(.{}),
|
||||||
|
};
|
||||||
|
errdefer self.runtimes_allocator.deinit();
|
||||||
|
const runtimes_allocator = self.runtimes_allocator.allocator();
|
||||||
|
|
||||||
const instance: *SoftInstance = @alignCast(@fieldParentPtr("interface", device.instance));
|
const instance: *SoftInstance = @alignCast(@fieldParentPtr("interface", device.instance));
|
||||||
const runtimes_count = switch (instance.threaded.async_limit) {
|
const runtimes_count = switch (instance.threaded.async_limit) {
|
||||||
@@ -68,57 +72,51 @@ pub fn createCompute(device: *base.Device, allocator: std.mem.Allocator, cache:
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
self.* = .{
|
self.stages.put(.compute, blk: {
|
||||||
.interface = interface,
|
var shader: Shader = undefined;
|
||||||
.runtimes_allocator = runtimes_allocator_arena,
|
soft_module.ref();
|
||||||
.stages = std.EnumMap(Stages, Shader).init(.{
|
shader.module = soft_module;
|
||||||
.compute = blk: {
|
|
||||||
var shader: Shader = undefined;
|
|
||||||
soft_module.ref();
|
|
||||||
shader.module = soft_module;
|
|
||||||
|
|
||||||
const runtimes = runtimes_allocator.alloc(spv.Runtime, runtimes_count) catch return VkError.OutOfDeviceMemory;
|
const runtimes = runtimes_allocator.alloc(spv.Runtime, runtimes_count) catch return VkError.OutOfDeviceMemory;
|
||||||
|
|
||||||
for (runtimes) |*runtime| {
|
for (runtimes) |*runtime| {
|
||||||
runtime.* = spv.Runtime.init(
|
runtime.* = spv.Runtime.init(
|
||||||
runtimes_allocator,
|
runtimes_allocator,
|
||||||
&soft_module.module,
|
&soft_module.module,
|
||||||
.{
|
.{
|
||||||
.readImageFloat4 = readImageFloat4,
|
.readImageFloat4 = readImageFloat4,
|
||||||
.readImageInt4 = readImageInt4,
|
.readImageInt4 = readImageInt4,
|
||||||
.writeImageFloat4 = writeImageFloat4,
|
.writeImageFloat4 = writeImageFloat4,
|
||||||
.writeImageInt4 = writeImageInt4,
|
.writeImageInt4 = writeImageInt4,
|
||||||
},
|
},
|
||||||
) catch |err| {
|
) catch |err| {
|
||||||
std.log.scoped(.SpvRuntimeInit).err("SPIR-V Runtime failed to initialize, {s}", .{@errorName(err)});
|
std.log.scoped(.SpvRuntimeInit).err("SPIR-V Runtime failed to initialize, {s}", .{@errorName(err)});
|
||||||
return VkError.Unknown;
|
return VkError.Unknown;
|
||||||
};
|
};
|
||||||
if (info.stage.p_specialization_info) |specialization| {
|
if (info.stage.p_specialization_info) |specialization| {
|
||||||
if (specialization.p_map_entries) |map| {
|
if (specialization.p_map_entries) |map| {
|
||||||
const data: []const u8 = @as([*]const u8, @ptrCast(@alignCast(specialization.p_data)))[0..specialization.data_size];
|
const data: []const u8 = @as([*]const u8, @ptrCast(@alignCast(specialization.p_data)))[0..specialization.data_size];
|
||||||
for (map[0..], 0..specialization.map_entry_count) |entry, _| {
|
for (map[0..], 0..specialization.map_entry_count) |entry, _| {
|
||||||
runtime.addSpecializationInfo(
|
runtime.addSpecializationInfo(
|
||||||
runtimes_allocator,
|
runtimes_allocator,
|
||||||
.{
|
.{
|
||||||
.id = @intCast(entry.constant_id),
|
.id = @intCast(entry.constant_id),
|
||||||
.offset = @intCast(entry.offset),
|
.offset = @intCast(entry.offset),
|
||||||
.size = @intCast(entry.size),
|
.size = @intCast(entry.size),
|
||||||
},
|
},
|
||||||
data,
|
data,
|
||||||
) catch return VkError.OutOfDeviceMemory;
|
) catch return VkError.OutOfDeviceMemory;
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
shader.runtimes = runtimes;
|
shader.runtimes = runtimes;
|
||||||
shader.entry = runtimes_allocator.dupe(u8, std.mem.span(info.stage.p_name)) catch return VkError.OutOfDeviceMemory;
|
shader.entry = runtimes_allocator.dupe(u8, std.mem.span(info.stage.p_name)) catch return VkError.OutOfDeviceMemory;
|
||||||
|
|
||||||
std.log.scoped(.ComputePipeline).debug("Created {d} runtimes for compute stage", .{runtimes_count});
|
std.log.scoped(.ComputePipeline).debug("Created {d} runtimes for compute stage", .{runtimes_count});
|
||||||
break :blk shader;
|
break :blk shader;
|
||||||
},
|
});
|
||||||
}),
|
|
||||||
};
|
|
||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -135,9 +133,13 @@ pub fn createGraphics(device: *base.Device, allocator: std.mem.Allocator, cache:
|
|||||||
const soft_device: *SoftDevice = @alignCast(@fieldParentPtr("interface", device));
|
const soft_device: *SoftDevice = @alignCast(@fieldParentPtr("interface", device));
|
||||||
const device_allocator = soft_device.device_allocator.allocator();
|
const device_allocator = soft_device.device_allocator.allocator();
|
||||||
|
|
||||||
var runtimes_allocator_arena: std.heap.ArenaAllocator = .init(device_allocator);
|
self.* = .{
|
||||||
errdefer runtimes_allocator_arena.deinit();
|
.interface = interface,
|
||||||
const runtimes_allocator = runtimes_allocator_arena.allocator();
|
.runtimes_allocator = .init(device_allocator),
|
||||||
|
.stages = std.EnumMap(Stages, Shader).init(.{}),
|
||||||
|
};
|
||||||
|
errdefer self.runtimes_allocator.deinit();
|
||||||
|
const runtimes_allocator = self.runtimes_allocator.allocator();
|
||||||
|
|
||||||
const instance: *SoftInstance = @alignCast(@fieldParentPtr("interface", device.instance));
|
const instance: *SoftInstance = @alignCast(@fieldParentPtr("interface", device.instance));
|
||||||
const runtimes_count = switch (instance.threaded.async_limit) {
|
const runtimes_count = switch (instance.threaded.async_limit) {
|
||||||
@@ -149,12 +151,6 @@ pub fn createGraphics(device: *base.Device, allocator: std.mem.Allocator, cache:
|
|||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
|
||||||
self.* = .{
|
|
||||||
.interface = interface,
|
|
||||||
.runtimes_allocator = runtimes_allocator_arena,
|
|
||||||
.stages = std.EnumMap(Stages, Shader).init(.{}),
|
|
||||||
};
|
|
||||||
|
|
||||||
if (info.p_stages) |stages| {
|
if (info.p_stages) |stages| {
|
||||||
for (stages[0..], 0..info.stage_count) |stage, _| {
|
for (stages[0..], 0..info.stage_count) |stage, _| {
|
||||||
var shader: Shader = undefined;
|
var shader: Shader = undefined;
|
||||||
@@ -228,9 +224,15 @@ pub fn createGraphics(device: *base.Device, allocator: std.mem.Allocator, cache:
|
|||||||
|
|
||||||
pub fn destroy(interface: *Interface, allocator: std.mem.Allocator) void {
|
pub fn destroy(interface: *Interface, allocator: std.mem.Allocator) void {
|
||||||
const self: *Self = @alignCast(@fieldParentPtr("interface", interface));
|
const self: *Self = @alignCast(@fieldParentPtr("interface", interface));
|
||||||
|
const soft_device: *SoftDevice = @alignCast(@fieldParentPtr("interface", interface.owner));
|
||||||
|
const device_allocator = soft_device.device_allocator.allocator();
|
||||||
|
|
||||||
var it = self.stages.iterator();
|
var it = self.stages.iterator();
|
||||||
while (it.next()) |entry| {
|
while (it.next()) |entry| {
|
||||||
entry.value.module.unref(allocator);
|
entry.value.module.unref(allocator);
|
||||||
|
for (entry.value.runtimes) |*rt| {
|
||||||
|
rt.function_stack.clearAndFree(device_allocator); // Hacky to avoid leaks
|
||||||
|
}
|
||||||
}
|
}
|
||||||
self.runtimes_allocator.deinit();
|
self.runtimes_allocator.deinit();
|
||||||
allocator.destroy(self);
|
allocator.destroy(self);
|
||||||
|
|||||||
@@ -95,8 +95,7 @@ fn taskRunner(self: *Self, info: Interface.SubmitInfo, p_fence: ?*base.Fence, ru
|
|||||||
}
|
}
|
||||||
|
|
||||||
var execution_device: ExecutionDevice = undefined;
|
var execution_device: ExecutionDevice = undefined;
|
||||||
execution_device.init(soft_device);
|
execution_device.setup(soft_device);
|
||||||
defer execution_device.deinit();
|
|
||||||
|
|
||||||
for (info.command_buffers.items) |command_buffer| {
|
for (info.command_buffers.items) |command_buffer| {
|
||||||
const soft_command_buffer: *SoftCommandBuffer = @alignCast(@fieldParentPtr("interface", command_buffer));
|
const soft_command_buffer: *SoftCommandBuffer = @alignCast(@fieldParentPtr("interface", command_buffer));
|
||||||
|
|||||||
@@ -45,10 +45,6 @@ pub fn init(device: *SoftDevice, state: *PipelineState) Self {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: *Self) void {
|
|
||||||
_ = self;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn dispatch(self: *Self, group_count_x: u32, group_count_y: u32, group_count_z: u32) VkError!void {
|
pub fn dispatch(self: *Self, group_count_x: u32, group_count_y: u32, group_count_z: u32) VkError!void {
|
||||||
const group_count: usize = @intCast(group_count_x * group_count_y * group_count_z);
|
const group_count: usize = @intCast(group_count_x * group_count_y * group_count_z);
|
||||||
|
|
||||||
|
|||||||
@@ -39,7 +39,7 @@ pipeline_states: [2]PipelineState,
|
|||||||
|
|
||||||
/// Initializating an execution device and
|
/// Initializating an execution device and
|
||||||
/// not creating one to avoid dangling pointers
|
/// not creating one to avoid dangling pointers
|
||||||
pub fn init(self: *Self, device: *SoftDevice) void {
|
pub fn setup(self: *Self, device: *SoftDevice) void {
|
||||||
for (self.pipeline_states[0..], 0..) |*state, i| {
|
for (self.pipeline_states[0..], 0..) |*state, i| {
|
||||||
state.* = .{
|
state.* = .{
|
||||||
.pipeline = null,
|
.pipeline = null,
|
||||||
@@ -60,8 +60,3 @@ pub fn init(self: *Self, device: *SoftDevice) void {
|
|||||||
self.compute = .init(device, &self.pipeline_states[@intFromEnum(vk.PipelineBindPoint.compute)]);
|
self.compute = .init(device, &self.pipeline_states[@intFromEnum(vk.PipelineBindPoint.compute)]);
|
||||||
self.renderer = .init(device, &self.pipeline_states[@intFromEnum(vk.PipelineBindPoint.graphics)]);
|
self.renderer = .init(device, &self.pipeline_states[@intFromEnum(vk.PipelineBindPoint.graphics)]);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: *Self) void {
|
|
||||||
self.compute.deinit();
|
|
||||||
self.renderer.deinit();
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -59,7 +59,7 @@ pub const DrawCall = struct {
|
|||||||
viewport: vk.Viewport,
|
viewport: vk.Viewport,
|
||||||
scissor: vk.Rect2D,
|
scissor: vk.Rect2D,
|
||||||
|
|
||||||
pub fn init(allocator: std.mem.Allocator, vertex_count: usize, instance_count: usize, renderer: *Self) VkError!@This() {
|
fn init(allocator: std.mem.Allocator, vertex_count: usize, instance_count: usize, renderer: *Self) VkError!@This() {
|
||||||
const self: @This() = .{
|
const self: @This() = .{
|
||||||
.vertices = allocator.alloc(Vertex, vertex_count * instance_count) catch return VkError.OutOfDeviceMemory,
|
.vertices = allocator.alloc(Vertex, vertex_count * instance_count) catch return VkError.OutOfDeviceMemory,
|
||||||
.renderer = renderer,
|
.renderer = renderer,
|
||||||
@@ -73,6 +73,17 @@ pub const DrawCall = struct {
|
|||||||
|
|
||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn deinit(self: *@This(), allocator: std.mem.Allocator) void {
|
||||||
|
for (self.vertices) |*vertex| {
|
||||||
|
for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
|
||||||
|
if (vertex.outputs[location]) |output| {
|
||||||
|
allocator.free(output.blob);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
allocator.free(self.vertices);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
device: *SoftDevice,
|
device: *SoftDevice,
|
||||||
@@ -96,10 +107,6 @@ pub fn init(device: *SoftDevice, state: *PipelineState) Self {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn deinit(self: *Self) void {
|
|
||||||
_ = self;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn draw(self: *Self, vertex_count: usize, instance_count: usize, first_vertex: usize, first_instance: usize) VkError!void {
|
pub fn draw(self: *Self, vertex_count: usize, instance_count: usize, first_vertex: usize, first_instance: usize) VkError!void {
|
||||||
var bounded_allocator: BoundedAllocator = .init(self.device.device_allocator.allocator(), @"1GiB");
|
var bounded_allocator: BoundedAllocator = .init(self.device.device_allocator.allocator(), @"1GiB");
|
||||||
try self.drawCall(&bounded_allocator, vertex_count, instance_count, first_vertex, first_instance, null);
|
try self.drawCall(&bounded_allocator, vertex_count, instance_count, first_vertex, first_instance, null);
|
||||||
@@ -119,17 +126,18 @@ fn drawCall(self: *Self, bounded_allocator: *BoundedAllocator, vertex_count: usi
|
|||||||
const allocator = bounded_allocator.allocator();
|
const allocator = bounded_allocator.allocator();
|
||||||
|
|
||||||
var draw_call = try DrawCall.init(allocator, vertex_count, instance_count, self);
|
var draw_call = try DrawCall.init(allocator, vertex_count, instance_count, self);
|
||||||
|
defer draw_call.deinit(allocator);
|
||||||
|
|
||||||
const timer = std.Io.Timestamp.now(io, .real);
|
const timer = std.Io.Timestamp.now(io, .real);
|
||||||
defer if (comptime base.config.logs != .none) {
|
defer if (comptime base.config.logs != .none) {
|
||||||
const duration = timer.untilNow(io, .real);
|
const duration = timer.untilNow(io, .real);
|
||||||
const ms = duration.toMicroseconds();
|
const ms: f32 = @floatFromInt(duration.toMicroseconds());
|
||||||
const memory_footprint = @divTrunc(bounded_allocator.queryFootprint(), 1000);
|
const memory_footprint = @divTrunc(bounded_allocator.queryFootprint(), 1000);
|
||||||
const logger = std.log.scoped(.SoftwareRenderer);
|
const logger = std.log.scoped(.SoftwareRenderer);
|
||||||
if (memory_footprint > 256_000)
|
if (memory_footprint > 256_000)
|
||||||
logger.warn("Drawcall stats:\n> Took {d}us\n> Allocated {d} KB", .{ ms, memory_footprint })
|
logger.warn("Drawcall stats:\n> Took {d:.3}ms\n> Allocated {d} KB", .{ ms / 1000, memory_footprint })
|
||||||
else
|
else
|
||||||
logger.debug("Drawcall stats:\n> Took {d}us\n> Allocated {d} KB", .{ ms, memory_footprint });
|
logger.debug("Drawcall stats:\n> Took {d:.3}ms\n> Allocated {d} KB", .{ ms / 1000, memory_footprint });
|
||||||
};
|
};
|
||||||
|
|
||||||
self.vertexShaderStage(allocator, &draw_call, vertex_count, instance_count, first_vertex, first_instance, indices) catch |err| {
|
self.vertexShaderStage(allocator, &draw_call, vertex_count, instance_count, first_vertex, first_instance, indices) catch |err| {
|
||||||
|
|||||||
+93
-109
@@ -2,7 +2,6 @@ const std = @import("std");
|
|||||||
const vk = @import("vulkan");
|
const vk = @import("vulkan");
|
||||||
const base = @import("base");
|
const base = @import("base");
|
||||||
const zm = base.zm;
|
const zm = base.zm;
|
||||||
const lib = @import("../lib.zig");
|
|
||||||
const spv = @import("spv");
|
const spv = @import("spv");
|
||||||
|
|
||||||
pub const F32x4 = zm.F32x4;
|
pub const F32x4 = zm.F32x4;
|
||||||
@@ -36,114 +35,6 @@ const ClippedPolygon = struct {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
fn clipDistance(position: F32x4, plane: ClipPlane) f32 {
|
|
||||||
const x = position[0];
|
|
||||||
const y = position[1];
|
|
||||||
const z = position[2];
|
|
||||||
const w = position[3];
|
|
||||||
|
|
||||||
return switch (plane) {
|
|
||||||
.Left => x + w,
|
|
||||||
.Right => w - x,
|
|
||||||
.Bottom => y + w,
|
|
||||||
.Top => w - y,
|
|
||||||
.Near => z,
|
|
||||||
.Far => w - z,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
fn vertexInsidePlane(vertex: *const Vertex, plane: ClipPlane) bool {
|
|
||||||
return clipDistance(vertex.position, plane) >= 0.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn copyBlob(allocator: std.mem.Allocator, blob: []const u8) VkError![]u8 {
|
|
||||||
const result = allocator.alloc(u8, blob.len) catch return VkError.OutOfDeviceMemory;
|
|
||||||
@memcpy(result, blob);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn writePacked(comptime T: type, bytes: []u8, value: T) void {
|
|
||||||
const raw: [@sizeOf(T)]u8 = @bitCast(value);
|
|
||||||
@memcpy(bytes[0..@sizeOf(T)], raw[0..]);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn interpolateBlob(allocator: std.mem.Allocator, a: []const u8, b: []const u8, t: f32) VkError![]u8 {
|
|
||||||
const len = @min(a.len, b.len);
|
|
||||||
const result = allocator.alloc(u8, len) catch return VkError.OutOfDeviceMemory;
|
|
||||||
|
|
||||||
var byte_index: usize = 0;
|
|
||||||
while (byte_index + @sizeOf(F32x4) <= len) : (byte_index += @sizeOf(F32x4)) {
|
|
||||||
const value_a = std.mem.bytesToValue(F32x4, a[byte_index..]);
|
|
||||||
const value_b = std.mem.bytesToValue(F32x4, b[byte_index..]);
|
|
||||||
writePacked(F32x4, result[byte_index..], value_a + ((value_b - value_a) * @as(F32x4, @splat(t))));
|
|
||||||
}
|
|
||||||
|
|
||||||
while (byte_index + @sizeOf(f32) <= len) : (byte_index += @sizeOf(f32)) {
|
|
||||||
const value_a = std.mem.bytesToValue(f32, a[byte_index..]);
|
|
||||||
const value_b = std.mem.bytesToValue(f32, b[byte_index..]);
|
|
||||||
writePacked(f32, result[byte_index..], value_a + ((value_b - value_a) * t));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (byte_index < len)
|
|
||||||
@memcpy(result[byte_index..], a[byte_index..len]);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn interpolateVertexForClipping(allocator: std.mem.Allocator, a: *const Vertex, b: *const Vertex, t: f32) VkError!Vertex {
|
|
||||||
var result: Vertex = .{
|
|
||||||
.position = a.position + ((b.position - a.position) * @as(F32x4, @splat(t))),
|
|
||||||
.outputs = undefined,
|
|
||||||
};
|
|
||||||
|
|
||||||
@memset(result.outputs[0..], null);
|
|
||||||
|
|
||||||
for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
|
|
||||||
const out_a = a.outputs[location] orelse continue;
|
|
||||||
const out_b = b.outputs[location] orelse continue;
|
|
||||||
|
|
||||||
result.outputs[location] = .{
|
|
||||||
.interpolation_type = out_a.interpolation_type,
|
|
||||||
.blob = if (out_a.interpolation_type == .flat)
|
|
||||||
try copyBlob(allocator, out_a.blob)
|
|
||||||
else
|
|
||||||
try interpolateBlob(allocator, out_a.blob, out_b.blob, t),
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn clipPolygonAgainstPlane(allocator: std.mem.Allocator, input: *const ClippedPolygon, plane: ClipPlane) VkError!ClippedPolygon {
|
|
||||||
var output: ClippedPolygon = .{};
|
|
||||||
|
|
||||||
if (input.len == 0)
|
|
||||||
return output;
|
|
||||||
|
|
||||||
var previous = input.vertices[input.len - 1];
|
|
||||||
var previous_inside = vertexInsidePlane(&previous, plane);
|
|
||||||
var previous_distance = clipDistance(previous.position, plane);
|
|
||||||
|
|
||||||
for (input.vertices[0..input.len]) |current| {
|
|
||||||
const current_inside = vertexInsidePlane(¤t, plane);
|
|
||||||
const current_distance = clipDistance(current.position, plane);
|
|
||||||
|
|
||||||
if (current_inside != previous_inside) {
|
|
||||||
const t = previous_distance / (previous_distance - current_distance);
|
|
||||||
try output.append(try interpolateVertexForClipping(allocator, &previous, ¤t, t));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (current_inside)
|
|
||||||
try output.append(current);
|
|
||||||
|
|
||||||
previous = current;
|
|
||||||
previous_inside = current_inside;
|
|
||||||
previous_distance = current_distance;
|
|
||||||
}
|
|
||||||
|
|
||||||
return output;
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn clipTriangle(allocator: std.mem.Allocator, v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) VkError!ClippedPolygon {
|
pub fn clipTriangle(allocator: std.mem.Allocator, v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) VkError!ClippedPolygon {
|
||||||
var polygon: ClippedPolygon = .{};
|
var polygon: ClippedPolygon = .{};
|
||||||
try polygon.append(v0.*);
|
try polygon.append(v0.*);
|
||||||
@@ -189,3 +80,96 @@ pub fn viewportTransformVertex(viewport: vk.Viewport, vertex: *Vertex) void {
|
|||||||
|
|
||||||
vertex.position = zm.f32x4(x_screen, y_screen, z_screen, w);
|
vertex.position = zm.f32x4(x_screen, y_screen, z_screen, w);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn clipDistance(position: F32x4, plane: ClipPlane) f32 {
|
||||||
|
const x, const y, const z, const w = position;
|
||||||
|
return switch (plane) {
|
||||||
|
.Left => x + w,
|
||||||
|
.Right => w - x,
|
||||||
|
.Bottom => y + w,
|
||||||
|
.Top => w - y,
|
||||||
|
.Near => z,
|
||||||
|
.Far => w - z,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
fn isVertexInsidePlane(vertex: *const Vertex, plane: ClipPlane) bool {
|
||||||
|
return clipDistance(vertex.position, plane) >= 0.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn interpolateBlob(allocator: std.mem.Allocator, a: []const u8, b: []const u8, t: f32) VkError![]u8 {
|
||||||
|
const len = @min(a.len, b.len);
|
||||||
|
const result = allocator.alloc(u8, len) catch return VkError.OutOfDeviceMemory;
|
||||||
|
|
||||||
|
var byte_index: usize = 0;
|
||||||
|
while (byte_index + @sizeOf(F32x4) <= len) : (byte_index += @sizeOf(F32x4)) {
|
||||||
|
const value_a = std.mem.bytesToValue(F32x4, a[byte_index..]);
|
||||||
|
const value_b = std.mem.bytesToValue(F32x4, b[byte_index..]);
|
||||||
|
base.utils.writePacked(F32x4, result[byte_index..], value_a + ((value_b - value_a) * zm.f32x4s(t)));
|
||||||
|
}
|
||||||
|
|
||||||
|
while (byte_index + @sizeOf(f32) <= len) : (byte_index += @sizeOf(f32)) {
|
||||||
|
const value_a = std.mem.bytesToValue(f32, a[byte_index..]);
|
||||||
|
const value_b = std.mem.bytesToValue(f32, b[byte_index..]);
|
||||||
|
base.utils.writePacked(f32, result[byte_index..], value_a + ((value_b - value_a) * t));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (byte_index < len)
|
||||||
|
@memcpy(result[byte_index..], a[byte_index..len]);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn interpolateVertexForClipping(allocator: std.mem.Allocator, a: *const Vertex, b: *const Vertex, t: f32) VkError!Vertex {
|
||||||
|
var result: Vertex = .{
|
||||||
|
.position = a.position + ((b.position - a.position) * zm.f32x4s(t)),
|
||||||
|
.outputs = undefined,
|
||||||
|
};
|
||||||
|
|
||||||
|
@memset(result.outputs[0..], null);
|
||||||
|
|
||||||
|
for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| {
|
||||||
|
const out_a = a.outputs[location] orelse continue;
|
||||||
|
const out_b = b.outputs[location] orelse continue;
|
||||||
|
|
||||||
|
result.outputs[location] = .{
|
||||||
|
.interpolation_type = out_a.interpolation_type,
|
||||||
|
.blob = if (out_a.interpolation_type == .flat)
|
||||||
|
allocator.dupe(u8, out_a.blob) catch return VkError.OutOfDeviceMemory
|
||||||
|
else
|
||||||
|
try interpolateBlob(allocator, out_a.blob, out_b.blob, t),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn clipPolygonAgainstPlane(allocator: std.mem.Allocator, input: *const ClippedPolygon, plane: ClipPlane) VkError!ClippedPolygon {
|
||||||
|
var output: ClippedPolygon = .{};
|
||||||
|
|
||||||
|
if (input.len == 0)
|
||||||
|
return output;
|
||||||
|
|
||||||
|
var previous = input.vertices[input.len - 1];
|
||||||
|
var previous_inside = isVertexInsidePlane(&previous, plane);
|
||||||
|
var previous_distance = clipDistance(previous.position, plane);
|
||||||
|
|
||||||
|
for (input.vertices[0..input.len]) |current| {
|
||||||
|
const current_inside = isVertexInsidePlane(¤t, plane);
|
||||||
|
const current_distance = clipDistance(current.position, plane);
|
||||||
|
|
||||||
|
if (current_inside != previous_inside) {
|
||||||
|
const t = previous_distance / (previous_distance - current_distance);
|
||||||
|
try output.append(try interpolateVertexForClipping(allocator, &previous, ¤t, t));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (current_inside)
|
||||||
|
try output.append(current);
|
||||||
|
|
||||||
|
previous = current;
|
||||||
|
previous_inside = current_inside;
|
||||||
|
previous_distance = current_distance;
|
||||||
|
}
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn clipTransformAndRasterizeTriangle(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall, v0: *const Vertex, v1: *const Vertex, v2: *const Vertex) VkError!void {
|
fn clipTransformAndRasterizeTriangle(renderer: *Renderer, allocator: std.mem.Allocator, draw_call: *DrawCall, v0: *Vertex, v1: *Vertex, v2: *Vertex) VkError!void {
|
||||||
const clipped_polygon = try clip.clipTriangle(allocator, v0, v1, v2);
|
const clipped_polygon = try clip.clipTriangle(allocator, v0, v1, v2);
|
||||||
|
|
||||||
if (clipped_polygon.len < 3)
|
if (clipped_polygon.len < 3)
|
||||||
|
|||||||
@@ -100,7 +100,7 @@ pub fn drawLine(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall, v0:
|
|||||||
wg.await(io) catch return VkError.DeviceLost;
|
wg.await(io) catch return VkError.DeviceLost;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline fn bresenhamYAtStep(y0: i32, d_x: i32, d_err: i32, y_step: i32, step: usize) i32 {
|
fn bresenhamYAtStep(y0: i32, d_x: i32, d_err: i32, y_step: i32, step: usize) i32 {
|
||||||
if (d_x == 0)
|
if (d_x == 0)
|
||||||
return y0;
|
return y0;
|
||||||
|
|
||||||
|
|||||||
@@ -25,15 +25,6 @@ pub fn scissorContainsPixel(scissor: vk.Rect2D, x: i32, y: i32) bool {
|
|||||||
pixel_y < max_y;
|
pixel_y < max_y;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn writePacked(comptime T: type, bytes: []u8, value: T) void {
|
|
||||||
const raw: [@sizeOf(T)]u8 = @bitCast(value);
|
|
||||||
@memcpy(bytes[0..@sizeOf(T)], raw[0..]);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn interpolateF32x4(value0: F32x4, value1: F32x4, value2: F32x4, b0: f32, b1: f32, b2: f32) F32x4 {
|
|
||||||
return (value0 * @as(F32x4, @splat(b0))) + (value1 * @as(F32x4, @splat(b1))) + (value2 * @as(F32x4, @splat(b2)));
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn interpolateVertexOutputs(
|
pub fn interpolateVertexOutputs(
|
||||||
allocator: std.mem.Allocator,
|
allocator: std.mem.Allocator,
|
||||||
v0: *const Renderer.Vertex,
|
v0: *const Renderer.Vertex,
|
||||||
@@ -63,14 +54,14 @@ pub fn interpolateVertexOutputs(
|
|||||||
const value0 = std.mem.bytesToValue(F32x4, out0.blob[byte_index..]);
|
const value0 = std.mem.bytesToValue(F32x4, out0.blob[byte_index..]);
|
||||||
const value1 = std.mem.bytesToValue(F32x4, out1.blob[byte_index..]);
|
const value1 = std.mem.bytesToValue(F32x4, out1.blob[byte_index..]);
|
||||||
const value2 = std.mem.bytesToValue(F32x4, out2.blob[byte_index..]);
|
const value2 = std.mem.bytesToValue(F32x4, out2.blob[byte_index..]);
|
||||||
writePacked(F32x4, input[byte_index..], interpolateF32x4(value0, value1, value2, b0, b1, b2));
|
base.utils.writePacked(F32x4, input[byte_index..], interpolateF32x4(value0, value1, value2, b0, b1, b2));
|
||||||
}
|
}
|
||||||
|
|
||||||
while (byte_index + @sizeOf(f32) <= len) : (byte_index += @sizeOf(f32)) {
|
while (byte_index + @sizeOf(f32) <= len) : (byte_index += @sizeOf(f32)) {
|
||||||
const value0 = std.mem.bytesToValue(f32, out0.blob[byte_index..]);
|
const value0 = std.mem.bytesToValue(f32, out0.blob[byte_index..]);
|
||||||
const value1 = std.mem.bytesToValue(f32, out1.blob[byte_index..]);
|
const value1 = std.mem.bytesToValue(f32, out1.blob[byte_index..]);
|
||||||
const value2 = std.mem.bytesToValue(f32, out2.blob[byte_index..]);
|
const value2 = std.mem.bytesToValue(f32, out2.blob[byte_index..]);
|
||||||
writePacked(f32, input[byte_index..], (value0 * b0) + (value1 * b1) + (value2 * b2));
|
base.utils.writePacked(f32, input[byte_index..], (value0 * b0) + (value1 * b1) + (value2 * b2));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (byte_index < len)
|
if (byte_index < len)
|
||||||
@@ -85,3 +76,7 @@ pub fn interpolateVertexOutputs(
|
|||||||
pub fn interpolateLineOutputs(allocator: std.mem.Allocator, v0: *const Renderer.Vertex, v1: *const Renderer.Vertex, t: f32) VkError![spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 {
|
pub fn interpolateLineOutputs(allocator: std.mem.Allocator, v0: *const Renderer.Vertex, v1: *const Renderer.Vertex, t: f32) VkError![spv.SPIRV_MAX_OUTPUT_LOCATIONS][]u8 {
|
||||||
return interpolateVertexOutputs(allocator, v0, v1, v0, 1.0 - t, t, 0.0);
|
return interpolateVertexOutputs(allocator, v0, v1, v0, 1.0 - t, t, 0.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline fn interpolateF32x4(value0: F32x4, value1: F32x4, value2: F32x4, b0: f32, b1: f32, b2: f32) F32x4 {
|
||||||
|
return (value0 * zm.f32x4s(b0)) + (value1 * zm.f32x4s(b1)) + (value2 * zm.f32x4s(b2));
|
||||||
|
}
|
||||||
|
|||||||
@@ -42,7 +42,6 @@ pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall,
|
|||||||
|
|
||||||
const pipeline = draw_call.renderer.state.pipeline orelse return;
|
const pipeline = draw_call.renderer.state.pipeline orelse return;
|
||||||
|
|
||||||
var wg: std.Io.Group = .init;
|
|
||||||
const runtimes_count = (pipeline.stages.getPtr(.fragment) orelse return).runtimes.len;
|
const runtimes_count = (pipeline.stages.getPtr(.fragment) orelse return).runtimes.len;
|
||||||
const grid_size: usize = @intFromFloat(@floor(@sqrt(@as(f32, @floatFromInt(runtimes_count)))));
|
const grid_size: usize = @intFromFloat(@floor(@sqrt(@as(f32, @floatFromInt(runtimes_count)))));
|
||||||
|
|
||||||
@@ -53,6 +52,8 @@ pub fn drawTriangle(allocator: std.mem.Allocator, draw_call: *Renderer.DrawCall,
|
|||||||
const rows_per_run = @divTrunc(height + grid_size - 1, grid_size);
|
const rows_per_run = @divTrunc(height + grid_size - 1, grid_size);
|
||||||
|
|
||||||
var batch_id: usize = 0;
|
var batch_id: usize = 0;
|
||||||
|
|
||||||
|
var wg: std.Io.Group = .init;
|
||||||
for (0..grid_size) |gy| {
|
for (0..grid_size) |gy| {
|
||||||
for (0..grid_size) |gx| {
|
for (0..grid_size) |gx| {
|
||||||
defer batch_id = @mod(batch_id + 1, runtimes_count);
|
defer batch_id = @mod(batch_id + 1, runtimes_count);
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ pub const lib_vulkan = @import("lib_vulkan.zig");
|
|||||||
pub const logger = @import("logger.zig");
|
pub const logger = @import("logger.zig");
|
||||||
pub const format = @import("format.zig");
|
pub const format = @import("format.zig");
|
||||||
pub const config = @import("config");
|
pub const config = @import("config");
|
||||||
|
pub const utils = @import("utils.zig");
|
||||||
|
|
||||||
pub const Dispatchable = @import("Dispatchable.zig").Dispatchable;
|
pub const Dispatchable = @import("Dispatchable.zig").Dispatchable;
|
||||||
pub const fallback_host_allocator = @import("fallback_host_allocator.zig").fallback_host_allocator;
|
pub const fallback_host_allocator = @import("fallback_host_allocator.zig").fallback_host_allocator;
|
||||||
|
|||||||
@@ -0,0 +1,4 @@
|
|||||||
|
pub fn writePacked(comptime T: type, bytes: []u8, value: T) void {
|
||||||
|
const raw: [@sizeOf(T)]u8 = @bitCast(value);
|
||||||
|
@memcpy(bytes[0..@sizeOf(T)], raw[0..]);
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user