reworking command buffers, adding soft compute routines
This commit is contained in:
@@ -12,16 +12,210 @@ const VkError = base.VkError;
|
||||
|
||||
const Self = @This();
|
||||
|
||||
const RunData = struct {
|
||||
self: *Self,
|
||||
batch_id: usize,
|
||||
group_count: usize,
|
||||
group_count_x: usize,
|
||||
group_count_y: usize,
|
||||
group_count_z: usize,
|
||||
subgroups_per_workgroup: usize,
|
||||
pipeline: *SoftPipeline,
|
||||
};
|
||||
|
||||
device: *SoftDevice,
|
||||
state: *PipelineState,
|
||||
batch_size: usize,
|
||||
|
||||
pub fn init(device: *SoftDevice, state: *PipelineState) Self {
|
||||
return .{
|
||||
.device = device,
|
||||
.state = state,
|
||||
.batch_size = 0,
|
||||
};
|
||||
}
|
||||
|
||||
pub fn destroy(self: *Self) void {
|
||||
_ = self;
|
||||
}
|
||||
|
||||
pub fn dispatch(self: *Self, group_count_x: u32, group_count_y: u32, group_count_z: u32) VkError!void {
|
||||
const group_count: usize = @intCast(group_count_x * group_count_y * group_count_z);
|
||||
|
||||
const pipeline = self.state.pipeline orelse return VkError.InvalidPipelineDrv;
|
||||
const shader = pipeline.stages.getPtr(.compute) orelse return VkError.InvalidPipelineDrv;
|
||||
const spv_module = &shader.module.module;
|
||||
self.batch_size = shader.runtimes.len;
|
||||
|
||||
const invocations_per_subgroup = 4;
|
||||
const invocations_per_workgroup = spv_module.local_size_x * spv_module.local_size_y * spv_module.local_size_z;
|
||||
const subgroups_per_workgroup = @divTrunc(invocations_per_workgroup + invocations_per_subgroup - 1, invocations_per_subgroup);
|
||||
|
||||
var wg: std.Thread.WaitGroup = .{};
|
||||
for (0..@min(self.batch_size, group_count)) |batch_id| {
|
||||
self.device.workers.spawnWg(&wg, runWrapper, .{
|
||||
RunData{
|
||||
.self = self,
|
||||
.batch_id = batch_id,
|
||||
.group_count = group_count,
|
||||
.group_count_x = @as(usize, @intCast(group_count_x)),
|
||||
.group_count_y = @as(usize, @intCast(group_count_y)),
|
||||
.group_count_z = @as(usize, @intCast(group_count_z)),
|
||||
.subgroups_per_workgroup = subgroups_per_workgroup,
|
||||
.pipeline = pipeline,
|
||||
},
|
||||
});
|
||||
}
|
||||
self.device.workers.waitAndWork(&wg);
|
||||
}
|
||||
|
||||
fn runWrapper(data: RunData) void {
|
||||
@call(.always_inline, run, .{data}) catch |err| {
|
||||
std.log.scoped(.@"SPIR-V runtime").err("SPIR-V runtime catched a '{s}'", .{@errorName(err)});
|
||||
if (@errorReturnTrace()) |trace| {
|
||||
std.debug.dumpStackTrace(trace.*);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
inline fn run(data: RunData) !void {
|
||||
const allocator = data.self.device.device_allocator.allocator();
|
||||
|
||||
const shader = data.pipeline.stages.getPtrAssertContains(.compute);
|
||||
const rt = &shader.runtimes[data.batch_id];
|
||||
|
||||
const entry = try rt.getEntryPointByName(shader.entry);
|
||||
|
||||
var group_index: usize = data.batch_id;
|
||||
while (group_index < data.group_count) : (group_index += data.self.batch_size) {
|
||||
var modulo: usize = group_index;
|
||||
|
||||
const group_z = @divTrunc(modulo, data.group_count_x * data.group_count_y);
|
||||
|
||||
modulo -= group_z * data.group_count_x * data.group_count_y;
|
||||
const group_y = @divTrunc(modulo, data.group_count_x);
|
||||
|
||||
modulo -= group_y * data.group_count_x;
|
||||
const group_x = modulo;
|
||||
|
||||
try setupWorkgroupBuiltins(
|
||||
data.self,
|
||||
rt,
|
||||
.{
|
||||
@as(u32, @intCast(data.group_count_x)),
|
||||
@as(u32, @intCast(data.group_count_y)),
|
||||
@as(u32, @intCast(data.group_count_z)),
|
||||
},
|
||||
.{
|
||||
@as(u32, @intCast(group_x)),
|
||||
@as(u32, @intCast(group_y)),
|
||||
@as(u32, @intCast(group_z)),
|
||||
},
|
||||
);
|
||||
|
||||
for (0..data.subgroups_per_workgroup) |i| {
|
||||
try setupSubgroupBuiltins(
|
||||
data.self,
|
||||
rt,
|
||||
.{
|
||||
@as(u32, @intCast(group_x)),
|
||||
@as(u32, @intCast(group_y)),
|
||||
@as(u32, @intCast(group_z)),
|
||||
},
|
||||
i,
|
||||
);
|
||||
try data.self.syncDescriptorSets(allocator, rt, true);
|
||||
|
||||
rt.callEntryPoint(allocator, entry) catch |err| switch (err) {
|
||||
spv.Runtime.RuntimeError.OutOfBounds => {},
|
||||
else => return err,
|
||||
};
|
||||
|
||||
try data.self.syncDescriptorSets(allocator, rt, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn syncDescriptorSets(self: *Self, allocator: std.mem.Allocator, rt: *spv.Runtime, write: bool) !void {
|
||||
sets: for (self.state.sets[0..], 0..) |set, set_index| {
|
||||
if (set == null)
|
||||
continue :sets;
|
||||
|
||||
bindings: for (set.?.descriptors[0..], 0..) |binding, binding_index| {
|
||||
switch (binding) {
|
||||
.buffer => |buffer_data| if (buffer_data.object) |buffer| {
|
||||
const memory = if (buffer.interface.memory) |memory| memory else continue :bindings;
|
||||
const map: []u8 = @as([*]u8, @ptrCast(try memory.map(buffer_data.offset, buffer_data.size)))[0..buffer_data.size];
|
||||
if (write) {
|
||||
try rt.writeDescriptorSet(
|
||||
allocator,
|
||||
map,
|
||||
@as(u32, @intCast(set_index)),
|
||||
@as(u32, @intCast(binding_index)),
|
||||
);
|
||||
} else {
|
||||
try rt.readDescriptorSet(
|
||||
map,
|
||||
@as(u32, @intCast(set_index)),
|
||||
@as(u32, @intCast(binding_index)),
|
||||
);
|
||||
}
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn setupWorkgroupBuiltins(
|
||||
self: *Self,
|
||||
rt: *spv.Runtime,
|
||||
group_count: [3]u32,
|
||||
group_id: [3]u32,
|
||||
) spv.Runtime.RuntimeError!void {
|
||||
const spv_module = &self.state.pipeline.?.stages.getPtrAssertContains(.compute).module.module;
|
||||
const workgroup_size = [3]u32{
|
||||
spv_module.local_size_x,
|
||||
spv_module.local_size_y,
|
||||
spv_module.local_size_z,
|
||||
};
|
||||
|
||||
rt.writeBuiltIn(std.mem.asBytes(&workgroup_size), .WorkgroupSize) catch {};
|
||||
rt.writeBuiltIn(std.mem.asBytes(&group_count), .NumWorkgroups) catch {};
|
||||
rt.writeBuiltIn(std.mem.asBytes(&group_id), .WorkgroupId) catch {};
|
||||
}
|
||||
|
||||
fn setupSubgroupBuiltins(
|
||||
self: *Self,
|
||||
rt: *spv.Runtime,
|
||||
group_id: [3]u32,
|
||||
local_invocation_index: usize,
|
||||
) spv.Runtime.RuntimeError!void {
|
||||
const spv_module = &self.state.pipeline.?.stages.getPtrAssertContains(.compute).module.module;
|
||||
const workgroup_size = [3]u32{
|
||||
spv_module.local_size_x,
|
||||
spv_module.local_size_y,
|
||||
spv_module.local_size_z,
|
||||
};
|
||||
const local_base = [3]u32{
|
||||
workgroup_size[0] * group_id[0],
|
||||
workgroup_size[1] * group_id[1],
|
||||
workgroup_size[2] * group_id[2],
|
||||
};
|
||||
var local_invocation = [3]u32{ 0, 0, 0 };
|
||||
|
||||
var idx: u32 = @intCast(local_invocation_index);
|
||||
local_invocation[2] = @divTrunc(idx, workgroup_size[0] * workgroup_size[1]);
|
||||
idx -= local_invocation[2] * workgroup_size[0] * workgroup_size[1];
|
||||
local_invocation[1] = @divTrunc(idx, workgroup_size[0]);
|
||||
idx -= local_invocation[1] * workgroup_size[0];
|
||||
local_invocation[0] = idx;
|
||||
|
||||
const global_invocation_index = [3]u32{
|
||||
local_base[0] + local_invocation[0],
|
||||
local_base[1] + local_invocation[1],
|
||||
local_base[2] + local_invocation[2],
|
||||
};
|
||||
|
||||
rt.writeBuiltIn(std.mem.asBytes(&global_invocation_index), .GlobalInvocationId) catch {};
|
||||
}
|
||||
|
||||
@@ -2,8 +2,6 @@ const std = @import("std");
|
||||
const vk = @import("vulkan");
|
||||
const base = @import("base");
|
||||
|
||||
const copy_routines = @import("copy_routines.zig");
|
||||
|
||||
const SoftDescriptorSet = @import("../SoftDescriptorSet.zig");
|
||||
const SoftDevice = @import("../SoftDevice.zig");
|
||||
const SoftPipeline = @import("../SoftPipeline.zig");
|
||||
@@ -11,50 +9,31 @@ const SoftPipeline = @import("../SoftPipeline.zig");
|
||||
const ComputeRoutines = @import("ComputeRoutines.zig");
|
||||
const PipelineState = @import("PipelineState.zig");
|
||||
|
||||
const cmd = base.commands;
|
||||
const VkError = base.VkError;
|
||||
|
||||
const Self = @This();
|
||||
|
||||
compute_routine: ComputeRoutines,
|
||||
compute_routines: ComputeRoutines,
|
||||
|
||||
/// .graphics = 0
|
||||
/// .compute = 1
|
||||
pipeline_states: [2]PipelineState,
|
||||
|
||||
pub fn init(device: *SoftDevice) Self {
|
||||
var self: Self = undefined;
|
||||
pub const init: Self = .{
|
||||
.compute_routines = undefined,
|
||||
.pipeline_states = undefined,
|
||||
};
|
||||
|
||||
pub fn setup(self: *Self, device: *SoftDevice) void {
|
||||
for (self.pipeline_states[0..]) |*state| {
|
||||
state.* = .{
|
||||
.pipeline = null,
|
||||
.sets = [_]?*SoftDescriptorSet{null} ** base.VULKAN_MAX_DESCRIPTOR_SETS,
|
||||
};
|
||||
}
|
||||
|
||||
self.compute_routine = .init(device, &self.pipeline_states[@intFromEnum(vk.PipelineBindPoint.compute)]);
|
||||
|
||||
return self;
|
||||
self.compute_routines = .init(device, &self.pipeline_states[@intFromEnum(vk.PipelineBindPoint.compute)]);
|
||||
}
|
||||
|
||||
pub fn deinit(self: *Self) void {
|
||||
self.compute_routine.destroy();
|
||||
}
|
||||
|
||||
pub fn execute(self: *Self, command: *const cmd.Command) VkError!void {
|
||||
switch (command.*) {
|
||||
.BindDescriptorSets => |data| {
|
||||
for (data.first_set.., data.sets[0..]) |i, set| {
|
||||
if (set == null) break;
|
||||
self.pipeline_states[@intCast(@intFromEnum(data.bind_point))].sets[i] = @alignCast(@fieldParentPtr("interface", set.?));
|
||||
}
|
||||
},
|
||||
.BindPipeline => |data| self.pipeline_states[@intCast(@intFromEnum(data.bind_point))].pipeline = @alignCast(@fieldParentPtr("interface", data.pipeline)),
|
||||
.ClearColorImage => |data| try copy_routines.clearColorImage(&data),
|
||||
.CopyBuffer => |data| try copy_routines.copyBuffer(&data),
|
||||
.CopyImage => |data| try copy_routines.copyImage(&data),
|
||||
.CopyImageToBuffer => |data| try copy_routines.copyImageToBuffer(&data),
|
||||
.FillBuffer => |data| try copy_routines.fillBuffer(&data),
|
||||
else => {},
|
||||
}
|
||||
self.compute_routines.destroy();
|
||||
}
|
||||
|
||||
@@ -1,86 +0,0 @@
|
||||
const std = @import("std");
|
||||
const vk = @import("vulkan");
|
||||
const base = @import("base");
|
||||
|
||||
const cmd = base.commands;
|
||||
const VkError = base.VkError;
|
||||
|
||||
const SoftImage = @import("../SoftImage.zig");
|
||||
|
||||
pub fn clearColorImage(data: *const cmd.CommandClearColorImage) VkError!void {
|
||||
const soft_image: *SoftImage = @alignCast(@fieldParentPtr("interface", data.image));
|
||||
soft_image.clearRange(data.clear_color, data.range);
|
||||
}
|
||||
|
||||
pub fn copyBuffer(data: *const cmd.CommandCopyBuffer) VkError!void {
|
||||
for (data.regions) |region| {
|
||||
const src_memory = if (data.src.memory) |memory| memory else return VkError.ValidationFailed;
|
||||
const dst_memory = if (data.dst.memory) |memory| memory else return VkError.ValidationFailed;
|
||||
|
||||
const src_map: []u8 = @as([*]u8, @ptrCast(try src_memory.map(region.src_offset, region.size)))[0..region.size];
|
||||
const dst_map: []u8 = @as([*]u8, @ptrCast(try dst_memory.map(region.dst_offset, region.size)))[0..region.size];
|
||||
|
||||
@memcpy(dst_map, src_map);
|
||||
|
||||
src_memory.unmap();
|
||||
dst_memory.unmap();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn copyImage(data: *const cmd.CommandCopyImage) VkError!void {
|
||||
_ = data;
|
||||
std.log.scoped(.commandExecutor).warn("FIXME: implement image to image copy", .{});
|
||||
}
|
||||
|
||||
pub fn copyImageToBuffer(data: *const cmd.CommandCopyImageToBuffer) VkError!void {
|
||||
for (data.regions) |region| {
|
||||
const src_memory = if (data.src.memory) |memory| memory else return VkError.ValidationFailed;
|
||||
const dst_memory = if (data.dst.memory) |memory| memory else return VkError.ValidationFailed;
|
||||
|
||||
const pixel_size: u32 = @intCast(data.src.getPixelSize());
|
||||
const image_row_pitch: u32 = data.src.extent.width * pixel_size;
|
||||
const image_size: u32 = @intCast(data.src.getTotalSize());
|
||||
|
||||
const buffer_row_length: u32 = if (region.buffer_row_length != 0) region.buffer_row_length else region.image_extent.width;
|
||||
const buffer_row_pitch: u32 = buffer_row_length * pixel_size;
|
||||
const buffer_size: u32 = buffer_row_pitch * region.image_extent.height * region.image_extent.depth;
|
||||
|
||||
const src_map: []u8 = @as([*]u8, @ptrCast(try src_memory.map(0, image_size)))[0..image_size];
|
||||
const dst_map: []u8 = @as([*]u8, @ptrCast(try dst_memory.map(region.buffer_offset, buffer_size)))[0..buffer_size];
|
||||
|
||||
const row_size = region.image_extent.width * pixel_size;
|
||||
for (0..data.src.extent.depth) |z| {
|
||||
for (0..data.src.extent.height) |y| {
|
||||
const z_as_u32: u32 = @intCast(z);
|
||||
const y_as_u32: u32 = @intCast(y);
|
||||
|
||||
const src_offset = ((@as(u32, @intCast(region.image_offset.z)) + z_as_u32) * data.src.extent.height + @as(u32, @intCast(region.image_offset.y)) + y_as_u32) * image_row_pitch + @as(u32, @intCast(region.image_offset.x)) * pixel_size;
|
||||
const dst_offset = (z_as_u32 * buffer_row_length * region.image_extent.height + y_as_u32 * buffer_row_length) * pixel_size;
|
||||
|
||||
const src_slice = src_map[src_offset..(src_offset + row_size)];
|
||||
const dst_slice = dst_map[dst_offset..(dst_offset + row_size)];
|
||||
@memcpy(dst_slice, src_slice);
|
||||
}
|
||||
}
|
||||
|
||||
src_memory.unmap();
|
||||
dst_memory.unmap();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn fillBuffer(data: *const cmd.CommandFillBuffer) VkError!void {
|
||||
const memory = if (data.buffer.memory) |memory| memory else return VkError.ValidationFailed;
|
||||
var memory_map: []u32 = @as([*]u32, @ptrCast(@alignCast(try memory.map(data.offset, data.size))))[0..data.size];
|
||||
|
||||
var bytes = if (data.size == vk.WHOLE_SIZE) memory.size - data.offset else data.size;
|
||||
|
||||
var i: usize = 0;
|
||||
while (bytes >= 4) : ({
|
||||
bytes -= 4;
|
||||
i += 1;
|
||||
}) {
|
||||
memory_map[i] = data.data;
|
||||
}
|
||||
|
||||
memory.unmap();
|
||||
}
|
||||
Reference in New Issue
Block a user