working compute pipelines
This commit is contained in:
@@ -90,6 +90,8 @@ pub fn build(b: *std.Build) !void {
|
|||||||
|
|
||||||
lib.step.dependOn(&icd_file.step);
|
lib.step.dependOn(&icd_file.step);
|
||||||
const lib_install = b.addInstallArtifact(lib, .{});
|
const lib_install = b.addInstallArtifact(lib, .{});
|
||||||
|
const install_step = b.step(impl.name, b.fmt("Build libvulkan_{s}", .{impl.name}));
|
||||||
|
install_step.dependOn(&lib_install.step);
|
||||||
|
|
||||||
const lib_tests = b.addTest(.{ .root_module = lib_mod });
|
const lib_tests = b.addTest(.{ .root_module = lib_mod });
|
||||||
|
|
||||||
|
|||||||
@@ -59,8 +59,8 @@
|
|||||||
.lazy = true,
|
.lazy = true,
|
||||||
},
|
},
|
||||||
.SPIRV_Interpreter = .{
|
.SPIRV_Interpreter = .{
|
||||||
.url = "git+https://git.kbz8.me/kbz_8/SPIRV-Interpreter#2409ec726946a314f795b6edb5ae3ddd3eb7426c",
|
.url = "git+https://git.kbz8.me/kbz_8/SPIRV-Interpreter#e09a41754ffa115ba1668f82698140b2b727b7fd",
|
||||||
.hash = "SPIRV_Interpreter-0.0.1-ajmpnyuJAwD5jM0piGGnEq07unzNZyEQ_GmBp_PVMg2X",
|
.hash = "SPIRV_Interpreter-0.0.1-ajmpn5eJAwCe-SNkaCifjF0UUqE6cww18VpqO0Qip_mp",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
||||||
|
|||||||
@@ -52,8 +52,8 @@ pub fn copyBuffer(self: *const Self, dst: *Self, regions: []const vk.BufferCopy)
|
|||||||
const src_memory = if (self.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv;
|
const src_memory = if (self.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv;
|
||||||
const dst_memory = if (dst.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv;
|
const dst_memory = if (dst.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv;
|
||||||
|
|
||||||
const src_map: []u8 = @as([*]u8, @ptrCast(try src_memory.map(region.src_offset, region.size)))[0..region.size];
|
const src_map: []u8 = @as([*]u8, @ptrCast(try src_memory.map(self.interface.offset + region.src_offset, region.size)))[0..region.size];
|
||||||
const dst_map: []u8 = @as([*]u8, @ptrCast(try dst_memory.map(region.dst_offset, region.size)))[0..region.size];
|
const dst_map: []u8 = @as([*]u8, @ptrCast(try dst_memory.map(dst.interface.offset + region.dst_offset, region.size)))[0..region.size];
|
||||||
|
|
||||||
@memcpy(dst_map, src_map);
|
@memcpy(dst_map, src_map);
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ const SoftDevice = @import("../SoftDevice.zig");
|
|||||||
const SoftPipeline = @import("../SoftPipeline.zig");
|
const SoftPipeline = @import("../SoftPipeline.zig");
|
||||||
|
|
||||||
const VkError = base.VkError;
|
const VkError = base.VkError;
|
||||||
|
const SpvRuntimeError = spv.Runtime.RuntimeError;
|
||||||
|
|
||||||
const Self = @This();
|
const Self = @This();
|
||||||
|
|
||||||
@@ -19,7 +20,7 @@ const RunData = struct {
|
|||||||
group_count_x: usize,
|
group_count_x: usize,
|
||||||
group_count_y: usize,
|
group_count_y: usize,
|
||||||
group_count_z: usize,
|
group_count_z: usize,
|
||||||
subgroups_per_workgroup: usize,
|
invocations_per_workgroup: usize,
|
||||||
pipeline: *SoftPipeline,
|
pipeline: *SoftPipeline,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -47,13 +48,23 @@ pub fn dispatch(self: *Self, group_count_x: u32, group_count_y: u32, group_count
|
|||||||
const spv_module = &shader.module.module;
|
const spv_module = &shader.module.module;
|
||||||
self.batch_size = shader.runtimes.len;
|
self.batch_size = shader.runtimes.len;
|
||||||
|
|
||||||
const invocations_per_subgroup = 4;
|
|
||||||
const invocations_per_workgroup = spv_module.local_size_x * spv_module.local_size_y * spv_module.local_size_z;
|
const invocations_per_workgroup = spv_module.local_size_x * spv_module.local_size_y * spv_module.local_size_z;
|
||||||
const subgroups_per_workgroup = @divTrunc(invocations_per_workgroup + invocations_per_subgroup - 1, invocations_per_subgroup);
|
|
||||||
|
|
||||||
var wg: std.Thread.WaitGroup = .{};
|
//var wg: std.Thread.WaitGroup = .{};
|
||||||
for (0..@min(self.batch_size, group_count)) |batch_id| {
|
for (0..@min(self.batch_size, group_count)) |batch_id| {
|
||||||
self.device.workers.spawnWg(&wg, runWrapper, .{
|
//self.device.workers.spawnWg(&wg, runWrapper, .{
|
||||||
|
// RunData{
|
||||||
|
// .self = self,
|
||||||
|
// .batch_id = batch_id,
|
||||||
|
// .group_count = group_count,
|
||||||
|
// .group_count_x = @as(usize, @intCast(group_count_x)),
|
||||||
|
// .group_count_y = @as(usize, @intCast(group_count_y)),
|
||||||
|
// .group_count_z = @as(usize, @intCast(group_count_z)),
|
||||||
|
// .invocations_per_workgroup = invocations_per_workgroup,
|
||||||
|
// .pipeline = pipeline,
|
||||||
|
// },
|
||||||
|
//});
|
||||||
|
runWrapper(
|
||||||
RunData{
|
RunData{
|
||||||
.self = self,
|
.self = self,
|
||||||
.batch_id = batch_id,
|
.batch_id = batch_id,
|
||||||
@@ -61,12 +72,12 @@ pub fn dispatch(self: *Self, group_count_x: u32, group_count_y: u32, group_count
|
|||||||
.group_count_x = @as(usize, @intCast(group_count_x)),
|
.group_count_x = @as(usize, @intCast(group_count_x)),
|
||||||
.group_count_y = @as(usize, @intCast(group_count_y)),
|
.group_count_y = @as(usize, @intCast(group_count_y)),
|
||||||
.group_count_z = @as(usize, @intCast(group_count_z)),
|
.group_count_z = @as(usize, @intCast(group_count_z)),
|
||||||
.subgroups_per_workgroup = subgroups_per_workgroup,
|
.invocations_per_workgroup = invocations_per_workgroup,
|
||||||
.pipeline = pipeline,
|
.pipeline = pipeline,
|
||||||
},
|
},
|
||||||
});
|
);
|
||||||
}
|
}
|
||||||
self.device.workers.waitAndWork(&wg);
|
//self.device.workers.waitAndWork(&wg);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn runWrapper(data: RunData) void {
|
fn runWrapper(data: RunData) void {
|
||||||
@@ -86,6 +97,8 @@ inline fn run(data: RunData) !void {
|
|||||||
|
|
||||||
const entry = try rt.getEntryPointByName(shader.entry);
|
const entry = try rt.getEntryPointByName(shader.entry);
|
||||||
|
|
||||||
|
try data.self.syncDescriptorSets(allocator, rt, true);
|
||||||
|
|
||||||
var group_index: usize = data.batch_id;
|
var group_index: usize = data.batch_id;
|
||||||
while (group_index < data.group_count) : (group_index += data.self.batch_size) {
|
while (group_index < data.group_count) : (group_index += data.self.batch_size) {
|
||||||
var modulo: usize = group_index;
|
var modulo: usize = group_index;
|
||||||
@@ -98,43 +111,35 @@ inline fn run(data: RunData) !void {
|
|||||||
modulo -= group_y * data.group_count_x;
|
modulo -= group_y * data.group_count_x;
|
||||||
const group_x = modulo;
|
const group_x = modulo;
|
||||||
|
|
||||||
try setupWorkgroupBuiltins(
|
try setupWorkgroupBuiltins(data.self, rt, .{
|
||||||
data.self,
|
|
||||||
rt,
|
|
||||||
.{
|
|
||||||
@as(u32, @intCast(data.group_count_x)),
|
@as(u32, @intCast(data.group_count_x)),
|
||||||
@as(u32, @intCast(data.group_count_y)),
|
@as(u32, @intCast(data.group_count_y)),
|
||||||
@as(u32, @intCast(data.group_count_z)),
|
@as(u32, @intCast(data.group_count_z)),
|
||||||
},
|
}, .{
|
||||||
.{
|
|
||||||
@as(u32, @intCast(group_x)),
|
@as(u32, @intCast(group_x)),
|
||||||
@as(u32, @intCast(group_y)),
|
@as(u32, @intCast(group_y)),
|
||||||
@as(u32, @intCast(group_z)),
|
@as(u32, @intCast(group_z)),
|
||||||
},
|
});
|
||||||
);
|
|
||||||
|
|
||||||
for (0..data.subgroups_per_workgroup) |i| {
|
for (0..data.invocations_per_workgroup) |i| {
|
||||||
try setupSubgroupBuiltins(
|
try setupSubgroupBuiltins(data.self, rt, .{
|
||||||
data.self,
|
|
||||||
rt,
|
|
||||||
.{
|
|
||||||
@as(u32, @intCast(group_x)),
|
@as(u32, @intCast(group_x)),
|
||||||
@as(u32, @intCast(group_y)),
|
@as(u32, @intCast(group_y)),
|
||||||
@as(u32, @intCast(group_z)),
|
@as(u32, @intCast(group_z)),
|
||||||
},
|
}, i);
|
||||||
i,
|
|
||||||
);
|
|
||||||
try data.self.syncDescriptorSets(allocator, rt, true);
|
|
||||||
|
|
||||||
rt.callEntryPoint(allocator, entry) catch |err| switch (err) {
|
rt.callEntryPoint(allocator, entry) catch |err| switch (err) {
|
||||||
spv.Runtime.RuntimeError.OutOfBounds => {},
|
// Some errors can be ignored
|
||||||
|
SpvRuntimeError.OutOfBounds,
|
||||||
|
SpvRuntimeError.Killed,
|
||||||
|
=> {},
|
||||||
else => return err,
|
else => return err,
|
||||||
};
|
};
|
||||||
|
}
|
||||||
|
|
||||||
try data.self.syncDescriptorSets(allocator, rt, false);
|
try data.self.syncDescriptorSets(allocator, rt, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
fn syncDescriptorSets(self: *Self, allocator: std.mem.Allocator, rt: *spv.Runtime, write: bool) !void {
|
fn syncDescriptorSets(self: *Self, allocator: std.mem.Allocator, rt: *spv.Runtime, write: bool) !void {
|
||||||
sets: for (self.state.sets[0..], 0..) |set, set_index| {
|
sets: for (self.state.sets[0..], 0..) |set, set_index| {
|
||||||
@@ -170,11 +175,11 @@ fn syncDescriptorSets(self: *Self, allocator: std.mem.Allocator, rt: *spv.Runtim
|
|||||||
fn setupWorkgroupBuiltins(
|
fn setupWorkgroupBuiltins(
|
||||||
self: *Self,
|
self: *Self,
|
||||||
rt: *spv.Runtime,
|
rt: *spv.Runtime,
|
||||||
group_count: [3]u32,
|
group_count: @Vector(3, u32),
|
||||||
group_id: [3]u32,
|
group_id: @Vector(3, u32),
|
||||||
) spv.Runtime.RuntimeError!void {
|
) spv.Runtime.RuntimeError!void {
|
||||||
const spv_module = &self.state.pipeline.?.stages.getPtrAssertContains(.compute).module.module;
|
const spv_module = &self.state.pipeline.?.stages.getPtrAssertContains(.compute).module.module;
|
||||||
const workgroup_size = [3]u32{
|
const workgroup_size = @Vector(3, u32){
|
||||||
spv_module.local_size_x,
|
spv_module.local_size_x,
|
||||||
spv_module.local_size_y,
|
spv_module.local_size_y,
|
||||||
spv_module.local_size_z,
|
spv_module.local_size_z,
|
||||||
@@ -188,21 +193,17 @@ fn setupWorkgroupBuiltins(
|
|||||||
fn setupSubgroupBuiltins(
|
fn setupSubgroupBuiltins(
|
||||||
self: *Self,
|
self: *Self,
|
||||||
rt: *spv.Runtime,
|
rt: *spv.Runtime,
|
||||||
group_id: [3]u32,
|
group_id: @Vector(3, u32),
|
||||||
local_invocation_index: usize,
|
local_invocation_index: usize,
|
||||||
) spv.Runtime.RuntimeError!void {
|
) spv.Runtime.RuntimeError!void {
|
||||||
const spv_module = &self.state.pipeline.?.stages.getPtrAssertContains(.compute).module.module;
|
const spv_module = &self.state.pipeline.?.stages.getPtrAssertContains(.compute).module.module;
|
||||||
const workgroup_size = [3]u32{
|
const workgroup_size = @Vector(3, u32){
|
||||||
spv_module.local_size_x,
|
spv_module.local_size_x,
|
||||||
spv_module.local_size_y,
|
spv_module.local_size_y,
|
||||||
spv_module.local_size_z,
|
spv_module.local_size_z,
|
||||||
};
|
};
|
||||||
const local_base = [3]u32{
|
const local_base = workgroup_size * group_id;
|
||||||
workgroup_size[0] * group_id[0],
|
var local_invocation = @Vector(3, u32){ 0, 0, 0 };
|
||||||
workgroup_size[1] * group_id[1],
|
|
||||||
workgroup_size[2] * group_id[2],
|
|
||||||
};
|
|
||||||
var local_invocation = [3]u32{ 0, 0, 0 };
|
|
||||||
|
|
||||||
var idx: u32 = @intCast(local_invocation_index);
|
var idx: u32 = @intCast(local_invocation_index);
|
||||||
local_invocation[2] = @divTrunc(idx, workgroup_size[0] * workgroup_size[1]);
|
local_invocation[2] = @divTrunc(idx, workgroup_size[0] * workgroup_size[1]);
|
||||||
@@ -211,11 +212,7 @@ fn setupSubgroupBuiltins(
|
|||||||
idx -= local_invocation[1] * workgroup_size[0];
|
idx -= local_invocation[1] * workgroup_size[0];
|
||||||
local_invocation[0] = idx;
|
local_invocation[0] = idx;
|
||||||
|
|
||||||
const global_invocation_index = [3]u32{
|
const global_invocation_index = local_base + local_invocation;
|
||||||
local_base[0] + local_invocation[0],
|
|
||||||
local_base[1] + local_invocation[1],
|
|
||||||
local_base[2] + local_invocation[2],
|
|
||||||
};
|
|
||||||
|
|
||||||
rt.writeBuiltIn(std.mem.asBytes(&global_invocation_index), .GlobalInvocationId) catch {};
|
rt.writeBuiltIn(std.mem.asBytes(&global_invocation_index), .GlobalInvocationId) catch {};
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,7 +40,7 @@ pub inline fn destroy(self: *Self, allocator: std.mem.Allocator) void {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub inline fn bindMemory(self: *Self, memory: *DeviceMemory, offset: vk.DeviceSize) VkError!void {
|
pub inline fn bindMemory(self: *Self, memory: *DeviceMemory, offset: vk.DeviceSize) VkError!void {
|
||||||
if (offset >= self.size or !self.allowed_memory_types.isSet(memory.memory_type_index)) {
|
if (offset > memory.size or !self.allowed_memory_types.isSet(memory.memory_type_index)) {
|
||||||
return VkError.ValidationFailed;
|
return VkError.ValidationFailed;
|
||||||
}
|
}
|
||||||
self.memory = memory;
|
self.memory = memory;
|
||||||
|
|||||||
Reference in New Issue
Block a user