fixing soft queues, improving blitter
This commit is contained in:
@@ -24,6 +24,7 @@ pub fn create(device: *base.Device, allocator: std.mem.Allocator, info: *const v
|
|||||||
interface.vtable = &.{
|
interface.vtable = &.{
|
||||||
.destroy = destroy,
|
.destroy = destroy,
|
||||||
.getMemoryRequirements = getMemoryRequirements,
|
.getMemoryRequirements = getMemoryRequirements,
|
||||||
|
.getSubresourceLayout = getSubresourceLayout,
|
||||||
.getTotalSizeForAspect = getTotalSizeForAspect,
|
.getTotalSizeForAspect = getTotalSizeForAspect,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -364,6 +365,27 @@ fn getTotalSizeForAspect(interface: *const Interface, aspect_mask: vk.ImageAspec
|
|||||||
return size * self.interface.array_layers;
|
return size * self.interface.array_layers;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn getSubresourceLayout(interface: *const Interface, subresource: vk.ImageSubresource) VkError!vk.SubresourceLayout {
|
||||||
|
const self: *const Self = @alignCast(@fieldParentPtr("interface", interface));
|
||||||
|
|
||||||
|
if (subresource.aspect_mask.subtract(.{
|
||||||
|
.color_bit = true,
|
||||||
|
.depth_bit = true,
|
||||||
|
.stencil_bit = true,
|
||||||
|
}).toInt() != 0) {
|
||||||
|
base.unsupported("aspectMask {f}", .{subresource.aspect_mask});
|
||||||
|
return VkError.ValidationFailed;
|
||||||
|
}
|
||||||
|
|
||||||
|
return .{
|
||||||
|
.offset = try self.getSubresourceOffset(subresource.aspect_mask, subresource.mip_level, subresource.array_layer),
|
||||||
|
.size = self.getMultiSampledLevelSize(subresource.aspect_mask, subresource.mip_level),
|
||||||
|
.row_pitch = self.getRowPitchMemSizeForMipLevel(subresource.aspect_mask, subresource.mip_level),
|
||||||
|
.array_pitch = self.getSliceMemSizeForMipLevel(subresource.aspect_mask, subresource.mip_level),
|
||||||
|
.depth_pitch = self.getLayerSize(subresource.aspect_mask),
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
pub fn getLayerSize(self: *const Self, aspect_mask: vk.ImageAspectFlags) usize {
|
pub fn getLayerSize(self: *const Self, aspect_mask: vk.ImageAspectFlags) usize {
|
||||||
var size: usize = 0;
|
var size: usize = 0;
|
||||||
for (0..self.interface.mip_levels) |mip_level| {
|
for (0..self.interface.mip_levels) |mip_level| {
|
||||||
|
|||||||
@@ -12,6 +12,7 @@ pub const Interface = base.Instance;
|
|||||||
|
|
||||||
interface: Interface,
|
interface: Interface,
|
||||||
threaded: std.Io.Threaded,
|
threaded: std.Io.Threaded,
|
||||||
|
io_impl: std.Io,
|
||||||
allocator: std.mem.Allocator,
|
allocator: std.mem.Allocator,
|
||||||
|
|
||||||
fn castExtension(comptime ext: vk.ApiInfo) vk.ExtensionProperties {
|
fn castExtension(comptime ext: vk.ApiInfo) vk.ExtensionProperties {
|
||||||
@@ -33,6 +34,7 @@ pub fn create(allocator: std.mem.Allocator, infos: *const vk.InstanceCreateInfo)
|
|||||||
|
|
||||||
self.allocator = std.heap.smp_allocator;
|
self.allocator = std.heap.smp_allocator;
|
||||||
self.threaded = std.Io.Threaded.init(self.allocator, .{});
|
self.threaded = std.Io.Threaded.init(self.allocator, .{});
|
||||||
|
self.io_impl = self.threaded.io();
|
||||||
|
|
||||||
self.interface = try base.Instance.init(allocator, infos);
|
self.interface = try base.Instance.init(allocator, infos);
|
||||||
self.interface.dispatch_table = &.{
|
self.interface.dispatch_table = &.{
|
||||||
@@ -70,5 +72,5 @@ fn releasePhysicalDevices(interface: *Interface, allocator: std.mem.Allocator) V
|
|||||||
|
|
||||||
fn io(interface: *Interface) std.Io {
|
fn io(interface: *Interface) std.Io {
|
||||||
const self: *Self = @alignCast(@fieldParentPtr("interface", interface));
|
const self: *Self = @alignCast(@fieldParentPtr("interface", interface));
|
||||||
return self.threaded.io();
|
return self.io_impl;
|
||||||
}
|
}
|
||||||
|
|||||||
+4
-15
@@ -17,7 +17,7 @@ const Self = @This();
|
|||||||
pub const Interface = base.Queue;
|
pub const Interface = base.Queue;
|
||||||
|
|
||||||
interface: Interface,
|
interface: Interface,
|
||||||
lock: std.Io.RwLock,
|
group: std.Io.Group,
|
||||||
|
|
||||||
pub fn create(allocator: std.mem.Allocator, device: *base.Device, index: u32, family_index: u32, flags: vk.DeviceQueueCreateFlags) VkError!*Interface {
|
pub fn create(allocator: std.mem.Allocator, device: *base.Device, index: u32, family_index: u32, flags: vk.DeviceQueueCreateFlags) VkError!*Interface {
|
||||||
const self = allocator.create(Self) catch return VkError.OutOfHostMemory;
|
const self = allocator.create(Self) catch return VkError.OutOfHostMemory;
|
||||||
@@ -33,7 +33,7 @@ pub fn create(allocator: std.mem.Allocator, device: *base.Device, index: u32, fa
|
|||||||
|
|
||||||
self.* = .{
|
self.* = .{
|
||||||
.interface = interface,
|
.interface = interface,
|
||||||
.lock = .init,
|
.group = .init,
|
||||||
};
|
};
|
||||||
return &self.interface;
|
return &self.interface;
|
||||||
}
|
}
|
||||||
@@ -58,10 +58,6 @@ pub fn submit(interface: *Interface, infos: []Interface.SubmitInfo, p_fence: ?*b
|
|||||||
const allocator = soft_device.device_allocator.allocator();
|
const allocator = soft_device.device_allocator.allocator();
|
||||||
const io = soft_device.interface.io();
|
const io = soft_device.interface.io();
|
||||||
|
|
||||||
// Lock here to avoid acquiring it in `waitIdle` before runners start
|
|
||||||
self.lock.lockShared(io) catch return VkError.DeviceLost;
|
|
||||||
defer self.lock.unlockShared(io);
|
|
||||||
|
|
||||||
for (infos) |info| {
|
for (infos) |info| {
|
||||||
// Cloning info to keep them alive until command execution ends
|
// Cloning info to keep them alive until command execution ends
|
||||||
const cloned_info: Interface.SubmitInfo = .{
|
const cloned_info: Interface.SubmitInfo = .{
|
||||||
@@ -69,24 +65,17 @@ pub fn submit(interface: *Interface, infos: []Interface.SubmitInfo, p_fence: ?*b
|
|||||||
};
|
};
|
||||||
const runners_counter = allocator.create(RefCounter) catch return VkError.OutOfDeviceMemory;
|
const runners_counter = allocator.create(RefCounter) catch return VkError.OutOfDeviceMemory;
|
||||||
runners_counter.* = .init;
|
runners_counter.* = .init;
|
||||||
_ = soft_device.interface.io().async(Self.taskRunner, .{ self, cloned_info, p_fence, runners_counter });
|
self.group.async(io, Self.taskRunner, .{ self, cloned_info, p_fence, runners_counter });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn waitIdle(interface: *Interface) VkError!void {
|
pub fn waitIdle(interface: *Interface) VkError!void {
|
||||||
const self: *Self = @alignCast(@fieldParentPtr("interface", interface));
|
const self: *Self = @alignCast(@fieldParentPtr("interface", interface));
|
||||||
const io = interface.owner.io();
|
const io = interface.owner.io();
|
||||||
|
self.group.await(io) catch return VkError.DeviceLost;
|
||||||
self.lock.lock(io) catch return VkError.DeviceLost;
|
|
||||||
defer self.lock.unlock(io);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn taskRunner(self: *Self, info: Interface.SubmitInfo, p_fence: ?*base.Fence, runners_counter: *RefCounter) void {
|
fn taskRunner(self: *Self, info: Interface.SubmitInfo, p_fence: ?*base.Fence, runners_counter: *RefCounter) void {
|
||||||
const io = self.interface.owner.io();
|
|
||||||
|
|
||||||
self.lock.lockShared(io) catch return;
|
|
||||||
defer self.lock.unlockShared(io);
|
|
||||||
|
|
||||||
runners_counter.ref();
|
runners_counter.ref();
|
||||||
defer {
|
defer {
|
||||||
runners_counter.unref();
|
runners_counter.unref();
|
||||||
|
|||||||
+209
-75
@@ -16,6 +16,27 @@ const State = struct {
|
|||||||
filter: vk.Filter,
|
filter: vk.Filter,
|
||||||
allow_srgb_conversion: bool,
|
allow_srgb_conversion: bool,
|
||||||
clamp_to_edge: bool,
|
clamp_to_edge: bool,
|
||||||
|
dst_samples: usize,
|
||||||
|
};
|
||||||
|
|
||||||
|
const BlitData = struct {
|
||||||
|
src_map: []const u8,
|
||||||
|
dst_map: []u8,
|
||||||
|
|
||||||
|
src_slice_pitch_bytes: usize,
|
||||||
|
src_row_pitch_bytes: usize,
|
||||||
|
dst_slice_pitch_bytes: usize,
|
||||||
|
dst_row_pitch_bytes: usize,
|
||||||
|
|
||||||
|
pos: zm.F32x4,
|
||||||
|
dim: zm.F32x4,
|
||||||
|
|
||||||
|
dst_offset_0: vk.Offset3D,
|
||||||
|
dst_offset_1: vk.Offset3D,
|
||||||
|
|
||||||
|
depth_ratio: f32,
|
||||||
|
height_ratio: f32,
|
||||||
|
width_ratio: f32,
|
||||||
};
|
};
|
||||||
|
|
||||||
fn computeOffset2D(x: usize, y: usize, pitch_bytes: usize, texel_bytes: usize) usize {
|
fn computeOffset2D(x: usize, y: usize, pitch_bytes: usize, texel_bytes: usize) usize {
|
||||||
@@ -26,17 +47,15 @@ fn computeOffset3D(x: usize, y: usize, z: usize, slice_bytes: usize, pitch_bytes
|
|||||||
return z * slice_bytes + y * pitch_bytes + x * texel_bytes;
|
return z * slice_bytes + y * pitch_bytes + x * texel_bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn clear(pixel: vk.ClearValue, format: vk.Format, dest: *SoftImage, view_format: vk.Format, range: vk.ImageSubresourceRange, area: ?vk.Rect2D) VkError!void {
|
pub fn clear(pixel: vk.ClearValue, format: vk.Format, dst: *SoftImage, view_format: vk.Format, range: vk.ImageSubresourceRange, area: ?vk.Rect2D) VkError!void {
|
||||||
const dst_format = base.format.fromAspect(view_format, range.aspect_mask);
|
const dst_format = base.format.fromAspect(view_format, range.aspect_mask);
|
||||||
if (dst_format == .undefined) {
|
if (dst_format == .undefined) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const view_format_value: c_uint = @intCast(@intFromEnum(view_format));
|
|
||||||
|
|
||||||
var clamped_pixel: vk.ClearValue = pixel;
|
var clamped_pixel: vk.ClearValue = pixel;
|
||||||
if (base.vku.vkuFormatIsSINT(view_format_value) or base.vku.vkuFormatIsUINT(view_format_value)) {
|
if (base.format.isSint(view_format) or base.format.isUint(view_format)) {
|
||||||
const min_value: f32 = if (base.vku.vkuFormatIsSNORM(view_format_value)) -1.0 else 0.0;
|
const min_value: f32 = if (base.format.isSnorm(view_format)) -1.0 else 0.0;
|
||||||
|
|
||||||
if (range.aspect_mask.color_bit) {
|
if (range.aspect_mask.color_bit) {
|
||||||
clamped_pixel.color.float_32[0] = std.math.clamp(pixel.color.float_32[0], min_value, 1.0);
|
clamped_pixel.color.float_32[0] = std.math.clamp(pixel.color.float_32[0], min_value, 1.0);
|
||||||
@@ -51,69 +70,117 @@ pub fn clear(pixel: vk.ClearValue, format: vk.Format, dest: *SoftImage, view_for
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (try fastClear(clamped_pixel, format, dest, dst_format, range, area)) {
|
if (try fastClear(clamped_pixel, format, dst, dst_format, range, area)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
base.logger.fixme("implement slow clear", .{});
|
base.logger.fixme("implement slow clear", .{});
|
||||||
}
|
}
|
||||||
|
|
||||||
fn fastClear(clear_value: vk.ClearValue, clear_format: vk.Format, dest: *SoftImage, view_format: vk.Format, range: vk.ImageSubresourceRange, render_area: ?vk.Rect2D) VkError!bool {
|
fn fastClear(clear_value: vk.ClearValue, clear_format: vk.Format, dst: *SoftImage, view_format: vk.Format, range: vk.ImageSubresourceRange, render_area: ?vk.Rect2D) VkError!bool {
|
||||||
_ = render_area;
|
|
||||||
_ = range;
|
|
||||||
|
|
||||||
if (clear_format != .r32g32b32a32_sfloat and clear_format != .d32_sfloat and clear_format != .s8_uint) {
|
if (clear_format != .r32g32b32a32_sfloat and clear_format != .d32_sfloat and clear_format != .s8_uint) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
const ClearValue = union {
|
const r, const g, const b, const a = clear_value.color.float_32;
|
||||||
rgba: struct { r: f32, g: f32, b: f32, a: f32 },
|
const d = clear_value.depth_stencil.depth;
|
||||||
rgb: [3]f32,
|
const s = clear_value.depth_stencil.stencil;
|
||||||
d: f32,
|
|
||||||
d_as_u32: u32,
|
|
||||||
s: u32,
|
|
||||||
};
|
|
||||||
|
|
||||||
const c: *const ClearValue = @ptrCast(&clear_value);
|
|
||||||
|
|
||||||
var pack: u32 = 0;
|
var pack: u32 = 0;
|
||||||
switch (view_format) {
|
switch (view_format) {
|
||||||
.r5g6b5_unorm_pack16 => pack = @as(u16, @intFromFloat(31.0 * c.rgba.b + 0.5)) | (@as(u16, @intFromFloat(63.0 * c.rgba.g + 0.5)) << 5) | (@as(u16, @intFromFloat(31.0 * c.rgba.r + 0.5)) << 11),
|
.r5g6b5_unorm_pack16 => pack = @as(u16, @intFromFloat(31.0 * b + 0.5)) |
|
||||||
.b5g6r5_unorm_pack16 => pack = @as(u16, @intFromFloat(31.0 * c.rgba.r + 0.5)) | (@as(u16, @intFromFloat(63.0 * c.rgba.g + 0.5)) << 5) | (@as(u16, @intFromFloat(31.0 * c.rgba.b + 0.5)) << 11),
|
(@as(u16, @intFromFloat(63.0 * g + 0.5)) << 5) |
|
||||||
|
(@as(u16, @intFromFloat(31.0 * r + 0.5)) << 11),
|
||||||
|
.b5g6r5_unorm_pack16 => pack = @as(u16, @intFromFloat(31.0 * r + 0.5)) |
|
||||||
|
(@as(u16, @intFromFloat(63.0 * g + 0.5)) << 5) |
|
||||||
|
(@as(u16, @intFromFloat(31.0 * b + 0.5)) << 11),
|
||||||
|
|
||||||
.a8b8g8r8_uint_pack32,
|
.a8b8g8r8_uint_pack32,
|
||||||
.a8b8g8r8_unorm_pack32,
|
.a8b8g8r8_unorm_pack32,
|
||||||
.r8g8b8a8_unorm,
|
.r8g8b8a8_unorm,
|
||||||
=> pack = (@as(u32, @intFromFloat(255.0 * c.rgba.a + 0.5)) << 24) | (@as(u32, @intFromFloat(255.0 * c.rgba.b + 0.5)) << 16) | (@as(u32, @intFromFloat(255.0 * c.rgba.g + 0.5)) << 8) | @as(u32, @intFromFloat(255.0 * c.rgba.r + 0.5)),
|
=> pack = (@as(u32, @intFromFloat(255.0 * a + 0.5)) << 24) |
|
||||||
|
(@as(u32, @intFromFloat(255.0 * b + 0.5)) << 16) |
|
||||||
|
(@as(u32, @intFromFloat(255.0 * g + 0.5)) << 8) |
|
||||||
|
(@as(u32, @intFromFloat(255.0 * r + 0.5))),
|
||||||
|
|
||||||
.b8g8r8a8_unorm => pack = (@as(u32, @intFromFloat(255.0 * c.rgba.a + 0.5)) << 24) | (@as(u32, @intFromFloat(255.0 * c.rgba.r + 0.5)) << 16) | (@as(u32, @intFromFloat(255.0 * c.rgba.g + 0.5)) << 8) | @as(u32, @intFromFloat(255.0 * c.rgba.b + 0.5)),
|
.b8g8r8a8_unorm => pack = (@as(u32, @intFromFloat(255.0 * a + 0.5)) << 24) |
|
||||||
|
(@as(u32, @intFromFloat(255.0 * r + 0.5)) << 16) |
|
||||||
|
(@as(u32, @intFromFloat(255.0 * g + 0.5)) << 8) |
|
||||||
|
(@as(u32, @intFromFloat(255.0 * b + 0.5))),
|
||||||
//.b10g11r11_ufloat_pack32 => pack = R11G11B10F(c.rgb),
|
//.b10g11r11_ufloat_pack32 => pack = R11G11B10F(c.rgb),
|
||||||
//.e5b9g9r9_ufloat_pack32 => pack = RGB9E5(c.rgb),
|
//.e5b9g9r9_ufloat_pack32 => pack = RGB9E5(c.rgb),
|
||||||
.d32_sfloat => {
|
.d32_sfloat => {
|
||||||
std.debug.assert(clear_format == .d32_sfloat);
|
std.debug.assert(clear_format == .d32_sfloat);
|
||||||
pack = c.d_as_u32; // float reinterpreted as uint32
|
pack = @bitCast(d); // float reinterpreted as uint32
|
||||||
},
|
},
|
||||||
.s8_uint => {
|
.s8_uint => {
|
||||||
std.debug.assert(clear_format == .s8_uint);
|
std.debug.assert(clear_format == .s8_uint);
|
||||||
pack = @as(u8, @intCast(c.s));
|
pack = @as(u8, @intCast(s));
|
||||||
},
|
},
|
||||||
else => return false,
|
else => return false,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (dest.interface.memory) |memory| {
|
var subresource: vk.ImageSubresource = .{
|
||||||
const image_size = try dest.interface.getTotalSize();
|
.aspect_mask = range.aspect_mask,
|
||||||
const memory_map = memory.map(dest.interface.memory_offset, image_size) catch return false;
|
.mip_level = range.base_mip_level,
|
||||||
defer memory.unmap();
|
.array_layer = range.base_array_layer,
|
||||||
|
};
|
||||||
|
const last_mip_level = dst.interface.getLastMipLevel(range);
|
||||||
|
const last_layer = dst.interface.getLastLayerIndex(range);
|
||||||
|
|
||||||
const memory_map_as_u32: []u32 = @as([*]u32, @ptrCast(@alignCast(memory_map)))[0..@divExact(image_size, 4)];
|
var area: vk.Rect2D = if (render_area) |ra| ra else .{
|
||||||
|
.offset = .{ .x = 0, .y = 0 },
|
||||||
|
.extent = .{ .width = 0, .height = 0 },
|
||||||
|
};
|
||||||
|
|
||||||
@memset(memory_map_as_u32, pack);
|
const dst_memory = if (dst.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv;
|
||||||
|
|
||||||
|
while (subresource.mip_level <= last_mip_level) : (subresource.mip_level += 1) {
|
||||||
|
const dst_slice_pitch_bytes = dst.getSliceMemSizeForMipLevel(subresource.aspect_mask, subresource.mip_level);
|
||||||
|
const dst_row_pitch_bytes = dst.getRowPitchMemSizeForMipLevel(subresource.aspect_mask, subresource.mip_level);
|
||||||
|
const extent = dst.getMipLevelExtent(subresource.mip_level);
|
||||||
|
|
||||||
|
if (render_area == null) {
|
||||||
|
area.extent.width = extent.width;
|
||||||
|
area.extent.height = extent.height;
|
||||||
|
}
|
||||||
|
|
||||||
|
subresource.array_layer = range.base_array_layer;
|
||||||
|
while (subresource.array_layer <= last_layer) : (subresource.array_layer += 1) {
|
||||||
|
for (0..@intCast(extent.depth)) |depth| {
|
||||||
|
const dst_texel_offset = try dst.getTexelMemoryOffset(.{ .x = area.offset.x, .y = area.offset.y, .z = @intCast(depth) }, subresource);
|
||||||
|
const dst_size = try dst.interface.getTotalSizeForAspect(subresource.aspect_mask) - dst_texel_offset;
|
||||||
|
var dst_map: []u8 = @as([*]u8, @ptrCast(try dst_memory.map(dst.interface.memory_offset + dst_texel_offset, dst_size)))[0..dst_size];
|
||||||
|
|
||||||
|
for (0..dst.interface.samples.toInt()) |_| {
|
||||||
|
var dst_pixel = dst_map[0..];
|
||||||
|
switch (base.format.texelSize(view_format)) {
|
||||||
|
4 => for (0..@intCast(area.extent.height)) |_| {
|
||||||
|
var dst_pixel_4bytes = std.mem.bytesAsSlice(u32, dst_pixel);
|
||||||
|
@memset(dst_pixel_4bytes[0..area.extent.width], pack);
|
||||||
|
dst_pixel = dst_pixel[dst_row_pitch_bytes..];
|
||||||
|
},
|
||||||
|
2 => for (0..@intCast(area.extent.height)) |_| {
|
||||||
|
var dst_pixel_2bytes = std.mem.bytesAsSlice(u16, dst_pixel);
|
||||||
|
@memset(dst_pixel_2bytes[0..area.extent.width], @as(u16, @truncate(pack)));
|
||||||
|
dst_pixel = dst_pixel[dst_row_pitch_bytes..];
|
||||||
|
},
|
||||||
|
1 => for (0..@intCast(area.extent.height)) |_| {
|
||||||
|
@memset(dst_pixel[0..area.extent.width], @as(u8, @truncate(pack)));
|
||||||
|
dst_pixel = dst_pixel[dst_row_pitch_bytes..];
|
||||||
|
},
|
||||||
|
else => unreachable,
|
||||||
|
}
|
||||||
|
|
||||||
|
dst_map = dst_map[dst_slice_pitch_bytes..];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn sample(src: []const u8, pos: zm.F32x4, dims: zm.F32x4, slice_bytes: usize, pitch_bytes: usize, state: State) zm.F32x4 {
|
fn sample(src: []const u8, pos: zm.F32x4, dim: zm.F32x4, slice_bytes: usize, pitch_bytes: usize, state: State) zm.F32x4 {
|
||||||
var color: zm.F32x4 = .{ 0.0, 0.0, 0.0, 1.0 };
|
var color: zm.F32x4 = .{ 0.0, 0.0, 0.0, 1.0 };
|
||||||
const src_texel_size = base.format.texelSize(state.src_format);
|
const src_texel_size = base.format.texelSize(state.src_format);
|
||||||
|
|
||||||
@@ -123,14 +190,52 @@ fn sample(src: []const u8, pos: zm.F32x4, dims: zm.F32x4, slice_bytes: usize, pi
|
|||||||
var z: usize = @intFromFloat(pos[2]);
|
var z: usize = @intFromFloat(pos[2]);
|
||||||
|
|
||||||
if (state.clamp_to_edge) {
|
if (state.clamp_to_edge) {
|
||||||
x = std.math.clamp(x, 0, @as(usize, @intFromFloat(dims[0])) - 1);
|
x = std.math.clamp(x, 0, @as(usize, @intFromFloat(dim[0])) - 1);
|
||||||
y = std.math.clamp(y, 0, @as(usize, @intFromFloat(dims[1])) - 1);
|
y = std.math.clamp(y, 0, @as(usize, @intFromFloat(dim[1])) - 1);
|
||||||
z = std.math.clamp(z, 0, @as(usize, @intFromFloat(dims[2])) - 1);
|
z = std.math.clamp(z, 0, @as(usize, @intFromFloat(dim[2])) - 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
const src_map = src[computeOffset3D(x, y, z, slice_bytes, pitch_bytes, src_texel_size)..];
|
const src_map = src[computeOffset3D(x, y, z, slice_bytes, pitch_bytes, src_texel_size)..];
|
||||||
|
|
||||||
color = readFloat4(src_map, state);
|
color = readFloat4(src_map, state);
|
||||||
|
} else {
|
||||||
|
var x: f32 = pos[0];
|
||||||
|
var y: f32 = pos[1];
|
||||||
|
var z: f32 = pos[2];
|
||||||
|
|
||||||
|
if (state.clamp_to_edge) {
|
||||||
|
x = @min(@max(x, 0.5), dim[0] - 0.5);
|
||||||
|
y = @min(@max(y, 0.5), dim[1] - 0.5);
|
||||||
|
z = @min(@max(z, 0.5), dim[2] - 0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
const fx0 = x - 0.5;
|
||||||
|
const fy0 = y - 0.5;
|
||||||
|
const fz0 = z - 0.5;
|
||||||
|
|
||||||
|
const ix0: usize = @intCast(@max(@as(i32, @intFromFloat(fx0)), 0));
|
||||||
|
const iy0: usize = @intCast(@max(@as(i32, @intFromFloat(fy0)), 0));
|
||||||
|
const iz0: usize = @intCast(@max(@as(i32, @intFromFloat(fz0)), 0));
|
||||||
|
|
||||||
|
const ix1 = if (ix0 + 1 >= @as(usize, @intFromFloat(dim[0]))) ix0 else ix0 + 1;
|
||||||
|
const iy1 = if (iy0 + 1 >= @as(usize, @intFromFloat(dim[0]))) iy0 else iy0 + 1;
|
||||||
|
|
||||||
|
const sample_0_0 = src[computeOffset3D(ix0, iy0, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
|
||||||
|
const sample_0_1 = src[computeOffset3D(ix1, iy0, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
|
||||||
|
const sample_1_0 = src[computeOffset3D(ix0, iy1, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
|
||||||
|
const sample_1_1 = src[computeOffset3D(ix1, iy1, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
|
||||||
|
|
||||||
|
const pixel_0_0 = readFloat4(sample_0_0, state);
|
||||||
|
const pixel_0_1 = readFloat4(sample_0_1, state);
|
||||||
|
const pixel_1_0 = readFloat4(sample_1_0, state);
|
||||||
|
const pixel_1_1 = readFloat4(sample_1_1, state);
|
||||||
|
|
||||||
|
const fx = zm.f32x4s(fx0 - @as(f32, @floatFromInt(ix0)));
|
||||||
|
const fy = zm.f32x4s(fy0 - @as(f32, @floatFromInt(iy0)));
|
||||||
|
const ix = zm.f32x4s(1.0) - fx;
|
||||||
|
const iy = zm.f32x4s(1.0) - fy;
|
||||||
|
|
||||||
|
color = (pixel_0_0 * ix + pixel_0_1 * fx) * iy + (pixel_1_0 * ix + pixel_1_1 * fx) * fy;
|
||||||
}
|
}
|
||||||
|
|
||||||
return applyScaleAndClamp(color, state);
|
return applyScaleAndClamp(color, state);
|
||||||
@@ -167,8 +272,8 @@ pub fn blitRegion(src: *const SoftImage, dst: *SoftImage, region: vk.ImageBlit,
|
|||||||
const z0 = @as(f32, @floatFromInt(src_offset_0.z)) + (0.5 - @as(f32, @floatFromInt(dst_offset_0.z))) * depth_ratio;
|
const z0 = @as(f32, @floatFromInt(src_offset_0.z)) + (0.5 - @as(f32, @floatFromInt(dst_offset_0.z))) * depth_ratio;
|
||||||
|
|
||||||
const src_slice_pitch_bytes = src.getSliceMemSizeForMipLevel(region.src_subresource.aspect_mask, region.src_subresource.mip_level);
|
const src_slice_pitch_bytes = src.getSliceMemSizeForMipLevel(region.src_subresource.aspect_mask, region.src_subresource.mip_level);
|
||||||
const dst_slice_pitch_bytes = dst.getSliceMemSizeForMipLevel(region.dst_subresource.aspect_mask, region.dst_subresource.mip_level);
|
|
||||||
const src_row_pitch_bytes = src.getRowPitchMemSizeForMipLevel(region.src_subresource.aspect_mask, region.src_subresource.mip_level);
|
const src_row_pitch_bytes = src.getRowPitchMemSizeForMipLevel(region.src_subresource.aspect_mask, region.src_subresource.mip_level);
|
||||||
|
const dst_slice_pitch_bytes = dst.getSliceMemSizeForMipLevel(region.dst_subresource.aspect_mask, region.dst_subresource.mip_level);
|
||||||
const dst_row_pitch_bytes = dst.getRowPitchMemSizeForMipLevel(region.dst_subresource.aspect_mask, region.dst_subresource.mip_level);
|
const dst_row_pitch_bytes = dst.getRowPitchMemSizeForMipLevel(region.dst_subresource.aspect_mask, region.dst_subresource.mip_level);
|
||||||
|
|
||||||
const src_format = base.format.fromAspect(src.interface.format, region.src_subresource.aspect_mask);
|
const src_format = base.format.fromAspect(src.interface.format, region.src_subresource.aspect_mask);
|
||||||
@@ -177,15 +282,6 @@ pub fn blitRegion(src: *const SoftImage, dst: *SoftImage, region: vk.ImageBlit,
|
|||||||
const apply_filter = (filter != .nearest);
|
const apply_filter = (filter != .nearest);
|
||||||
const allow_srgb_conversion = apply_filter or base.format.isSrgb(src_format) != base.format.isSrgb(dst_format);
|
const allow_srgb_conversion = apply_filter or base.format.isSrgb(src_format) != base.format.isSrgb(dst_format);
|
||||||
|
|
||||||
const is_src_int = base.format.isUint(src_format) or base.format.isSint(src_format);
|
|
||||||
const is_dst_int = base.format.isUint(dst_format) or base.format.isSint(dst_format);
|
|
||||||
const are_both_int = is_src_int and is_dst_int;
|
|
||||||
|
|
||||||
if (are_both_int) {
|
|
||||||
base.unsupported("Blit of only integer type images are not supported yet", .{});
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
var src_subresource = vk.ImageSubresource{
|
var src_subresource = vk.ImageSubresource{
|
||||||
.aspect_mask = region.src_subresource.aspect_mask,
|
.aspect_mask = region.src_subresource.aspect_mask,
|
||||||
.mip_level = region.src_subresource.mip_level,
|
.mip_level = region.src_subresource.mip_level,
|
||||||
@@ -215,6 +311,7 @@ pub fn blitRegion(src: *const SoftImage, dst: *SoftImage, region: vk.ImageBlit,
|
|||||||
.filter = filter,
|
.filter = filter,
|
||||||
.allow_srgb_conversion = allow_srgb_conversion,
|
.allow_srgb_conversion = allow_srgb_conversion,
|
||||||
.clamp_to_edge = false,
|
.clamp_to_edge = false,
|
||||||
|
.dst_samples = dst.interface.samples.toInt(),
|
||||||
};
|
};
|
||||||
|
|
||||||
while (dst_subresource.array_layer <= last_layer) : ({
|
while (dst_subresource.array_layer <= last_layer) : ({
|
||||||
@@ -227,45 +324,68 @@ pub fn blitRegion(src: *const SoftImage, dst: *SoftImage, region: vk.ImageBlit,
|
|||||||
|
|
||||||
const dst_texel_offset = try dst.getTexelMemoryOffset(.{ .x = 0, .y = 0, .z = 0 }, dst_subresource);
|
const dst_texel_offset = try dst.getTexelMemoryOffset(.{ .x = 0, .y = 0, .z = 0 }, dst_subresource);
|
||||||
const dst_size = try dst.interface.getTotalSizeForAspect(dst_subresource.aspect_mask) - dst_texel_offset;
|
const dst_size = try dst.interface.getTotalSizeForAspect(dst_subresource.aspect_mask) - dst_texel_offset;
|
||||||
var dst_map: []u8 = @as([*]u8, @ptrCast(try dst_memory.map(dst.interface.memory_offset + dst_texel_offset, dst_size)))[0..dst_size];
|
const dst_map: []u8 = @as([*]u8, @ptrCast(try dst_memory.map(dst.interface.memory_offset + dst_texel_offset, dst_size)))[0..dst_size];
|
||||||
|
|
||||||
_ = &src_map;
|
blit(state, .{
|
||||||
_ = &dst_map;
|
.src_map = src_map,
|
||||||
|
.dst_map = dst_map,
|
||||||
|
|
||||||
for (@intCast(dst_offset_0.z)..@intCast(dst_offset_1.z)) |k| {
|
.src_slice_pitch_bytes = src_slice_pitch_bytes,
|
||||||
const z = z0 + @as(f32, @floatFromInt(k)) * depth_ratio;
|
.src_row_pitch_bytes = src_row_pitch_bytes,
|
||||||
var dst_slice = dst_map[(k * dst_slice_pitch_bytes)..];
|
.dst_slice_pitch_bytes = dst_slice_pitch_bytes,
|
||||||
|
.dst_row_pitch_bytes = dst_row_pitch_bytes,
|
||||||
|
|
||||||
for (@intCast(dst_offset_0.y)..@intCast(dst_offset_1.y)) |j| {
|
.pos = zm.f32x4(x0, y0, z0, 0.0),
|
||||||
const y = y0 + @as(f32, @floatFromInt(j)) * height_ratio;
|
.dim = zm.f32x4(@floatFromInt(src_extent.width), @floatFromInt(src_extent.height), @floatFromInt(src_extent.depth), 0.0),
|
||||||
var dst_line = dst_slice[(j * dst_row_pitch_bytes)..];
|
|
||||||
|
|
||||||
for (@intCast(dst_offset_0.x)..@intCast(dst_offset_1.x)) |i| {
|
.dst_offset_0 = dst_offset_0,
|
||||||
const x = x0 + @as(f32, @floatFromInt(i)) * width_ratio;
|
.dst_offset_1 = dst_offset_1,
|
||||||
var dst_pixel = dst_line[(i * base.format.texelSize(dst_format))..];
|
|
||||||
|
.depth_ratio = depth_ratio,
|
||||||
|
.height_ratio = height_ratio,
|
||||||
|
.width_ratio = width_ratio,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn blit(state: State, data: BlitData) void {
|
||||||
|
const is_src_int = base.format.isUint(state.src_format) or base.format.isSint(state.src_format);
|
||||||
|
const is_dst_int = base.format.isUint(state.dst_format) or base.format.isSint(state.dst_format);
|
||||||
|
const are_both_int = is_src_int and is_dst_int;
|
||||||
|
|
||||||
|
if (are_both_int) {
|
||||||
|
base.unsupported("Blit of only integer type images are not supported yet", .{});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (@intCast(data.dst_offset_0.z)..@intCast(data.dst_offset_1.z)) |k| {
|
||||||
|
const z = data.pos[2] + @as(f32, @floatFromInt(k)) * data.depth_ratio;
|
||||||
|
var dst_slice = data.dst_map[(k * data.dst_slice_pitch_bytes)..];
|
||||||
|
|
||||||
|
for (@intCast(data.dst_offset_0.y)..@intCast(data.dst_offset_1.y)) |j| {
|
||||||
|
const y = data.pos[1] + @as(f32, @floatFromInt(j)) * data.height_ratio;
|
||||||
|
var dst_line = dst_slice[(j * data.dst_row_pitch_bytes)..];
|
||||||
|
|
||||||
|
for (@intCast(data.dst_offset_0.x)..@intCast(data.dst_offset_1.x)) |i| {
|
||||||
|
const x = data.pos[0] + @as(f32, @floatFromInt(i)) * data.width_ratio;
|
||||||
|
var dst_pixel = dst_line[(i * base.format.texelSize(state.dst_format))..];
|
||||||
|
|
||||||
if (are_both_int) {
|
if (are_both_int) {
|
||||||
// TODO
|
// TODO
|
||||||
} else {
|
} else {
|
||||||
const color = sample(
|
const color = sample(
|
||||||
src_map,
|
data.src_map,
|
||||||
.{ x, y, z, 0.0 },
|
.{ x, y, z, 0.0 },
|
||||||
.{
|
data.dim,
|
||||||
@floatFromInt(src_extent.width),
|
data.src_slice_pitch_bytes,
|
||||||
@floatFromInt(src_extent.height),
|
data.src_row_pitch_bytes,
|
||||||
@floatFromInt(src_extent.depth),
|
|
||||||
0.0,
|
|
||||||
},
|
|
||||||
src_slice_pitch_bytes,
|
|
||||||
src_row_pitch_bytes,
|
|
||||||
state,
|
state,
|
||||||
);
|
);
|
||||||
for (0..dst.interface.samples.toInt()) |_| {
|
for (0..state.dst_samples) |_| {
|
||||||
writeFloat4(color, dst_pixel, state);
|
writeFloat4(color, dst_pixel, state);
|
||||||
if (dst_pixel.len < dst_slice_pitch_bytes)
|
if (dst_pixel.len < data.dst_slice_pitch_bytes)
|
||||||
break;
|
break;
|
||||||
dst_pixel = dst_pixel[dst_slice_pitch_bytes..];
|
dst_pixel = dst_pixel[data.dst_slice_pitch_bytes..];
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -313,11 +433,25 @@ fn writeFloat4(color: zm.F32x4, map: []u8, state: State) void {
|
|||||||
.b8g8r8a8_srgb,
|
.b8g8r8a8_srgb,
|
||||||
.b8g8r8a8_unorm,
|
.b8g8r8a8_unorm,
|
||||||
=> {
|
=> {
|
||||||
map[0] = @intFromFloat(color[1] * 255.0);
|
map[0] = @intFromFloat(color[2] * 255.0);
|
||||||
map[1] = @intFromFloat(color[2] * 255.0);
|
map[1] = @intFromFloat(color[1] * 255.0);
|
||||||
map[2] = @intFromFloat(color[0] * 255.0);
|
map[2] = @intFromFloat(color[0] * 255.0);
|
||||||
map[3] = @intFromFloat(color[3] * 255.0);
|
map[3] = @intFromFloat(color[3] * 255.0);
|
||||||
},
|
},
|
||||||
else => base.unsupported("Blitter destination format {any}", .{state.src_format}),
|
.a8b8g8r8_unorm_pack32,
|
||||||
|
.r8g8b8a8_unorm,
|
||||||
|
.a8b8g8r8_srgb_pack32,
|
||||||
|
.r8g8b8a8_srgb,
|
||||||
|
.a8b8g8r8_uint_pack32,
|
||||||
|
.r8g8b8a8_uint,
|
||||||
|
.r8g8b8a8_uscaled,
|
||||||
|
.a8b8g8r8_uscaled_pack32,
|
||||||
|
=> {
|
||||||
|
map[0] = @intFromFloat(color[0] * 255.0);
|
||||||
|
map[1] = @intFromFloat(color[1] * 255.0);
|
||||||
|
map[2] = @intFromFloat(color[2] * 255.0);
|
||||||
|
map[3] = @intFromFloat(color[3] * 255.0);
|
||||||
|
},
|
||||||
|
else => base.unsupported("Blitter dstination format {any}", .{state.src_format}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ vtable: *const VTable,
|
|||||||
pub const VTable = struct {
|
pub const VTable = struct {
|
||||||
destroy: *const fn (*Self, std.mem.Allocator) void,
|
destroy: *const fn (*Self, std.mem.Allocator) void,
|
||||||
getMemoryRequirements: *const fn (*Self, *vk.MemoryRequirements) VkError!void,
|
getMemoryRequirements: *const fn (*Self, *vk.MemoryRequirements) VkError!void,
|
||||||
|
getSubresourceLayout: *const fn (*const Self, vk.ImageSubresource) VkError!vk.SubresourceLayout,
|
||||||
getTotalSizeForAspect: *const fn (*const Self, vk.ImageAspectFlags) VkError!usize,
|
getTotalSizeForAspect: *const fn (*const Self, vk.ImageAspectFlags) VkError!usize,
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -95,3 +96,11 @@ pub inline fn formatToAspect(self: *const Self, aspect_mask: vk.ImageAspectFlags
|
|||||||
pub fn getLastLayerIndex(self: *const Self, range: vk.ImageSubresourceRange) u32 {
|
pub fn getLastLayerIndex(self: *const Self, range: vk.ImageSubresourceRange) u32 {
|
||||||
return (if (range.layer_count == vk.REMAINING_ARRAY_LAYERS) self.array_layers else range.base_array_layer + range.layer_count) - 1;
|
return (if (range.layer_count == vk.REMAINING_ARRAY_LAYERS) self.array_layers else range.base_array_layer + range.layer_count) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn getLastMipLevel(self: *const Self, range: vk.ImageSubresourceRange) u32 {
|
||||||
|
return (if (range.level_count == vk.REMAINING_MIP_LEVELS) self.mip_levels else range.base_mip_level + range.level_count) - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub inline fn getSubresourceLayout(self: *const Self, subresource: vk.ImageSubresource) VkError!vk.SubresourceLayout {
|
||||||
|
return self.vtable.getSubresourceLayout(self, subresource);
|
||||||
|
}
|
||||||
|
|||||||
@@ -1384,12 +1384,7 @@ pub export fn strollGetImageSubresourceLayout(p_device: vk.Device, p_image: vk.I
|
|||||||
Dispatchable(Device).checkHandleValidity(p_device) catch |err| return errorLogger(err);
|
Dispatchable(Device).checkHandleValidity(p_device) catch |err| return errorLogger(err);
|
||||||
|
|
||||||
const image = NonDispatchable(Image).fromHandleObject(p_image) catch |err| return errorLogger(err);
|
const image = NonDispatchable(Image).fromHandleObject(p_image) catch |err| return errorLogger(err);
|
||||||
|
layout.* = image.getSubresourceLayout(subresource.*) catch |err| return errorLogger(err);
|
||||||
notImplementedWarning();
|
|
||||||
|
|
||||||
_ = image;
|
|
||||||
_ = subresource;
|
|
||||||
_ = layout;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub export fn strollGetPipelineCacheData(p_device: vk.Device, p_cache: vk.PipelineCache, size: *usize, data: *anyopaque) callconv(vk.vulkan_call_conv) vk.Result {
|
pub export fn strollGetPipelineCacheData(p_device: vk.Device, p_cache: vk.PipelineCache, size: *usize, data: *anyopaque) callconv(vk.vulkan_call_conv) vk.Result {
|
||||||
|
|||||||
Reference in New Issue
Block a user