improving blitter
Build / build (push) Successful in 54s
Test / build_and_test (push) Successful in 41m32s

This commit is contained in:
2026-04-22 03:13:15 +02:00
parent aaeca6f854
commit 9b7728503b
3 changed files with 270 additions and 39 deletions
+4 -4
View File
@@ -50,8 +50,8 @@ vkCmdBindDescriptorSets | ✅ Implemented
vkCmdBindIndexBuffer | ⚙️ WIP vkCmdBindIndexBuffer | ⚙️ WIP
vkCmdBindPipeline | ✅ Implemented vkCmdBindPipeline | ✅ Implemented
vkCmdBindVertexBuffers | ⚙️ WIP vkCmdBindVertexBuffers | ⚙️ WIP
vkCmdBlitImage | ⚙️ WIP vkCmdBlitImage | ✅ Implemented
vkCmdClearAttachments | ⚙️ wip vkCmdClearAttachments | ⚙️ WIP
vkCmdClearColorImage | ⚙️ WIP vkCmdClearColorImage | ⚙️ WIP
vkCmdClearDepthStencilImage | ⚙️ WIP vkCmdClearDepthStencilImage | ⚙️ WIP
vkCmdCopyBuffer | ✅ Implemented vkCmdCopyBuffer | ✅ Implemented
@@ -70,7 +70,7 @@ vkCmdEndRenderPass | ⚙️ WIP
vkCmdExecuteCommands | ⚙️ WIP vkCmdExecuteCommands | ⚙️ WIP
vkCmdFillBuffer | ✅ Implemented vkCmdFillBuffer | ✅ Implemented
vkCmdNextSubpass | ⚙️ WIP vkCmdNextSubpass | ⚙️ WIP
vkCmdPipelineBarrier | ⚙️ WIP vkCmdPipelineBarrier | ✅ Implemented
vkCmdPushConstants | ⚙️ WIP vkCmdPushConstants | ⚙️ WIP
vkCmdResetEvent | ⚙️ WIP vkCmdResetEvent | ⚙️ WIP
vkCmdResetQueryPool | ⚙️ WIP vkCmdResetQueryPool | ⚙️ WIP
@@ -148,7 +148,7 @@ vkGetEventStatus | ⚙️ WIP
vkGetFenceStatus | ✅ Implemented vkGetFenceStatus | ✅ Implemented
vkGetImageMemoryRequirements | ✅ Implemented vkGetImageMemoryRequirements | ✅ Implemented
vkGetImageSparseMemoryRequirements | ⚙️ WIP vkGetImageSparseMemoryRequirements | ⚙️ WIP
vkGetImageSubresourceLayout | ⚙️ WIP vkGetImageSubresourceLayout | ✅ Implemented
vkGetInstanceProcAddr | ✅ Implemented vkGetInstanceProcAddr | ✅ Implemented
vkGetPhysicalDeviceFeatures | ✅ Implemented vkGetPhysicalDeviceFeatures | ✅ Implemented
vkGetPhysicalDeviceFormatProperties | ✅ Implemented vkGetPhysicalDeviceFormatProperties | ✅ Implemented
+95 -35
View File
@@ -17,6 +17,7 @@ const State = struct {
allow_srgb_conversion: bool, allow_srgb_conversion: bool,
clamp_to_edge: bool, clamp_to_edge: bool,
dst_samples: usize, dst_samples: usize,
filter_3D: bool,
}; };
const BlitData = struct { const BlitData = struct {
@@ -184,7 +185,7 @@ fn sample(src: []const u8, pos: zm.F32x4, dim: zm.F32x4, slice_bytes: usize, pit
var color: zm.F32x4 = .{ 0.0, 0.0, 0.0, 1.0 }; var color: zm.F32x4 = .{ 0.0, 0.0, 0.0, 1.0 };
const src_texel_size = base.format.texelSize(state.src_format); const src_texel_size = base.format.texelSize(state.src_format);
if (state.filter != .linear or base.format.isUint(state.src_format)) { if (state.filter == .nearest or base.format.isUint(state.src_format)) {
var x: usize = @intFromFloat(pos[0]); var x: usize = @intFromFloat(pos[0]);
var y: usize = @intFromFloat(pos[1]); var y: usize = @intFromFloat(pos[1]);
var z: usize = @intFromFloat(pos[2]); var z: usize = @intFromFloat(pos[2]);
@@ -218,24 +219,56 @@ fn sample(src: []const u8, pos: zm.F32x4, dim: zm.F32x4, slice_bytes: usize, pit
const iz0: usize = @intCast(@max(@as(i32, @intFromFloat(fz0)), 0)); const iz0: usize = @intCast(@max(@as(i32, @intFromFloat(fz0)), 0));
const ix1 = if (ix0 + 1 >= @as(usize, @intFromFloat(dim[0]))) ix0 else ix0 + 1; const ix1 = if (ix0 + 1 >= @as(usize, @intFromFloat(dim[0]))) ix0 else ix0 + 1;
const iy1 = if (iy0 + 1 >= @as(usize, @intFromFloat(dim[0]))) iy0 else iy0 + 1; const iy1 = if (iy0 + 1 >= @as(usize, @intFromFloat(dim[1]))) iy0 else iy0 + 1;
const sample_0_0 = src[computeOffset3D(ix0, iy0, iz0, slice_bytes, pitch_bytes, src_texel_size)..]; if (state.filter_3D) {
const sample_0_1 = src[computeOffset3D(ix1, iy0, iz0, slice_bytes, pitch_bytes, src_texel_size)..]; const iz1 = if (iz0 + 1 >= @as(usize, @intFromFloat(dim[2]))) iz0 else iz0 + 1;
const sample_1_0 = src[computeOffset3D(ix0, iy1, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
const sample_1_1 = src[computeOffset3D(ix1, iy1, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
const pixel_0_0 = readFloat4(sample_0_0, state); const sample_0_0_0 = src[computeOffset3D(ix0, iy0, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
const pixel_0_1 = readFloat4(sample_0_1, state); const sample_0_1_0 = src[computeOffset3D(ix1, iy0, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
const pixel_1_0 = readFloat4(sample_1_0, state); const sample_1_0_0 = src[computeOffset3D(ix0, iy1, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
const pixel_1_1 = readFloat4(sample_1_1, state); const sample_1_1_0 = src[computeOffset3D(ix1, iy1, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
const sample_0_0_1 = src[computeOffset3D(ix0, iy0, iz1, slice_bytes, pitch_bytes, src_texel_size)..];
const sample_0_1_1 = src[computeOffset3D(ix1, iy0, iz1, slice_bytes, pitch_bytes, src_texel_size)..];
const sample_1_0_1 = src[computeOffset3D(ix0, iy1, iz1, slice_bytes, pitch_bytes, src_texel_size)..];
const sample_1_1_1 = src[computeOffset3D(ix1, iy1, iz1, slice_bytes, pitch_bytes, src_texel_size)..];
const fx = zm.f32x4s(fx0 - @as(f32, @floatFromInt(ix0))); const pixel_0_0_0 = readFloat4(sample_0_0_0, state);
const fy = zm.f32x4s(fy0 - @as(f32, @floatFromInt(iy0))); const pixel_0_1_0 = readFloat4(sample_0_1_0, state);
const ix = zm.f32x4s(1.0) - fx; const pixel_1_0_0 = readFloat4(sample_1_0_0, state);
const iy = zm.f32x4s(1.0) - fy; const pixel_1_1_0 = readFloat4(sample_1_1_0, state);
const pixel_0_0_1 = readFloat4(sample_0_0_1, state);
const pixel_0_1_1 = readFloat4(sample_0_1_1, state);
const pixel_1_0_1 = readFloat4(sample_1_0_1, state);
const pixel_1_1_1 = readFloat4(sample_1_1_1, state);
color = (pixel_0_0 * ix + pixel_0_1 * fx) * iy + (pixel_1_0 * ix + pixel_1_1 * fx) * fy; const fx = zm.f32x4s(fx0 - @as(f32, @floatFromInt(ix0)));
const fy = zm.f32x4s(fy0 - @as(f32, @floatFromInt(iy0)));
const fz = zm.f32x4s(fz0 - @as(f32, @floatFromInt(iz0)));
const ix = zm.f32x4s(1.0) - fx;
const iy = zm.f32x4s(1.0) - fy;
const iz = zm.f32x4s(1.0) - fz;
color = ((pixel_0_0_0 * ix + pixel_0_1_0 * fx) * iy + (pixel_1_0_0 * ix + pixel_1_1_0 * fx) * fy) * iz +
((pixel_0_0_1 * ix + pixel_0_1_1 * fx) * iy + (pixel_1_0_1 * ix + pixel_1_1_1 * fx) * fy) * fz;
} else {
const sample_0_0 = src[computeOffset3D(ix0, iy0, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
const sample_0_1 = src[computeOffset3D(ix1, iy0, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
const sample_1_0 = src[computeOffset3D(ix0, iy1, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
const sample_1_1 = src[computeOffset3D(ix1, iy1, iz0, slice_bytes, pitch_bytes, src_texel_size)..];
const pixel_0_0 = readFloat4(sample_0_0, state);
const pixel_0_1 = readFloat4(sample_0_1, state);
const pixel_1_0 = readFloat4(sample_1_0, state);
const pixel_1_1 = readFloat4(sample_1_1, state);
const fx = zm.f32x4s(fx0 - @as(f32, @floatFromInt(ix0)));
const fy = zm.f32x4s(fy0 - @as(f32, @floatFromInt(iy0)));
const ix = zm.f32x4s(1.0) - fx;
const iy = zm.f32x4s(1.0) - fy;
color = (pixel_0_0 * ix + pixel_0_1 * fx) * iy + (pixel_1_0 * ix + pixel_1_1 * fx) * fy;
}
} }
return applyScaleAndClamp(color, state); return applyScaleAndClamp(color, state);
@@ -310,8 +343,13 @@ pub fn blitRegion(src: *const SoftImage, dst: *SoftImage, region: vk.ImageBlit,
.dst_format = dst_format, .dst_format = dst_format,
.filter = filter, .filter = filter,
.allow_srgb_conversion = allow_srgb_conversion, .allow_srgb_conversion = allow_srgb_conversion,
.clamp_to_edge = false, .clamp_to_edge = src_offset_0.x < 0 or
src_offset_0.y < 0 or
@as(u32, @intCast(src_offset_1.x)) > src_extent.width or
@as(u32, @intCast(src_offset_1.y)) > src_extent.height or
(filter != .nearest and ((x0 < 0.5) or (y0 < 0.5))),
.dst_samples = dst.interface.samples.toInt(), .dst_samples = dst.interface.samples.toInt(),
.filter_3D = (src_offset_1.z - src_offset_0.z) != (dst_offset_1.z - dst_offset_0.z),
}; };
while (dst_subresource.array_layer <= last_layer) : ({ while (dst_subresource.array_layer <= last_layer) : ({
@@ -353,11 +391,6 @@ fn blit(state: State, data: BlitData) void {
const is_dst_int = base.format.isUint(state.dst_format) or base.format.isSint(state.dst_format); const is_dst_int = base.format.isUint(state.dst_format) or base.format.isSint(state.dst_format);
const are_both_int = is_src_int and is_dst_int; const are_both_int = is_src_int and is_dst_int;
if (are_both_int) {
base.unsupported("Blit of only integer type images are not supported yet", .{});
return;
}
for (@intCast(data.dst_offset_0.z)..@intCast(data.dst_offset_1.z)) |k| { for (@intCast(data.dst_offset_0.z)..@intCast(data.dst_offset_1.z)) |k| {
const z = data.pos[2] + @as(f32, @floatFromInt(k)) * data.depth_ratio; const z = data.pos[2] + @as(f32, @floatFromInt(k)) * data.depth_ratio;
var dst_slice = data.dst_map[(k * data.dst_slice_pitch_bytes)..]; var dst_slice = data.dst_map[(k * data.dst_slice_pitch_bytes)..];
@@ -371,16 +404,27 @@ fn blit(state: State, data: BlitData) void {
var dst_pixel = dst_line[(i * base.format.texelSize(state.dst_format))..]; var dst_pixel = dst_line[(i * base.format.texelSize(state.dst_format))..];
if (are_both_int) { if (are_both_int) {
// TODO var ix: usize = @intFromFloat(x);
var iy: usize = @intFromFloat(y);
var iz: usize = @intFromFloat(z);
if (state.clamp_to_edge) {
ix = std.math.clamp(ix, 0, @as(usize, @intFromFloat(data.dim[0])) - 1);
iy = std.math.clamp(iy, 0, @as(usize, @intFromFloat(data.dim[1])) - 1);
iz = std.math.clamp(iz, 0, @as(usize, @intFromFloat(data.dim[2])) - 1);
}
const src_map = data.src_map[computeOffset3D(ix, iy, iz, data.src_slice_pitch_bytes, data.src_row_pitch_bytes, base.format.texelSize(state.src_format))..];
const color = readFloat4(src_map, state);
for (0..state.dst_samples) |_| {
writeFloat4(color, dst_pixel, state);
if (dst_pixel.len < data.dst_slice_pitch_bytes)
break;
dst_pixel = dst_pixel[data.dst_slice_pitch_bytes..];
}
} else { } else {
const color = sample( const color = sample(data.src_map, .{ x, y, z, 0.0 }, data.dim, data.src_slice_pitch_bytes, data.src_row_pitch_bytes, state);
data.src_map,
.{ x, y, z, 0.0 },
data.dim,
data.src_slice_pitch_bytes,
data.src_row_pitch_bytes,
state,
);
for (0..state.dst_samples) |_| { for (0..state.dst_samples) |_| {
writeFloat4(color, dst_pixel, state); writeFloat4(color, dst_pixel, state);
if (dst_pixel.len < data.dst_slice_pitch_bytes) if (dst_pixel.len < data.dst_slice_pitch_bytes)
@@ -396,11 +440,20 @@ fn blit(state: State, data: BlitData) void {
fn applyScaleAndClamp(base_color: zm.F32x4, state: State) zm.F32x4 { fn applyScaleAndClamp(base_color: zm.F32x4, state: State) zm.F32x4 {
var color: zm.F32x4 = base_color; var color: zm.F32x4 = base_color;
const unscale = base.format.getScale(state.src_format);
const scale = base.format.getScale(state.dst_format); const scale = base.format.getScale(state.dst_format);
if (std.simd.firstTrue(unscale != scale) != null) { if (base.format.isFloat(state.src_format) and !base.format.isFloat(state.dst_format)) {
color *= zm.f32x4(scale[0] / unscale[0], scale[1] / unscale[1], scale[2] / unscale[2], scale[3] / unscale[3]); color = @min(color, scale);
color = @max(color, zm.f32x4(
if (base.format.isUnsignedComponent(state.dst_format, 0)) 0.0 else -scale[0],
if (base.format.isUnsignedComponent(state.dst_format, 1)) 0.0 else -scale[1],
if (base.format.isUnsignedComponent(state.dst_format, 2)) 0.0 else -scale[2],
if (base.format.isUnsignedComponent(state.dst_format, 3)) 0.0 else -scale[3],
));
}
if (!base.format.isUnsigned(state.src_format) and base.format.isUnsigned(state.dst_format)) {
color = @max(color, zm.f32x4s(0.0));
} }
return color; return color;
@@ -422,7 +475,12 @@ fn readFloat4(map: []const u8, state: State) zm.F32x4 {
c[3] = @as(f32, @floatFromInt(map[3])) / 255.0; c[3] = @as(f32, @floatFromInt(map[3])) / 255.0;
}, },
else => base.unsupported("Blitter source format {any}", .{state.src_format}), .r32_uint => {
c[0] = std.mem.bytesToValue(f32, map);
c[3] = @as(f32, @floatFromInt(0xFFFFFFFF));
},
else => base.unsupported("Blitter: read from source format {any}", .{state.src_format}),
} }
return c; return c;
@@ -452,6 +510,8 @@ fn writeFloat4(color: zm.F32x4, map: []u8, state: State) void {
map[2] = @intFromFloat(color[2] * 255.0); map[2] = @intFromFloat(color[2] * 255.0);
map[3] = @intFromFloat(color[3] * 255.0); map[3] = @intFromFloat(color[3] * 255.0);
}, },
else => base.unsupported("Blitter dstination format {any}", .{state.src_format}), .r32_sfloat => std.mem.bytesAsValue(f32, map).* = color[0],
.r32_uint => std.mem.bytesAsValue(u32, map).* = @intFromFloat(color[0]),
else => base.unsupported("Blitter: write to destination format {any}", .{state.dst_format}),
} }
} }
+171
View File
@@ -120,6 +120,10 @@ pub inline fn isUnorm(format: vk.Format) bool {
return lib.vku.vkuFormatIsUNORM(@intCast(@intFromEnum(format))); return lib.vku.vkuFormatIsUNORM(@intCast(@intFromEnum(format)));
} }
pub inline fn isFloat(format: vk.Format) bool {
return isSfloat(format) or isUfloat(format);
}
pub fn getScale(format: vk.Format) zm.F32x4 { pub fn getScale(format: vk.Format) zm.F32x4 {
return switch (format) { return switch (format) {
.r4g4_unorm_pack8, .r4g4_unorm_pack8,
@@ -244,3 +248,170 @@ pub fn getScale(format: vk.Format) zm.F32x4 {
}, },
}; };
} }
pub fn isUnsignedComponent(format: vk.Format, component: usize) bool {
return switch (format) {
.undefined,
.r4g4_unorm_pack8,
.r4g4b4a4_unorm_pack16,
.b4g4r4a4_unorm_pack16,
.a4r4g4b4_unorm_pack16,
.a4b4g4r4_unorm_pack16,
.r5g6b5_unorm_pack16,
.b5g6r5_unorm_pack16,
.r5g5b5a1_unorm_pack16,
.b5g5r5a1_unorm_pack16,
.a1r5g5b5_unorm_pack16,
.r8_unorm,
.r8_uint,
.r8_srgb,
.r8g8_unorm,
.r8g8_uscaled,
.r8g8_uint,
.r8g8_srgb,
.r8g8b8a8_unorm,
.r8g8b8a8_uscaled,
.r8g8b8a8_uint,
.r8g8b8a8_srgb,
.b8g8r8a8_unorm,
.b8g8r8a8_uscaled,
.b8g8r8a8_uint,
.b8g8r8a8_srgb,
.a8b8g8r8_unorm_pack32,
.a8b8g8r8_uscaled_pack32,
.a8b8g8r8_uint_pack32,
.a8b8g8r8_srgb_pack32,
.a2r10g10b10_unorm_pack32,
.a2r10g10b10_uscaled_pack32,
.a2r10g10b10_uint_pack32,
.a2b10g10r10_unorm_pack32,
.a2b10g10r10_uscaled_pack32,
.a2b10g10r10_uint_pack32,
.r16_unorm,
.r16_uscaled,
.r16_uint,
.r16g16_unorm,
.r16g16_uscaled,
.r16g16_uint,
.r16g16b16_unorm,
.r16g16b16_uscaled,
.r16g16b16_uint,
.r16g16b16a16_unorm,
.r16g16b16a16_uscaled,
.r16g16b16a16_uint,
.r32_uint,
.r32g32_uint,
.r32g32b32_uint,
.r32g32b32a32_uint,
.r64_uint,
.r64g64_uint,
.r64g64b64_uint,
.r64g64b64a64_uint,
.b10g11r11_ufloat_pack32,
.e5b9g9r9_ufloat_pack32,
.d16_unorm,
.x8_d24_unorm_pack32,
.s8_uint,
.d16_unorm_s8_uint,
.d24_unorm_s8_uint,
.d32_sfloat,
.d32_sfloat_s8_uint,
.bc1_rgb_unorm_block,
.bc1_rgb_srgb_block,
.bc1_rgba_unorm_block,
.bc1_rgba_srgb_block,
.bc2_unorm_block,
.bc2_srgb_block,
.bc3_unorm_block,
.bc3_srgb_block,
.bc4_unorm_block,
.bc5_unorm_block,
.bc6h_ufloat_block,
.bc7_unorm_block,
.bc7_srgb_block,
.eac_r11_unorm_block,
.eac_r11g11_unorm_block,
.etc2_r8g8b8_unorm_block,
.etc2_r8g8b8_srgb_block,
.etc2_r8g8b8a1_unorm_block,
.etc2_r8g8b8a1_srgb_block,
.etc2_r8g8b8a8_unorm_block,
.etc2_r8g8b8a8_srgb_block,
=> true,
.r8g8b8a8_snorm,
.r8g8b8a8_sscaled,
.r8g8b8a8_sint,
.b8g8r8a8_snorm,
.b8g8r8a8_sscaled,
.b8g8r8a8_sint,
.a8b8g8r8_snorm_pack32,
.a8b8g8r8_sscaled_pack32,
.a8b8g8r8_sint_pack32,
.a2r10g10b10_snorm_pack32,
.a2r10g10b10_sscaled_pack32,
.a2r10g10b10_sint_pack32,
.a2b10g10r10_snorm_pack32,
.a2b10g10r10_sscaled_pack32,
.a2b10g10r10_sint_pack32,
.r16g16b16a16_snorm,
.r16g16b16a16_sscaled,
.r16g16b16a16_sint,
.r16g16b16a16_sfloat,
.r32g32b32a32_sint,
.r32g32b32a32_sfloat,
.r64g64b64a64_sint,
.r64g64b64a64_sfloat,
.bc4_snorm_block,
.bc5_snorm_block,
.bc6h_sfloat_block,
.eac_r11_snorm_block,
.eac_r11g11_snorm_block,
.g8_b8_r8_3plane_420_unorm,
.g8_b8r8_2plane_420_unorm,
.g10x6_b10x6r10x6_2plane_420_unorm_3pack16,
=> false,
.r8_snorm,
.r8_uscaled,
.r8_sscaled,
.r8_sint,
.r16_snorm,
.r16_sscaled,
.r16_sint,
.r16_sfloat,
.r32_sint,
.r32_sfloat,
.r64_sint,
.r64_sfloat,
=> component >= 1,
.r8g8_snorm,
.r8g8_sscaled,
.r8g8_sint,
.r16g16_snorm,
.r16g16_sscaled,
.r16g16_sint,
.r16g16_sfloat,
.r32g32_sint,
.r32g32_sfloat,
.r64g64_sint,
.r64g64_sfloat,
=> component >= 2,
.r16g16b16_snorm,
.r16g16b16_sscaled,
.r16g16b16_sint,
.r16g16b16_sfloat,
.r32g32b32_sint,
.r32g32b32_sfloat,
.r64g64b64_sint,
.r64g64b64_sfloat,
=> component >= 3,
else => blk: {
lib.unsupported("Format unsigned component {any}", .{format});
break :blk false;
},
};
}
pub inline fn isUnsigned(format: vk.Format) bool {
return isUnsignedComponent(format, 0);
}