improving blitter
Build / build (push) Successful in 50s
Test / build_and_test (push) Successful in 41m27s

This commit is contained in:
2026-04-22 15:48:03 +02:00
parent 9b7728503b
commit 54ffc3f50d
2 changed files with 169 additions and 41 deletions
+24 -12
View File
@@ -158,12 +158,18 @@ pub fn copyToImageSingleAspect(self: *const Self, dst: *Self, region: vk.ImageCo
for (0..layer_count) |_| { for (0..layer_count) |_| {
if (is_single_row) { if (is_single_row) {
const copy_size = region.extent.width * bytes_per_block; const copy_size = region.extent.width * bytes_per_block;
if (dst_map.len < copy_size or src_map.len < copy_size)
break;
@memcpy(dst_map[0..copy_size], src_map[0..copy_size]); @memcpy(dst_map[0..copy_size], src_map[0..copy_size]);
} else if (is_entire_row and is_single_slice) { } else if (is_entire_row and is_single_slice) {
const copy_size = region.extent.height * src_row_pitch_bytes; const copy_size = region.extent.height * src_row_pitch_bytes;
if (dst_map.len < copy_size or src_map.len < copy_size)
break;
@memcpy(dst_map[0..copy_size], src_map[0..copy_size]); @memcpy(dst_map[0..copy_size], src_map[0..copy_size]);
} else if (is_entire_slice) { } else if (is_entire_slice) {
const copy_size = slice_count * src_depth_pitch_bytes; const copy_size = slice_count * src_depth_pitch_bytes;
if (dst_map.len < copy_size or src_map.len < copy_size)
break;
@memcpy(dst_map[0..copy_size], src_map[0..copy_size]); @memcpy(dst_map[0..copy_size], src_map[0..copy_size]);
} else if (is_entire_row) { } else if (is_entire_row) {
const slice_size = region.extent.height * src_row_pitch_bytes; const slice_size = region.extent.height * src_row_pitch_bytes;
@@ -171,9 +177,11 @@ pub fn copyToImageSingleAspect(self: *const Self, dst: *Self, region: vk.ImageCo
var dst_slice_memory = dst_map[0..]; var dst_slice_memory = dst_map[0..];
for (0..slice_count) |_| { for (0..slice_count) |_| {
if (dst_slice_memory.len < slice_size or src_slice_memory.len < slice_size)
break;
@memcpy(dst_slice_memory[0..slice_size], src_slice_memory[0..slice_size]); @memcpy(dst_slice_memory[0..slice_size], src_slice_memory[0..slice_size]);
src_slice_memory = src_slice_memory[src_depth_pitch_bytes..]; src_slice_memory = if (src_slice_memory.len < src_depth_pitch_bytes) break else src_slice_memory[src_depth_pitch_bytes..];
dst_slice_memory = dst_slice_memory[dst_depth_pitch_bytes..]; dst_slice_memory = if (dst_slice_memory.len < dst_depth_pitch_bytes) break else dst_slice_memory[dst_depth_pitch_bytes..];
} }
} else { } else {
const row_size = region.extent.width * bytes_per_block; const row_size = region.extent.width * bytes_per_block;
@@ -185,15 +193,17 @@ pub fn copyToImageSingleAspect(self: *const Self, dst: *Self, region: vk.ImageCo
var dst_row_memory = dst_slice_memory[0..]; var dst_row_memory = dst_slice_memory[0..];
for (0..region.extent.height) |_| { for (0..region.extent.height) |_| {
if (dst_row_memory.len < row_size or src_row_memory.len < row_size)
break;
@memcpy(dst_row_memory[0..row_size], src_row_memory[0..row_size]); @memcpy(dst_row_memory[0..row_size], src_row_memory[0..row_size]);
src_row_memory = src_row_memory[src_row_pitch_bytes..]; src_row_memory = if (src_row_memory.len < src_row_pitch_bytes) break else src_row_memory[src_row_pitch_bytes..];
dst_row_memory = dst_row_memory[dst_row_pitch_bytes..]; dst_row_memory = if (dst_row_memory.len < dst_row_pitch_bytes) break else dst_row_memory[dst_row_pitch_bytes..];
} }
} }
} }
src_map = src_map[src_layer_pitch..]; src_map = if (src_map.len < src_layer_pitch) break else src_map[src_layer_pitch..];
dst_map = dst_map[dst_layer_pitch..]; dst_map = if (dst_map.len < dst_layer_pitch) break else dst_map[dst_layer_pitch..];
} }
} }
@@ -292,15 +302,17 @@ pub fn copy(
var dst_slice_memory = dst_layer_memory[0..]; var dst_slice_memory = dst_layer_memory[0..];
for (0..image_extent.height) |_| { for (0..image_extent.height) |_| {
if (dst_slice_memory.len < copy_size or src_slice_memory.len < copy_size)
break;
@memcpy(dst_slice_memory[0..copy_size], src_slice_memory[0..copy_size]); @memcpy(dst_slice_memory[0..copy_size], src_slice_memory[0..copy_size]);
src_slice_memory = src_slice_memory[src_row_pitch_bytes..]; src_slice_memory = if (src_slice_memory.len < src_row_pitch_bytes) break else src_slice_memory[src_row_pitch_bytes..];
dst_slice_memory = dst_slice_memory[dst_row_pitch_bytes..]; dst_slice_memory = if (dst_slice_memory.len < dst_row_pitch_bytes) break else dst_slice_memory[dst_row_pitch_bytes..];
} }
src_layer_memory = src_layer_memory[src_slice_pitch_bytes..]; src_layer_memory = if (src_layer_memory.len < src_slice_pitch_bytes) break else src_layer_memory[src_slice_pitch_bytes..];
dst_layer_memory = dst_layer_memory[dst_slice_pitch_bytes..]; dst_layer_memory = if (dst_layer_memory.len < dst_slice_pitch_bytes) break else dst_layer_memory[dst_slice_pitch_bytes..];
} }
src_memory = src_memory[src_layer_size..]; src_memory = if (src_memory.len < src_layer_size) break else src_memory[src_layer_size..];
dst_memory = dst_memory[dst_layer_size..]; dst_memory = if (dst_memory.len < dst_layer_size) break else dst_memory[dst_layer_size..];
} }
} }
+145 -29
View File
@@ -10,12 +10,16 @@ const VkError = base.VkError;
pub const SoftImage = @import("../SoftImage.zig"); pub const SoftImage = @import("../SoftImage.zig");
pub const SoftImageView = @import("../SoftImageView.zig"); pub const SoftImageView = @import("../SoftImageView.zig");
const F32x4 = zm.F32x4;
const U32x4 = @Vector(4, u32);
const State = struct { const State = struct {
src_format: vk.Format, src_format: vk.Format,
dst_format: vk.Format, dst_format: vk.Format,
filter: vk.Filter, filter: vk.Filter,
allow_srgb_conversion: bool, allow_srgb_conversion: bool,
clamp_to_edge: bool, clamp_to_edge: bool,
src_samples: usize,
dst_samples: usize, dst_samples: usize,
filter_3D: bool, filter_3D: bool,
}; };
@@ -29,8 +33,8 @@ const BlitData = struct {
dst_slice_pitch_bytes: usize, dst_slice_pitch_bytes: usize,
dst_row_pitch_bytes: usize, dst_row_pitch_bytes: usize,
pos: zm.F32x4, pos: F32x4,
dim: zm.F32x4, dim: F32x4,
dst_offset_0: vk.Offset3D, dst_offset_0: vk.Offset3D,
dst_offset_1: vk.Offset3D, dst_offset_1: vk.Offset3D,
@@ -158,21 +162,21 @@ fn fastClear(clear_value: vk.ClearValue, clear_format: vk.Format, dst: *SoftImag
4 => for (0..@intCast(area.extent.height)) |_| { 4 => for (0..@intCast(area.extent.height)) |_| {
var dst_pixel_4bytes = std.mem.bytesAsSlice(u32, dst_pixel); var dst_pixel_4bytes = std.mem.bytesAsSlice(u32, dst_pixel);
@memset(dst_pixel_4bytes[0..area.extent.width], pack); @memset(dst_pixel_4bytes[0..area.extent.width], pack);
dst_pixel = dst_pixel[dst_row_pitch_bytes..]; dst_pixel = if (dst_pixel.len < dst_row_pitch_bytes) break else dst_pixel[dst_row_pitch_bytes..];
}, },
2 => for (0..@intCast(area.extent.height)) |_| { 2 => for (0..@intCast(area.extent.height)) |_| {
var dst_pixel_2bytes = std.mem.bytesAsSlice(u16, dst_pixel); var dst_pixel_2bytes = std.mem.bytesAsSlice(u16, dst_pixel);
@memset(dst_pixel_2bytes[0..area.extent.width], @as(u16, @truncate(pack))); @memset(dst_pixel_2bytes[0..area.extent.width], @as(u16, @truncate(pack)));
dst_pixel = dst_pixel[dst_row_pitch_bytes..]; dst_pixel = if (dst_pixel.len < dst_row_pitch_bytes) break else dst_pixel[dst_row_pitch_bytes..];
}, },
1 => for (0..@intCast(area.extent.height)) |_| { 1 => for (0..@intCast(area.extent.height)) |_| {
@memset(dst_pixel[0..area.extent.width], @as(u8, @truncate(pack))); @memset(dst_pixel[0..area.extent.width], @as(u8, @truncate(pack)));
dst_pixel = dst_pixel[dst_row_pitch_bytes..]; dst_pixel = if (dst_pixel.len < dst_row_pitch_bytes) break else dst_pixel[dst_row_pitch_bytes..];
}, },
else => unreachable, else => unreachable,
} }
dst_map = dst_map[dst_slice_pitch_bytes..]; dst_map = if (dst_map.len < dst_slice_pitch_bytes) break else dst_map[dst_slice_pitch_bytes..];
} }
} }
} }
@@ -181,8 +185,8 @@ fn fastClear(clear_value: vk.ClearValue, clear_format: vk.Format, dst: *SoftImag
return true; return true;
} }
fn sample(src: []const u8, pos: zm.F32x4, dim: zm.F32x4, slice_bytes: usize, pitch_bytes: usize, state: State) zm.F32x4 { fn sample(src: []const u8, pos: F32x4, dim: F32x4, slice_bytes: usize, pitch_bytes: usize, state: State) F32x4 {
var color: zm.F32x4 = .{ 0.0, 0.0, 0.0, 1.0 }; var color: F32x4 = .{ 0.0, 0.0, 0.0, 1.0 };
const src_texel_size = base.format.texelSize(state.src_format); const src_texel_size = base.format.texelSize(state.src_format);
if (state.filter == .nearest or base.format.isUint(state.src_format)) { if (state.filter == .nearest or base.format.isUint(state.src_format)) {
@@ -348,6 +352,7 @@ pub fn blitRegion(src: *const SoftImage, dst: *SoftImage, region: vk.ImageBlit,
@as(u32, @intCast(src_offset_1.x)) > src_extent.width or @as(u32, @intCast(src_offset_1.x)) > src_extent.width or
@as(u32, @intCast(src_offset_1.y)) > src_extent.height or @as(u32, @intCast(src_offset_1.y)) > src_extent.height or
(filter != .nearest and ((x0 < 0.5) or (y0 < 0.5))), (filter != .nearest and ((x0 < 0.5) or (y0 < 0.5))),
.src_samples = src.interface.samples.toInt(),
.dst_samples = dst.interface.samples.toInt(), .dst_samples = dst.interface.samples.toInt(),
.filter_3D = (src_offset_1.z - src_offset_0.z) != (dst_offset_1.z - dst_offset_0.z), .filter_3D = (src_offset_1.z - src_offset_0.z) != (dst_offset_1.z - dst_offset_0.z),
}; };
@@ -416,20 +421,16 @@ fn blit(state: State, data: BlitData) void {
const src_map = data.src_map[computeOffset3D(ix, iy, iz, data.src_slice_pitch_bytes, data.src_row_pitch_bytes, base.format.texelSize(state.src_format))..]; const src_map = data.src_map[computeOffset3D(ix, iy, iz, data.src_slice_pitch_bytes, data.src_row_pitch_bytes, base.format.texelSize(state.src_format))..];
const color = readFloat4(src_map, state); const color = readInt4(src_map, state);
for (0..state.dst_samples) |_| { for (0..state.dst_samples) |_| {
writeFloat4(color, dst_pixel, state); writeInt4(color, dst_pixel, state);
if (dst_pixel.len < data.dst_slice_pitch_bytes) dst_pixel = if (dst_pixel.len < data.dst_slice_pitch_bytes) break else dst_pixel[data.dst_slice_pitch_bytes..];
break;
dst_pixel = dst_pixel[data.dst_slice_pitch_bytes..];
} }
} else { } else {
const color = sample(data.src_map, .{ x, y, z, 0.0 }, data.dim, data.src_slice_pitch_bytes, data.src_row_pitch_bytes, state); const color = sample(data.src_map, .{ x, y, z, 0.0 }, data.dim, data.src_slice_pitch_bytes, data.src_row_pitch_bytes, state);
for (0..state.dst_samples) |_| { for (0..state.dst_samples) |_| {
writeFloat4(color, dst_pixel, state); writeFloat4(color, dst_pixel, state);
if (dst_pixel.len < data.dst_slice_pitch_bytes) dst_pixel = if (dst_pixel.len < data.dst_slice_pitch_bytes) break else dst_pixel[data.dst_slice_pitch_bytes..];
break;
dst_pixel = dst_pixel[data.dst_slice_pitch_bytes..];
} }
} }
} }
@@ -437,8 +438,8 @@ fn blit(state: State, data: BlitData) void {
} }
} }
fn applyScaleAndClamp(base_color: zm.F32x4, state: State) zm.F32x4 { fn applyScaleAndClamp(base_color: F32x4, state: State) F32x4 {
var color: zm.F32x4 = base_color; var color: F32x4 = base_color;
const scale = base.format.getScale(state.dst_format); const scale = base.format.getScale(state.dst_format);
@@ -459,10 +460,18 @@ fn applyScaleAndClamp(base_color: zm.F32x4, state: State) zm.F32x4 {
return color; return color;
} }
fn readFloat4(map: []const u8, state: State) zm.F32x4 { fn readFloat4(map: []const u8, state: State) F32x4 {
var c: zm.F32x4 = .{ 0.0, 0.0, 0.0, 1.0 }; var c: F32x4 = .{ 0.0, 0.0, 0.0, 1.0 };
switch (state.src_format) { switch (state.src_format) {
.r8_snorm,
.r8_unorm,
=> c[0] = @as(f32, @floatFromInt(map[0])) / 255.0,
.r16_snorm,
.r16_unorm,
=> c[0] = @as(f32, @floatFromInt(std.mem.bytesToValue(u16, map))) / 255.0,
.r8g8b8a8_sint, .r8g8b8a8_sint,
.r8g8b8a8_snorm, .r8g8b8a8_snorm,
.r8g8b8a8_unorm, .r8g8b8a8_unorm,
@@ -475,19 +484,42 @@ fn readFloat4(map: []const u8, state: State) zm.F32x4 {
c[3] = @as(f32, @floatFromInt(map[3])) / 255.0; c[3] = @as(f32, @floatFromInt(map[3])) / 255.0;
}, },
.r32_uint => { .r16_sint,
c[0] = std.mem.bytesToValue(f32, map); .r16_uint,
c[3] = @as(f32, @floatFromInt(0xFFFFFFFF)); => c[0] = @floatFromInt(std.mem.bytesToValue(u16, map)),
},
else => base.unsupported("Blitter: read from source format {any}", .{state.src_format}), .r32_sint,
.r32_uint,
=> c[0] = @floatFromInt(std.mem.bytesToValue(u32, map)),
.r32_sfloat => c[0] = std.mem.bytesToValue(f32, map),
.r32g32b32a32_sfloat => c = std.mem.bytesToValue(F32x4, map),
else => base.unsupported("Blitter: read float from source format {any}", .{state.src_format}),
} }
return c; return c;
} }
fn writeFloat4(color: zm.F32x4, map: []u8, state: State) void { fn writeFloat4(color: F32x4, map: []u8, state: State) void {
switch (state.dst_format) { switch (state.dst_format) {
.r8_snorm,
.r8_unorm,
=> map[0] = @intFromFloat(color[0] * 255.0),
.r16_sint,
.r16_uint,
=> std.mem.bytesAsValue(u16, map).* = @intFromFloat(color[0]),
.r16_sfloat => std.mem.bytesAsValue(f16, map).* = @floatCast(color[0]),
.r32_sint,
.r32_uint,
=> std.mem.bytesAsValue(u32, map).* = @intFromFloat(color[0]),
.r32_sfloat => std.mem.bytesAsValue(f32, map).* = color[0],
.b8g8r8a8_srgb, .b8g8r8a8_srgb,
.b8g8r8a8_unorm, .b8g8r8a8_unorm,
=> { => {
@@ -496,6 +528,7 @@ fn writeFloat4(color: zm.F32x4, map: []u8, state: State) void {
map[2] = @intFromFloat(color[0] * 255.0); map[2] = @intFromFloat(color[0] * 255.0);
map[3] = @intFromFloat(color[3] * 255.0); map[3] = @intFromFloat(color[3] * 255.0);
}, },
.a8b8g8r8_unorm_pack32, .a8b8g8r8_unorm_pack32,
.r8g8b8a8_unorm, .r8g8b8a8_unorm,
.a8b8g8r8_srgb_pack32, .a8b8g8r8_srgb_pack32,
@@ -510,8 +543,91 @@ fn writeFloat4(color: zm.F32x4, map: []u8, state: State) void {
map[2] = @intFromFloat(color[2] * 255.0); map[2] = @intFromFloat(color[2] * 255.0);
map[3] = @intFromFloat(color[3] * 255.0); map[3] = @intFromFloat(color[3] * 255.0);
}, },
.r32_sfloat => std.mem.bytesAsValue(f32, map).* = color[0],
.r32_uint => std.mem.bytesAsValue(u32, map).* = @intFromFloat(color[0]), .r32g32b32a32_sfloat => std.mem.bytesAsValue(F32x4, map).* = color,
else => base.unsupported("Blitter: write to destination format {any}", .{state.dst_format}),
else => base.unsupported("Blitter: write float to destination format {any}", .{state.dst_format}),
}
}
fn readInt4(map: []const u8, state: State) U32x4 {
var c: U32x4 = .{ 0.0, 0.0, 0.0, 1.0 };
switch (state.src_format) {
.r8_sint,
.r8_uint,
=> c[0] = map[0],
.r16_sint,
.r16_uint,
=> c[0] = std.mem.bytesToValue(u16, map),
.r32_sint,
.r32_uint,
=> c[0] = std.mem.bytesToValue(u32, map),
.r8g8b8a8_sint,
.r8g8b8a8_uint,
=> {
c[0] = map[0];
c[1] = map[1];
c[2] = map[2];
c[3] = map[3];
},
.r16g16b16a16_sint,
.r16g16b16a16_uint,
=> {
c[0] = std.mem.bytesToValue(u16, map[0..2]);
c[1] = std.mem.bytesToValue(u16, map[2..4]);
c[2] = std.mem.bytesToValue(u16, map[4..6]);
c[3] = std.mem.bytesToValue(u16, map[6..8]);
},
.r32g32b32a32_sint,
.r32g32b32a32_uint,
=> c = std.mem.bytesToValue(U32x4, map),
else => base.unsupported("Blitter: read int from source format {any}", .{state.src_format}),
}
return c;
}
fn writeInt4(color: U32x4, map: []u8, state: State) void {
switch (state.dst_format) {
.r8_sint,
.r8_uint,
=> map[0] = @truncate(color[0]),
.r16_sint,
.r16_uint,
=> std.mem.bytesAsValue(u16, map).* = @truncate(color[0]),
.r32_sint,
.r32_uint,
=> std.mem.bytesAsValue(u32, map).* = color[0],
.r8g8b8a8_sint,
.r8g8b8a8_uint,
=> {
map[0] = @truncate(color[0]);
map[1] = @truncate(color[1]);
map[2] = @truncate(color[2]);
map[3] = @truncate(color[3]);
},
.r16g16b16a16_sint,
.r16g16b16a16_uint,
=> {
std.mem.bytesAsValue(u16, map[0..2]).* = @truncate(color[0]);
std.mem.bytesAsValue(u16, map[2..4]).* = @truncate(color[1]);
std.mem.bytesAsValue(u16, map[4..6]).* = @truncate(color[2]);
std.mem.bytesAsValue(u16, map[6..8]).* = @truncate(color[3]);
},
.r32g32b32a32_sint,
.r32g32b32a32_uint,
=> std.mem.bytesAsValue(U32x4, map).* = color,
else => base.unsupported("Blitter: write int to destination format {any}", .{state.dst_format}),
} }
} }