From 54ffc3f50d48227447bed5805e7b4164e64913ba Mon Sep 17 00:00:00 2001 From: Kbz-8 Date: Wed, 22 Apr 2026 15:48:03 +0200 Subject: [PATCH] improving blitter --- src/soft/SoftImage.zig | 36 +++++--- src/soft/device/blitter.zig | 174 ++++++++++++++++++++++++++++++------ 2 files changed, 169 insertions(+), 41 deletions(-) diff --git a/src/soft/SoftImage.zig b/src/soft/SoftImage.zig index 99f932d..b7417ee 100644 --- a/src/soft/SoftImage.zig +++ b/src/soft/SoftImage.zig @@ -158,12 +158,18 @@ pub fn copyToImageSingleAspect(self: *const Self, dst: *Self, region: vk.ImageCo for (0..layer_count) |_| { if (is_single_row) { const copy_size = region.extent.width * bytes_per_block; + if (dst_map.len < copy_size or src_map.len < copy_size) + break; @memcpy(dst_map[0..copy_size], src_map[0..copy_size]); } else if (is_entire_row and is_single_slice) { const copy_size = region.extent.height * src_row_pitch_bytes; + if (dst_map.len < copy_size or src_map.len < copy_size) + break; @memcpy(dst_map[0..copy_size], src_map[0..copy_size]); } else if (is_entire_slice) { const copy_size = slice_count * src_depth_pitch_bytes; + if (dst_map.len < copy_size or src_map.len < copy_size) + break; @memcpy(dst_map[0..copy_size], src_map[0..copy_size]); } else if (is_entire_row) { const slice_size = region.extent.height * src_row_pitch_bytes; @@ -171,9 +177,11 @@ pub fn copyToImageSingleAspect(self: *const Self, dst: *Self, region: vk.ImageCo var dst_slice_memory = dst_map[0..]; for (0..slice_count) |_| { + if (dst_slice_memory.len < slice_size or src_slice_memory.len < slice_size) + break; @memcpy(dst_slice_memory[0..slice_size], src_slice_memory[0..slice_size]); - src_slice_memory = src_slice_memory[src_depth_pitch_bytes..]; - dst_slice_memory = dst_slice_memory[dst_depth_pitch_bytes..]; + src_slice_memory = if (src_slice_memory.len < src_depth_pitch_bytes) break else src_slice_memory[src_depth_pitch_bytes..]; + dst_slice_memory = if (dst_slice_memory.len < dst_depth_pitch_bytes) break else dst_slice_memory[dst_depth_pitch_bytes..]; } } else { const row_size = region.extent.width * bytes_per_block; @@ -185,15 +193,17 @@ pub fn copyToImageSingleAspect(self: *const Self, dst: *Self, region: vk.ImageCo var dst_row_memory = dst_slice_memory[0..]; for (0..region.extent.height) |_| { + if (dst_row_memory.len < row_size or src_row_memory.len < row_size) + break; @memcpy(dst_row_memory[0..row_size], src_row_memory[0..row_size]); - src_row_memory = src_row_memory[src_row_pitch_bytes..]; - dst_row_memory = dst_row_memory[dst_row_pitch_bytes..]; + src_row_memory = if (src_row_memory.len < src_row_pitch_bytes) break else src_row_memory[src_row_pitch_bytes..]; + dst_row_memory = if (dst_row_memory.len < dst_row_pitch_bytes) break else dst_row_memory[dst_row_pitch_bytes..]; } } } - src_map = src_map[src_layer_pitch..]; - dst_map = dst_map[dst_layer_pitch..]; + src_map = if (src_map.len < src_layer_pitch) break else src_map[src_layer_pitch..]; + dst_map = if (dst_map.len < dst_layer_pitch) break else dst_map[dst_layer_pitch..]; } } @@ -292,15 +302,17 @@ pub fn copy( var dst_slice_memory = dst_layer_memory[0..]; for (0..image_extent.height) |_| { + if (dst_slice_memory.len < copy_size or src_slice_memory.len < copy_size) + break; @memcpy(dst_slice_memory[0..copy_size], src_slice_memory[0..copy_size]); - src_slice_memory = src_slice_memory[src_row_pitch_bytes..]; - dst_slice_memory = dst_slice_memory[dst_row_pitch_bytes..]; + src_slice_memory = if (src_slice_memory.len < src_row_pitch_bytes) break else src_slice_memory[src_row_pitch_bytes..]; + dst_slice_memory = if (dst_slice_memory.len < dst_row_pitch_bytes) break else dst_slice_memory[dst_row_pitch_bytes..]; } - src_layer_memory = src_layer_memory[src_slice_pitch_bytes..]; - dst_layer_memory = dst_layer_memory[dst_slice_pitch_bytes..]; + src_layer_memory = if (src_layer_memory.len < src_slice_pitch_bytes) break else src_layer_memory[src_slice_pitch_bytes..]; + dst_layer_memory = if (dst_layer_memory.len < dst_slice_pitch_bytes) break else dst_layer_memory[dst_slice_pitch_bytes..]; } - src_memory = src_memory[src_layer_size..]; - dst_memory = dst_memory[dst_layer_size..]; + src_memory = if (src_memory.len < src_layer_size) break else src_memory[src_layer_size..]; + dst_memory = if (dst_memory.len < dst_layer_size) break else dst_memory[dst_layer_size..]; } } diff --git a/src/soft/device/blitter.zig b/src/soft/device/blitter.zig index 772f745..234be5d 100644 --- a/src/soft/device/blitter.zig +++ b/src/soft/device/blitter.zig @@ -10,12 +10,16 @@ const VkError = base.VkError; pub const SoftImage = @import("../SoftImage.zig"); pub const SoftImageView = @import("../SoftImageView.zig"); +const F32x4 = zm.F32x4; +const U32x4 = @Vector(4, u32); + const State = struct { src_format: vk.Format, dst_format: vk.Format, filter: vk.Filter, allow_srgb_conversion: bool, clamp_to_edge: bool, + src_samples: usize, dst_samples: usize, filter_3D: bool, }; @@ -29,8 +33,8 @@ const BlitData = struct { dst_slice_pitch_bytes: usize, dst_row_pitch_bytes: usize, - pos: zm.F32x4, - dim: zm.F32x4, + pos: F32x4, + dim: F32x4, dst_offset_0: vk.Offset3D, dst_offset_1: vk.Offset3D, @@ -158,21 +162,21 @@ fn fastClear(clear_value: vk.ClearValue, clear_format: vk.Format, dst: *SoftImag 4 => for (0..@intCast(area.extent.height)) |_| { var dst_pixel_4bytes = std.mem.bytesAsSlice(u32, dst_pixel); @memset(dst_pixel_4bytes[0..area.extent.width], pack); - dst_pixel = dst_pixel[dst_row_pitch_bytes..]; + dst_pixel = if (dst_pixel.len < dst_row_pitch_bytes) break else dst_pixel[dst_row_pitch_bytes..]; }, 2 => for (0..@intCast(area.extent.height)) |_| { var dst_pixel_2bytes = std.mem.bytesAsSlice(u16, dst_pixel); @memset(dst_pixel_2bytes[0..area.extent.width], @as(u16, @truncate(pack))); - dst_pixel = dst_pixel[dst_row_pitch_bytes..]; + dst_pixel = if (dst_pixel.len < dst_row_pitch_bytes) break else dst_pixel[dst_row_pitch_bytes..]; }, 1 => for (0..@intCast(area.extent.height)) |_| { @memset(dst_pixel[0..area.extent.width], @as(u8, @truncate(pack))); - dst_pixel = dst_pixel[dst_row_pitch_bytes..]; + dst_pixel = if (dst_pixel.len < dst_row_pitch_bytes) break else dst_pixel[dst_row_pitch_bytes..]; }, else => unreachable, } - dst_map = dst_map[dst_slice_pitch_bytes..]; + dst_map = if (dst_map.len < dst_slice_pitch_bytes) break else dst_map[dst_slice_pitch_bytes..]; } } } @@ -181,8 +185,8 @@ fn fastClear(clear_value: vk.ClearValue, clear_format: vk.Format, dst: *SoftImag return true; } -fn sample(src: []const u8, pos: zm.F32x4, dim: zm.F32x4, slice_bytes: usize, pitch_bytes: usize, state: State) zm.F32x4 { - var color: zm.F32x4 = .{ 0.0, 0.0, 0.0, 1.0 }; +fn sample(src: []const u8, pos: F32x4, dim: F32x4, slice_bytes: usize, pitch_bytes: usize, state: State) F32x4 { + var color: F32x4 = .{ 0.0, 0.0, 0.0, 1.0 }; const src_texel_size = base.format.texelSize(state.src_format); if (state.filter == .nearest or base.format.isUint(state.src_format)) { @@ -348,6 +352,7 @@ pub fn blitRegion(src: *const SoftImage, dst: *SoftImage, region: vk.ImageBlit, @as(u32, @intCast(src_offset_1.x)) > src_extent.width or @as(u32, @intCast(src_offset_1.y)) > src_extent.height or (filter != .nearest and ((x0 < 0.5) or (y0 < 0.5))), + .src_samples = src.interface.samples.toInt(), .dst_samples = dst.interface.samples.toInt(), .filter_3D = (src_offset_1.z - src_offset_0.z) != (dst_offset_1.z - dst_offset_0.z), }; @@ -416,20 +421,16 @@ fn blit(state: State, data: BlitData) void { const src_map = data.src_map[computeOffset3D(ix, iy, iz, data.src_slice_pitch_bytes, data.src_row_pitch_bytes, base.format.texelSize(state.src_format))..]; - const color = readFloat4(src_map, state); + const color = readInt4(src_map, state); for (0..state.dst_samples) |_| { - writeFloat4(color, dst_pixel, state); - if (dst_pixel.len < data.dst_slice_pitch_bytes) - break; - dst_pixel = dst_pixel[data.dst_slice_pitch_bytes..]; + writeInt4(color, dst_pixel, state); + dst_pixel = if (dst_pixel.len < data.dst_slice_pitch_bytes) break else dst_pixel[data.dst_slice_pitch_bytes..]; } } else { const color = sample(data.src_map, .{ x, y, z, 0.0 }, data.dim, data.src_slice_pitch_bytes, data.src_row_pitch_bytes, state); for (0..state.dst_samples) |_| { writeFloat4(color, dst_pixel, state); - if (dst_pixel.len < data.dst_slice_pitch_bytes) - break; - dst_pixel = dst_pixel[data.dst_slice_pitch_bytes..]; + dst_pixel = if (dst_pixel.len < data.dst_slice_pitch_bytes) break else dst_pixel[data.dst_slice_pitch_bytes..]; } } } @@ -437,8 +438,8 @@ fn blit(state: State, data: BlitData) void { } } -fn applyScaleAndClamp(base_color: zm.F32x4, state: State) zm.F32x4 { - var color: zm.F32x4 = base_color; +fn applyScaleAndClamp(base_color: F32x4, state: State) F32x4 { + var color: F32x4 = base_color; const scale = base.format.getScale(state.dst_format); @@ -459,10 +460,18 @@ fn applyScaleAndClamp(base_color: zm.F32x4, state: State) zm.F32x4 { return color; } -fn readFloat4(map: []const u8, state: State) zm.F32x4 { - var c: zm.F32x4 = .{ 0.0, 0.0, 0.0, 1.0 }; +fn readFloat4(map: []const u8, state: State) F32x4 { + var c: F32x4 = .{ 0.0, 0.0, 0.0, 1.0 }; switch (state.src_format) { + .r8_snorm, + .r8_unorm, + => c[0] = @as(f32, @floatFromInt(map[0])) / 255.0, + + .r16_snorm, + .r16_unorm, + => c[0] = @as(f32, @floatFromInt(std.mem.bytesToValue(u16, map))) / 255.0, + .r8g8b8a8_sint, .r8g8b8a8_snorm, .r8g8b8a8_unorm, @@ -475,19 +484,42 @@ fn readFloat4(map: []const u8, state: State) zm.F32x4 { c[3] = @as(f32, @floatFromInt(map[3])) / 255.0; }, - .r32_uint => { - c[0] = std.mem.bytesToValue(f32, map); - c[3] = @as(f32, @floatFromInt(0xFFFFFFFF)); - }, + .r16_sint, + .r16_uint, + => c[0] = @floatFromInt(std.mem.bytesToValue(u16, map)), - else => base.unsupported("Blitter: read from source format {any}", .{state.src_format}), + .r32_sint, + .r32_uint, + => c[0] = @floatFromInt(std.mem.bytesToValue(u32, map)), + + .r32_sfloat => c[0] = std.mem.bytesToValue(f32, map), + + .r32g32b32a32_sfloat => c = std.mem.bytesToValue(F32x4, map), + + else => base.unsupported("Blitter: read float from source format {any}", .{state.src_format}), } return c; } -fn writeFloat4(color: zm.F32x4, map: []u8, state: State) void { +fn writeFloat4(color: F32x4, map: []u8, state: State) void { switch (state.dst_format) { + .r8_snorm, + .r8_unorm, + => map[0] = @intFromFloat(color[0] * 255.0), + + .r16_sint, + .r16_uint, + => std.mem.bytesAsValue(u16, map).* = @intFromFloat(color[0]), + + .r16_sfloat => std.mem.bytesAsValue(f16, map).* = @floatCast(color[0]), + + .r32_sint, + .r32_uint, + => std.mem.bytesAsValue(u32, map).* = @intFromFloat(color[0]), + + .r32_sfloat => std.mem.bytesAsValue(f32, map).* = color[0], + .b8g8r8a8_srgb, .b8g8r8a8_unorm, => { @@ -496,6 +528,7 @@ fn writeFloat4(color: zm.F32x4, map: []u8, state: State) void { map[2] = @intFromFloat(color[0] * 255.0); map[3] = @intFromFloat(color[3] * 255.0); }, + .a8b8g8r8_unorm_pack32, .r8g8b8a8_unorm, .a8b8g8r8_srgb_pack32, @@ -510,8 +543,91 @@ fn writeFloat4(color: zm.F32x4, map: []u8, state: State) void { map[2] = @intFromFloat(color[2] * 255.0); map[3] = @intFromFloat(color[3] * 255.0); }, - .r32_sfloat => std.mem.bytesAsValue(f32, map).* = color[0], - .r32_uint => std.mem.bytesAsValue(u32, map).* = @intFromFloat(color[0]), - else => base.unsupported("Blitter: write to destination format {any}", .{state.dst_format}), + + .r32g32b32a32_sfloat => std.mem.bytesAsValue(F32x4, map).* = color, + + else => base.unsupported("Blitter: write float to destination format {any}", .{state.dst_format}), + } +} + +fn readInt4(map: []const u8, state: State) U32x4 { + var c: U32x4 = .{ 0.0, 0.0, 0.0, 1.0 }; + + switch (state.src_format) { + .r8_sint, + .r8_uint, + => c[0] = map[0], + .r16_sint, + .r16_uint, + => c[0] = std.mem.bytesToValue(u16, map), + .r32_sint, + .r32_uint, + => c[0] = std.mem.bytesToValue(u32, map), + + .r8g8b8a8_sint, + .r8g8b8a8_uint, + => { + c[0] = map[0]; + c[1] = map[1]; + c[2] = map[2]; + c[3] = map[3]; + }, + + .r16g16b16a16_sint, + .r16g16b16a16_uint, + => { + c[0] = std.mem.bytesToValue(u16, map[0..2]); + c[1] = std.mem.bytesToValue(u16, map[2..4]); + c[2] = std.mem.bytesToValue(u16, map[4..6]); + c[3] = std.mem.bytesToValue(u16, map[6..8]); + }, + + .r32g32b32a32_sint, + .r32g32b32a32_uint, + => c = std.mem.bytesToValue(U32x4, map), + + else => base.unsupported("Blitter: read int from source format {any}", .{state.src_format}), + } + + return c; +} + +fn writeInt4(color: U32x4, map: []u8, state: State) void { + switch (state.dst_format) { + .r8_sint, + .r8_uint, + => map[0] = @truncate(color[0]), + + .r16_sint, + .r16_uint, + => std.mem.bytesAsValue(u16, map).* = @truncate(color[0]), + + .r32_sint, + .r32_uint, + => std.mem.bytesAsValue(u32, map).* = color[0], + + .r8g8b8a8_sint, + .r8g8b8a8_uint, + => { + map[0] = @truncate(color[0]); + map[1] = @truncate(color[1]); + map[2] = @truncate(color[2]); + map[3] = @truncate(color[3]); + }, + + .r16g16b16a16_sint, + .r16g16b16a16_uint, + => { + std.mem.bytesAsValue(u16, map[0..2]).* = @truncate(color[0]); + std.mem.bytesAsValue(u16, map[2..4]).* = @truncate(color[1]); + std.mem.bytesAsValue(u16, map[4..6]).* = @truncate(color[2]); + std.mem.bytesAsValue(u16, map[6..8]).* = @truncate(color[3]); + }, + + .r32g32b32a32_sint, + .r32g32b32a32_uint, + => std.mem.bytesAsValue(U32x4, map).* = color, + + else => base.unsupported("Blitter: write int to destination format {any}", .{state.dst_format}), } }