improving image clear
Test / build_and_test (push) Successful in 24s
Build / build (push) Successful in 56s

This commit is contained in:
2026-05-26 01:25:05 +02:00
parent 5a91956939
commit ae2bdd03a0
4 changed files with 182 additions and 178 deletions
+128 -120
View File
@@ -1,5 +1,3 @@
//! This software blitter is highly inspired by SwiftShaders one
const std = @import("std");
const vk = @import("vulkan");
const base = @import("base");
@@ -47,10 +45,6 @@ const BlitData = struct {
width_ratio: f32,
};
fn computeOffset2D(x: usize, y: usize, pitch_bytes: usize, texel_bytes: usize) usize {
return y * pitch_bytes + x * texel_bytes;
}
fn computeOffset3D(x: usize, y: usize, z: usize, slice_bytes: usize, pitch_bytes: usize, texel_bytes: usize) usize {
return z * slice_bytes + y * pitch_bytes + x * texel_bytes;
}
@@ -70,7 +64,7 @@ pub fn clear(pixel: vk.ClearValue, format: vk.Format, dst: *SoftImage, view_form
};
var clamped_pixel: vk.ClearValue = pixel;
if (base.format.isSint(view_format) or base.format.isUint(view_format)) {
if (base.format.isSnorm(view_format) or base.format.isUnorm(view_format)) {
const min_value: f32 = if (base.format.isSnorm(view_format)) -1.0 else 0.0;
if (range.aspect_mask.color_bit) {
@@ -85,10 +79,6 @@ pub fn clear(pixel: vk.ClearValue, format: vk.Format, dst: *SoftImage, view_form
}
}
if (try fastClear(clamped_pixel, format, dst, dst_format, range, render_area)) {
return;
}
const state: State = .{
.src_format = format,
.dst_format = dst_format,
@@ -118,8 +108,6 @@ pub fn clear(pixel: vk.ClearValue, format: vk.Format, dst: *SoftImage, view_form
.extent = .{ .width = 0, .height = 0 },
};
const dst_memory = if (dst.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv;
while (subresource.mip_level <= last_mip_level) : (subresource.mip_level += 1) {
const extent = dst.getMipLevelExtent(subresource.mip_level);
@@ -132,11 +120,10 @@ pub fn clear(pixel: vk.ClearValue, format: vk.Format, dst: *SoftImage, view_form
while (subresource.array_layer <= last_layer) : (subresource.array_layer += 1) {
for (0..@intCast(extent.depth)) |depth| {
const dst_texel_offset = try dst.getTexelMemoryOffset(.{ .x = 0, .y = 0, .z = @intCast(depth) }, subresource);
const dst_size = try dst.interface.getTotalSizeForAspect(subresource.aspect_mask) - dst_texel_offset;
const dst_map: []u8 = @as([*]u8, @ptrCast(try dst_memory.map(dst.interface.memory_offset + dst_texel_offset, dst_size)))[0..dst_size];
const dst_map = try dst.mapAsSliceWithAddedOffset(u8, dst_texel_offset, vk.WHOLE_SIZE);
blit(state, .{
.src_map = std.mem.asBytes(&pixel),
.src_map = std.mem.asBytes(&clamped_pixel),
.dst_map = dst_map,
.src_slice_pitch_bytes = base.format.texelSize(format),
@@ -159,112 +146,12 @@ pub fn clear(pixel: vk.ClearValue, format: vk.Format, dst: *SoftImage, view_form
}
}
fn fastClear(clear_value: vk.ClearValue, clear_format: vk.Format, dst: *SoftImage, view_format: vk.Format, range: vk.ImageSubresourceRange, render_area: ?vk.Rect2D) VkError!bool {
if (clear_format != .r32g32b32a32_sfloat and clear_format != .d32_sfloat and clear_format != .s8_uint) {
return false;
}
const r, const g, const b, const a = clear_value.color.float_32;
const d = clear_value.depth_stencil.depth;
const s = clear_value.depth_stencil.stencil;
var pack: u32 = 0;
switch (view_format) {
.r5g6b5_unorm_pack16 => pack = @as(u16, @intFromFloat(31.0 * b + 0.5)) |
(@as(u16, @intFromFloat(63.0 * g + 0.5)) << 5) |
(@as(u16, @intFromFloat(31.0 * r + 0.5)) << 11),
.b5g6r5_unorm_pack16 => pack = @as(u16, @intFromFloat(31.0 * r + 0.5)) |
(@as(u16, @intFromFloat(63.0 * g + 0.5)) << 5) |
(@as(u16, @intFromFloat(31.0 * b + 0.5)) << 11),
.a8b8g8r8_uint_pack32,
.a8b8g8r8_unorm_pack32,
.r8g8b8a8_unorm,
=> pack = (@as(u32, @intFromFloat(255.0 * a + 0.5)) << 24) |
(@as(u32, @intFromFloat(255.0 * b + 0.5)) << 16) |
(@as(u32, @intFromFloat(255.0 * g + 0.5)) << 8) |
(@as(u32, @intFromFloat(255.0 * r + 0.5))),
.b8g8r8a8_unorm => pack = (@as(u32, @intFromFloat(255.0 * a + 0.5)) << 24) |
(@as(u32, @intFromFloat(255.0 * r + 0.5)) << 16) |
(@as(u32, @intFromFloat(255.0 * g + 0.5)) << 8) |
(@as(u32, @intFromFloat(255.0 * b + 0.5))),
.d32_sfloat => {
std.debug.assert(clear_format == .d32_sfloat);
pack = @bitCast(d); // f32 reinterpreted as u32
},
.s8_uint => {
std.debug.assert(clear_format == .s8_uint);
pack = @as(u8, @intCast(s));
},
else => return false,
}
var subresource: vk.ImageSubresource = .{
.aspect_mask = range.aspect_mask,
.mip_level = range.base_mip_level,
.array_layer = range.base_array_layer,
};
const last_mip_level = dst.interface.getLastMipLevel(range);
const last_layer = dst.interface.getLastLayerIndex(range);
var area: vk.Rect2D = if (render_area) |ra| ra else .{
.offset = .{ .x = 0, .y = 0 },
.extent = .{ .width = 0, .height = 0 },
};
while (subresource.mip_level <= last_mip_level) : (subresource.mip_level += 1) {
const dst_slice_pitch_bytes = dst.interface.getSliceMemSizeForMipLevel(subresource.aspect_mask, subresource.mip_level);
const dst_row_pitch_bytes = dst.interface.getRowPitchMemSizeForMipLevel(subresource.aspect_mask, subresource.mip_level);
const extent = dst.getMipLevelExtent(subresource.mip_level);
if (render_area == null) {
area.extent.width = extent.width;
area.extent.height = extent.height;
}
subresource.array_layer = range.base_array_layer;
while (subresource.array_layer <= last_layer) : (subresource.array_layer += 1) {
for (0..@intCast(extent.depth)) |depth| {
const dst_texel_offset = try dst.getTexelMemoryOffset(.{ .x = area.offset.x, .y = area.offset.y, .z = @intCast(depth) }, subresource);
const dst_size = try dst.interface.getTotalSizeForAspect(subresource.aspect_mask);
var dst_map = try dst.mapAsSliceWithAddedOffset(u8, dst_texel_offset, dst_size);
for (0..dst.interface.samples.toInt()) |_| {
var dst_pixel = dst_map[0..];
switch (base.format.texelSize(view_format)) {
4 => for (0..@intCast(area.extent.height)) |_| {
var dst_pixel_4bytes = std.mem.bytesAsSlice(u32, dst_pixel);
@memset(dst_pixel_4bytes[0..area.extent.width], pack);
dst_pixel = if (dst_pixel.len < dst_row_pitch_bytes) break else dst_pixel[dst_row_pitch_bytes..];
},
2 => for (0..@intCast(area.extent.height)) |_| {
var dst_pixel_2bytes = std.mem.bytesAsSlice(u16, dst_pixel);
@memset(dst_pixel_2bytes[0..area.extent.width], @as(u16, @truncate(pack)));
dst_pixel = if (dst_pixel.len < dst_row_pitch_bytes) break else dst_pixel[dst_row_pitch_bytes..];
},
1 => for (0..@intCast(area.extent.height)) |_| {
@memset(dst_pixel[0..area.extent.width], @as(u8, @truncate(pack)));
dst_pixel = if (dst_pixel.len < dst_row_pitch_bytes) break else dst_pixel[dst_row_pitch_bytes..];
},
else => unreachable,
}
dst_map = if (dst_map.len < dst_slice_pitch_bytes) break else dst_map[dst_slice_pitch_bytes..];
}
}
}
}
return true;
}
fn sample(src: []const u8, pos: F32x4, dim: F32x4, slice_bytes: usize, pitch_bytes: usize, state: State) F32x4 {
var color: F32x4 = .{ 0.0, 0.0, 0.0, 1.0 };
const src_texel_size = base.format.texelSize(state.src_format);
var apply_srgb_convertion = true;
if (state.filter == .nearest or base.format.isUnsignedUnnormalizedInteger(state.src_format)) {
if (state.filter == .nearest or base.format.isUnnormalizedInteger(state.src_format)) {
var x: usize = @intFromFloat(pos[0]);
var y: usize = @intFromFloat(pos[1]);
var z: usize = @intFromFloat(pos[2]);
@@ -866,16 +753,20 @@ pub fn readFloat4(map: []const u8, src_format: vk.Format) F32x4 {
pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
switch (dst_format) {
.r8_snorm,
.r8_unorm,
.r8_srgb,
.s8_uint,
=> map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(u8))),
.r8_snorm,
=> map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(i8))),
.r16_sint,
.r16_uint,
.d16_unorm,
=> std.mem.bytesAsValue(u16, map).* = @intFromFloat(@round(color[0])),
.r16_snorm => std.mem.bytesAsValue(u16, map).* = @intFromFloat(@round(color[0] * std.math.maxInt(i16))),
.r16_unorm => std.mem.bytesAsValue(u16, map).* = @intFromFloat(@round(color[0] * std.math.maxInt(u16))),
.r16_sfloat => std.mem.bytesAsValue(f16, map).* = @floatCast(color[0]),
@@ -888,7 +779,14 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
.d32_sfloat,
=> std.mem.bytesAsValue(f32, map).* = color[0],
.r8g8_unorm => {
.r8g8_snorm => {
map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(i8)));
map[1] = @intFromFloat(@round(color[1] * std.math.maxInt(i8)));
},
.r8g8_unorm,
.r8g8_srgb,
=> {
map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(u8)));
map[1] = @intFromFloat(@round(color[1] * std.math.maxInt(u8)));
},
@@ -916,7 +814,6 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
.r16g16b16a16_sint,
.r16g16b16a16_uint,
.r16g16b16a16_unorm,
.r16g16b16a16_snorm,
=> {
std.mem.bytesAsValue(u16, map[0..]).* = @intFromFloat(@round(color[0] * std.math.maxInt(u16)));
std.mem.bytesAsValue(u16, map[2..]).* = @intFromFloat(@round(color[1] * std.math.maxInt(u16)));
@@ -924,6 +821,13 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
std.mem.bytesAsValue(u16, map[6..]).* = @intFromFloat(@round(color[3] * std.math.maxInt(u16)));
},
.r16g16b16a16_snorm => {
std.mem.bytesAsValue(u16, map[0..]).* = @intFromFloat(@round(color[0] * std.math.maxInt(i16)));
std.mem.bytesAsValue(u16, map[2..]).* = @intFromFloat(@round(color[1] * std.math.maxInt(i16)));
std.mem.bytesAsValue(u16, map[4..]).* = @intFromFloat(@round(color[2] * std.math.maxInt(i16)));
std.mem.bytesAsValue(u16, map[6..]).* = @intFromFloat(@round(color[3] * std.math.maxInt(i16)));
},
.r16g16b16a16_sfloat => {
std.mem.bytesAsValue(f16, map[0..]).* = @floatCast(color[0]);
std.mem.bytesAsValue(f16, map[2..]).* = @floatCast(color[1]);
@@ -964,6 +868,30 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
(@as(u16, a) << 0);
},
.a4r4g4b4_unorm_pack16 => {
const r: u4 = @intFromFloat(@round(color[0] * std.math.maxInt(u4)));
const g: u4 = @intFromFloat(@round(color[1] * std.math.maxInt(u4)));
const b: u4 = @intFromFloat(@round(color[2] * std.math.maxInt(u4)));
const a: u4 = @intFromFloat(@round(color[3] * std.math.maxInt(u4)));
std.mem.bytesAsValue(u16, map[0..]).* =
(@as(u16, a) << 12) |
(@as(u16, r) << 8) |
(@as(u16, g) << 4) |
(@as(u16, b) << 0);
},
.a4b4g4r4_unorm_pack16 => {
const r: u4 = @intFromFloat(@round(color[0] * std.math.maxInt(u4)));
const g: u4 = @intFromFloat(@round(color[1] * std.math.maxInt(u4)));
const b: u4 = @intFromFloat(@round(color[2] * std.math.maxInt(u4)));
const a: u4 = @intFromFloat(@round(color[3] * std.math.maxInt(u4)));
std.mem.bytesAsValue(u16, map[0..]).* =
(@as(u16, a) << 12) |
(@as(u16, b) << 8) |
(@as(u16, g) << 4) |
(@as(u16, r) << 0);
},
.r8g8b8a8_unorm,
.r8g8b8a8_srgb,
.r8g8b8a8_uint,
@@ -979,6 +907,22 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
map[3] = @intFromFloat(@round(color[3] * std.math.maxInt(u8)));
},
.a8b8g8r8_sint_pack32,
.a8b8g8r8_snorm_pack32,
=> {
map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(i8)));
map[1] = @intFromFloat(@round(color[1] * std.math.maxInt(i8)));
map[2] = @intFromFloat(@round(color[2] * std.math.maxInt(i8)));
map[3] = @intFromFloat(@round(color[3] * std.math.maxInt(i8)));
},
.r8g8b8a8_snorm => {
map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(i8)));
map[1] = @intFromFloat(@round(color[1] * std.math.maxInt(i8)));
map[2] = @intFromFloat(@round(color[2] * std.math.maxInt(i8)));
map[3] = @intFromFloat(@round(color[3] * std.math.maxInt(i8)));
},
.a2r10g10b10_uint_pack32,
.a2r10g10b10_unorm_pack32,
=> {
@@ -1076,6 +1020,8 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
(b << 22);
},
.e5b9g9r9_ufloat_pack32 => std.mem.bytesAsValue(u32, map).* = encodeE5B9G9R9(color),
else => base.unsupported("Blitter: write float to destination format {any}", .{dst_format}),
}
}
@@ -1393,3 +1339,65 @@ fn encodeUFloat(value: f32, mantissa_bits: comptime_int) u32 {
return (exp_bits << mantissa_bits) | mantissa;
}
fn clampE5B9G9R9Component(value: f32) f32 {
const mantissa_bits = 9;
const exponent_bits = 5;
const exponent_bias = 15;
const max_mantissa = (1 << mantissa_bits) - 1;
const max_exponent = (1 << exponent_bits) - 1;
const max_value = @as(f32, @floatFromInt(max_mantissa)) *
std.math.ldexp(@as(f32, 1.0), max_exponent - exponent_bias - mantissa_bits);
if (std.math.isNan(value) or value <= 0.0)
return 0.0;
if (std.math.isInf(value) or value >= max_value)
return max_value;
return value;
}
fn encodeE5B9G9R9Mantissa(value: f32, scale: f32) u32 {
const max_mantissa = 0x1FF;
return @min(@as(u32, @intFromFloat(@round(value / scale))), max_mantissa);
}
fn encodeE5B9G9R9(color: F32x4) u32 {
const mantissa_bits = 9;
const exponent_bits = 5;
const exponent_bias = 15;
const max_mantissa = (1 << mantissa_bits) - 1;
const max_exponent = (1 << exponent_bits) - 1;
const r = clampE5B9G9R9Component(color[0]);
const g = clampE5B9G9R9Component(color[1]);
const b = clampE5B9G9R9Component(color[2]);
const max_component = @max(r, @max(g, b));
if (max_component == 0.0)
return 0;
const parts = std.math.frexp(max_component);
var exponent_i = std.math.clamp(parts.exponent + exponent_bias, 0, max_exponent);
var exponent: u32 = @intCast(exponent_i);
var scale = std.math.ldexp(@as(f32, 1.0), exponent_i - exponent_bias - mantissa_bits);
const rounded_max: u32 = @intFromFloat(@round(max_component / scale));
if (rounded_max > max_mantissa and exponent < max_exponent) {
exponent += 1;
exponent_i += 1;
scale *= 2.0;
}
const r_mantissa = encodeE5B9G9R9Mantissa(r, scale);
const g_mantissa = encodeE5B9G9R9Mantissa(g, scale);
const b_mantissa = encodeE5B9G9R9Mantissa(b, scale);
return (r_mantissa << 0) |
(g_mantissa << 9) |
(b_mantissa << 18) |
(exponent << 27);
}