improving image clear
This commit is contained in:
+128
-120
@@ -1,5 +1,3 @@
|
||||
//! This software blitter is highly inspired by SwiftShaders one
|
||||
|
||||
const std = @import("std");
|
||||
const vk = @import("vulkan");
|
||||
const base = @import("base");
|
||||
@@ -47,10 +45,6 @@ const BlitData = struct {
|
||||
width_ratio: f32,
|
||||
};
|
||||
|
||||
fn computeOffset2D(x: usize, y: usize, pitch_bytes: usize, texel_bytes: usize) usize {
|
||||
return y * pitch_bytes + x * texel_bytes;
|
||||
}
|
||||
|
||||
fn computeOffset3D(x: usize, y: usize, z: usize, slice_bytes: usize, pitch_bytes: usize, texel_bytes: usize) usize {
|
||||
return z * slice_bytes + y * pitch_bytes + x * texel_bytes;
|
||||
}
|
||||
@@ -70,7 +64,7 @@ pub fn clear(pixel: vk.ClearValue, format: vk.Format, dst: *SoftImage, view_form
|
||||
};
|
||||
|
||||
var clamped_pixel: vk.ClearValue = pixel;
|
||||
if (base.format.isSint(view_format) or base.format.isUint(view_format)) {
|
||||
if (base.format.isSnorm(view_format) or base.format.isUnorm(view_format)) {
|
||||
const min_value: f32 = if (base.format.isSnorm(view_format)) -1.0 else 0.0;
|
||||
|
||||
if (range.aspect_mask.color_bit) {
|
||||
@@ -85,10 +79,6 @@ pub fn clear(pixel: vk.ClearValue, format: vk.Format, dst: *SoftImage, view_form
|
||||
}
|
||||
}
|
||||
|
||||
if (try fastClear(clamped_pixel, format, dst, dst_format, range, render_area)) {
|
||||
return;
|
||||
}
|
||||
|
||||
const state: State = .{
|
||||
.src_format = format,
|
||||
.dst_format = dst_format,
|
||||
@@ -118,8 +108,6 @@ pub fn clear(pixel: vk.ClearValue, format: vk.Format, dst: *SoftImage, view_form
|
||||
.extent = .{ .width = 0, .height = 0 },
|
||||
};
|
||||
|
||||
const dst_memory = if (dst.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv;
|
||||
|
||||
while (subresource.mip_level <= last_mip_level) : (subresource.mip_level += 1) {
|
||||
const extent = dst.getMipLevelExtent(subresource.mip_level);
|
||||
|
||||
@@ -132,11 +120,10 @@ pub fn clear(pixel: vk.ClearValue, format: vk.Format, dst: *SoftImage, view_form
|
||||
while (subresource.array_layer <= last_layer) : (subresource.array_layer += 1) {
|
||||
for (0..@intCast(extent.depth)) |depth| {
|
||||
const dst_texel_offset = try dst.getTexelMemoryOffset(.{ .x = 0, .y = 0, .z = @intCast(depth) }, subresource);
|
||||
const dst_size = try dst.interface.getTotalSizeForAspect(subresource.aspect_mask) - dst_texel_offset;
|
||||
const dst_map: []u8 = @as([*]u8, @ptrCast(try dst_memory.map(dst.interface.memory_offset + dst_texel_offset, dst_size)))[0..dst_size];
|
||||
const dst_map = try dst.mapAsSliceWithAddedOffset(u8, dst_texel_offset, vk.WHOLE_SIZE);
|
||||
|
||||
blit(state, .{
|
||||
.src_map = std.mem.asBytes(&pixel),
|
||||
.src_map = std.mem.asBytes(&clamped_pixel),
|
||||
.dst_map = dst_map,
|
||||
|
||||
.src_slice_pitch_bytes = base.format.texelSize(format),
|
||||
@@ -159,112 +146,12 @@ pub fn clear(pixel: vk.ClearValue, format: vk.Format, dst: *SoftImage, view_form
|
||||
}
|
||||
}
|
||||
|
||||
fn fastClear(clear_value: vk.ClearValue, clear_format: vk.Format, dst: *SoftImage, view_format: vk.Format, range: vk.ImageSubresourceRange, render_area: ?vk.Rect2D) VkError!bool {
|
||||
if (clear_format != .r32g32b32a32_sfloat and clear_format != .d32_sfloat and clear_format != .s8_uint) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const r, const g, const b, const a = clear_value.color.float_32;
|
||||
const d = clear_value.depth_stencil.depth;
|
||||
const s = clear_value.depth_stencil.stencil;
|
||||
|
||||
var pack: u32 = 0;
|
||||
switch (view_format) {
|
||||
.r5g6b5_unorm_pack16 => pack = @as(u16, @intFromFloat(31.0 * b + 0.5)) |
|
||||
(@as(u16, @intFromFloat(63.0 * g + 0.5)) << 5) |
|
||||
(@as(u16, @intFromFloat(31.0 * r + 0.5)) << 11),
|
||||
.b5g6r5_unorm_pack16 => pack = @as(u16, @intFromFloat(31.0 * r + 0.5)) |
|
||||
(@as(u16, @intFromFloat(63.0 * g + 0.5)) << 5) |
|
||||
(@as(u16, @intFromFloat(31.0 * b + 0.5)) << 11),
|
||||
|
||||
.a8b8g8r8_uint_pack32,
|
||||
.a8b8g8r8_unorm_pack32,
|
||||
.r8g8b8a8_unorm,
|
||||
=> pack = (@as(u32, @intFromFloat(255.0 * a + 0.5)) << 24) |
|
||||
(@as(u32, @intFromFloat(255.0 * b + 0.5)) << 16) |
|
||||
(@as(u32, @intFromFloat(255.0 * g + 0.5)) << 8) |
|
||||
(@as(u32, @intFromFloat(255.0 * r + 0.5))),
|
||||
|
||||
.b8g8r8a8_unorm => pack = (@as(u32, @intFromFloat(255.0 * a + 0.5)) << 24) |
|
||||
(@as(u32, @intFromFloat(255.0 * r + 0.5)) << 16) |
|
||||
(@as(u32, @intFromFloat(255.0 * g + 0.5)) << 8) |
|
||||
(@as(u32, @intFromFloat(255.0 * b + 0.5))),
|
||||
.d32_sfloat => {
|
||||
std.debug.assert(clear_format == .d32_sfloat);
|
||||
pack = @bitCast(d); // f32 reinterpreted as u32
|
||||
},
|
||||
.s8_uint => {
|
||||
std.debug.assert(clear_format == .s8_uint);
|
||||
pack = @as(u8, @intCast(s));
|
||||
},
|
||||
else => return false,
|
||||
}
|
||||
|
||||
var subresource: vk.ImageSubresource = .{
|
||||
.aspect_mask = range.aspect_mask,
|
||||
.mip_level = range.base_mip_level,
|
||||
.array_layer = range.base_array_layer,
|
||||
};
|
||||
const last_mip_level = dst.interface.getLastMipLevel(range);
|
||||
const last_layer = dst.interface.getLastLayerIndex(range);
|
||||
|
||||
var area: vk.Rect2D = if (render_area) |ra| ra else .{
|
||||
.offset = .{ .x = 0, .y = 0 },
|
||||
.extent = .{ .width = 0, .height = 0 },
|
||||
};
|
||||
|
||||
while (subresource.mip_level <= last_mip_level) : (subresource.mip_level += 1) {
|
||||
const dst_slice_pitch_bytes = dst.interface.getSliceMemSizeForMipLevel(subresource.aspect_mask, subresource.mip_level);
|
||||
const dst_row_pitch_bytes = dst.interface.getRowPitchMemSizeForMipLevel(subresource.aspect_mask, subresource.mip_level);
|
||||
const extent = dst.getMipLevelExtent(subresource.mip_level);
|
||||
|
||||
if (render_area == null) {
|
||||
area.extent.width = extent.width;
|
||||
area.extent.height = extent.height;
|
||||
}
|
||||
|
||||
subresource.array_layer = range.base_array_layer;
|
||||
while (subresource.array_layer <= last_layer) : (subresource.array_layer += 1) {
|
||||
for (0..@intCast(extent.depth)) |depth| {
|
||||
const dst_texel_offset = try dst.getTexelMemoryOffset(.{ .x = area.offset.x, .y = area.offset.y, .z = @intCast(depth) }, subresource);
|
||||
const dst_size = try dst.interface.getTotalSizeForAspect(subresource.aspect_mask);
|
||||
var dst_map = try dst.mapAsSliceWithAddedOffset(u8, dst_texel_offset, dst_size);
|
||||
|
||||
for (0..dst.interface.samples.toInt()) |_| {
|
||||
var dst_pixel = dst_map[0..];
|
||||
switch (base.format.texelSize(view_format)) {
|
||||
4 => for (0..@intCast(area.extent.height)) |_| {
|
||||
var dst_pixel_4bytes = std.mem.bytesAsSlice(u32, dst_pixel);
|
||||
@memset(dst_pixel_4bytes[0..area.extent.width], pack);
|
||||
dst_pixel = if (dst_pixel.len < dst_row_pitch_bytes) break else dst_pixel[dst_row_pitch_bytes..];
|
||||
},
|
||||
2 => for (0..@intCast(area.extent.height)) |_| {
|
||||
var dst_pixel_2bytes = std.mem.bytesAsSlice(u16, dst_pixel);
|
||||
@memset(dst_pixel_2bytes[0..area.extent.width], @as(u16, @truncate(pack)));
|
||||
dst_pixel = if (dst_pixel.len < dst_row_pitch_bytes) break else dst_pixel[dst_row_pitch_bytes..];
|
||||
},
|
||||
1 => for (0..@intCast(area.extent.height)) |_| {
|
||||
@memset(dst_pixel[0..area.extent.width], @as(u8, @truncate(pack)));
|
||||
dst_pixel = if (dst_pixel.len < dst_row_pitch_bytes) break else dst_pixel[dst_row_pitch_bytes..];
|
||||
},
|
||||
else => unreachable,
|
||||
}
|
||||
|
||||
dst_map = if (dst_map.len < dst_slice_pitch_bytes) break else dst_map[dst_slice_pitch_bytes..];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
fn sample(src: []const u8, pos: F32x4, dim: F32x4, slice_bytes: usize, pitch_bytes: usize, state: State) F32x4 {
|
||||
var color: F32x4 = .{ 0.0, 0.0, 0.0, 1.0 };
|
||||
const src_texel_size = base.format.texelSize(state.src_format);
|
||||
var apply_srgb_convertion = true;
|
||||
|
||||
if (state.filter == .nearest or base.format.isUnsignedUnnormalizedInteger(state.src_format)) {
|
||||
if (state.filter == .nearest or base.format.isUnnormalizedInteger(state.src_format)) {
|
||||
var x: usize = @intFromFloat(pos[0]);
|
||||
var y: usize = @intFromFloat(pos[1]);
|
||||
var z: usize = @intFromFloat(pos[2]);
|
||||
@@ -866,16 +753,20 @@ pub fn readFloat4(map: []const u8, src_format: vk.Format) F32x4 {
|
||||
|
||||
pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
|
||||
switch (dst_format) {
|
||||
.r8_snorm,
|
||||
.r8_unorm,
|
||||
.r8_srgb,
|
||||
.s8_uint,
|
||||
=> map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(u8))),
|
||||
|
||||
.r8_snorm,
|
||||
=> map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(i8))),
|
||||
|
||||
.r16_sint,
|
||||
.r16_uint,
|
||||
.d16_unorm,
|
||||
=> std.mem.bytesAsValue(u16, map).* = @intFromFloat(@round(color[0])),
|
||||
|
||||
.r16_snorm => std.mem.bytesAsValue(u16, map).* = @intFromFloat(@round(color[0] * std.math.maxInt(i16))),
|
||||
.r16_unorm => std.mem.bytesAsValue(u16, map).* = @intFromFloat(@round(color[0] * std.math.maxInt(u16))),
|
||||
|
||||
.r16_sfloat => std.mem.bytesAsValue(f16, map).* = @floatCast(color[0]),
|
||||
@@ -888,7 +779,14 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
|
||||
.d32_sfloat,
|
||||
=> std.mem.bytesAsValue(f32, map).* = color[0],
|
||||
|
||||
.r8g8_unorm => {
|
||||
.r8g8_snorm => {
|
||||
map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(i8)));
|
||||
map[1] = @intFromFloat(@round(color[1] * std.math.maxInt(i8)));
|
||||
},
|
||||
|
||||
.r8g8_unorm,
|
||||
.r8g8_srgb,
|
||||
=> {
|
||||
map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(u8)));
|
||||
map[1] = @intFromFloat(@round(color[1] * std.math.maxInt(u8)));
|
||||
},
|
||||
@@ -916,7 +814,6 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
|
||||
.r16g16b16a16_sint,
|
||||
.r16g16b16a16_uint,
|
||||
.r16g16b16a16_unorm,
|
||||
.r16g16b16a16_snorm,
|
||||
=> {
|
||||
std.mem.bytesAsValue(u16, map[0..]).* = @intFromFloat(@round(color[0] * std.math.maxInt(u16)));
|
||||
std.mem.bytesAsValue(u16, map[2..]).* = @intFromFloat(@round(color[1] * std.math.maxInt(u16)));
|
||||
@@ -924,6 +821,13 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
|
||||
std.mem.bytesAsValue(u16, map[6..]).* = @intFromFloat(@round(color[3] * std.math.maxInt(u16)));
|
||||
},
|
||||
|
||||
.r16g16b16a16_snorm => {
|
||||
std.mem.bytesAsValue(u16, map[0..]).* = @intFromFloat(@round(color[0] * std.math.maxInt(i16)));
|
||||
std.mem.bytesAsValue(u16, map[2..]).* = @intFromFloat(@round(color[1] * std.math.maxInt(i16)));
|
||||
std.mem.bytesAsValue(u16, map[4..]).* = @intFromFloat(@round(color[2] * std.math.maxInt(i16)));
|
||||
std.mem.bytesAsValue(u16, map[6..]).* = @intFromFloat(@round(color[3] * std.math.maxInt(i16)));
|
||||
},
|
||||
|
||||
.r16g16b16a16_sfloat => {
|
||||
std.mem.bytesAsValue(f16, map[0..]).* = @floatCast(color[0]);
|
||||
std.mem.bytesAsValue(f16, map[2..]).* = @floatCast(color[1]);
|
||||
@@ -964,6 +868,30 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
|
||||
(@as(u16, a) << 0);
|
||||
},
|
||||
|
||||
.a4r4g4b4_unorm_pack16 => {
|
||||
const r: u4 = @intFromFloat(@round(color[0] * std.math.maxInt(u4)));
|
||||
const g: u4 = @intFromFloat(@round(color[1] * std.math.maxInt(u4)));
|
||||
const b: u4 = @intFromFloat(@round(color[2] * std.math.maxInt(u4)));
|
||||
const a: u4 = @intFromFloat(@round(color[3] * std.math.maxInt(u4)));
|
||||
std.mem.bytesAsValue(u16, map[0..]).* =
|
||||
(@as(u16, a) << 12) |
|
||||
(@as(u16, r) << 8) |
|
||||
(@as(u16, g) << 4) |
|
||||
(@as(u16, b) << 0);
|
||||
},
|
||||
|
||||
.a4b4g4r4_unorm_pack16 => {
|
||||
const r: u4 = @intFromFloat(@round(color[0] * std.math.maxInt(u4)));
|
||||
const g: u4 = @intFromFloat(@round(color[1] * std.math.maxInt(u4)));
|
||||
const b: u4 = @intFromFloat(@round(color[2] * std.math.maxInt(u4)));
|
||||
const a: u4 = @intFromFloat(@round(color[3] * std.math.maxInt(u4)));
|
||||
std.mem.bytesAsValue(u16, map[0..]).* =
|
||||
(@as(u16, a) << 12) |
|
||||
(@as(u16, b) << 8) |
|
||||
(@as(u16, g) << 4) |
|
||||
(@as(u16, r) << 0);
|
||||
},
|
||||
|
||||
.r8g8b8a8_unorm,
|
||||
.r8g8b8a8_srgb,
|
||||
.r8g8b8a8_uint,
|
||||
@@ -979,6 +907,22 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
|
||||
map[3] = @intFromFloat(@round(color[3] * std.math.maxInt(u8)));
|
||||
},
|
||||
|
||||
.a8b8g8r8_sint_pack32,
|
||||
.a8b8g8r8_snorm_pack32,
|
||||
=> {
|
||||
map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(i8)));
|
||||
map[1] = @intFromFloat(@round(color[1] * std.math.maxInt(i8)));
|
||||
map[2] = @intFromFloat(@round(color[2] * std.math.maxInt(i8)));
|
||||
map[3] = @intFromFloat(@round(color[3] * std.math.maxInt(i8)));
|
||||
},
|
||||
|
||||
.r8g8b8a8_snorm => {
|
||||
map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(i8)));
|
||||
map[1] = @intFromFloat(@round(color[1] * std.math.maxInt(i8)));
|
||||
map[2] = @intFromFloat(@round(color[2] * std.math.maxInt(i8)));
|
||||
map[3] = @intFromFloat(@round(color[3] * std.math.maxInt(i8)));
|
||||
},
|
||||
|
||||
.a2r10g10b10_uint_pack32,
|
||||
.a2r10g10b10_unorm_pack32,
|
||||
=> {
|
||||
@@ -1076,6 +1020,8 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void {
|
||||
(b << 22);
|
||||
},
|
||||
|
||||
.e5b9g9r9_ufloat_pack32 => std.mem.bytesAsValue(u32, map).* = encodeE5B9G9R9(color),
|
||||
|
||||
else => base.unsupported("Blitter: write float to destination format {any}", .{dst_format}),
|
||||
}
|
||||
}
|
||||
@@ -1393,3 +1339,65 @@ fn encodeUFloat(value: f32, mantissa_bits: comptime_int) u32 {
|
||||
|
||||
return (exp_bits << mantissa_bits) | mantissa;
|
||||
}
|
||||
|
||||
fn clampE5B9G9R9Component(value: f32) f32 {
|
||||
const mantissa_bits = 9;
|
||||
const exponent_bits = 5;
|
||||
const exponent_bias = 15;
|
||||
const max_mantissa = (1 << mantissa_bits) - 1;
|
||||
const max_exponent = (1 << exponent_bits) - 1;
|
||||
|
||||
const max_value = @as(f32, @floatFromInt(max_mantissa)) *
|
||||
std.math.ldexp(@as(f32, 1.0), max_exponent - exponent_bias - mantissa_bits);
|
||||
|
||||
if (std.math.isNan(value) or value <= 0.0)
|
||||
return 0.0;
|
||||
|
||||
if (std.math.isInf(value) or value >= max_value)
|
||||
return max_value;
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
fn encodeE5B9G9R9Mantissa(value: f32, scale: f32) u32 {
|
||||
const max_mantissa = 0x1FF;
|
||||
return @min(@as(u32, @intFromFloat(@round(value / scale))), max_mantissa);
|
||||
}
|
||||
|
||||
fn encodeE5B9G9R9(color: F32x4) u32 {
|
||||
const mantissa_bits = 9;
|
||||
const exponent_bits = 5;
|
||||
const exponent_bias = 15;
|
||||
const max_mantissa = (1 << mantissa_bits) - 1;
|
||||
const max_exponent = (1 << exponent_bits) - 1;
|
||||
|
||||
const r = clampE5B9G9R9Component(color[0]);
|
||||
const g = clampE5B9G9R9Component(color[1]);
|
||||
const b = clampE5B9G9R9Component(color[2]);
|
||||
|
||||
const max_component = @max(r, @max(g, b));
|
||||
if (max_component == 0.0)
|
||||
return 0;
|
||||
|
||||
const parts = std.math.frexp(max_component);
|
||||
var exponent_i = std.math.clamp(parts.exponent + exponent_bias, 0, max_exponent);
|
||||
var exponent: u32 = @intCast(exponent_i);
|
||||
|
||||
var scale = std.math.ldexp(@as(f32, 1.0), exponent_i - exponent_bias - mantissa_bits);
|
||||
|
||||
const rounded_max: u32 = @intFromFloat(@round(max_component / scale));
|
||||
if (rounded_max > max_mantissa and exponent < max_exponent) {
|
||||
exponent += 1;
|
||||
exponent_i += 1;
|
||||
scale *= 2.0;
|
||||
}
|
||||
|
||||
const r_mantissa = encodeE5B9G9R9Mantissa(r, scale);
|
||||
const g_mantissa = encodeE5B9G9R9Mantissa(g, scale);
|
||||
const b_mantissa = encodeE5B9G9R9Mantissa(b, scale);
|
||||
|
||||
return (r_mantissa << 0) |
|
||||
(g_mantissa << 9) |
|
||||
(b_mantissa << 18) |
|
||||
(exponent << 27);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user