diff --git a/src/soft/device/ComputeDispatcher.zig b/src/soft/device/ComputeDispatcher.zig index c60ca1d..1ca5aa6 100644 --- a/src/soft/device/ComputeDispatcher.zig +++ b/src/soft/device/ComputeDispatcher.zig @@ -58,6 +58,14 @@ pub fn dispatch(self: *Self, group_count_x: u32, group_count_y: u32, group_count self.invocation_index.store(0, .monotonic); + const io = self.device.interface.io(); + const timer = std.Io.Timestamp.now(io, .real); + defer if (comptime base.config.logs != .none) { + const duration = timer.untilNow(io, .real); + const ms: f32 = @floatFromInt(duration.toMicroseconds()); + std.log.scoped(.ComputeDispatcher).debug("Compute dispatch took {}ms", .{ms / 1000}); + }; + var wg: std.Io.Group = .init; for (0..@min(self.batch_size, group_count)) |batch_id| { const run_data: RunData = .{ diff --git a/src/soft/device/blitter.zig b/src/soft/device/blitter.zig index ff46ce1..ac22690 100644 --- a/src/soft/device/blitter.zig +++ b/src/soft/device/blitter.zig @@ -804,20 +804,22 @@ pub fn readFloat4(map: []const u8, src_format: vk.Format) F32x4 { return c; } -pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void { +pub fn writeFloat4(c: F32x4, map: []u8, dst_format: vk.Format) void { + const color = std.math.clamp(c, zm.f32x4s(base.format.minElementValue(dst_format)), zm.f32x4s(base.format.maxElementValue(dst_format))); + switch (dst_format) { .r8_unorm, .r8_srgb, .s8_uint, => map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(u8))), - .r8_snorm => map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(i8))), + .r8_snorm => map[0] = @bitCast(@as(i8, @intFromFloat(@round(color[0] * std.math.maxInt(i8))))), .r16_sint, .r16_uint, => std.mem.bytesAsValue(u16, map).* = @intFromFloat(@round(color[0])), - .r16_snorm => std.mem.bytesAsValue(u16, map).* = @intFromFloat(@round(color[0] * std.math.maxInt(i16))), + .r16_snorm => std.mem.bytesAsValue(u16, map).* = @bitCast(@as(i16, @intFromFloat(@round(color[0] * std.math.maxInt(i16))))), .r16_unorm, .d16_unorm, @@ -834,8 +836,8 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void { => std.mem.bytesAsValue(f32, map).* = color[0], .r8g8_snorm => { - map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(i8))); - map[1] = @intFromFloat(@round(color[1] * std.math.maxInt(i8))); + map[0] = @bitCast(@as(i8, @intFromFloat(@round(color[0] * std.math.maxInt(i8))))); + map[1] = @bitCast(@as(i8, @intFromFloat(@round(color[1] * std.math.maxInt(i8))))); }, .r8g8_unorm, @@ -846,8 +848,8 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void { }, .r16g16_snorm => { - std.mem.bytesAsValue(u16, map[0..]).* = @intFromFloat(@round(color[0] * std.math.maxInt(i16))); - std.mem.bytesAsValue(u16, map[2..]).* = @intFromFloat(@round(color[1] * std.math.maxInt(i16))); + std.mem.bytesAsValue(u16, map[0..]).* = @bitCast(@as(i16, @intFromFloat(@round(color[0] * std.math.maxInt(i16))))); + std.mem.bytesAsValue(u16, map[2..]).* = @bitCast(@as(i16, @intFromFloat(@round(color[1] * std.math.maxInt(i16))))); }, .r16g16_unorm => { @@ -855,6 +857,11 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void { std.mem.bytesAsValue(u16, map[2..]).* = @intFromFloat(@round(color[1] * std.math.maxInt(u16))); }, + .r16g16_uint => { + std.mem.bytesAsValue(u16, map[0..]).* = @intFromFloat(@round(color[0])); + std.mem.bytesAsValue(u16, map[2..]).* = @intFromFloat(@round(color[1])); + }, + .r16g16_sfloat => { std.mem.bytesAsValue(f16, map[0..]).* = @floatCast(color[0]); std.mem.bytesAsValue(f16, map[2..]).* = @floatCast(color[1]); @@ -865,7 +872,6 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void { std.mem.bytesAsValue(f32, map[4..]).* = color[1]; }, - .r16g16b16a16_sint, .r16g16b16a16_uint, .r16g16b16a16_unorm, => { @@ -875,11 +881,13 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void { std.mem.bytesAsValue(u16, map[6..]).* = @intFromFloat(@round(color[3] * std.math.maxInt(u16))); }, - .r16g16b16a16_snorm => { - std.mem.bytesAsValue(u16, map[0..]).* = @intFromFloat(@round(color[0] * std.math.maxInt(i16))); - std.mem.bytesAsValue(u16, map[2..]).* = @intFromFloat(@round(color[1] * std.math.maxInt(i16))); - std.mem.bytesAsValue(u16, map[4..]).* = @intFromFloat(@round(color[2] * std.math.maxInt(i16))); - std.mem.bytesAsValue(u16, map[6..]).* = @intFromFloat(@round(color[3] * std.math.maxInt(i16))); + .r16g16b16a16_sint, + .r16g16b16a16_snorm, + => { + std.mem.bytesAsValue(u16, map[0..]).* = @bitCast(@as(i16, @intFromFloat(@round(color[0] * std.math.maxInt(i16))))); + std.mem.bytesAsValue(u16, map[2..]).* = @bitCast(@as(i16, @intFromFloat(@round(color[1] * std.math.maxInt(i16))))); + std.mem.bytesAsValue(u16, map[4..]).* = @bitCast(@as(i16, @intFromFloat(@round(color[2] * std.math.maxInt(i16))))); + std.mem.bytesAsValue(u16, map[6..]).* = @bitCast(@as(i16, @intFromFloat(@round(color[3] * std.math.maxInt(i16))))); }, .r16g16b16a16_sfloat => { @@ -964,17 +972,17 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void { .a8b8g8r8_sint_pack32, .a8b8g8r8_snorm_pack32, => { - map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(i8))); - map[1] = @intFromFloat(@round(color[1] * std.math.maxInt(i8))); - map[2] = @intFromFloat(@round(color[2] * std.math.maxInt(i8))); - map[3] = @intFromFloat(@round(color[3] * std.math.maxInt(i8))); + map[0] = @bitCast(@as(i8, @intFromFloat(@round(color[0] * std.math.maxInt(i8))))); + map[1] = @bitCast(@as(i8, @intFromFloat(@round(color[1] * std.math.maxInt(i8))))); + map[2] = @bitCast(@as(i8, @intFromFloat(@round(color[2] * std.math.maxInt(i8))))); + map[3] = @bitCast(@as(i8, @intFromFloat(@round(color[3] * std.math.maxInt(i8))))); }, .r8g8b8a8_snorm => { - map[0] = @intFromFloat(@round(color[0] * std.math.maxInt(i8))); - map[1] = @intFromFloat(@round(color[1] * std.math.maxInt(i8))); - map[2] = @intFromFloat(@round(color[2] * std.math.maxInt(i8))); - map[3] = @intFromFloat(@round(color[3] * std.math.maxInt(i8))); + map[0] = @bitCast(@as(i8, @intFromFloat(@round(color[0] * std.math.maxInt(i8))))); + map[1] = @bitCast(@as(i8, @intFromFloat(@round(color[1] * std.math.maxInt(i8))))); + map[2] = @bitCast(@as(i8, @intFromFloat(@round(color[2] * std.math.maxInt(i8))))); + map[3] = @bitCast(@as(i8, @intFromFloat(@round(color[3] * std.math.maxInt(i8))))); }, .a2r10g10b10_uint_pack32, @@ -1005,6 +1013,11 @@ pub fn writeFloat4(color: F32x4, map: []u8, dst_format: vk.Format) void { (@as(u32, a) << 30); }, + .r32g32b32a32_uint => { + std.debug.print("{}\n", .{@as(@Vector(4, f64), color) * @as(@Vector(4, f64), @splat(std.math.maxInt(u32)))}); + std.mem.bytesAsValue(U32x4, map).* = @intFromFloat(@round(@as(@Vector(4, f64), color) * @as(@Vector(4, f64), @splat(std.math.maxInt(u32))))); + }, + .r32g32b32a32_sfloat => std.mem.bytesAsValue(F32x4, map).* = color, .r5g6b5_unorm_pack16 => { diff --git a/src/vulkan/format.zig b/src/vulkan/format.zig index ab70916..e957bb3 100644 --- a/src/vulkan/format.zig +++ b/src/vulkan/format.zig @@ -427,3 +427,148 @@ pub inline fn isUnsigned(format: vk.Format) bool { pub inline fn isUnnormalizedInteger(format: vk.Format) bool { return isSint(format) or isUint(format); } + +pub inline fn isSscaled(format: vk.Format) bool { + return lib.c.vkuFormatIsSSCALED(@intCast(@intFromEnum(format))); +} + +pub inline fn isUscaled(format: vk.Format) bool { + return lib.c.vkuFormatIsUSCALED(@intCast(@intFromEnum(format))); +} + +fn maxComponentBits(format: vk.Format) u32 { + if (lib.c.vkuFormatHasComponentSize(@intCast(@intFromEnum(format)), 64)) return 64; + if (lib.c.vkuFormatHasComponentSize(@intCast(@intFromEnum(format)), 32)) return 32; + if (lib.c.vkuFormatHasComponentSize(@intCast(@intFromEnum(format)), 24)) return 24; + if (lib.c.vkuFormatHasComponentSize(@intCast(@intFromEnum(format)), 16)) return 16; + if (lib.c.vkuFormatHasComponentSize(@intCast(@intFromEnum(format)), 11)) return 11; + if (lib.c.vkuFormatHasComponentSize(@intCast(@intFromEnum(format)), 10)) return 10; + if (lib.c.vkuFormatHasComponentSize(@intCast(@intFromEnum(format)), 8)) return 8; + if (lib.c.vkuFormatHasComponentSize(@intCast(@intFromEnum(format)), 6)) return 6; + if (lib.c.vkuFormatHasComponentSize(@intCast(@intFromEnum(format)), 5)) return 5; + if (lib.c.vkuFormatHasComponentSize(@intCast(@intFromEnum(format)), 4)) return 4; + if (lib.c.vkuFormatHasComponentSize(@intCast(@intFromEnum(format)), 2)) return 2; + if (lib.c.vkuFormatHasComponentSize(@intCast(@intFromEnum(format)), 1)) return 1; + + lib.unsupported("format component bits {any}", .{format}); + return 0; +} + +fn maxUnsignedValue(bits: u32) f32 { + return switch (bits) { + 1 => 1.0, + 2 => 3.0, + 4 => 15.0, + 5 => 31.0, + 6 => 63.0, + 8 => @as(f32, @floatFromInt(std.math.maxInt(u8))), + 10 => 1023.0, + 11 => 2047.0, + 16 => @as(f32, @floatFromInt(std.math.maxInt(u16))), + 24 => 0xffffff, + 32 => @as(f32, @floatFromInt(std.math.maxInt(u32))), + 64 => @as(f32, @floatFromInt(std.math.maxInt(u64))), + else => blk: { + lib.unsupported("format component bits {d}", .{bits}); + break :blk 1.0; + }, + }; +} + +fn maxSignedValue(bits: u32) f32 { + return switch (bits) { + 2 => 1.0, + 4 => 7.0, + 5 => 15.0, + 6 => 31.0, + 8 => @as(f32, @floatFromInt(std.math.maxInt(i8))), + 10 => 511.0, + 11 => 1023.0, + 16 => @as(f32, @floatFromInt(std.math.maxInt(i16))), + 24 => 0x7fffff, + 32 => @as(f32, @floatFromInt(std.math.maxInt(i32))), + 64 => @as(f32, @floatFromInt(std.math.maxInt(i64))), + else => blk: { + lib.unsupported("format component bits {d}", .{bits}); + break :blk 1.0; + }, + }; +} + +fn minSignedValue(bits: u32) f32 { + return switch (bits) { + 2 => -2.0, + 4 => -8.0, + 5 => -16.0, + 6 => -32.0, + 8 => @as(f32, @floatFromInt(std.math.minInt(i8))), + 10 => -512.0, + 11 => -1024.0, + 16 => @as(f32, @floatFromInt(std.math.minInt(i16))), + 24 => -0x800000, + 32 => @as(f32, @floatFromInt(std.math.minInt(i32))), + 64 => @as(f32, @floatFromInt(std.math.minInt(i64))), + else => blk: { + lib.unsupported("format component bits {d}", .{bits}); + break :blk -1.0; + }, + }; +} + +fn maxFloatValue(format: vk.Format) f32 { + return switch (format) { + .r16_sfloat, + .r16g16_sfloat, + .r16g16b16_sfloat, + .r16g16b16a16_sfloat, + .b10g11r11_ufloat_pack32, + .e5b9g9r9_ufloat_pack32, + .bc6h_ufloat_block, + .bc6h_sfloat_block, + => std.math.floatMax(f16), + else => std.math.floatMax(f32), + }; +} + +pub fn maxElementValue(format: vk.Format) f32 { + if (isDepth(format)) + return 1.0; + + if (isStencil(format)) + return maxUnsignedValue(8); + + if (isSnorm(format) or isUnorm(format) or isSrgb(format)) + return 1.0; + + if (isUscaled(format) or isUint(format)) + return maxUnsignedValue(maxComponentBits(format)); + + if (isSscaled(format) or isSint(format)) + return maxSignedValue(maxComponentBits(format)); + + if (isUfloat(format) or isSfloat(format)) + return maxFloatValue(format); + + lib.unsupported("format max element value {any}", .{format}); + return 1.0; +} + +pub fn minElementValue(format: vk.Format) f32 { + if (isDepth(format) or isStencil(format)) + return 0.0; + + if (isSnorm(format)) + return -1.0; + + if (isUnorm(format) or isSrgb(format) or isUscaled(format) or isUint(format) or isUfloat(format)) + return 0.0; + + if (isSscaled(format) or isSint(format)) + return minSignedValue(maxComponentBits(format)); + + if (isSfloat(format)) + return -maxFloatValue(format); + + lib.unsupported("format min element value {any}", .{format}); + return 0.0; +}