diff --git a/build.zig b/build.zig index 2a3d983..d3ea65a 100644 --- a/build.zig +++ b/build.zig @@ -41,11 +41,14 @@ pub fn build(b: *std.Build) !void { .registry = vulkan_headers.path("registry/vk.xml"), }).module("vulkan-zig"); + const zmath = b.dependency("zmath", .{}).module("root"); + const logs_option = b.option(bool, "logs", "Driver logs") orelse false; const options = b.addOptions(); options.addOption(bool, "logs", logs_option); + base_mod.addImport("zmath", zmath); base_mod.addImport("vulkan", vulkan); base_mod.addSystemIncludePath(vulkan_headers.path("include")); base_mod.addSystemIncludePath(vulkan_utility_libraries.path("include")); @@ -145,12 +148,12 @@ fn customSoft(b: *std.Build, lib: *std.Build.Step.Compile, options: *std.Build.S lib.root_module.addSystemIncludePath(cpuinfo.path("include")); lib.root_module.linkLibrary(cpuinfo.artifact("cpuinfo")); - const spv = b.dependency("SPIRV_Interpreter", .{ + const spv = b.lazyDependency("SPIRV_Interpreter", .{ .@"no-example" = true, .@"no-test" = true, .@"use-llvm" = true, - }).module("spv"); - lib.root_module.addImport("spv", spv); + }) orelse return error.UnresolvedDependency; + lib.root_module.addImport("spv", spv.module("spv")); const debug_allocator_option = b.option(bool, "debug-allocator", "Debug device allocator") orelse false; const shaders_simd_option = b.option(bool, "shader-simd", "Shaders SIMD acceleration") orelse true; diff --git a/build.zig.zon b/build.zig.zon index 61bc9fe..31047a9 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -26,9 +26,14 @@ .hash = "cpuinfo-0.0.1-RLgIQYrTMgGqfQMOd1nAa2EuglXOh5gR9bNzwMzQTemt", .lazy = true, }, + .zmath = .{ + .url = "git+https://github.com/zig-gamedev/zmath#3a5955b2b72cd081563fbb084eff05bffd1e3fbb", + .hash = "zmath-0.11.0-dev-wjwivdMsAwD-xaLj76YHUq3t9JDH-X16xuMTmnDzqbu2", + }, .SPIRV_Interpreter = .{ .url = "git+https://git.kbz8.me/kbz_8/SPIRV-Interpreter#664ea9b92bf84bc97ec4a062c171562bf6628263", .hash = "SPIRV_Interpreter-0.0.1-ajmpn1qKBACshq_ncUUF-zXJzpdNLRzIAPcWRQL57W8l", + .lazy = true, }, }, diff --git a/src/soft/SoftCommandBuffer.zig b/src/soft/SoftCommandBuffer.zig index 54654ae..979718a 100644 --- a/src/soft/SoftCommandBuffer.zig +++ b/src/soft/SoftCommandBuffer.zig @@ -14,6 +14,7 @@ const SoftPipeline = @import("SoftPipeline.zig"); const SoftDescriptorSet = @import("SoftDescriptorSet.zig"); const ExecutionDevice = @import("device/Device.zig"); +const blitter = @import("device/blitter.zig"); const Self = @This(); pub const Interface = base.CommandBuffer; @@ -182,10 +183,10 @@ pub fn blitImage(interface: *Interface, src: *base.Image, _: vk.ImageLayout, dst regions: []const vk.ImageBlit, filter: vk.Filter, - pub fn execute(context: *anyopaque, device: *ExecutionDevice) VkError!void { + pub fn execute(context: *anyopaque, _: *ExecutionDevice) VkError!void { const impl: *Impl = @ptrCast(@alignCast(context)); for (impl.regions[0..]) |region| { - try device.blitter.blitRegion(impl.src, impl.dst, region, impl.filter); + try blitter.blitRegion(impl.src, impl.dst, region, impl.filter); } } }; @@ -212,10 +213,10 @@ pub fn clearColorImage(interface: *Interface, image: *base.Image, _: vk.ImageLay clear_color: vk.ClearColorValue, range: vk.ImageSubresourceRange, - pub fn execute(context: *anyopaque, device: *ExecutionDevice) VkError!void { + pub fn execute(context: *anyopaque, _: *ExecutionDevice) VkError!void { const impl: *Impl = @ptrCast(@alignCast(context)); const clear_format = try impl.image.getClearFormat(); - try device.blitter.clear(.{ .color = impl.clear_color }, clear_format, impl.image, impl.image.interface.format, impl.range, null); + try blitter.clear(.{ .color = impl.clear_color }, clear_format, impl.image, impl.image.interface.format, impl.range, null); } }; diff --git a/src/soft/SoftImage.zig b/src/soft/SoftImage.zig index e69703d..a1adaa7 100644 --- a/src/soft/SoftImage.zig +++ b/src/soft/SoftImage.zig @@ -145,14 +145,14 @@ pub fn copyToImageSingleAspect(self: *const Self, dst: *Self, region: vk.ImageCo const src_memory = if (self.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv; var src_map: []u8 = @as([*]u8, @ptrCast(try src_memory.map(self.interface.memory_offset + src_texel_offset, src_size)))[0..src_size]; - const dst_texel_offset = try self.getTexelMemoryOffset(region.dst_offset, .{ + const dst_texel_offset = try dst.getTexelMemoryOffset(region.dst_offset, .{ .aspect_mask = region.dst_subresource.aspect_mask, .mip_level = region.dst_subresource.mip_level, .array_layer = region.dst_subresource.base_array_layer, }); const dst_size = try dst.interface.getTotalSizeForAspect(region.dst_subresource.aspect_mask) - dst_texel_offset; const dst_memory = if (dst.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv; - var dst_map: []u8 = @as([*]u8, @ptrCast(try dst_memory.map(self.interface.memory_offset + dst_texel_offset, dst_size)))[0..dst_size]; + var dst_map: []u8 = @as([*]u8, @ptrCast(try dst_memory.map(dst.interface.memory_offset + dst_texel_offset, dst_size)))[0..dst_size]; for (0..layer_count) |_| { if (is_single_row) { diff --git a/src/soft/device/Blitter.zig b/src/soft/device/Blitter.zig deleted file mode 100644 index f2a6eab..0000000 --- a/src/soft/device/Blitter.zig +++ /dev/null @@ -1,225 +0,0 @@ -//! This software blitter is highly inspired by SwiftShaders one - -const std = @import("std"); -const vk = @import("vulkan"); -const base = @import("base"); - -const VkError = base.VkError; - -pub const SoftImage = @import("../SoftImage.zig"); -pub const SoftImageView = @import("../SoftImageView.zig"); - -const Self = @This(); - -pub const init: Self = .{}; - -pub fn clear(self: *Self, pixel: vk.ClearValue, format: vk.Format, dest: *SoftImage, view_format: vk.Format, range: vk.ImageSubresourceRange, area: ?vk.Rect2D) VkError!void { - const dst_format = base.format.fromAspect(view_format, range.aspect_mask); - if (dst_format == .undefined) { - return; - } - - const view_format_value: c_uint = @intCast(@intFromEnum(view_format)); - - var clamped_pixel: vk.ClearValue = pixel; - if (base.vku.vkuFormatIsSINT(view_format_value) or base.vku.vkuFormatIsUINT(view_format_value)) { - const min_value: f32 = if (base.vku.vkuFormatIsSNORM(view_format_value)) -1.0 else 0.0; - - if (range.aspect_mask.color_bit) { - clamped_pixel.color.float_32[0] = std.math.clamp(pixel.color.float_32[0], min_value, 1.0); - clamped_pixel.color.float_32[1] = std.math.clamp(pixel.color.float_32[1], min_value, 1.0); - clamped_pixel.color.float_32[2] = std.math.clamp(pixel.color.float_32[2], min_value, 1.0); - clamped_pixel.color.float_32[3] = std.math.clamp(pixel.color.float_32[3], min_value, 1.0); - } - - // Stencil never requires clamping, so we can check for Depth only - if (range.aspect_mask.depth_bit) { - clamped_pixel.depth_stencil.depth = std.math.clamp(pixel.depth_stencil.depth, min_value, 1.0); - } - } - - if (try self.fastClear(clamped_pixel, format, dest, dst_format, range, area)) { - return; - } - base.logger.fixme("implement slow clear", .{}); -} - -fn fastClear(self: *Self, clear_value: vk.ClearValue, clear_format: vk.Format, dest: *SoftImage, view_format: vk.Format, range: vk.ImageSubresourceRange, render_area: ?vk.Rect2D) VkError!bool { - _ = self; - _ = render_area; - _ = range; - - if (clear_format != .r32g32b32a32_sfloat and clear_format != .d32_sfloat and clear_format != .s8_uint) { - return false; - } - - const ClearValue = union { - rgba: struct { r: f32, g: f32, b: f32, a: f32 }, - rgb: [3]f32, - d: f32, - d_as_u32: u32, - s: u32, - }; - - const c: *const ClearValue = @ptrCast(&clear_value); - - var pack: u32 = 0; - switch (view_format) { - .r5g6b5_unorm_pack16 => pack = @as(u16, @intFromFloat(31.0 * c.rgba.b + 0.5)) | (@as(u16, @intFromFloat(63.0 * c.rgba.g + 0.5)) << 5) | (@as(u16, @intFromFloat(31.0 * c.rgba.r + 0.5)) << 11), - .b5g6r5_unorm_pack16 => pack = @as(u16, @intFromFloat(31.0 * c.rgba.r + 0.5)) | (@as(u16, @intFromFloat(63.0 * c.rgba.g + 0.5)) << 5) | (@as(u16, @intFromFloat(31.0 * c.rgba.b + 0.5)) << 11), - - .a8b8g8r8_uint_pack32, - .a8b8g8r8_unorm_pack32, - .r8g8b8a8_unorm, - => pack = (@as(u32, @intFromFloat(255.0 * c.rgba.a + 0.5)) << 24) | (@as(u32, @intFromFloat(255.0 * c.rgba.b + 0.5)) << 16) | (@as(u32, @intFromFloat(255.0 * c.rgba.g + 0.5)) << 8) | @as(u32, @intFromFloat(255.0 * c.rgba.r + 0.5)), - - .b8g8r8a8_unorm => pack = (@as(u32, @intFromFloat(255.0 * c.rgba.a + 0.5)) << 24) | (@as(u32, @intFromFloat(255.0 * c.rgba.r + 0.5)) << 16) | (@as(u32, @intFromFloat(255.0 * c.rgba.g + 0.5)) << 8) | @as(u32, @intFromFloat(255.0 * c.rgba.b + 0.5)), - //.b10g11r11_ufloat_pack32 => pack = R11G11B10F(c.rgb), - //.e5b9g9r9_ufloat_pack32 => pack = RGB9E5(c.rgb), - .d32_sfloat => { - std.debug.assert(clear_format == .d32_sfloat); - pack = c.d_as_u32; // float reinterpreted as uint32 - }, - .s8_uint => { - std.debug.assert(clear_format == .s8_uint); - pack = @as(u8, @intCast(c.s)); - }, - else => return false, - } - - if (dest.interface.memory) |memory| { - const image_size = try dest.interface.getTotalSize(); - const memory_map = memory.map(dest.interface.memory_offset, image_size) catch return false; - defer memory.unmap(); - - const memory_map_as_u32: []u32 = @as([*]u32, @ptrCast(@alignCast(memory_map)))[0..@divExact(image_size, 4)]; - - @memset(memory_map_as_u32, pack); - - return true; - } - return false; -} - -pub fn blitRegion(_: *Self, src: *const SoftImage, dst: *SoftImage, region: vk.ImageBlit, filter: vk.Filter) VkError!void { - var dst_offset_0 = region.dst_offsets[0]; - var dst_offset_1 = region.dst_offsets[1]; - var src_offset_0 = region.src_offsets[0]; - var src_offset_1 = region.src_offsets[1]; - - if (dst_offset_0.x > dst_offset_1.x) { - std.mem.swap(i32, &src_offset_0.x, &src_offset_1.x); - std.mem.swap(i32, &dst_offset_0.x, &dst_offset_1.x); - } - - if (dst_offset_0.y > dst_offset_1.y) { - std.mem.swap(i32, &src_offset_0.y, &src_offset_1.y); - std.mem.swap(i32, &dst_offset_0.y, &dst_offset_1.y); - } - - if (dst_offset_0.z > dst_offset_1.z) { - std.mem.swap(i32, &src_offset_0.z, &src_offset_1.z); - std.mem.swap(i32, &dst_offset_0.z, &dst_offset_1.z); - } - - const src_extent = src.getMipLevelExtent(region.src_subresource.mip_level); - - _ = src_extent; - - const width_ratio = @as(f32, @floatFromInt(src_offset_1.x - src_offset_0.x)) / @as(f32, @floatFromInt(dst_offset_1.x - dst_offset_0.x)); - const height_ratio = @as(f32, @floatFromInt(src_offset_1.y - src_offset_0.y)) / @as(f32, @floatFromInt(dst_offset_1.y - dst_offset_0.y)); - const depth_ratio = @as(f32, @floatFromInt(src_offset_1.z - src_offset_0.z)) / @as(f32, @floatFromInt(dst_offset_1.z - dst_offset_0.z)); - const x0 = @as(f32, @floatFromInt(src_offset_0.x)) + (0.5 - @as(f32, @floatFromInt(dst_offset_0.x))) * width_ratio; - const y0 = @as(f32, @floatFromInt(src_offset_0.y)) + (0.5 - @as(f32, @floatFromInt(dst_offset_0.y))) * height_ratio; - const z0 = @as(f32, @floatFromInt(src_offset_0.z)) + (0.5 - @as(f32, @floatFromInt(dst_offset_0.z))) * depth_ratio; - - _ = x0; - _ = y0; - _ = z0; - - const src_format = base.format.fromAspect(src.interface.format, region.src_subresource.aspect_mask); - const dst_format = base.format.fromAspect(dst.interface.format, region.dst_subresource.aspect_mask); - - const apply_filter = (filter != .nearest); - const allow_srgb_conversion = apply_filter or base.format.isSrgb(src_format) != base.format.isSrgb(dst_format); - - _ = allow_srgb_conversion; -} - -// State state(srcFormat, dstFormat, src->getSampleCount(), dst->getSampleCount(), -// Options{ doFilter, allowSRGBConversion }); -// state.clampToEdge = (region.srcOffsets[0].x < 0) || -// (region.srcOffsets[0].y < 0) || -// (static_cast(region.srcOffsets[1].x) > srcExtent.width) || -// (static_cast(region.srcOffsets[1].y) > srcExtent.height) || -// (doFilter && ((x0 < 0.5f) || (y0 < 0.5f))); -// state.filter3D = (region.srcOffsets[1].z - region.srcOffsets[0].z) != -// (region.dstOffsets[1].z - region.dstOffsets[0].z); -// -// auto blitRoutine = getBlitRoutine(state); -// if(!blitRoutine) -// { -// return; -// } -// -// BlitData data = { -// nullptr, // source -// nullptr, // dest -// assert_cast(src->rowPitchBytes(srcAspect, region.srcSubresource.mipLevel)), // sPitchB -// assert_cast(dst->rowPitchBytes(dstAspect, region.dstSubresource.mipLevel)), // dPitchB -// assert_cast(src->slicePitchBytes(srcAspect, region.srcSubresource.mipLevel)), // sSliceB -// assert_cast(dst->slicePitchBytes(dstAspect, region.dstSubresource.mipLevel)), // dSliceB -// -// x0, -// y0, -// z0, -// widthRatio, -// heightRatio, -// depthRatio, -// -// region.dstOffsets[0].x, // x0d -// region.dstOffsets[1].x, // x1d -// region.dstOffsets[0].y, // y0d -// region.dstOffsets[1].y, // y1d -// region.dstOffsets[0].z, // z0d -// region.dstOffsets[1].z, // z1d -// -// static_cast(srcExtent.width), // sWidth -// static_cast(srcExtent.height), // sHeight -// static_cast(srcExtent.depth), // sDepth -// -// false, // filter3D -// }; -// -// VkImageSubresource srcSubres = { -// region.srcSubresource.aspectMask, -// region.srcSubresource.mipLevel, -// region.srcSubresource.baseArrayLayer -// }; -// -// VkImageSubresource dstSubres = { -// region.dstSubresource.aspectMask, -// region.dstSubresource.mipLevel, -// region.dstSubresource.baseArrayLayer -// }; -// -// VkImageSubresourceRange dstSubresRange = { -// region.dstSubresource.aspectMask, -// region.dstSubresource.mipLevel, -// 1, // levelCount -// region.dstSubresource.baseArrayLayer, -// region.dstSubresource.layerCount -// }; -// -// uint32_t lastLayer = src->getLastLayerIndex(dstSubresRange); -// -// for(; dstSubres.arrayLayer <= lastLayer; srcSubres.arrayLayer++, dstSubres.arrayLayer++) -// { -// data.source = src->getTexelPointer({ 0, 0, 0 }, srcSubres); -// data.dest = dst->getTexelPointer({ 0, 0, 0 }, dstSubres); -// -// ASSERT(data.source < src->end()); -// ASSERT(data.dest < dst->end()); -// -// blitRoutine(&data); -// } diff --git a/src/soft/device/Device.zig b/src/soft/device/Device.zig index 6de90d4..4fbb073 100644 --- a/src/soft/device/Device.zig +++ b/src/soft/device/Device.zig @@ -6,7 +6,6 @@ const SoftDescriptorSet = @import("../SoftDescriptorSet.zig"); const SoftDevice = @import("../SoftDevice.zig"); const SoftPipeline = @import("../SoftPipeline.zig"); -const Blitter = @import("Blitter.zig"); const ComputeRoutines = @import("ComputeRoutines.zig"); const PipelineState = @import("PipelineState.zig"); @@ -14,8 +13,6 @@ const VkError = base.VkError; const Self = @This(); -blitter: Blitter, - compute_routines: ComputeRoutines, /// .graphics = 0 @@ -23,7 +20,6 @@ compute_routines: ComputeRoutines, pipeline_states: [2]PipelineState, pub const init: Self = .{ - .blitter = .init, .compute_routines = undefined, .pipeline_states = undefined, }; diff --git a/src/soft/device/blitter.zig b/src/soft/device/blitter.zig new file mode 100644 index 0000000..4768b17 --- /dev/null +++ b/src/soft/device/blitter.zig @@ -0,0 +1,323 @@ +//! This software blitter is highly inspired by SwiftShaders one + +const std = @import("std"); +const vk = @import("vulkan"); +const base = @import("base"); +const zm = base.zm; + +const VkError = base.VkError; + +pub const SoftImage = @import("../SoftImage.zig"); +pub const SoftImageView = @import("../SoftImageView.zig"); + +const State = struct { + src_format: vk.Format, + dst_format: vk.Format, + filter: vk.Filter, + allow_srgb_conversion: bool, + clamp_to_edge: bool, +}; + +fn computeOffset2D(x: usize, y: usize, pitch_bytes: usize, texel_bytes: usize) usize { + return y * pitch_bytes + x * texel_bytes; +} + +fn computeOffset3D(x: usize, y: usize, z: usize, slice_bytes: usize, pitch_bytes: usize, texel_bytes: usize) usize { + return z * slice_bytes + y * pitch_bytes + x * texel_bytes; +} + +pub fn clear(pixel: vk.ClearValue, format: vk.Format, dest: *SoftImage, view_format: vk.Format, range: vk.ImageSubresourceRange, area: ?vk.Rect2D) VkError!void { + const dst_format = base.format.fromAspect(view_format, range.aspect_mask); + if (dst_format == .undefined) { + return; + } + + const view_format_value: c_uint = @intCast(@intFromEnum(view_format)); + + var clamped_pixel: vk.ClearValue = pixel; + if (base.vku.vkuFormatIsSINT(view_format_value) or base.vku.vkuFormatIsUINT(view_format_value)) { + const min_value: f32 = if (base.vku.vkuFormatIsSNORM(view_format_value)) -1.0 else 0.0; + + if (range.aspect_mask.color_bit) { + clamped_pixel.color.float_32[0] = std.math.clamp(pixel.color.float_32[0], min_value, 1.0); + clamped_pixel.color.float_32[1] = std.math.clamp(pixel.color.float_32[1], min_value, 1.0); + clamped_pixel.color.float_32[2] = std.math.clamp(pixel.color.float_32[2], min_value, 1.0); + clamped_pixel.color.float_32[3] = std.math.clamp(pixel.color.float_32[3], min_value, 1.0); + } + + // Stencil never requires clamping, so we can check for Depth only + if (range.aspect_mask.depth_bit) { + clamped_pixel.depth_stencil.depth = std.math.clamp(pixel.depth_stencil.depth, min_value, 1.0); + } + } + + if (try fastClear(clamped_pixel, format, dest, dst_format, range, area)) { + return; + } + base.logger.fixme("implement slow clear", .{}); +} + +fn fastClear(clear_value: vk.ClearValue, clear_format: vk.Format, dest: *SoftImage, view_format: vk.Format, range: vk.ImageSubresourceRange, render_area: ?vk.Rect2D) VkError!bool { + _ = render_area; + _ = range; + + if (clear_format != .r32g32b32a32_sfloat and clear_format != .d32_sfloat and clear_format != .s8_uint) { + return false; + } + + const ClearValue = union { + rgba: struct { r: f32, g: f32, b: f32, a: f32 }, + rgb: [3]f32, + d: f32, + d_as_u32: u32, + s: u32, + }; + + const c: *const ClearValue = @ptrCast(&clear_value); + + var pack: u32 = 0; + switch (view_format) { + .r5g6b5_unorm_pack16 => pack = @as(u16, @intFromFloat(31.0 * c.rgba.b + 0.5)) | (@as(u16, @intFromFloat(63.0 * c.rgba.g + 0.5)) << 5) | (@as(u16, @intFromFloat(31.0 * c.rgba.r + 0.5)) << 11), + .b5g6r5_unorm_pack16 => pack = @as(u16, @intFromFloat(31.0 * c.rgba.r + 0.5)) | (@as(u16, @intFromFloat(63.0 * c.rgba.g + 0.5)) << 5) | (@as(u16, @intFromFloat(31.0 * c.rgba.b + 0.5)) << 11), + + .a8b8g8r8_uint_pack32, + .a8b8g8r8_unorm_pack32, + .r8g8b8a8_unorm, + => pack = (@as(u32, @intFromFloat(255.0 * c.rgba.a + 0.5)) << 24) | (@as(u32, @intFromFloat(255.0 * c.rgba.b + 0.5)) << 16) | (@as(u32, @intFromFloat(255.0 * c.rgba.g + 0.5)) << 8) | @as(u32, @intFromFloat(255.0 * c.rgba.r + 0.5)), + + .b8g8r8a8_unorm => pack = (@as(u32, @intFromFloat(255.0 * c.rgba.a + 0.5)) << 24) | (@as(u32, @intFromFloat(255.0 * c.rgba.r + 0.5)) << 16) | (@as(u32, @intFromFloat(255.0 * c.rgba.g + 0.5)) << 8) | @as(u32, @intFromFloat(255.0 * c.rgba.b + 0.5)), + //.b10g11r11_ufloat_pack32 => pack = R11G11B10F(c.rgb), + //.e5b9g9r9_ufloat_pack32 => pack = RGB9E5(c.rgb), + .d32_sfloat => { + std.debug.assert(clear_format == .d32_sfloat); + pack = c.d_as_u32; // float reinterpreted as uint32 + }, + .s8_uint => { + std.debug.assert(clear_format == .s8_uint); + pack = @as(u8, @intCast(c.s)); + }, + else => return false, + } + + if (dest.interface.memory) |memory| { + const image_size = try dest.interface.getTotalSize(); + const memory_map = memory.map(dest.interface.memory_offset, image_size) catch return false; + defer memory.unmap(); + + const memory_map_as_u32: []u32 = @as([*]u32, @ptrCast(@alignCast(memory_map)))[0..@divExact(image_size, 4)]; + + @memset(memory_map_as_u32, pack); + + return true; + } + return false; +} + +fn sample(src: []const u8, pos: zm.F32x4, dims: zm.F32x4, slice_bytes: usize, pitch_bytes: usize, state: State) zm.F32x4 { + var color: zm.F32x4 = .{ 0.0, 0.0, 0.0, 1.0 }; + const src_texel_size = base.format.texelSize(state.src_format); + + if (state.filter != .linear or base.format.isUint(state.src_format)) { + var x: usize = @intFromFloat(pos[0]); + var y: usize = @intFromFloat(pos[1]); + var z: usize = @intFromFloat(pos[2]); + + if (state.clamp_to_edge) { + x = std.math.clamp(x, 0, @as(usize, @intFromFloat(dims[0])) - 1); + y = std.math.clamp(y, 0, @as(usize, @intFromFloat(dims[1])) - 1); + z = std.math.clamp(z, 0, @as(usize, @intFromFloat(dims[2])) - 1); + } + + const src_map = src[computeOffset3D(x, y, z, slice_bytes, pitch_bytes, src_texel_size)..]; + + color = readFloat4(src_map, state); + } + + return applyScaleAndClamp(color, state); +} + +pub fn blitRegion(src: *const SoftImage, dst: *SoftImage, region: vk.ImageBlit, filter: vk.Filter) VkError!void { + var dst_offset_0 = region.dst_offsets[0]; + var dst_offset_1 = region.dst_offsets[1]; + var src_offset_0 = region.src_offsets[0]; + var src_offset_1 = region.src_offsets[1]; + + if (dst_offset_0.x > dst_offset_1.x) { + std.mem.swap(i32, &src_offset_0.x, &src_offset_1.x); + std.mem.swap(i32, &dst_offset_0.x, &dst_offset_1.x); + } + + if (dst_offset_0.y > dst_offset_1.y) { + std.mem.swap(i32, &src_offset_0.y, &src_offset_1.y); + std.mem.swap(i32, &dst_offset_0.y, &dst_offset_1.y); + } + + if (dst_offset_0.z > dst_offset_1.z) { + std.mem.swap(i32, &src_offset_0.z, &src_offset_1.z); + std.mem.swap(i32, &dst_offset_0.z, &dst_offset_1.z); + } + + const src_extent = src.getMipLevelExtent(region.src_subresource.mip_level); + + const width_ratio = @as(f32, @floatFromInt(src_offset_1.x - src_offset_0.x)) / @as(f32, @floatFromInt(dst_offset_1.x - dst_offset_0.x)); + const height_ratio = @as(f32, @floatFromInt(src_offset_1.y - src_offset_0.y)) / @as(f32, @floatFromInt(dst_offset_1.y - dst_offset_0.y)); + const depth_ratio = @as(f32, @floatFromInt(src_offset_1.z - src_offset_0.z)) / @as(f32, @floatFromInt(dst_offset_1.z - dst_offset_0.z)); + const x0 = @as(f32, @floatFromInt(src_offset_0.x)) + (0.5 - @as(f32, @floatFromInt(dst_offset_0.x))) * width_ratio; + const y0 = @as(f32, @floatFromInt(src_offset_0.y)) + (0.5 - @as(f32, @floatFromInt(dst_offset_0.y))) * height_ratio; + const z0 = @as(f32, @floatFromInt(src_offset_0.z)) + (0.5 - @as(f32, @floatFromInt(dst_offset_0.z))) * depth_ratio; + + const src_slice_pitch_bytes = src.getSliceMemSizeForMipLevel(region.src_subresource.aspect_mask, region.src_subresource.mip_level); + const dst_slice_pitch_bytes = dst.getSliceMemSizeForMipLevel(region.dst_subresource.aspect_mask, region.dst_subresource.mip_level); + const src_row_pitch_bytes = src.getRowPitchMemSizeForMipLevel(region.src_subresource.aspect_mask, region.src_subresource.mip_level); + const dst_row_pitch_bytes = dst.getRowPitchMemSizeForMipLevel(region.dst_subresource.aspect_mask, region.dst_subresource.mip_level); + + const src_format = base.format.fromAspect(src.interface.format, region.src_subresource.aspect_mask); + const dst_format = base.format.fromAspect(dst.interface.format, region.dst_subresource.aspect_mask); + + const apply_filter = (filter != .nearest); + const allow_srgb_conversion = apply_filter or base.format.isSrgb(src_format) != base.format.isSrgb(dst_format); + + const is_src_int = base.format.isUint(src_format) or base.format.isSint(src_format); + const is_dst_int = base.format.isUint(dst_format) or base.format.isSint(dst_format); + const are_both_int = is_src_int and is_dst_int; + + if (are_both_int) { + base.unsupported("Blit of only integer type images are not supported yet", .{}); + return; + } + + var src_subresource = vk.ImageSubresource{ + .aspect_mask = region.src_subresource.aspect_mask, + .mip_level = region.src_subresource.mip_level, + .array_layer = region.src_subresource.base_array_layer, + }; + + var dst_subresource = vk.ImageSubresource{ + .aspect_mask = region.dst_subresource.aspect_mask, + .mip_level = region.dst_subresource.mip_level, + .array_layer = region.dst_subresource.base_array_layer, + }; + + const last_layer = src.interface.getLastLayerIndex(.{ + .aspect_mask = region.dst_subresource.aspect_mask, + .base_mip_level = region.dst_subresource.mip_level, + .level_count = 1, + .base_array_layer = region.dst_subresource.base_array_layer, + .layer_count = region.dst_subresource.layer_count, + }); + + const src_memory = if (src.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv; + const dst_memory = if (dst.interface.memory) |memory| memory else return VkError.InvalidDeviceMemoryDrv; + + const state: State = .{ + .src_format = src_format, + .dst_format = dst_format, + .filter = filter, + .allow_srgb_conversion = allow_srgb_conversion, + .clamp_to_edge = false, + }; + + while (dst_subresource.array_layer <= last_layer) : ({ + src_subresource.array_layer += 1; + dst_subresource.array_layer += 1; + }) { + const src_texel_offset = try src.getTexelMemoryOffset(.{ .x = 0, .y = 0, .z = 0 }, src_subresource); + const src_size = try src.interface.getTotalSizeForAspect(src_subresource.aspect_mask) - src_texel_offset; + const src_map: []u8 = @as([*]u8, @ptrCast(try src_memory.map(src.interface.memory_offset + src_texel_offset, src_size)))[0..src_size]; + + const dst_texel_offset = try dst.getTexelMemoryOffset(.{ .x = 0, .y = 0, .z = 0 }, dst_subresource); + const dst_size = try dst.interface.getTotalSizeForAspect(dst_subresource.aspect_mask) - dst_texel_offset; + var dst_map: []u8 = @as([*]u8, @ptrCast(try dst_memory.map(dst.interface.memory_offset + dst_texel_offset, dst_size)))[0..dst_size]; + + _ = &src_map; + _ = &dst_map; + + for (@intCast(dst_offset_0.z)..@intCast(dst_offset_1.z)) |k| { + const z = z0 + @as(f32, @floatFromInt(k)) * depth_ratio; + var dst_slice = dst_map[(k * dst_slice_pitch_bytes)..]; + + for (@intCast(dst_offset_0.y)..@intCast(dst_offset_1.y)) |j| { + const y = y0 + @as(f32, @floatFromInt(j)) * height_ratio; + var dst_line = dst_slice[(j * dst_row_pitch_bytes)..]; + + for (@intCast(dst_offset_0.x)..@intCast(dst_offset_1.x)) |i| { + const x = x0 + @as(f32, @floatFromInt(i)) * width_ratio; + var dst_pixel = dst_line[(i * base.format.texelSize(dst_format))..]; + + if (are_both_int) { + // TODO + } else { + const color = sample( + src_map, + .{ x, y, z, 0.0 }, + .{ + @floatFromInt(src_extent.width), + @floatFromInt(src_extent.height), + @floatFromInt(src_extent.depth), + 0.0, + }, + src_slice_pitch_bytes, + src_row_pitch_bytes, + state, + ); + for (0..dst.interface.samples.toInt()) |_| { + writeFloat4(color, dst_pixel, state); + if (dst_pixel.len < dst_slice_pitch_bytes) + break; + dst_pixel = dst_pixel[dst_slice_pitch_bytes..]; + } + } + } + } + } + } +} + +fn applyScaleAndClamp(base_color: zm.F32x4, state: State) zm.F32x4 { + var color: zm.F32x4 = base_color; + + const unscale = base.format.getScale(state.src_format); + const scale = base.format.getScale(state.dst_format); + + if (std.simd.firstTrue(unscale != scale) != null) { + color *= zm.f32x4(scale[0] / unscale[0], scale[1] / unscale[1], scale[2] / unscale[2], scale[3] / unscale[3]); + } + + return color; +} + +fn readFloat4(map: []const u8, state: State) zm.F32x4 { + var c: zm.F32x4 = .{ 0.0, 0.0, 0.0, 1.0 }; + + switch (state.src_format) { + .r8g8b8a8_sint, + .r8g8b8a8_snorm, + .r8g8b8a8_unorm, + .r8g8b8a8_uint, + .r8g8b8a8_srgb, + => { + c[0] = @as(f32, @floatFromInt(map[0])) / 255.0; + c[1] = @as(f32, @floatFromInt(map[1])) / 255.0; + c[2] = @as(f32, @floatFromInt(map[2])) / 255.0; + c[3] = @as(f32, @floatFromInt(map[3])) / 255.0; + }, + + else => base.unsupported("Blitter source format {any}", .{state.src_format}), + } + + return c; +} + +fn writeFloat4(color: zm.F32x4, map: []u8, state: State) void { + switch (state.dst_format) { + .b8g8r8a8_srgb, + .b8g8r8a8_unorm, + => { + map[0] = @intFromFloat(color[1] * 255.0); + map[1] = @intFromFloat(color[2] * 255.0); + map[2] = @intFromFloat(color[0] * 255.0); + map[3] = @intFromFloat(color[3] * 255.0); + }, + else => base.unsupported("Blitter destination format {any}", .{state.src_format}), + } +} diff --git a/src/vulkan/Image.zig b/src/vulkan/Image.zig index b0fd212..d667fe1 100644 --- a/src/vulkan/Image.zig +++ b/src/vulkan/Image.zig @@ -91,3 +91,7 @@ pub inline fn formatFromAspect(self: *const Self, aspect_mask: vk.ImageAspectFla pub inline fn formatToAspect(self: *const Self, aspect_mask: vk.ImageAspectFlags) vk.ImageAspectFlags { return lib.format.toAspect(self.format, aspect_mask); } + +pub fn getLastLayerIndex(self: *const Self, range: vk.ImageSubresourceRange) u32 { + return (if (range.layer_count == vk.REMAINING_ARRAY_LAYERS) self.array_layers else range.base_array_layer + range.layer_count) - 1; +} diff --git a/src/vulkan/format.zig b/src/vulkan/format.zig index 351057f..9e9990e 100644 --- a/src/vulkan/format.zig +++ b/src/vulkan/format.zig @@ -1,6 +1,7 @@ const std = @import("std"); const vk = @import("vulkan"); const lib = @import("lib.zig"); +const zm = @import("zmath"); pub fn fromAspect(format: vk.Format, aspect: vk.ImageAspectFlags) vk.Format { if (aspect.color_bit or (aspect.color_bit and aspect.stencil_bit)) { @@ -118,3 +119,128 @@ pub inline fn isUint(format: vk.Format) bool { pub inline fn isUnorm(format: vk.Format) bool { return lib.vku.vkuFormatIsUNORM(@intCast(@intFromEnum(format))); } + +pub fn getScale(format: vk.Format) zm.F32x4 { + return switch (format) { + .r4g4_unorm_pack8, + .r4g4b4a4_unorm_pack16, + .b4g4r4a4_unorm_pack16, + .a4r4g4b4_unorm_pack16, + .a4b4g4r4_unorm_pack16, + => zm.f32x4(0xf, 0xf, 0xf, 0xf), + .r8_unorm, + .r8g8_unorm, + .a8b8g8r8_unorm_pack32, + .r8g8b8a8_unorm, + .b8g8r8a8_unorm, + .r8_srgb, + .r8g8_srgb, + .a8b8g8r8_srgb_pack32, + .r8g8b8a8_srgb, + .b8g8r8a8_srgb, + => zm.f32x4(0xff, 0xff, 0xff, 0xff), + .r8_snorm, + .r8g8_snorm, + .a8b8g8r8_snorm_pack32, + .r8g8b8a8_snorm, + .b8g8r8a8_snorm, + => zm.f32x4(0x7f, 0x7f, 0x7f, 0x7f), + .r16_unorm, + .r16g16_unorm, + .r16g16b16_unorm, + .r16g16b16a16_unorm, + => zm.f32x4(0xffff, 0xffff, 0xffff, 0xffff), + .r16_snorm, + .r16g16_snorm, + .r16g16b16_snorm, + .r16g16b16a16_snorm, + => zm.f32x4(0x7fff, 0x7fff, 0x7fff, 0x7fff), + .r8_sint, + .r8_uint, + .r8g8_sint, + .r8g8_uint, + .r8g8b8a8_sint, + .r8g8b8a8_uint, + .a8b8g8r8_sint_pack32, + .a8b8g8r8_uint_pack32, + .b8g8r8a8_sint, + .b8g8r8a8_uint, + .r8_uscaled, + .r8g8_uscaled, + .r8g8b8a8_uscaled, + .b8g8r8a8_uscaled, + .a8b8g8r8_uscaled_pack32, + .r8_sscaled, + .r8g8_sscaled, + .r8g8b8a8_sscaled, + .b8g8r8a8_sscaled, + .a8b8g8r8_sscaled_pack32, + .r16_sint, + .r16_uint, + .r16g16_sint, + .r16g16_uint, + .r16g16b16a16_sint, + .r16g16b16a16_uint, + .r16_sscaled, + .r16g16_sscaled, + .r16g16b16_sscaled, + .r16g16b16a16_sscaled, + .r16_uscaled, + .r16g16_uscaled, + .r16g16b16_uscaled, + .r16g16b16a16_uscaled, + .r32_sint, + .r32_uint, + .r32g32_sint, + .r32g32_uint, + .r32g32b32_sint, + .r32g32b32_uint, + .r32g32b32a32_sint, + .r32g32b32a32_uint, + .r32g32b32a32_sfloat, + .r32g32b32_sfloat, + .r32g32_sfloat, + .r32_sfloat, + .r16g16b16a16_sfloat, + .r16g16b16_sfloat, + .r16g16_sfloat, + .r16_sfloat, + .b10g11r11_ufloat_pack32, + .e5b9g9r9_ufloat_pack32, + .a2r10g10b10_uscaled_pack32, + .a2r10g10b10_sscaled_pack32, + .a2r10g10b10_uint_pack32, + .a2r10g10b10_sint_pack32, + .a2b10g10r10_uscaled_pack32, + .a2b10g10r10_sscaled_pack32, + .a2b10g10r10_uint_pack32, + .a2b10g10r10_sint_pack32, + => zm.f32x4(1.0, 1.0, 1.0, 1.0), + .r5g5b5a1_unorm_pack16, + .b5g5r5a1_unorm_pack16, + .a1r5g5b5_unorm_pack16, + => zm.f32x4(0x1f, 0x1f, 0x1f, 0x01), + .r5g6b5_unorm_pack16, + .b5g6r5_unorm_pack16, + => zm.f32x4(0x1f, 0x3f, 0x1f, 1.0), + .a2r10g10b10_unorm_pack32, + .a2b10g10r10_unorm_pack32, + => zm.f32x4(0x3ff, 0x3ff, 0x3ff, 0x03), + .a2r10g10b10_snorm_pack32, + .a2b10g10r10_snorm_pack32, + => zm.f32x4(0x1ff, 0x1ff, 0x1ff, 0x01), + .d16_unorm, + => zm.f32x4(0xffff, 0.0, 0.0, 0.0), + .d24_unorm_s8_uint, + .x8_d24_unorm_pack32, + => zm.f32x4(0xffffff, 0.0, 0.0, 0.0), + .d32_sfloat, + .d32_sfloat_s8_uint, + .s8_uint, + => zm.f32x4(1.0, 1.0, 1.0, 1.0), + else => blk: { + lib.unsupported("format scale {any}", .{format}); + break :blk zm.f32x4s(1.0); + }, + }; +} diff --git a/src/vulkan/lib.zig b/src/vulkan/lib.zig index d3d0982..3488472 100644 --- a/src/vulkan/lib.zig +++ b/src/vulkan/lib.zig @@ -7,6 +7,8 @@ pub const vku = @cImport({ @cInclude("vulkan/utility/vk_format_utils.h"); }); +pub const zm = @import("zmath"); + pub const errors = @import("error_set.zig"); pub const lib_vulkan = @import("lib_vulkan.zig"); pub const logger = @import("logger.zig");