From 6dc82d4a68c5820dd84d85a27bd97f352ce1f60a Mon Sep 17 00:00:00 2001 From: Kbz-8 Date: Mon, 1 Jun 2026 22:13:43 +0200 Subject: [PATCH] fixing all image operation tests --- build.zig.zon | 4 +- src/soft/SoftCommandBuffer.zig | 4 +- src/soft/SoftFramebuffer.zig | 38 +-- src/soft/SoftImage.zig | 12 +- src/soft/SoftPipeline.zig | 244 +++++++++++++++++-- src/soft/device/ComputeDispatcher.zig | 88 ++++++- src/soft/device/Device.zig | 2 +- src/soft/device/blitter.zig | 131 +++++++--- src/soft/device/fragment.zig | 12 +- src/soft/device/rasterizer.zig | 65 ++--- src/soft/device/rasterizer/bresenham.zig | 4 +- src/soft/device/rasterizer/common.zig | 27 +- src/soft/device/rasterizer/edge_function.zig | 4 +- src/vulkan/Sampler.zig | 11 +- 14 files changed, 514 insertions(+), 132 deletions(-) diff --git a/build.zig.zon b/build.zig.zon index f56d292..add2050 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -26,8 +26,8 @@ .hash = "N-V-__8AAMpOQxkHCKTw9i-NwmmQ3ks1ndFDXcVLlic4KjK3", }, .SPIRV_Interpreter = .{ - .url = "git+https://git.kbz8.me/kbz_8/SPIRV-Interpreter#e993162d4eebc8efa3b8a795da8c107a38ba5858", - .hash = "SPIRV_Interpreter-0.0.1-ajmpn-55BQAOM0GSLiRt-IflVvVg5tRyOyHpBjXz5XdZ", + .url = "git+https://git.kbz8.me/kbz_8/SPIRV-Interpreter#9c355fe126d0142ac6d1fae48633993864c90d61", + .hash = "SPIRV_Interpreter-0.0.1-ajmpn867BQA-J2PA4fGQGnb3WcNtqd_3_W7-goaEocC5", .lazy = true, }, //.SPIRV_Interpreter = .{ diff --git a/src/soft/SoftCommandBuffer.zig b/src/soft/SoftCommandBuffer.zig index d4fe1ae..4570ea1 100644 --- a/src/soft/SoftCommandBuffer.zig +++ b/src/soft/SoftCommandBuffer.zig @@ -168,7 +168,7 @@ pub fn beginRenderPass(interface: *Interface, render_pass: *base.RenderPass, fra if (clear_mask.color_bit) { try blitter.clear( (impl.clear_values orelse return VkError.Unknown)[index], - try image.getClearFormat(), + try SoftImage.getClearFormatFor(attachment.format), image, attachment.format, attachment.subresource_range, @@ -401,7 +401,7 @@ pub fn clearAttachment(interface: *Interface, attachment: vk.ClearAttachment, re }; const image: *SoftImage = @alignCast(@fieldParentPtr("interface", image_view.image)); - const clear_format = try image.getClearFormat(); + const clear_format = try SoftImage.getClearFormatFor(image_view.format); const range: vk.ImageSubresourceRange = .{ .aspect_mask = impl.attachment.aspect_mask, diff --git a/src/soft/SoftFramebuffer.zig b/src/soft/SoftFramebuffer.zig index 9682c2d..3e199ce 100644 --- a/src/soft/SoftFramebuffer.zig +++ b/src/soft/SoftFramebuffer.zig @@ -49,23 +49,29 @@ pub fn resolveAttachments(self: *Self, render_pass: *SoftRenderPass, subpass_ind const dst_image_view = self.interface.attachments[resolve.attachment]; const dst_image: *SoftImage = @alignCast(@fieldParentPtr("interface", dst_image_view.image)); - try blitter.resolve(src_image, dst_image, .{ - .src_subresource = .{ - .aspect_mask = src_image_view.subresource_range.aspect_mask, - .base_array_layer = src_image_view.subresource_range.base_array_layer, - .layer_count = src_image_view.subresource_range.layer_count, - .mip_level = src_image_view.subresource_range.base_mip_level, + try blitter.resolveWithFormats( + src_image, + dst_image, + .{ + .src_subresource = .{ + .aspect_mask = src_image_view.subresource_range.aspect_mask, + .base_array_layer = src_image_view.subresource_range.base_array_layer, + .layer_count = src_image_view.subresource_range.layer_count, + .mip_level = src_image_view.subresource_range.base_mip_level, + }, + .src_offset = .{ .x = 0, .y = 0, .z = 0 }, + .dst_subresource = .{ + .aspect_mask = dst_image_view.subresource_range.aspect_mask, + .base_array_layer = dst_image_view.subresource_range.base_array_layer, + .layer_count = dst_image_view.subresource_range.layer_count, + .mip_level = dst_image_view.subresource_range.base_mip_level, + }, + .dst_offset = .{ .x = 0, .y = 0, .z = 0 }, + .extent = src_image.getMipLevelExtent(src_image_view.subresource_range.base_mip_level), }, - .src_offset = .{ .x = 0, .y = 0, .z = 0 }, - .dst_subresource = .{ - .aspect_mask = dst_image_view.subresource_range.aspect_mask, - .base_array_layer = dst_image_view.subresource_range.base_array_layer, - .layer_count = dst_image_view.subresource_range.layer_count, - .mip_level = dst_image_view.subresource_range.base_mip_level, - }, - .dst_offset = .{ .x = 0, .y = 0, .z = 0 }, - .extent = src_image.getMipLevelExtent(src_image_view.subresource_range.base_mip_level), - }); + src_image_view.format, + dst_image_view.format, + ); } } } diff --git a/src/soft/SoftImage.zig b/src/soft/SoftImage.zig index 9746d4e..925a02c 100644 --- a/src/soft/SoftImage.zig +++ b/src/soft/SoftImage.zig @@ -102,9 +102,13 @@ pub fn getMemoryRequirements(_: *Interface, requirements: *vk.MemoryRequirements } pub fn getClearFormat(self: *Self) VkError!vk.Format { - return if (base.c.vkuFormatIsSINT(@intCast(@intFromEnum(self.interface.format)))) + return getClearFormatFor(self.interface.format); +} + +pub fn getClearFormatFor(format: vk.Format) VkError!vk.Format { + return if (base.format.isSint(format)) .r32g32b32a32_sint - else if (base.c.vkuFormatIsUINT(@intCast(@intFromEnum(self.interface.format)))) + else if (base.format.isUint(format)) .r32g32b32a32_uint else .r32g32b32a32_sfloat; @@ -472,8 +476,8 @@ fn getSubresourceLayout(interface: *const Interface, subresource: vk.ImageSubres .offset = try self.getSubresourceOffset(subresource.aspect_mask, subresource.mip_level, subresource.array_layer), .size = self.getMultiSampledLevelSize(subresource.aspect_mask, subresource.mip_level), .row_pitch = self.interface.getRowPitchMemSizeForMipLevel(subresource.aspect_mask, subresource.mip_level), - .array_pitch = self.interface.getSliceMemSizeForMipLevel(subresource.aspect_mask, subresource.mip_level), - .depth_pitch = self.getLayerSize(subresource.aspect_mask), + .array_pitch = self.getLayerSize(subresource.aspect_mask), + .depth_pitch = self.interface.getSliceMemSizeForMipLevel(subresource.aspect_mask, subresource.mip_level), }; } diff --git a/src/soft/SoftPipeline.zig b/src/soft/SoftPipeline.zig index 53df4a4..5fcc7fe 100644 --- a/src/soft/SoftPipeline.zig +++ b/src/soft/SoftPipeline.zig @@ -102,6 +102,7 @@ pub fn createCompute(device: *base.Device, allocator: std.mem.Allocator, cache: .writeImageFloat4 = writeImageFloat4, .writeImageInt4 = writeImageInt4, .sampleImageFloat4 = sampleImageFloat4, + .queryImageSize = queryImageSize, }, ) catch |err| { std.log.scoped(.SpvRuntimeInit).err("SPIR-V Runtime failed to initialize, {s}", .{@errorName(err)}); @@ -187,6 +188,7 @@ pub fn createGraphics(device: *base.Device, allocator: std.mem.Allocator, cache: .writeImageFloat4 = writeImageFloat4, .writeImageInt4 = writeImageInt4, .sampleImageFloat4 = sampleImageFloat4, + .queryImageSize = queryImageSize, }, ) catch |err| { std.log.scoped(.SpvRuntimeInit).err("SPIR-V Runtime failed to initialize, {s}", .{@errorName(err)}); @@ -264,16 +266,17 @@ fn readImageFloat4(context: *anyopaque, dim: spv.SpvDim, x: i32, y: i32, z: i32) } else { const image_view: *SoftImageView = @ptrCast(@alignCast(context)); const image: *SoftImage = @alignCast(@fieldParentPtr("interface", image_view.interface.image)); + const cube_face: u32 = if (dim == .Cube) @intCast(z) else 0; pixel = image.readFloat4( .{ .x = x, .y = y, - .z = z, + .z = if (dim == .Cube) 0 else z, }, .{ .aspect_mask = image_view.interface.subresource_range.aspect_mask, .mip_level = image_view.interface.subresource_range.base_mip_level, - .array_layer = image_view.interface.subresource_range.base_array_layer, + .array_layer = image_view.interface.subresource_range.base_array_layer + cube_face, }, image_view.interface.format, ) catch return SpvRuntimeError.Unknown; @@ -296,16 +299,17 @@ fn readImageInt4(context: *anyopaque, dim: spv.SpvDim, x: i32, y: i32, z: i32) S } else { const image_view: *SoftImageView = @ptrCast(@alignCast(context)); const image: *SoftImage = @alignCast(@fieldParentPtr("interface", image_view.interface.image)); + const cube_face: u32 = if (dim == .Cube) @intCast(z) else 0; pixel = image.readInt4( .{ .x = x, .y = y, - .z = z, + .z = if (dim == .Cube) 0 else z, }, .{ .aspect_mask = image_view.interface.subresource_range.aspect_mask, .mip_level = image_view.interface.subresource_range.base_mip_level, - .array_layer = image_view.interface.subresource_range.base_array_layer, + .array_layer = image_view.interface.subresource_range.base_array_layer + cube_face, }, image_view.interface.format, ) catch return SpvRuntimeError.Unknown; @@ -328,16 +332,17 @@ fn writeImageFloat4(context: *anyopaque, dim: spv.SpvDim, x: i32, y: i32, z: i32 } else { const image_view: *SoftImageView = @ptrCast(@alignCast(context)); const image: *SoftImage = @alignCast(@fieldParentPtr("interface", image_view.interface.image)); + const cube_face: u32 = if (dim == .Cube) @intCast(z) else 0; image.writeFloat4( .{ .x = x, .y = y, - .z = z, + .z = if (dim == .Cube) 0 else z, }, .{ .aspect_mask = image_view.interface.subresource_range.aspect_mask, .mip_level = image_view.interface.subresource_range.base_mip_level, - .array_layer = image_view.interface.subresource_range.base_array_layer, + .array_layer = image_view.interface.subresource_range.base_array_layer + cube_face, }, image_view.interface.format, vec_pixel, @@ -355,16 +360,17 @@ fn writeImageInt4(context: *anyopaque, dim: spv.SpvDim, x: i32, y: i32, z: i32, } else { const image_view: *SoftImageView = @ptrCast(@alignCast(context)); const image: *SoftImage = @alignCast(@fieldParentPtr("interface", image_view.interface.image)); + const cube_face: u32 = if (dim == .Cube) @intCast(z) else 0; image.writeInt4( .{ .x = x, .y = y, - .z = z, + .z = if (dim == .Cube) 0 else z, }, .{ .aspect_mask = image_view.interface.subresource_range.aspect_mask, .mip_level = image_view.interface.subresource_range.base_mip_level, - .array_layer = image_view.interface.subresource_range.base_array_layer, + .array_layer = image_view.interface.subresource_range.base_array_layer + cube_face, }, image_view.interface.format, vec_pixel, @@ -372,6 +378,157 @@ fn writeImageInt4(context: *anyopaque, dim: spv.SpvDim, x: i32, y: i32, z: i32, } } +const CubeCoordinate = struct { + face: u32, + u: f32, + v: f32, +}; + +fn resolveCubeCoordinate(x: f32, y: f32, z: f32) CubeCoordinate { + const ax = @abs(x); + const ay = @abs(y); + const az = @abs(z); + + var face: u32 = 0; + var sc: f32 = 0.0; + var tc: f32 = 0.0; + var ma: f32 = 1.0; + + if (ax >= ay and ax >= az) { + ma = ax; + if (x >= 0.0) { + face = 0; + sc = -z; + tc = -y; + } else { + face = 1; + sc = z; + tc = -y; + } + } else if (ay >= ax and ay >= az) { + ma = ay; + if (y >= 0.0) { + face = 2; + sc = x; + tc = z; + } else { + face = 3; + sc = x; + tc = -z; + } + } else { + ma = az; + if (z >= 0.0) { + face = 4; + sc = x; + tc = -y; + } else { + face = 5; + sc = -x; + tc = -y; + } + } + + const inv_ma = if (ma == 0.0) 0.0 else 1.0 / ma; + return .{ + .face = face, + .u = (sc * inv_ma + 1.0) * 0.5, + .v = (tc * inv_ma + 1.0) * 0.5, + }; +} + +fn cubeDirection(face: u32, u: f32, v: f32) struct { x: f32, y: f32, z: f32 } { + const sc = u * 2.0 - 1.0; + const tc = v * 2.0 - 1.0; + + return switch (face) { + 0 => .{ .x = 1.0, .y = -tc, .z = -sc }, + 1 => .{ .x = -1.0, .y = -tc, .z = sc }, + 2 => .{ .x = sc, .y = 1.0, .z = tc }, + 3 => .{ .x = sc, .y = -1.0, .z = -tc }, + 4 => .{ .x = sc, .y = -tc, .z = 1.0 }, + 5 => .{ .x = -sc, .y = -tc, .z = -1.0 }, + else => .{ .x = 0.0, .y = 0.0, .z = 0.0 }, + }; +} + +fn readSampledFloat4( + image: *SoftImage, + image_view: *SoftImageView, + dim: spv.SpvDim, + coord: CubeCoordinate, + ix: i32, + iy: i32, +) VkError!zm.F32x4 { + const width_f: f32 = @floatFromInt(image.interface.extent.width); + const height_f: f32 = @floatFromInt(image.interface.extent.height); + + const texel = if (dim == .Cube) blk: { + const dir = cubeDirection( + coord.face, + (@as(f32, @floatFromInt(ix)) + 0.5) / width_f, + (@as(f32, @floatFromInt(iy)) + 0.5) / height_f, + ); + break :blk resolveCubeCoordinate(dir.x, dir.y, dir.z); + } else coord; + + const result = try image.readFloat4( + .{ + .x = if (dim == .Cube) + std.math.clamp(@as(i32, @intFromFloat(texel.u * width_f)), 0, image.interface.extent.width - 1) + else + std.math.clamp(ix, 0, image.interface.extent.width - 1), + .y = if (dim == .Cube) + std.math.clamp(@as(i32, @intFromFloat(texel.v * height_f)), 0, image.interface.extent.height - 1) + else + std.math.clamp(iy, 0, image.interface.extent.height - 1), + .z = 0, + }, + .{ + .aspect_mask = image_view.interface.subresource_range.aspect_mask, + .mip_level = image_view.interface.subresource_range.base_mip_level, + .array_layer = image_view.interface.subresource_range.base_array_layer + texel.face, + }, + image_view.interface.format, + ); + return result; +} + +fn sampleNearestFloat4(image: *SoftImage, image_view: *SoftImageView, dim: spv.SpvDim, coord: CubeCoordinate) VkError!zm.F32x4 { + const width_f: f32 = @floatFromInt(image.interface.extent.width); + const height_f: f32 = @floatFromInt(image.interface.extent.height); + return readSampledFloat4( + image, + image_view, + dim, + coord, + @intFromFloat(coord.u * width_f), + @intFromFloat(coord.v * height_f), + ); +} + +fn sampleLinearFloat4(image: *SoftImage, image_view: *SoftImageView, dim: spv.SpvDim, coord: CubeCoordinate) VkError!zm.F32x4 { + const width_f: f32 = @floatFromInt(image.interface.extent.width); + const height_f: f32 = @floatFromInt(image.interface.extent.height); + const x = coord.u * width_f - 0.5; + const y = coord.v * height_f - 0.5; + const x0: i32 = @intFromFloat(@floor(x)); + const y0: i32 = @intFromFloat(@floor(y)); + const x1 = x0 + 1; + const y1 = y0 + 1; + const wx = x - @as(f32, @floatFromInt(x0)); + const wy = y - @as(f32, @floatFromInt(y0)); + + const p00 = try readSampledFloat4(image, image_view, dim, coord, x0, y0); + const p10 = try readSampledFloat4(image, image_view, dim, coord, x1, y0); + const p01 = try readSampledFloat4(image, image_view, dim, coord, x0, y1); + const p11 = try readSampledFloat4(image, image_view, dim, coord, x1, y1); + + const row0 = p00 * zm.f32x4s(1.0 - wx) + p10 * zm.f32x4s(wx); + const row1 = p01 * zm.f32x4s(1.0 - wx) + p11 * zm.f32x4s(wx); + return row0 * zm.f32x4s(1.0 - wy) + row1 * zm.f32x4s(wy); +} + fn sampleImageFloat4(context: *anyopaque, context2: *anyopaque, dim: spv.SpvDim, x: f32, y: f32, z: f32) SpvRuntimeError!spv.Runtime.Vec4(f32) { var pixel = zm.f32x4s(0.0); @@ -385,21 +542,28 @@ fn sampleImageFloat4(context: *anyopaque, context2: *anyopaque, dim: spv.SpvDim, const image: *SoftImage = @alignCast(@fieldParentPtr("interface", image_view.interface.image)); const sampler: *SoftSampler = @ptrCast(@alignCast(context2)); - _ = sampler; - pixel = image.readFloat4( - .{ - .x = std.math.clamp(@as(i32, @intFromFloat(x * @as(f32, @floatFromInt(image.interface.extent.width)))), 0, image.interface.extent.width - 1), - .y = std.math.clamp(@as(i32, @intFromFloat(y * @as(f32, @floatFromInt(image.interface.extent.height)))), 0, image.interface.extent.height - 1), - .z = std.math.clamp(@as(i32, @intFromFloat(z * @as(f32, @floatFromInt(image.interface.extent.depth)))), 0, image.interface.extent.depth - 1), - }, - .{ - .aspect_mask = image_view.interface.subresource_range.aspect_mask, - .mip_level = image_view.interface.subresource_range.base_mip_level, - .array_layer = image_view.interface.subresource_range.base_array_layer, - }, - image_view.interface.format, - ) catch return SpvRuntimeError.Unknown; + if (dim == .Cube) { + const coord = resolveCubeCoordinate(x, y, z); + pixel = switch (sampler.interface.mag_filter) { + .linear => sampleLinearFloat4(image, image_view, dim, coord), + else => sampleNearestFloat4(image, image_view, dim, coord), + } catch return SpvRuntimeError.Unknown; + } else { + pixel = image.readFloat4( + .{ + .x = std.math.clamp(@as(i32, @intFromFloat(x * @as(f32, @floatFromInt(image.interface.extent.width)))), 0, image.interface.extent.width - 1), + .y = std.math.clamp(@as(i32, @intFromFloat(y * @as(f32, @floatFromInt(image.interface.extent.height)))), 0, image.interface.extent.height - 1), + .z = std.math.clamp(@as(i32, @intFromFloat(z * @as(f32, @floatFromInt(image.interface.extent.depth)))), 0, image.interface.extent.depth - 1), + }, + .{ + .aspect_mask = image_view.interface.subresource_range.aspect_mask, + .mip_level = image_view.interface.subresource_range.base_mip_level, + .array_layer = image_view.interface.subresource_range.base_array_layer, + }, + image_view.interface.format, + ) catch return SpvRuntimeError.Unknown; + } } return .{ @@ -409,3 +573,39 @@ fn sampleImageFloat4(context: *anyopaque, context2: *anyopaque, dim: spv.SpvDim, .w = pixel[3], }; } + +fn queryImageSize(context: *anyopaque, dim: spv.SpvDim, arrayed: bool) SpvRuntimeError!spv.Runtime.Vec4(u32) { + if (dim == .Buffer) { + const buffer_view: *SoftBufferView = @ptrCast(@alignCast(context)); + const range = if (buffer_view.interface.range == vk.WHOLE_SIZE) + buffer_view.interface.buffer.size - buffer_view.interface.offset + else + buffer_view.interface.range; + return .{ + .x = @intCast(@divTrunc(range, base.format.texelSize(buffer_view.interface.format))), + .y = 0, + .z = 0, + .w = 0, + }; + } + + const image_view: *SoftImageView = @ptrCast(@alignCast(context)); + const image: *SoftImage = @alignCast(@fieldParentPtr("interface", image_view.interface.image)); + const extent = image.getMipLevelExtent(image_view.interface.subresource_range.base_mip_level); + const layers = if (image_view.interface.subresource_range.layer_count == vk.REMAINING_ARRAY_LAYERS) + image.interface.array_layers - image_view.interface.subresource_range.base_array_layer + else + image_view.interface.subresource_range.layer_count; + return switch (dim) { + .@"1D" => if (arrayed) + .{ .x = extent.width, .y = layers, .z = 0, .w = 0 } + else + .{ .x = extent.width, .y = 0, .z = 0, .w = 0 }, + .@"2D", .Cube, .Rect => if (arrayed) + .{ .x = extent.width, .y = extent.height, .z = layers, .w = 0 } + else + .{ .x = extent.width, .y = extent.height, .z = 0, .w = 0 }, + .@"3D" => .{ .x = extent.width, .y = extent.height, .z = extent.depth, .w = 0 }, + else => .{ .x = extent.width, .y = extent.height, .z = layers, .w = 0 }, + }; +} diff --git a/src/soft/device/ComputeDispatcher.zig b/src/soft/device/ComputeDispatcher.zig index 1ca5aa6..9dd16d1 100644 --- a/src/soft/device/ComputeDispatcher.zig +++ b/src/soft/device/ComputeDispatcher.zig @@ -106,8 +106,28 @@ inline fn run(data: RunData) !void { const rt = &shader.runtimes[data.batch_id].rt; const entry = try rt.getEntryPointByName(shader.entry); + const uses_control_barrier = hasControlBarrier(rt.mod.code); - try ExecutionDevice.writeDescriptorSets(data.self.state, rt); + var barrier_runtimes: []spv.Runtime = &.{}; + var barrier_statuses: []spv.Runtime.EntryPointStatus = &.{}; + if (uses_control_barrier) { + barrier_runtimes = try allocator.alloc(spv.Runtime, data.invocations_per_workgroup); + barrier_statuses = try allocator.alloc(spv.Runtime.EntryPointStatus, data.invocations_per_workgroup); + for (barrier_runtimes) |*barrier_rt| { + barrier_rt.* = try spv.Runtime.init(allocator, rt.mod, rt.image_api); + try barrier_rt.copySpecializationConstantsFrom(allocator, rt); + } + } + defer { + for (barrier_runtimes) |*barrier_rt| { + barrier_rt.deinit(allocator); + } + allocator.free(barrier_runtimes); + allocator.free(barrier_statuses); + } + + if (!uses_control_barrier) + try ExecutionDevice.writeDescriptorSets(data.self.state, rt); var group_index: usize = data.batch_id; while (group_index < data.group_count) : (group_index += data.self.batch_size) { @@ -121,15 +141,23 @@ inline fn run(data: RunData) !void { modulo -= group_y * data.group_count_x; const group_x = modulo; - try setupWorkgroupBuiltins(data.self, rt, .{ + const group_count_vec = @Vector(3, u32){ @as(u32, @intCast(data.group_count_x)), @as(u32, @intCast(data.group_count_y)), @as(u32, @intCast(data.group_count_z)), - }, .{ + }; + const group_id_vec = @Vector(3, u32){ @as(u32, @intCast(group_x)), @as(u32, @intCast(group_y)), @as(u32, @intCast(group_z)), - }); + }; + + if (uses_control_barrier) { + try runBarrierWorkgroup(data, barrier_runtimes, barrier_statuses, entry, group_count_vec, group_id_vec); + continue; + } + + try setupWorkgroupBuiltins(data.self, rt, group_count_vec, group_id_vec); for (0..data.invocations_per_workgroup) |i| { const invocation_index = data.self.invocation_index.fetchAdd(1, .monotonic); @@ -163,6 +191,58 @@ inline fn run(data: RunData) !void { } } +fn runBarrierWorkgroup( + data: RunData, + runtimes: []spv.Runtime, + statuses: []spv.Runtime.EntryPointStatus, + entry: spv.SpvWord, + group_count: @Vector(3, u32), + group_id: @Vector(3, u32), +) !void { + const allocator = data.self.device.device_allocator.allocator(); + + for (runtimes, 0..) |*rt, i| { + try ExecutionDevice.writeDescriptorSets(data.self.state, rt); + try setupWorkgroupBuiltins(data.self, rt, group_count, group_id); + try setupSubgroupBuiltins(data.self, rt, group_id, i); + statuses[i] = try rt.beginEntryPoint(allocator, entry); + try rt.flushDescriptorSets(allocator); + } + + while (true) { + var pending = false; + for (statuses) |status| { + if (status == .barrier) { + pending = true; + break; + } + } + if (!pending) + break; + + for (runtimes, 0..) |*rt, i| { + if (statuses[i] == .completed) + continue; + statuses[i] = try rt.continueEntryPoint(allocator); + try rt.flushDescriptorSets(allocator); + } + } +} + +/// TODO: Move this in the SPIR-V Interpreter +fn hasControlBarrier(code: []const spv.SpvWord) bool { + var i: usize = 5; + while (i < code.len) { + const opcode_data = code[i]; + const word_count = (opcode_data & (~spv.spv.SpvOpCodeMask)) >> spv.spv.SpvWordCountShift; + const opcode: spv.spv.SpvOp = @enumFromInt(opcode_data & spv.spv.SpvOpCodeMask); + if (opcode == .ControlBarrier) + return true; + i += @max(word_count, 1); + } + return false; +} + inline fn dumpResultsTable(allocator: std.mem.Allocator, io: std.Io, rt: *spv.Runtime, is_early: bool) !void { @branchHint(.cold); const file = try std.Io.Dir.cwd().createFile( diff --git a/src/soft/device/Device.zig b/src/soft/device/Device.zig index 97f6583..b3fc2ea 100644 --- a/src/soft/device/Device.zig +++ b/src/soft/device/Device.zig @@ -71,7 +71,7 @@ pub fn writeDescriptorSets(state: *PipelineState, rt: *spv.Runtime) !void { switch (binding) { .buffer => |buffer_data_array| for (buffer_data_array, 0..) |buffer_data, descriptor_index| { if (buffer_data.object) |buffer| { - const map = buffer.mapAsSliceWithOffset(u8, buffer_data.offset, buffer_data.size) catch continue :bindings; + const map = buffer.mapAsSliceWithAddedOffset(u8, buffer_data.offset, buffer_data.size) catch continue :bindings; try rt.writeDescriptorSet( map, @as(u32, @intCast(set_index)), diff --git a/src/soft/device/blitter.zig b/src/soft/device/blitter.zig index 4b28bab..df47eab 100644 --- a/src/soft/device/blitter.zig +++ b/src/soft/device/blitter.zig @@ -286,6 +286,17 @@ fn sample(src: []const u8, pos: F32x4, dim: F32x4, slice_bytes: usize, pitch_byt } pub fn blitRegion(src: *const SoftImage, dst: *SoftImage, region: vk.ImageBlit, filter: vk.Filter) VkError!void { + try blitRegionWithFormats( + src, + dst, + region, + filter, + base.format.fromAspect(src.interface.format, region.src_subresource.aspect_mask), + base.format.fromAspect(dst.interface.format, region.dst_subresource.aspect_mask), + ); +} + +pub fn blitRegionWithFormats(src: *const SoftImage, dst: *SoftImage, region: vk.ImageBlit, filter: vk.Filter, src_format: vk.Format, dst_format: vk.Format) VkError!void { const io = dst.interface.owner.io(); const timer = std.Io.Timestamp.now(io, .real); defer if (comptime base.config.logs != .none) { @@ -323,13 +334,10 @@ pub fn blitRegion(src: *const SoftImage, dst: *SoftImage, region: vk.ImageBlit, const y0 = @as(f32, @floatFromInt(src_offset_0.y)) + (0.5 - @as(f32, @floatFromInt(dst_offset_0.y))) * height_ratio; const z0 = @as(f32, @floatFromInt(src_offset_0.z)) + (0.5 - @as(f32, @floatFromInt(dst_offset_0.z))) * depth_ratio; - const src_slice_pitch_bytes = src.interface.getSliceMemSizeForMipLevel(region.src_subresource.aspect_mask, region.src_subresource.mip_level); - const src_row_pitch_bytes = src.interface.getRowPitchMemSizeForMipLevel(region.src_subresource.aspect_mask, region.src_subresource.mip_level); - const dst_slice_pitch_bytes = dst.interface.getSliceMemSizeForMipLevel(region.dst_subresource.aspect_mask, region.dst_subresource.mip_level); - const dst_row_pitch_bytes = dst.interface.getRowPitchMemSizeForMipLevel(region.dst_subresource.aspect_mask, region.dst_subresource.mip_level); - - const src_format = base.format.fromAspect(src.interface.format, region.src_subresource.aspect_mask); - const dst_format = base.format.fromAspect(dst.interface.format, region.dst_subresource.aspect_mask); + const src_slice_pitch_bytes = src.getSliceMemSizeForMipLevelWithFormat(region.src_subresource.aspect_mask, region.src_subresource.mip_level, src_format); + const src_row_pitch_bytes = src.getRowPitchMemSizeForMipLevelWithFormat(region.src_subresource.aspect_mask, region.src_subresource.mip_level, src_format); + const dst_slice_pitch_bytes = dst.getSliceMemSizeForMipLevelWithFormat(region.dst_subresource.aspect_mask, region.dst_subresource.mip_level, dst_format); + const dst_row_pitch_bytes = dst.getRowPitchMemSizeForMipLevelWithFormat(region.dst_subresource.aspect_mask, region.dst_subresource.mip_level, dst_format); const apply_filter = (filter != .nearest); const allow_srgb_conversion = apply_filter or base.format.isSrgb(src_format) != base.format.isSrgb(dst_format); @@ -485,6 +493,16 @@ fn blit(state: State, data: BlitData) void { /// Using image blitting to resolve pub inline fn resolve(src: *const SoftImage, dst: *SoftImage, region: vk.ImageResolve) VkError!void { + try resolveWithFormats( + src, + dst, + region, + base.format.fromAspect(src.interface.format, region.src_subresource.aspect_mask), + base.format.fromAspect(dst.interface.format, region.dst_subresource.aspect_mask), + ); +} + +pub inline fn resolveWithFormats(src: *const SoftImage, dst: *SoftImage, region: vk.ImageResolve, src_format: vk.Format, dst_format: vk.Format) VkError!void { var blit_region: vk.ImageBlit = .{ .src_offsets = .{ region.src_offset, region.src_offset }, .src_subresource = region.src_subresource, @@ -500,7 +518,7 @@ pub inline fn resolve(src: *const SoftImage, dst: *SoftImage, region: vk.ImageRe blit_region.dst_offsets[1].y += @intCast(region.extent.height); blit_region.dst_offsets[1].z += @intCast(region.extent.depth); - try blitRegion(src, dst, blit_region, .nearest); + try blitRegionWithFormats(src, dst, blit_region, .nearest, src_format, dst_format); } fn applyScaleAndClamp(base_color: F32x4, state: State, apply_srgb_convertion: bool) F32x4 { @@ -532,6 +550,16 @@ fn applyScaleAndClamp(base_color: F32x4, state: State, apply_srgb_convertion: bo return color; } +inline fn normalizedI8(value: u8) f32 { + const signed: i8 = @bitCast(value); + return @max(@as(f32, @floatFromInt(signed)) / @as(f32, @floatFromInt(std.math.maxInt(i8))), -1.0); +} + +inline fn normalizedI16(value: u16) f32 { + const signed: i16 = @bitCast(value); + return @max(@as(f32, @floatFromInt(signed)) / @as(f32, @floatFromInt(std.math.maxInt(i16))), -1.0); +} + pub fn readFloat4(map: []const u8, src_format: vk.Format) F32x4 { var c: F32x4 = .{ 0.0, 0.0, 0.0, 1.0 }; @@ -543,9 +571,9 @@ pub fn readFloat4(map: []const u8, src_format: vk.Format) F32x4 { .r8_sint, .r8_snorm, - => c[0] = @as(f32, @floatFromInt(map[0])) / std.math.maxInt(i8), + => c[0] = normalizedI8(map[0]), - .r16_snorm => c[0] = @as(f32, @floatFromInt(std.mem.bytesToValue(u16, map))) / std.math.maxInt(i16), + .r16_snorm => c[0] = normalizedI16(std.mem.bytesToValue(u16, map)), .r16_unorm => c[0] = @as(f32, @floatFromInt(std.mem.bytesToValue(u16, map))) / std.math.maxInt(u16), .r8g8b8a8_sint, @@ -570,15 +598,15 @@ pub fn readFloat4(map: []const u8, src_format: vk.Format) F32x4 { .r8g8_sint, .r8g8_snorm, => { - c[0] = @as(f32, @floatFromInt(map[0])) / std.math.maxInt(i8); - c[1] = @as(f32, @floatFromInt(map[1])) / std.math.maxInt(i8); + c[0] = normalizedI8(map[0]); + c[1] = normalizedI8(map[1]); }, .r8g8b8a8_snorm => { - c[0] = @as(f32, @floatFromInt(map[0])) / std.math.maxInt(i8); - c[1] = @as(f32, @floatFromInt(map[1])) / std.math.maxInt(i8); - c[2] = @as(f32, @floatFromInt(map[2])) / std.math.maxInt(i8); - c[3] = @as(f32, @floatFromInt(map[3])) / std.math.maxInt(i8); + c[0] = normalizedI8(map[0]); + c[1] = normalizedI8(map[1]); + c[2] = normalizedI8(map[2]); + c[3] = normalizedI8(map[3]); }, .r4g4b4a4_unorm_pack16 => { @@ -627,8 +655,8 @@ pub fn readFloat4(map: []const u8, src_format: vk.Format) F32x4 { }, .r16g16_snorm => { - c[0] = @as(f32, @floatFromInt(std.mem.bytesToValue(u16, map[0..]))) / std.math.maxInt(i16); - c[1] = @as(f32, @floatFromInt(std.mem.bytesToValue(u16, map[2..]))) / std.math.maxInt(i16); + c[0] = normalizedI16(std.mem.bytesToValue(u16, map[0..])); + c[1] = normalizedI16(std.mem.bytesToValue(u16, map[2..])); }, .r16g16_unorm => { @@ -666,10 +694,10 @@ pub fn readFloat4(map: []const u8, src_format: vk.Format) F32x4 { .r16g16b16a16_sint, .r16g16b16a16_snorm, => { - c[0] = @as(f32, @floatFromInt(std.mem.bytesToValue(u16, map[0..]))) / std.math.maxInt(i16); - c[1] = @as(f32, @floatFromInt(std.mem.bytesToValue(u16, map[2..]))) / std.math.maxInt(i16); - c[2] = @as(f32, @floatFromInt(std.mem.bytesToValue(u16, map[4..]))) / std.math.maxInt(i16); - c[3] = @as(f32, @floatFromInt(std.mem.bytesToValue(u16, map[6..]))) / std.math.maxInt(i16); + c[0] = normalizedI16(std.mem.bytesToValue(u16, map[0..])); + c[1] = normalizedI16(std.mem.bytesToValue(u16, map[2..])); + c[2] = normalizedI16(std.mem.bytesToValue(u16, map[4..])); + c[3] = normalizedI16(std.mem.bytesToValue(u16, map[6..])); }, .r16g16b16a16_sfloat => c = std.mem.bytesToValue(@Vector(4, f16), map), @@ -704,10 +732,10 @@ pub fn readFloat4(map: []const u8, src_format: vk.Format) F32x4 { .a8b8g8r8_snorm_pack32, => { const pack = std.mem.bytesToValue(@Vector(4, u8), map); - c[0] = @as(f32, @floatFromInt(pack[0])) / std.math.maxInt(i8); - c[1] = @as(f32, @floatFromInt(pack[1])) / std.math.maxInt(i8); - c[2] = @as(f32, @floatFromInt(pack[2])) / std.math.maxInt(i8); - c[3] = @as(f32, @floatFromInt(pack[3])) / std.math.maxInt(i8); + c[0] = normalizedI8(pack[0]); + c[1] = normalizedI8(pack[1]); + c[2] = normalizedI8(pack[2]); + c[3] = normalizedI8(pack[3]); }, .a2b10g10r10_uint_pack32, @@ -1093,37 +1121,55 @@ pub fn writeFloat4(c: F32x4, map: []u8, dst_format: vk.Format) void { } } +inline fn signExtendI8(value: u8) u32 { + return @bitCast(@as(i32, @as(i8, @bitCast(value)))); +} + +inline fn signExtendI16(value: u16) u32 { + return @bitCast(@as(i32, @as(i16, @bitCast(value)))); +} + pub fn readInt4(map: []const u8, src_format: vk.Format) U32x4 { var c: U32x4 = .{ 0, 0, 0, 1 }; switch (src_format) { - .r8_sint, .r8_uint, .s8_uint, => c[0] = map[0], - .r16_sint, + .r8_sint => c[0] = signExtendI8(map[0]), + .r16_uint, => c[0] = std.mem.bytesToValue(u16, map), + .r16_sint => c[0] = signExtendI16(std.mem.bytesToValue(u16, map)), + .r32_sint, .r32_uint, => c[0] = std.mem.bytesToValue(u32, map), - .r8g8_sint, .r8g8_uint, => { c[0] = map[0]; c[1] = map[1]; }, - .r16g16_sint, + .r8g8_sint => { + c[0] = signExtendI8(map[0]); + c[1] = signExtendI8(map[1]); + }, + .r16g16_uint, => { c[0] = std.mem.bytesToValue(u16, map[0..]); c[1] = std.mem.bytesToValue(u16, map[2..]); }, + .r16g16_sint => { + c[0] = signExtendI16(std.mem.bytesToValue(u16, map[0..])); + c[1] = signExtendI16(std.mem.bytesToValue(u16, map[2..])); + }, + .r32g32_sint, .r32g32_uint, => { @@ -1131,7 +1177,6 @@ pub fn readInt4(map: []const u8, src_format: vk.Format) U32x4 { c[1] = std.mem.bytesToValue(u32, map[4..]); }, - .r8g8b8a8_sint, .r8g8b8a8_uint, => { c[0] = map[0]; @@ -1140,7 +1185,13 @@ pub fn readInt4(map: []const u8, src_format: vk.Format) U32x4 { c[3] = map[3]; }, - .r16g16b16a16_sint, + .r8g8b8a8_sint => { + c[0] = signExtendI8(map[0]); + c[1] = signExtendI8(map[1]); + c[2] = signExtendI8(map[2]); + c[3] = signExtendI8(map[3]); + }, + .r16g16b16a16_uint, => { c[0] = std.mem.bytesToValue(u16, map[0..2]); @@ -1149,12 +1200,18 @@ pub fn readInt4(map: []const u8, src_format: vk.Format) U32x4 { c[3] = std.mem.bytesToValue(u16, map[6..8]); }, + .r16g16b16a16_sint => { + c[0] = signExtendI16(std.mem.bytesToValue(u16, map[0..2])); + c[1] = signExtendI16(std.mem.bytesToValue(u16, map[2..4])); + c[2] = signExtendI16(std.mem.bytesToValue(u16, map[4..6])); + c[3] = signExtendI16(std.mem.bytesToValue(u16, map[6..8])); + }, + .r32g32b32a32_sint, .r32g32b32a32_uint, => c = std.mem.bytesToValue(U32x4, map), .a8b8g8r8_uint_pack32, - .a8b8g8r8_sint_pack32, .a8b8g8r8_unorm_pack32, .a8b8g8r8_snorm_pack32, => { @@ -1165,6 +1222,14 @@ pub fn readInt4(map: []const u8, src_format: vk.Format) U32x4 { c[3] = pack[3]; }, + .a8b8g8r8_sint_pack32 => { + const pack = std.mem.bytesToValue(@Vector(4, u8), map); + c[0] = signExtendI8(pack[0]); + c[1] = signExtendI8(pack[1]); + c[2] = signExtendI8(pack[2]); + c[3] = signExtendI8(pack[3]); + }, + .a2b10g10r10_unorm_pack32, .a2b10g10r10_uint_pack32, => { diff --git a/src/soft/device/fragment.zig b/src/soft/device/fragment.zig index 6057c07..aeefd47 100644 --- a/src/soft/device/fragment.zig +++ b/src/soft/device/fragment.zig @@ -32,8 +32,9 @@ pub fn shaderInvocation( mutex.lock(io) catch return SpvRuntimeError.Unknown; defer mutex.unlock(io); + try rt.populatePushConstants(draw_call.renderer.state.push_constant_blob[0..]); + const entry = try rt.getEntryPointByName(shader.entry); - const output_result = try rt.getResultByLocation(0, .output); for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| { const result_word = rt.getResultByLocation(@intCast(location), .input) catch |err| switch (err) { @@ -54,8 +55,15 @@ pub fn shaderInvocation( }; var outputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][@sizeOf(zm.F32x4)]u8 = undefined; + @memset(std.mem.asBytes(&outputs), 0); - try rt.readOutput(std.mem.asBytes(&outputs), output_result); + for (0..spv.SPIRV_MAX_OUTPUT_LOCATIONS) |location| { + const result_word = rt.getResultByLocation(@intCast(location), .output) catch |err| switch (err) { + SpvRuntimeError.NotFound => continue, + else => return err, + }; + try rt.readOutput(&outputs[location], result_word); + } try rt.flushDescriptorSets(allocator); diff --git a/src/soft/device/rasterizer.zig b/src/soft/device/rasterizer.zig index 45ee636..2369739 100644 --- a/src/soft/device/rasterizer.zig +++ b/src/soft/device/rasterizer.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const vk = @import("vulkan"); const base = @import("base"); const clip = @import("clip.zig"); @@ -20,26 +21,34 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato const pipeline_data = (renderer.state.pipeline orelse return VkError.InvalidHandleDrv).interface.mode.graphics; const topology = pipeline_data.input_assembly.topology; - const color_attachment = if (draw_call.render_pass.interface.subpasses[renderer.subpass_index].color_attachments) |attachments| attachments[0].attachment else return VkError.InvalidAttachmentDrv; - const render_target_view: *base.ImageView = draw_call.color_attachments[color_attachment]; - const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image)); + const color_attachments = draw_call.render_pass.interface.subpasses[renderer.subpass_index].color_attachments orelse return VkError.InvalidAttachmentDrv; + const color_attachment_access = allocator.alloc(?common.RenderTargetAccess, color_attachments.len) catch return VkError.OutOfDeviceMemory; + @memset(color_attachment_access, null); - const color_range = render_target_view.subresource_range; - const color_format = render_target_view.format; + for (color_attachments, color_attachment_access) |attachment_ref, *access| { + if (attachment_ref.attachment == vk.ATTACHMENT_UNUSED) + continue; - const color_attachment_subresource_offset = try render_target.getSubresourceOffset( - color_range.aspect_mask, - color_range.base_mip_level, - color_range.base_array_layer, - ); - const color_attachment_subresource_size = render_target.getLayerSize(color_range.aspect_mask); - const color_attachment_access: common.RenderTargetAccess = .{ - .mutex = undefined, - .base = try render_target.mapAsSliceWithAddedOffset(u8, color_attachment_subresource_offset, color_attachment_subresource_size), - .row_pitch = render_target.getRowPitchMemSizeForMipLevelWithFormat(color_range.aspect_mask, color_range.base_mip_level, color_format), - .texel_size = base.format.texelSize(color_format), - .format = color_format, - }; + const render_target_view: *base.ImageView = draw_call.color_attachments[attachment_ref.attachment]; + const render_target: *SoftImage = @alignCast(@fieldParentPtr("interface", render_target_view.image)); + + const color_range = render_target_view.subresource_range; + const color_format = render_target_view.format; + + const color_attachment_subresource_offset = try render_target.getSubresourceOffset( + color_range.aspect_mask, + color_range.base_mip_level, + color_range.base_array_layer, + ); + const color_attachment_subresource_size = render_target.getLayerSize(color_range.aspect_mask); + access.* = .{ + .mutex = undefined, + .base = try render_target.mapAsSliceWithAddedOffset(u8, color_attachment_subresource_offset, color_attachment_subresource_size), + .row_pitch = render_target.getRowPitchMemSizeForMipLevelWithFormat(color_range.aspect_mask, color_range.base_mip_level, color_format), + .texel_size = base.format.texelSize(color_format), + .format = color_format, + }; + } const depth_attachment_view: ?*base.ImageView = if (draw_call.depth_attachment) |view| view else null; const depth_attachment: ?*SoftImage = if (depth_attachment_view) |view| @alignCast(@fieldParentPtr("interface", view.image)) else null; @@ -60,7 +69,7 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato break :blk .{ .mutex = .init, .base = try depth_attachment.?.mapAsSliceWithAddedOffset(u8, attachment_subresource_offset, attachment_subresource_size), - .row_pitch = render_target.getRowPitchMemSizeForMipLevelWithFormat(depth_range.aspect_mask, depth_range.base_mip_level, depth_format), + .row_pitch = depth_attachment.?.getRowPitchMemSizeForMipLevelWithFormat(depth_range.aspect_mask, depth_range.base_mip_level, depth_format), .texel_size = base.format.texelSize(depth_format), .format = depth_format, }; @@ -80,7 +89,7 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato v0, v1, v2, - &color_attachment_access, + color_attachment_access, if (depth_attachment_access) |*access| access else null, ); }, @@ -97,7 +106,7 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato v0, v1, v2, - &color_attachment_access, + color_attachment_access, if (depth_attachment_access) |*access| access else null, ); } @@ -116,7 +125,7 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato v0, v1, v2, - &color_attachment_access, + color_attachment_access, if (depth_attachment_access) |*access| access else null, ); } else { @@ -127,7 +136,7 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato v1, v0, v2, - &color_attachment_access, + color_attachment_access, if (depth_attachment_access) |*access| access else null, ); } @@ -143,7 +152,7 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato draw_call, v0, v1, - &color_attachment_access, + color_attachment_access, if (depth_attachment_access) |*access| access else null, ); }, @@ -157,7 +166,7 @@ pub fn processThenFragmentStage(renderer: *Renderer, allocator: std.mem.Allocato draw_call, v0, v1, - &color_attachment_access, + color_attachment_access, if (depth_attachment_access) |*access| access else null, ); } @@ -173,7 +182,7 @@ fn clipTransformAndRasterizeLine( draw_call: *DrawCall, v0: *Vertex, v1: *Vertex, - color_attachment_access: *const common.RenderTargetAccess, + color_attachment_access: []const ?common.RenderTargetAccess, depth_attachment_access: ?*common.RenderTargetAccess, ) VkError!void { const clipped_line = (try clip.clipLine(allocator, v0, v1)) orelse return; @@ -201,7 +210,7 @@ fn clipTransformAndRasterizeTriangle( v0: *Vertex, v1: *Vertex, v2: *Vertex, - color_attachment_access: *const common.RenderTargetAccess, + color_attachment_access: []const ?common.RenderTargetAccess, depth_attachment_access: ?*common.RenderTargetAccess, ) VkError!void { const clipped_polygon = try clip.clipTriangle(allocator, v0, v1, v2); @@ -238,7 +247,7 @@ fn rasterizeTriangle( v0: *Vertex, v1: *Vertex, v2: *Vertex, - color_attachment_access: *const common.RenderTargetAccess, + color_attachment_access: []const ?common.RenderTargetAccess, depth_attachment_access: ?*common.RenderTargetAccess, ) VkError!void { if (try triangleIsCulled(renderer, v0, v1, v2)) diff --git a/src/soft/device/rasterizer/bresenham.zig b/src/soft/device/rasterizer/bresenham.zig index 31ce1f2..550060f 100644 --- a/src/soft/device/rasterizer/bresenham.zig +++ b/src/soft/device/rasterizer/bresenham.zig @@ -28,7 +28,7 @@ const RunData = struct { end_vertex: *Renderer.Vertex, start_step: usize, end_step: usize, - color_attachment_access: *const common.RenderTargetAccess, + color_attachment_access: []const ?common.RenderTargetAccess, depth_attachment_access: ?*common.RenderTargetAccess, }; @@ -37,7 +37,7 @@ pub fn drawLine( draw_call: *Renderer.DrawCall, v0: *Renderer.Vertex, v1: *Renderer.Vertex, - color_attachment_access: *const common.RenderTargetAccess, + color_attachment_access: []const ?common.RenderTargetAccess, depth_attachment_access: ?*common.RenderTargetAccess, ) VkError!void { const io = draw_call.renderer.device.interface.io(); diff --git a/src/soft/device/rasterizer/common.zig b/src/soft/device/rasterizer/common.zig index f573340..26a0a33 100644 --- a/src/soft/device/rasterizer/common.zig +++ b/src/soft/device/rasterizer/common.zig @@ -101,10 +101,15 @@ inline fn interpolateF32x4(value0: F32x4, value1: F32x4, value2: F32x4, b0: f32, return (value0 * zm.f32x4s(b0)) + (value1 * zm.f32x4s(b1)) + (value2 * zm.f32x4s(b2)); } +inline fn fragmentOutputFloat4(output: [@sizeOf(F32x4)]u8, format: vk.Format) F32x4 { + const color = std.mem.bytesToValue(F32x4, &output); + return if (base.format.isSrgb(format)) zm.rgbToSrgb(color) else color; +} + pub fn writeToTargets( outputs: [spv.SPIRV_MAX_OUTPUT_LOCATIONS][@sizeOf(F32x4)]u8, draw_call: *Renderer.DrawCall, - color_attachment_access: *const RenderTargetAccess, + color_attachment_access: []const ?RenderTargetAccess, depth_attachment_access: ?*RenderTargetAccess, x: usize, y: usize, @@ -112,8 +117,6 @@ pub fn writeToTargets( ) VkError!void { const io = draw_call.renderer.device.interface.io(); - const color_offset = @as(usize, @intCast(x)) * color_attachment_access.texel_size + @as(usize, @intCast(y)) * color_attachment_access.row_pitch; - // After work depth test to avoid overwritten depth pixels during fragment invocations if (depth_attachment_access) |depth| { const depth_offset = @as(usize, @intCast(x)) * depth.texel_size + @as(usize, @intCast(y)) * depth.row_pitch; @@ -125,18 +128,16 @@ pub fn writeToTargets( if (z >= depth_value[0]) return; blitter.writeFloat4(zm.f32x4s(z), depth.base[depth_offset..], depth.format); + } - // Doubled line to stay inside the critical section - if (base.format.isUnnormalizedInteger(color_attachment_access.format)) { - blitter.writeInt4(std.mem.bytesToValue(U32x4, &outputs[0]), color_attachment_access.base[color_offset..], color_attachment_access.format); + for (color_attachment_access, 0..) |maybe_color, location| { + const color = maybe_color orelse continue; + const color_offset = @as(usize, @intCast(x)) * color.texel_size + @as(usize, @intCast(y)) * color.row_pitch; + + if (base.format.isUnnormalizedInteger(color.format)) { + blitter.writeInt4(std.mem.bytesToValue(U32x4, &outputs[location]), color.base[color_offset..], color.format); } else { - blitter.writeFloat4(std.mem.bytesToValue(F32x4, &outputs[0]), color_attachment_access.base[color_offset..], color_attachment_access.format); - } - } else { - if (base.format.isUnnormalizedInteger(color_attachment_access.format)) { - blitter.writeInt4(std.mem.bytesToValue(U32x4, &outputs[0]), color_attachment_access.base[color_offset..], color_attachment_access.format); - } else { - blitter.writeFloat4(std.mem.bytesToValue(F32x4, &outputs[0]), color_attachment_access.base[color_offset..], color_attachment_access.format); + blitter.writeFloat4(fragmentOutputFloat4(outputs[location], color.format), color.base[color_offset..], color.format); } } } diff --git a/src/soft/device/rasterizer/edge_function.zig b/src/soft/device/rasterizer/edge_function.zig index 549a1e2..e4faf50 100644 --- a/src/soft/device/rasterizer/edge_function.zig +++ b/src/soft/device/rasterizer/edge_function.zig @@ -26,7 +26,7 @@ const RunData = struct { v0: Renderer.Vertex, v1: Renderer.Vertex, v2: Renderer.Vertex, - color_attachment_access: *const common.RenderTargetAccess, + color_attachment_access: []const ?common.RenderTargetAccess, depth_attachment_access: ?*common.RenderTargetAccess, }; @@ -36,7 +36,7 @@ pub fn drawTriangle( v0: *Renderer.Vertex, v1: *Renderer.Vertex, v2: *Renderer.Vertex, - color_attachment_access: *const common.RenderTargetAccess, + color_attachment_access: []const ?common.RenderTargetAccess, depth_attachment_access: ?*common.RenderTargetAccess, ) VkError!void { const io = draw_call.renderer.device.interface.io(); diff --git a/src/vulkan/Sampler.zig b/src/vulkan/Sampler.zig index a3c160e..a7b51cc 100644 --- a/src/vulkan/Sampler.zig +++ b/src/vulkan/Sampler.zig @@ -11,6 +11,11 @@ const Self = @This(); pub const ObjectType: vk.ObjectType = .sampler; owner: *Device, +mag_filter: vk.Filter, +min_filter: vk.Filter, +address_mode_u: vk.SamplerAddressMode, +address_mode_v: vk.SamplerAddressMode, +address_mode_w: vk.SamplerAddressMode, vtable: *const VTable, @@ -20,9 +25,13 @@ pub const VTable = struct { pub fn init(device: *Device, allocator: std.mem.Allocator, info: *const vk.SamplerCreateInfo) VkError!Self { _ = allocator; - _ = info; return .{ .owner = device, + .mag_filter = info.mag_filter, + .min_filter = info.min_filter, + .address_mode_u = info.address_mode_u, + .address_mode_v = info.address_mode_v, + .address_mode_w = info.address_mode_w, .vtable = undefined, }; }