From dbf963a3c998f814f6063fb32c1843a7345d6183 Mon Sep 17 00:00:00 2001 From: Kbz-8 Date: Sat, 24 Jan 2026 15:18:57 +0100 Subject: [PATCH] fixing shader execution --- README.md | 2 +- build.zig | 3 + build.zig.zon | 4 ++ example/main.zig | 22 +++---- example/mangohud.conf | 6 ++ example/shader.nzsl | 8 +-- example/shader.spv | Bin 4480 -> 4480 bytes example/shader.spv.txt | 4 +- sandbox/shader.nzsl | 29 ++++----- src/GLSL_std_450/opcodes.zig | 114 ++++++++++++++++++++--------------- src/lib.zig | 2 +- src/opcodes.zig | 15 ++--- 12 files changed, 118 insertions(+), 91 deletions(-) create mode 100644 example/mangohud.conf diff --git a/README.md b/README.md index 7d03b29..3745c05 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # SPIR-V Interpreter -A small footprint SPIR-V interpreter with zero dependencies to execute SPIR-V shaders on the CPU. It is designed to be used with multiple runtimes concurrently. +A small footprint SPIR-V interpreter to execute SPIR-V shaders on the CPU. It is designed to be used with multiple runtimes concurrently. ```zig const std = @import("std"); diff --git a/build.zig b/build.zig index b5ea803..f96a128 100644 --- a/build.zig +++ b/build.zig @@ -12,6 +12,9 @@ pub fn build(b: *std.Build) void { .optimize = optimize, }); + const zmath = b.dependency("zmath", .{}); + mod.addImport("zmath", zmath.module("root")); + const pretty = b.dependency("pretty", .{ .target = target, .optimize = optimize }); mod.addImport("pretty", pretty.module("pretty")); diff --git a/build.zig.zon b/build.zig.zon index ccc0d95..134bdf4 100644 --- a/build.zig.zon +++ b/build.zig.zon @@ -2,6 +2,10 @@ .name = .SPIRV_Interpreter, .version = "0.0.1", .dependencies = .{ + .zmath = .{ + .url = "git+https://github.com/zig-gamedev/zmath.git#3a5955b2b72cd081563fbb084eff05bffd1e3fbb", + .hash = "zmath-0.11.0-dev-wjwivdMsAwD-xaLj76YHUq3t9JDH-X16xuMTmnDzqbu2", + }, .pretty = .{ // For debugging purposes .url = "git+https://github.com/Kbz-8/pretty#117674465efd4d07d5ae9d9d8ca59c2c323a65ba", .hash = "pretty-0.10.6-Tm65r99UAQDEJMgZysD10qE8dinBHr064fPM6YkxVPfB", diff --git a/example/main.zig b/example/main.zig index b30b8bb..886a6d1 100644 --- a/example/main.zig +++ b/example/main.zig @@ -4,8 +4,8 @@ const spv = @import("spv"); const shader_source = @embedFile("shader.spv"); -const screen_width = 200; -const screen_height = 200; +const screen_width = 480; +const screen_height = 240; pub fn main() !void { { @@ -50,9 +50,7 @@ pub fn main() !void { } var thread_pool: std.Thread.Pool = undefined; - try thread_pool.init(.{ - .allocator = allocator, - }); + try thread_pool.init(.{ .allocator = allocator }); var timer = try std.time.Timer.start(); @@ -73,15 +71,15 @@ pub fn main() !void { const pixel_map: [*]u32 = @as([*]u32, @ptrCast(@alignCast((surface.getPixels() orelse return).ptr))); + const delta: f32 = @as(f32, @floatFromInt(timer.read())) / std.time.ns_per_s; + var frame_timer = try std.time.Timer.start(); defer { const ns = frame_timer.lap(); const ms = @as(f32, @floatFromInt(ns)) / std.time.ns_per_s; - std.log.info("Took {d:.3}s - {d:.3}fps to render", .{ ms, 1.0 / ms }); + std.log.info("Took {d:.3}s - {d:.3}fps to render {d:.2}", .{ ms, 1.0 / ms, delta }); } - const delta: f32 = @as(f32, @floatFromInt(timer.read())) / std.time.ns_per_s; - var wait_group: std.Thread.WaitGroup = .{}; for (0..screen_height) |y| { const runner = &runner_cache.items[y]; @@ -131,10 +129,10 @@ const Runner = struct { try rt.readOutput(f32, output[0..], self.color); const rgba = self.surface.mapRgba( - @truncate(@as(u32, @intFromFloat(output[0] * 255.0))), - @truncate(@as(u32, @intFromFloat(output[1] * 255.0))), - @truncate(@as(u32, @intFromFloat(output[2] * 255.0))), - @truncate(@as(u32, @intFromFloat(output[3] * 255.0))), + @intCast(@max(@min(@as(i32, @intFromFloat(output[0] * 255.0)), 255), 0)), + @intCast(@max(@min(@as(i32, @intFromFloat(output[1] * 255.0)), 255), 0)), + @intCast(@max(@min(@as(i32, @intFromFloat(output[2] * 255.0)), 255), 0)), + @intCast(@max(@min(@as(i32, @intFromFloat(output[3] * 255.0)), 255), 0)), ); pixel_map[(y * self.surface.getWidth()) + x] = rgba.value; diff --git a/example/mangohud.conf b/example/mangohud.conf new file mode 100644 index 0000000..569e46f --- /dev/null +++ b/example/mangohud.conf @@ -0,0 +1,6 @@ +gpu_stats=0 +font_size=16 +resolution +hud_compact +background_alpha=0 +width=140 diff --git a/example/shader.nzsl b/example/shader.nzsl index ba00b3f..62955c1 100644 --- a/example/shader.nzsl +++ b/example/shader.nzsl @@ -16,16 +16,16 @@ struct FragOut [entry(frag)] fn main(input: FragIn) -> FragOut { - const I: i32 = 32; + const I: i32 = 128; const A: f32 = 7.5; - const MA: f32 = 20.0; + const MA: f32 = 100.0; const MI: f32 = 0.001; let uv0 = input.pos / input.res * 2.0 - vec2[f32](1.0, 1.0); let uv = vec2[f32](uv0.x * (input.res.x / input.res.y), uv0.y); - let col = vec3[f32](0.0, 0.0, 0.0); - let ro = vec3[f32](0.0, 0.0, -2.0); + let col = vec3[f32](0.0, 0.0, 0.0); + let ro = vec3[f32](0.0, 0.0, -2.0); let rd = vec3[f32](uv.x, uv.y, 1.0); let dt = 0.0; let ds = 0.0; diff --git a/example/shader.spv b/example/shader.spv index 984e29b064b6083f65fb259c75aa8b09c6b3c126..3801e1e5ca00e4af97e29471291855a5c125940e 100644 GIT binary patch delta 21 ccmZorZcyG(!_3$)xt7_I>4ekf9n2?q0Z{D+k^lez delta 21 ccmZorZcyG(!_25Kxt7_IX@TSB9n2?q0ZQ)&2><{9 diff --git a/example/shader.spv.txt b/example/shader.spv.txt index 10493e2..9eecca9 100644 --- a/example/shader.spv.txt +++ b/example/shader.spv.txt @@ -55,11 +55,11 @@ Schema: 0 %29 = OpConstant %3 f32(0.2) %30 = OpConstant %3 f32(4) %31 = OpTypePointer StorageClass(Function) %6 - %32 = OpConstant %6 i32(32) + %32 = OpConstant %6 i32(128) %33 = OpTypeBool %34 = OpConstant %3 f32(0.001) %35 = OpConstant %3 f32(0.35) - %36 = OpConstant %3 f32(20) + %36 = OpConstant %3 f32(100) %37 = OpConstant %3 f32(0.15) %38 = OpConstant %3 f32(0.05) %39 = OpConstant %3 f32(1.15) diff --git a/sandbox/shader.nzsl b/sandbox/shader.nzsl index ec4e798..59a606d 100644 --- a/sandbox/shader.nzsl +++ b/sandbox/shader.nzsl @@ -16,24 +16,24 @@ struct FragOut [entry(frag)] fn main(input: FragIn) -> FragOut { - const I: i32 = 128; + const I: i32 = 32; const A: f32 = 7.5; - const MA: f32 = 100.0; + const MA: f32 = 2.0; const MI: f32 = 0.001; let uv0 = input.pos / input.res * 2.0 - vec2[f32](1.0, 1.0); let uv = vec2[f32](uv0.x * (input.res.x / input.res.y), uv0.y); - let col = vec3[f32](0.0, 0.0, 0.0); - let ro = vec3[f32](0.0, 0.0, -2.0); - let rd = vec3[f32](uv.x, uv.y, 1.0); + let col = vec4[f32](0.0, 0.0, 0.0, 0.0); + let ro = vec4[f32](0.0, 0.0, -2.0, 0.0); + let rd = vec4[f32](uv.x, uv.y, 1.0, 0.0); let dt = 0.0; let ds = 0.0; let dm = -1.0; let p = ro; - let c = vec3[f32](0.0, 0.0, 0.0); + let c = vec4[f32](0.0, 0.0, 0.0, 0.0); - let l = vec3[f32](0.0, sin(input.time * 0.2) * 4.0, cos(input.time * 0.2) * 4.0); + let l = vec4[f32](0.0, sin(input.time * 0.2) * 4.0, cos(input.time * 0.2) * 4.0, 0.0); for i in 0 -> I { @@ -46,26 +46,23 @@ fn main(input: FragIn) -> FragOut if (ds <= MI) { - let value = max(dot(normalize(c - p), normalize(p - l)) - 0.35, 0.0); - col = vec3[f32](value, value, value); + let value = max(dot(normalize(c - p), normalize(p - l)), 0.0); + col = vec4[f32](value, value, value, 1.0); break; } if (ds >= MA) { - if (dot(normalize(rd), normalize(l - ro)) <= 1.0) + if (dot(normalize(rd), normalize(l - ro)) < 1.0) { - let value = max(dot(normalize(rd), normalize(l - ro)) + 0.15, 0.05)/ 1.15 * (1.0 - dm * A); - col = vec3[f32](value, value, value); + let value = max(dot(normalize(rd), normalize(l - ro)) + 0.15, 0.0) / 1.15 * max(1.0 - dm * A, 0.0); + col = vec4[f32](value, value, value, 1.0); } break; } } - //if (col == vec3[f32](0.0, 0.0, 0.0)) - // discard; - let output: FragOut; - output.color = vec4[f32](col.x, col.y, col.z, 1.0); + output.color = col; return output; } diff --git a/src/GLSL_std_450/opcodes.zig b/src/GLSL_std_450/opcodes.zig index c48b821..4e597e9 100644 --- a/src/GLSL_std_450/opcodes.zig +++ b/src/GLSL_std_450/opcodes.zig @@ -2,6 +2,7 @@ const std = @import("std"); const spv = @import("../spv.zig"); const ext = @import("GLSL_std_450.zig"); const opc = @import("../opcodes.zig"); +const zm = @import("zmath"); const Module = @import("../Module.zig"); const Runtime = @import("../Runtime.zig"); @@ -72,11 +73,23 @@ pub var runtime_dispatcher = [_]?OpCodeExtFunc{null} ** ext.GLSLOpMaxValue; pub fn initRuntimeDispatcher() void { // zig fmt: off + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Ceil)] = MathEngine(.Float, .Ceil).opSingleOperator; runtime_dispatcher[@intFromEnum(ext.GLSLOp.Cos)] = MathEngine(.Float, .Cos).opSingleOperator; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Exp)] = MathEngine(.Float, .Exp).opSingleOperator; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Exp2)] = MathEngine(.Float, .Exp2).opSingleOperator; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.FAbs)] = MathEngine(.Float, .FAbs).opSingleOperator; runtime_dispatcher[@intFromEnum(ext.GLSLOp.FMax)] = MathEngine(.Float, .FMax).opDoubleOperators; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Floor)] = MathEngine(.Float, .Floor).opSingleOperator; runtime_dispatcher[@intFromEnum(ext.GLSLOp.Length)] = opLength; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Log)] = MathEngine(.Float, .Log).opSingleOperator; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Log2)] = MathEngine(.Float, .Log2).opSingleOperator; runtime_dispatcher[@intFromEnum(ext.GLSLOp.Normalize)] = opNormalize; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Round)] = MathEngine(.Float, .Round).opSingleOperator; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.SAbs)] = MathEngine(.SInt, .SAbs).opSingleOperator; runtime_dispatcher[@intFromEnum(ext.GLSLOp.Sin)] = MathEngine(.Float, .Sin).opSingleOperator; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Sqrt)] = MathEngine(.Float, .Sqrt).opSingleOperator; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Tan)] = MathEngine(.Float, .Tan).opSingleOperator; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Trunc)] = MathEngine(.Float, .Trunc).opSingleOperator; // zig fmt: on } @@ -92,8 +105,20 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type { const operator = struct { fn operation(comptime TT: type, x: TT) RuntimeError!TT { return switch (Op) { - .Sin => @sin(x), + .Ceil => @ceil(x), .Cos => @cos(x), + .Exp => @exp(x), + .Exp2 => @exp2(x), + .FAbs => @abs(x), + .Floor => @floor(x), + .Log => @log(x), + .Log2 => @log2(x), + .Round => @round(x), + .SAbs => if (comptime @typeInfo(TT) == .int) @intCast(@abs(x)) else return RuntimeError.InvalidSpirV, + .Sin => @sin(x), + .Sqrt => @sqrt(x), + .Tan => @tan(x), + .Trunc => @trunc(x), else => RuntimeError.InvalidSpirV, }; } @@ -166,12 +191,6 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type { else => return RuntimeError.InvalidSpirV, } } - - inline fn applySIMDVector(comptime ElemT: type, comptime N: usize, d: *@Vector(N, ElemT), l: *const @Vector(N, ElemT), r: *const @Vector(N, ElemT)) RuntimeError!void { - inline for (0..N) |i| { - d[i] = try operation(ElemT, l[i], r[i]); - } - } }; switch (dst.*) { @@ -181,17 +200,17 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type { try operator.applyScalar(lane_bits, d_lane, &l_lane, &r_lane); }, - .Vector4f32 => |*d| try operator.applySIMDVector(f32, 4, d, &lhs.Vector4f32, &rhs.Vector4f32), - .Vector3f32 => |*d| try operator.applySIMDVector(f32, 3, d, &lhs.Vector3f32, &rhs.Vector3f32), - .Vector2f32 => |*d| try operator.applySIMDVector(f32, 2, d, &lhs.Vector2f32, &rhs.Vector2f32), + .Vector4f32 => |*d| d.* = try operator.operation(@Vector(4, f32), lhs.Vector4f32, rhs.Vector4f32), + .Vector3f32 => |*d| d.* = try operator.operation(@Vector(3, f32), lhs.Vector3f32, rhs.Vector3f32), + .Vector2f32 => |*d| d.* = try operator.operation(@Vector(2, f32), lhs.Vector2f32, rhs.Vector2f32), - .Vector4i32 => |*d| try operator.applySIMDVector(i32, 4, d, &lhs.Vector4i32, &rhs.Vector4i32), - .Vector3i32 => |*d| try operator.applySIMDVector(i32, 3, d, &lhs.Vector3i32, &rhs.Vector3i32), - .Vector2i32 => |*d| try operator.applySIMDVector(i32, 2, d, &lhs.Vector2i32, &rhs.Vector2i32), + .Vector4i32 => |*d| d.* = try operator.operation(@Vector(4, i32), lhs.Vector4i32, rhs.Vector4i32), + .Vector3i32 => |*d| d.* = try operator.operation(@Vector(3, i32), lhs.Vector3i32, rhs.Vector3i32), + .Vector2i32 => |*d| d.* = try operator.operation(@Vector(2, i32), lhs.Vector2i32, rhs.Vector2i32), - .Vector4u32 => |*d| try operator.applySIMDVector(u32, 4, d, &lhs.Vector4u32, &rhs.Vector4u32), - .Vector3u32 => |*d| try operator.applySIMDVector(u32, 3, d, &lhs.Vector3u32, &rhs.Vector3u32), - .Vector2u32 => |*d| try operator.applySIMDVector(u32, 2, d, &lhs.Vector2u32, &rhs.Vector2u32), + .Vector4u32 => |*d| d.* = try operator.operation(@Vector(4, u32), lhs.Vector4u32, rhs.Vector4u32), + .Vector3u32 => |*d| d.* = try operator.operation(@Vector(3, u32), lhs.Vector3u32, rhs.Vector3u32), + .Vector2u32 => |*d| d.* = try operator.operation(@Vector(2, u32), lhs.Vector2u32, rhs.Vector2u32), else => return RuntimeError.InvalidSpirV, } @@ -199,12 +218,6 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type { }; } -inline fn sumSIMDVector(comptime ElemT: type, comptime N: usize, d: *ElemT, v: *const @Vector(N, ElemT)) void { - inline for (0..N) |i| { - d.* += v[i]; - } -} - fn opLength(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void { const target_type = (try rt.results[target_type_id].getVariant()).Type; const dst = try rt.results[id].getValue(); @@ -219,9 +232,18 @@ fn opLength(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWo if (bits == 32) { // More likely to be SIMD if f32 switch (src.*) { - .Vector4f32 => |src_vec| sumSIMDVector(f32, 4, &sum, &src_vec), - .Vector3f32 => |src_vec| sumSIMDVector(f32, 3, &sum, &src_vec), - .Vector2f32 => |src_vec| sumSIMDVector(f32, 2, &sum, &src_vec), + .Vector4f32 => |src_vec| { + d_field.* = zm.length4(src_vec)[0]; + return; + }, + .Vector3f32 => |src_vec| { + d_field.* = zm.length3(zm.f32x4(src_vec[0], src_vec[1], src_vec[2], 0.0))[0]; + return; + }, + .Vector2f32 => |src_vec| { + d_field.* = zm.length2(zm.f32x4(src_vec[0], src_vec[1], 0.0, 0.0))[0]; + return; + }, else => {}, } } @@ -237,7 +259,6 @@ fn opLength(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWo const s_field = try getValuePrimitiveField(.Float, bits, s_lane); sum += s_field.*; }, - .Vector4f32, .Vector3f32, .Vector2f32 => {}, else => return RuntimeError.InvalidSpirV, } @@ -256,17 +277,31 @@ fn opNormalize(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: Sp switch (lane_bits) { inline 16, 32, 64 => |bits| { - var sum: std.meta.Float(bits) = 0.0; - if (bits == 32) { // More likely to be SIMD if f32 switch (src.*) { - .Vector4f32 => |src_vec| sumSIMDVector(f32, 4, &sum, &src_vec), - .Vector3f32 => |src_vec| sumSIMDVector(f32, 3, &sum, &src_vec), - .Vector2f32 => |src_vec| sumSIMDVector(f32, 2, &sum, &src_vec), + .Vector4f32 => |src_vec| { + dst.Vector4f32 = zm.normalize4(src_vec); + return; + }, + .Vector3f32 => |src_vec| { + const normed = zm.normalize3(zm.f32x4(src_vec[0], src_vec[1], src_vec[2], 0.0)); + dst.Vector3f32[0] = normed[0]; + dst.Vector3f32[1] = normed[1]; + dst.Vector3f32[2] = normed[2]; + return; + }, + .Vector2f32 => |src_vec| { + const normed = zm.normalize2(zm.f32x4(src_vec[0], src_vec[1], 0.0, 0.0)); + dst.Vector2f32[0] = normed[0]; + dst.Vector2f32[1] = normed[1]; + return; + }, else => {}, } } + var sum: std.meta.Float(bits) = 0.0; + switch (src.*) { .Float => { const s_field = try getValuePrimitiveField(.Float, bits, src); @@ -276,34 +311,17 @@ fn opNormalize(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: Sp const s_field = try getValuePrimitiveField(.Float, bits, s_lane); sum += s_field.*; }, - .Vector4f32, .Vector3f32, .Vector2f32 => {}, else => return RuntimeError.InvalidSpirV, } sum = @sqrt(sum); - if (bits == 32) { - switch (dst.*) { - .Vector4f32 => |*dst_vec| inline for (0..4) |i| { - dst_vec[i] = src.Vector4f32[i] / sum; - }, - .Vector3f32 => |*dst_vec| inline for (0..3) |i| { - dst_vec[i] = src.Vector3f32[i] / sum; - }, - .Vector2f32 => |*dst_vec| inline for (0..2) |i| { - dst_vec[i] = src.Vector2f32[i] / sum; - }, - else => {}, - } - } - switch (dst.*) { .Vector => |dst_vec| for (dst_vec, src.Vector) |*d_lane, *s_lane| { const d_field = try getValuePrimitiveField(.Float, bits, d_lane); const s_field = try getValuePrimitiveField(.Float, bits, s_lane); d_field.* = s_field.* / sum; }, - .Vector4f32, .Vector3f32, .Vector2f32 => {}, else => return RuntimeError.InvalidSpirV, } }, diff --git a/src/lib.zig b/src/lib.zig index c5c96cd..1d54d92 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -1,4 +1,4 @@ -//! A small footprint SPIR-V interpreter with zero dependencies to execute SPIR-V shaders on the CPU. It is designed to be used with multiple runtimes concurrently. +//! A small footprint SPIR-V interpreter to execute SPIR-V shaders on the CPU. It is designed to be used with multiple runtimes concurrently. //! //! ```zig //! const std = @import("std"); diff --git a/src/opcodes.zig b/src/opcodes.zig index 0009286..cb0c19a 100644 --- a/src/opcodes.zig +++ b/src/opcodes.zig @@ -1,5 +1,6 @@ const std = @import("std"); const spv = @import("spv.zig"); +const zm = @import("zmath"); const GLSL_std_450 = @import("GLSL_std_450/opcodes.zig"); @@ -1141,14 +1142,14 @@ fn opDot(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { else => return RuntimeError.InvalidSpirV, } }, - .Vector4f32 => |*vec| inline for (0..4) |i| { - value.Float.float32 += vec[i] * op2_value.Vector4f32[i]; + .Vector4f32 => |vec| value.Float.float32 = zm.dot4(vec, op2_value.Vector4f32)[0], + .Vector3f32 => |vec| { + const op2_vec = op2_value.Vector3f32; + value.Float.float32 = zm.dot3(zm.f32x4(vec[0], vec[1], vec[2], 0.0), zm.f32x4(op2_vec[0], op2_vec[1], op2_vec[2], 0.0))[0]; }, - .Vector3f32 => |*vec| inline for (0..3) |i| { - value.Float.float32 += vec[i] * op2_value.Vector3f32[i]; - }, - .Vector2f32 => |*vec| inline for (0..2) |i| { - value.Float.float32 += vec[i] * op2_value.Vector2f32[i]; + .Vector2f32 => |vec| { + const op2_vec = op2_value.Vector2f32; + value.Float.float32 = zm.dot2(zm.f32x4(vec[0], vec[1], 0.0, 0.0), zm.f32x4(op2_vec[0], op2_vec[1], 0.0, 0.0))[0]; }, else => return RuntimeError.InvalidSpirV, }