From 96ad7f12f94086cba8d36e86f60ebfc0397320de Mon Sep 17 00:00:00 2001 From: Kbz-8 Date: Sat, 24 Jan 2026 02:46:02 +0100 Subject: [PATCH] adding GLSL std 450 base --- example/main.zig | 56 ++- example/shader.nzsl | 65 ++- example/shader.spv | Bin 1344 -> 4480 bytes example/shader.spv.txt | 360 ++++++++++--- sandbox/shader.nzsl | 4 +- sandbox/shader.spv | Bin 4620 -> 4480 bytes sandbox/shader.spv.txt | 397 +++++++-------- src/GLSL_std_450/GLSL_std_450.zig | 91 ++++ src/GLSL_std_450/opcodes.zig | 312 ++++++++++++ src/Result.zig | 139 +++--- src/Runtime.zig | 5 +- src/ext/GLSL_std_450.zig | 0 src/lib.zig | 7 + src/opcodes.zig | 805 +++++++++++++++++------------- 14 files changed, 1501 insertions(+), 740 deletions(-) create mode 100644 src/GLSL_std_450/GLSL_std_450.zig create mode 100644 src/GLSL_std_450/opcodes.zig delete mode 100644 src/ext/GLSL_std_450.zig diff --git a/example/main.zig b/example/main.zig index 0cc17dd..b30b8bb 100644 --- a/example/main.zig +++ b/example/main.zig @@ -4,8 +4,8 @@ const spv = @import("spv"); const shader_source = @embedFile("shader.spv"); -const screen_width = 1250; -const screen_height = 720; +const screen_width = 200; +const screen_height = 200; pub fn main() !void { { @@ -36,10 +36,16 @@ pub fn main() !void { } for (0..screen_height) |_| { + var rt = try spv.Runtime.init(allocator, &module); (try runner_cache.addOne(allocator)).* = .{ .allocator = allocator, .surface = surface, - .rt = try spv.Runtime.init(allocator, &module), + .rt = rt, + .entry = try rt.getEntryPointByName("main"), + .color = try rt.getResultByName("color"), + .time = try rt.getResultByName("time"), + .pos = try rt.getResultByName("pos"), + .res = try rt.getResultByName("res"), }; } @@ -48,9 +54,11 @@ pub fn main() !void { .allocator = allocator, }); + var timer = try std.time.Timer.start(); + var quit = false; while (!quit) { - try surface.clear(.{ .r = 0.0, .g = 0.0, .b = 0.0, .a = 0.0 }); + try surface.clear(.{ .r = 0.0, .g = 0.0, .b = 0.0, .a = 1.0 }); while (sdl3.events.poll()) |event| switch (event) { @@ -65,17 +73,19 @@ pub fn main() !void { const pixel_map: [*]u32 = @as([*]u32, @ptrCast(@alignCast((surface.getPixels() orelse return).ptr))); - var timer = try std.time.Timer.start(); + var frame_timer = try std.time.Timer.start(); defer { - const ns = timer.lap(); + const ns = frame_timer.lap(); const ms = @as(f32, @floatFromInt(ns)) / std.time.ns_per_s; std.log.info("Took {d:.3}s - {d:.3}fps to render", .{ ms, 1.0 / ms }); } + const delta: f32 = @as(f32, @floatFromInt(timer.read())) / std.time.ns_per_s; + var wait_group: std.Thread.WaitGroup = .{}; for (0..screen_height) |y| { const runner = &runner_cache.items[y]; - thread_pool.spawnWg(&wait_group, Runner.run, .{ runner, y, pixel_map }); + thread_pool.spawnWg(&wait_group, Runner.runWrapper, .{ runner, y, pixel_map, delta }); } thread_pool.waitAndWork(&wait_group); } @@ -92,23 +102,33 @@ const Runner = struct { allocator: std.mem.Allocator, surface: sdl3.surface.Surface, rt: spv.Runtime, + entry: spv.SpvWord, + color: spv.SpvWord, + time: spv.SpvWord, + pos: spv.SpvWord, + res: spv.SpvWord, - fn run(self: *Self, y: usize, pixel_map: [*]u32) void { + fn runWrapper(self: *Self, y: usize, pixel_map: [*]u32, timer: f32) void { + @call(.always_inline, Self.run, .{ self, y, pixel_map, timer }) catch |err| { + std.log.err("{s}", .{@errorName(err)}); + if (@errorReturnTrace()) |trace| { + std.debug.dumpStackTrace(trace.*); + } + std.process.abort(); + }; + } + + fn run(self: *Self, y: usize, pixel_map: [*]u32, timer: f32) !void { var rt = self.rt; // Copy to avoid pointer access of `self` at runtime. Okay as Runtime contains only pointers and trivially copyable fields - const entry = rt.getEntryPointByName("main") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - const color = rt.getResultByName("color") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - const time = rt.getResultByName("time") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - const pos = rt.getResultByName("pos") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - const res = rt.getResultByName("res") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); var output: [4]f32 = undefined; for (0..screen_width) |x| { - rt.writeInput(f32, &.{@as(f32, @floatFromInt(std.time.milliTimestamp()))}, time) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - rt.writeInput(f32, &.{ @floatFromInt(screen_width), @floatFromInt(screen_height) }, res) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - rt.writeInput(f32, &.{ @floatFromInt(x), @floatFromInt(y) }, pos) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - rt.callEntryPoint(self.allocator, entry) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - rt.readOutput(f32, output[0..], color) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); + try rt.writeInput(f32, &.{timer}, self.time); + try rt.writeInput(f32, &.{ @floatFromInt(screen_width), @floatFromInt(screen_height) }, self.res); + try rt.writeInput(f32, &.{ @floatFromInt(x), @floatFromInt(y) }, self.pos); + try rt.callEntryPoint(self.allocator, self.entry); + try rt.readOutput(f32, output[0..], self.color); const rgba = self.surface.mapRgba( @truncate(@as(u32, @intFromFloat(output[0] * 255.0))), diff --git a/example/shader.nzsl b/example/shader.nzsl index 064ce5e..ba00b3f 100644 --- a/example/shader.nzsl +++ b/example/shader.nzsl @@ -3,25 +3,66 @@ module; struct FragIn { - [location(0)] time: f32, - [location(1)] res: vec2[f32], - [location(2)] pos: vec2[f32], + [location(0)] time: f32, + [location(1)] res: vec2[f32], + [location(2)] pos: vec2[f32], } struct FragOut { - [location(0)] color: vec4[f32] + [location(0)] color: vec4[f32] } [entry(frag)] fn main(input: FragIn) -> FragOut { - let output: FragOut; - output.color = vec4[f32]( - input.pos.x / input.res.x, - input.pos.y / input.res.y, - 1.0, - 1.0 - ); - return output; + const I: i32 = 32; + const A: f32 = 7.5; + const MA: f32 = 20.0; + const MI: f32 = 0.001; + + let uv0 = input.pos / input.res * 2.0 - vec2[f32](1.0, 1.0); + let uv = vec2[f32](uv0.x * (input.res.x / input.res.y), uv0.y); + + let col = vec3[f32](0.0, 0.0, 0.0); + let ro = vec3[f32](0.0, 0.0, -2.0); + let rd = vec3[f32](uv.x, uv.y, 1.0); + let dt = 0.0; + let ds = 0.0; + let dm = -1.0; + let p = ro; + let c = vec3[f32](0.0, 0.0, 0.0); + + let l = vec3[f32](0.0, sin(input.time * 0.2) * 4.0, cos(input.time * 0.2) * 4.0); + + for i in 0 -> I + { + p = ro + rd * dt; + ds = length(c - p) - 1.0; + dt += ds; + + if (dm == -1.0 || ds < dm) + dm = ds; + + if (ds <= MI) + { + let value = max(dot(normalize(c - p), normalize(p - l)) - 0.35, 0.0); + col = vec3[f32](value, value, value); + break; + } + + if (ds >= MA) + { + if (dot(normalize(rd), normalize(l - ro)) <= 1.0) + { + let value = max(dot(normalize(rd), normalize(l - ro)) + 0.15, 0.05)/ 1.15 * (1.0 - dm * A); + col = vec3[f32](value, value, value); + } + break; + } + } + + let output: FragOut; + output.color = vec4[f32](col.x, col.y, col.z, 1.0); + return output; } diff --git a/example/shader.spv b/example/shader.spv index 599620a2d64b3698d2839dde4f6c19e601c6bc0c..984e29b064b6083f65fb259c75aa8b09c6b3c126 100644 GIT binary patch literal 4480 zcmZ9O_j6TM6ooGd2`Zs?l^6(3&;W)Kfr zq9S0&_P_X(ZN?eD@8#@x=iQmjS!?Zb_GyolmQ?r3vL4x%{^PS>vaWAHR)Ugcy|eOc zBIBaQ<&8D%9jj}mO`l>!->lT35YsO^G3yDQgsorMvO&EJ>y7or24nq+*UvG*|YV;_8zrdLq0FR)&Wd-ZY{a} z$*mXhir$n)zxc=vm3=4jJ3v4a4BO|!Ftj`m##JhR$kD% zv`LA3R(@4*=kGeZEE@**{LHn!{*xG;M?LZ@lBT~8*!;Y|AKd)Pw0`U9*UTBfXpZZ2 z&Vh`^*t`BgiK|!QaUaCaZ`@FfuOaxb0_~g^e0YI()gA#hN3-_HjLxkd@uz_Kjs-up zKs%2H9|`~M*T%WQyUw?>!Fs3Rv!45`KlZx{KA2q?m2~GHjkONkS|4?);kC5~y6cXC zXW8EQ;U5dzvwK9he;k}X=7fJdytZ~{_qr#*v+VamU#ElY%e}MDzF~Nx^KnLv;K>J&tSB+cj8RU`GeQwxMwiAh)*fv{$NDDcl4|x zUR%Vc<+$~y7x5WId}fY&hIK`JR*pM=eG#9Xp@Iqv=c^leMp#bA5W7rieB=X<{du4b*+lS{$o>T~_x zd9@~f75u$ff%)4MW{mGZJz_4)`Tboo#bI+( ztKsJA^BkSacc(7)&DpEi_ZHZ<=5M5Pxvpz4do<2>Cy(`93pOvz7~i70XXo3v4)g4M z@7DBOuE*5lOjagsBx42pY0M4SM63^{KVnyb^;Kdq$7*o#9L;Fzk#{3lEkDN^xO4Qy z^hfMXV13RJYi$9$PUqL}T4yq0x+~w|TJLcK@{ypCg%oz2}N!tXr zjy~7pxvNFZTfv@l@Y}$~_*-Gk+c9I*<2>&GyVw5y=r`7W?!@%P9rZ4%MZb4}?Kk+{ zU}Nmn{ChBC)a}(+wTRyg_D&xp-o4#|sl^?)##YRlk#j%ToI@#R8`v22eMx%&Y%P6% zGVi^Nex8rM`>;;T^AShw2f@x|ug)R&o|tcJXAxJA+(*FX9!@ar7_aq)~{CT)J z#_6->WJYz*DBhcUDtEu#TgRB=Ww2*-B%RS-u-X{RGkSs1&okP~_#*ZaW*u?to<2`V`|M%fBr@n~$0G#jtLwLUbkKo3rNB{f5QA>aH|1sEk^vCbR0kHa5 zEbh-iu=x`({o}Cso8c3%{_&VSdfz_9reX2_!)Hk|Kkn%vuzRF0;ywpkPoH;bHlte9 z{Q|5O{L3PKIPtijU%|~WPG8(lb?>8R<2iU9d9I$nzuq^wchPu9dnji-g4JQ2++X?8 zq*X9FpFTD7{XfZkNBP>seJ9o%!FUYw4aMIxUnh;BJLVg3KIU7vT8~`JcSSMZ!_`W# zCVm$NGX8*B-+cX^hkqj+XZ#U68|#I+4!QC6IfU^iOuv0v>*u5~6ygUb?O(d8!~O$K Cj8}00 literal 1344 zcmZ9MOK(#_424b7gqBB}zIld}00jz#vOov~g$>XZuwh@J2t}d|O3EIw;wQ5~;`{C$ zqpObOjL-4-*q*scyEQx;aVCECw&O|EYdKml5sT5T2>qp4h&lF4*wNwm*gyw6k1b+r zSda6@5yWegA^DoTPJA9sXXDB7^TFQko^%Jc$NqMD`1Js_E{xc9GCmr)cgq>;Pe-%= zxCQ<`nN=F=_-cIkaZ+iBU0){OCR29n*k@{X=*ynf)3^83a)xGJbJll7r_ULeSRJwb ziPaL@pIE&-|DI|&-w@uFocVI|4NeU?b3B)GW$tQuqb+9etQs5mZT3NBb97Mia_gXO z#n;?U!M>Gi0JeVpb`hIk-_Kn2jh|z;j(p}{D4g+&_~tj`OJMW+Wq$RHYvwGoo8z3; zTw(W&y&GRGSiVohd(bzw!EUac{g}6jMf}`1{s_;w=U;%;^jmNSvGe$bP3#*cwzhAm z?o%@5{U%(_^oQTe>}uJ|70h~K=ek<(8*;tdYYlE6Rk8gT_wKHjxOZw?Y=0T|TT;_I zH7>T_jNib|_)B`I?>IvK6E&P;3magr%)5zio^>;Ch%cXcxA5g$zl+vweD#c_)*bxQ zJ#53tr`BD3Iqx&`?%|*A;Xa&v=I!9inU_60z*o=MM{eJ@eu&wdF~0}RI(`qkSiaLM eZba;P=k_~#T;kSL%RehQm*@Q+ FragOut } } - if (col == vec3[f32](0.0, 0.0, 0.0)) - discard; + //if (col == vec3[f32](0.0, 0.0, 0.0)) + // discard; let output: FragOut; output.color = vec4[f32](col.x, col.y, col.z, 1.0); diff --git a/sandbox/shader.spv b/sandbox/shader.spv index 4306992f01fc8fff6973674ae84841881afed1e1..3801e1e5ca00e4af97e29471291855a5c125940e 100644 GIT binary patch literal 4480 zcmZ9O_j6TM6ooGd2`Zs?6%2$XXaGYAp#_wtQGzJ;f)FtX1e0Jd0YtH30Snl$VJ`?O zDgt(F|BFA_W}NZ+Ue1np-ksT;wbm|YpY}*;NmY+5>y~ZpJ1+Yr>-_p*^eAl8?7{Y(TZB}1{E zys~U%Q)^pu%f`CODb-Vy%TW6=E^1x1b}7}$QRIH@%^R8=%TtU!wl=kO@e=r^mbOBT zL11ev+uUBLA@^I|vc9F2(Ru7MYL?NLJzGy~?@`M&1e3HFNqgn&Ud1 zvp=IT_O5?G;_4&txDR6IH*PS-S08*xfp*LbKD59)YYzjPqgne@M(0+K_|w3A$Ah0< zpdH784~PHuYs1{&o#)$8Z@n|{S!RCb-<6Bhs?0g&7VV<4u z-I|`u^_Y5`$*QCcXRKg9jky7vfc3)kN9<~_z7bf=u?Ac`M-!TQV`C*~X5QN-0F_YttUM^X=ug4IT0_OOG|&mQzW zgguN|S8N^6=P@jQemmj$^LsqSsC$0qJb`(B3o-ps>q)S=!Jh*AF5)ii0vn?qwRVG} zmj2zW$8&fZa~}P1Kb}c?6=MZ2%(!PU^T%WQhhpw$9izV3ujjyO!S@vLy@|≥#g* zar&$|iBa7%iudN8%H41G)-mRI8SEJyO=q+ZtTr0+j9y^$^NjW}zKFeqSw|fE_&V5k z8+-N!IQC3m#O(*W-@)Gm`~GA9-vS$>9<|;EM=kxa|L=gEM}OS0cfsmIF#CUn(a-+% zy^6ht*}pjYcn_TK|9yDOsW0L_0O$Mv5T5V>lZhxX;1X)8}29#i$l_ zzW}QR|FVc5Nj&c7S8#KT(--$s-TUa-cn+RNo~!5YulG&vT{Pa&9?BVyVzpQY_g8)_ zX%&plr%%m%|4%aCQNAv5---2xF&@W!L-F^_*GXgOiunedkNFm^)-4zFT~W;UaJ3Sw zk>7>>j6Y!3H($T!;ok@+7=OfOV?8j}AvfMW2QmJH>9& BSNZ?| literal 4620 zcmZ9O_j6TM6ooGYf(p{B1p}dqMlf^?1eB&xf+%(cA!4KilVC3aM6qE33)ma>f}o-z zU`GW}{}O+)jn4Re@132w^X|;fS!?Zb_C4k1m36G_l4TvT4LwI@zh%XzSJn|F%erRe z*%-#Tb&Kk%T3VM^jh`^qi0)aLLm{R|c5K!Od>XcTY2zC8&RAEhJJuKLNxXh00+o@W zSW8~#Y;i+#OJmcTsTJd@#wmA3?a4T&dFhJzR4Yf3`?NN$Zg4D5G4|Np(9+I3!q+yn z6l(MXTVuhx)Wy@;R3Z1H}T=suyDuXapMvxb^E#>IO@ZlAPwLwE;ct4$8`i}>jpZE8Gn?E?czxDKM=JaAT z$MgwI? zb%(*T?9i<64~Om8KB(P40!|+_;U5XFuHM@I-lO1I_Is^sIvV8M@!qF`?bEfi@1u#k zuG8SIy?3TV;_ka=JQD&doJ+&!IA!l&lA^G_?`XXUtSU6bRU_vwkxWUb7f zk<(_digMRqzw1?tnd4p4FLyt}KNGCqyQW|6{)B%PIQ%v2fp_n0Og&=GNioh5F|)zy zo9V-xIhc9sTaq?6>E6Toq|F0+54DX+n-4Zt-*kHP45-Ci=YrK167M-W4^#7r9_zrd z4v}|0ntJ4209NyfyanLWym~bC$Xf_j^X&S*_HZF)J$;*!wg_x*`l5%6!1>-UhO1dC z)?_i*Tz$U3XI`zIuY}*9OEAAxVa9k5)FbB7oZs)1G2RXJ=;bo7wd1`m2dnu+%oX5# z%$0DpV!y?Hu7aDZ&wX?*@145XJ7=$A?^|H+n%_+4^1ZIc?9n*yojl&>8nAg`#!P~% zyLaA=C765X{kNw3vJ_i_#hzT7G{4Q=SWjb?VPi1wkN${V4%Rmqi#Zy=rE^?|rXG1K zz-sw9u7^8EcT9i8HiGpzN4)C|VBgdE_4}@q8P&y87=1^v-)MLB_uhZ^ zcO7Po`ue2Z1h$So-^YDdi<&os-RI!9fQ|86Va;1HW7K0mZv(s5ezWu&Yd^PR`UYUm z<{N8Ho~uXhLtu0Fr5+vz zs}05M;eJLRd(ig)_8?|mv31;^Em(g49)aig@6i;a?*5tc80P-X#`H(6$HC?X-wO6# z#97z|Hby;aJpqnd`nT~u?!%Lq^XQNB@l?_)8M|@AjC&e0e-x&FAm(~bW7HSxwH>S$ zd`Ag?Ch@rA&%(_yPMsC?g59J2X^&n4s|~~4qvsfX+#`L@ zV=rLV5yv{d2KL^@n(YL~n(2$U*TJrL@HfET|5*Pw!N#aZt+&8YOMk5Y+hFI>A7|_x zu)6ot{$FPFv44H9V6S5KFOELm1?T&J4<2*si@5i}`TjqE=lkCUH%2}B{}3Ft^hf`@ z!Oo*Uz7KoA>cg=(KOce3_x$J|fyHlzkHPv!V)p2H`vjYS#s7!BNi#pr>8D`VNMFQ# z2DY9)&(c|pYEk!duv+kaCH#xT<9zOin`4~5IG^gCNB72ka6j@~J%4^ZZ*tF~@s9RT z&UgTug0*pe*@X<2RUhDBj^)@W7n;T^iH)1W;}vf-+cX^1%IC$W&9bd!Mb3sq1 @sin(x), + .Cos => @cos(x), + else => RuntimeError.InvalidSpirV, + }; + } + + fn applyScalar(bit_count: SpvWord, d: *Result.Value, s: *const Result.Value) RuntimeError!void { + switch (bit_count) { + inline 8, 16, 32, 64 => |bits| { + if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV; + + const ScalarT = getValuePrimitiveFieldType(T, bits); + const d_field = try getValuePrimitiveField(T, bits, d); + const s_field = try getValuePrimitiveField(T, bits, @constCast(s)); + d_field.* = try operation(ScalarT, s_field.*); + }, + else => return RuntimeError.InvalidSpirV, + } + } + }; + + switch (dst.*) { + .Int, .Float => try operator.applyScalar(lane_bits, dst, src), + + .Vector => |dst_vec| for (dst_vec, src.Vector) |*d_lane, s_lane| { + try operator.applyScalar(lane_bits, d_lane, &s_lane); + }, + + .Vector4f32 => |*d| d.* = try operator.operation(@Vector(4, f32), src.Vector4f32), + .Vector3f32 => |*d| d.* = try operator.operation(@Vector(3, f32), src.Vector3f32), + .Vector2f32 => |*d| d.* = try operator.operation(@Vector(2, f32), src.Vector2f32), + + //.Vector4i32 => |*d| d.* = try operator.operation(@Vector(4, i32), src.Vector4i32), + //.Vector3i32 => |*d| d.* = try operator.operation(@Vector(3, i32), src.Vector3i32), + //.Vector2i32 => |*d| d.* = try operator.operation(@Vector(2, i32), src.Vector2i32), + + //.Vector4u32 => |*d| d.* = try operator.operation(@Vector(4, u32), src.Vector4u32), + //.Vector3u32 => |*d| d.* = try operator.operation(@Vector(3, u32), src.Vector3u32), + //.Vector2u32 => |*d| d.* = try operator.operation(@Vector(2, u32), src.Vector2u32), + + else => return RuntimeError.InvalidSpirV, + } + } + + fn opDoubleOperators(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void { + const target_type = (try rt.results[target_type_id].getVariant()).Type; + const dst = try rt.results[id].getValue(); + const lhs = try rt.results[try rt.it.next()].getValue(); + const rhs = try rt.results[try rt.it.next()].getValue(); + + const lane_bits = try Result.resolveLaneBitWidth(target_type, rt); + + const operator = struct { + fn operation(comptime TT: type, l: TT, r: TT) RuntimeError!TT { + return switch (Op) { + .FMax => @max(l, r), + else => RuntimeError.InvalidSpirV, + }; + } + + fn applyScalar(bit_count: SpvWord, d: *Result.Value, l: *const Result.Value, r: *const Result.Value) RuntimeError!void { + switch (bit_count) { + inline 8, 16, 32, 64 => |bits| { + if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV; + + const ScalarT = getValuePrimitiveFieldType(T, bits); + const d_field = try getValuePrimitiveField(T, bits, d); + const l_field = try getValuePrimitiveField(T, bits, @constCast(l)); + const r_field = try getValuePrimitiveField(T, bits, @constCast(r)); + d_field.* = try operation(ScalarT, l_field.*, r_field.*); + }, + else => return RuntimeError.InvalidSpirV, + } + } + + inline fn applySIMDVector(comptime ElemT: type, comptime N: usize, d: *@Vector(N, ElemT), l: *const @Vector(N, ElemT), r: *const @Vector(N, ElemT)) RuntimeError!void { + inline for (0..N) |i| { + d[i] = try operation(ElemT, l[i], r[i]); + } + } + }; + + switch (dst.*) { + .Int, .Float => try operator.applyScalar(lane_bits, dst, lhs, rhs), + + .Vector => |dst_vec| for (dst_vec, lhs.Vector, rhs.Vector) |*d_lane, l_lane, r_lane| { + try operator.applyScalar(lane_bits, d_lane, &l_lane, &r_lane); + }, + + .Vector4f32 => |*d| try operator.applySIMDVector(f32, 4, d, &lhs.Vector4f32, &rhs.Vector4f32), + .Vector3f32 => |*d| try operator.applySIMDVector(f32, 3, d, &lhs.Vector3f32, &rhs.Vector3f32), + .Vector2f32 => |*d| try operator.applySIMDVector(f32, 2, d, &lhs.Vector2f32, &rhs.Vector2f32), + + .Vector4i32 => |*d| try operator.applySIMDVector(i32, 4, d, &lhs.Vector4i32, &rhs.Vector4i32), + .Vector3i32 => |*d| try operator.applySIMDVector(i32, 3, d, &lhs.Vector3i32, &rhs.Vector3i32), + .Vector2i32 => |*d| try operator.applySIMDVector(i32, 2, d, &lhs.Vector2i32, &rhs.Vector2i32), + + .Vector4u32 => |*d| try operator.applySIMDVector(u32, 4, d, &lhs.Vector4u32, &rhs.Vector4u32), + .Vector3u32 => |*d| try operator.applySIMDVector(u32, 3, d, &lhs.Vector3u32, &rhs.Vector3u32), + .Vector2u32 => |*d| try operator.applySIMDVector(u32, 2, d, &lhs.Vector2u32, &rhs.Vector2u32), + + else => return RuntimeError.InvalidSpirV, + } + } + }; +} + +inline fn sumSIMDVector(comptime ElemT: type, comptime N: usize, d: *ElemT, v: *const @Vector(N, ElemT)) void { + inline for (0..N) |i| { + d.* += v[i]; + } +} + +fn opLength(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void { + const target_type = (try rt.results[target_type_id].getVariant()).Type; + const dst = try rt.results[id].getValue(); + const src = try rt.results[try rt.it.next()].getValue(); + + const lane_bits = try Result.resolveLaneBitWidth(target_type, rt); + + switch (lane_bits) { + inline 16, 32, 64 => |bits| { + var sum: std.meta.Float(bits) = 0.0; + const d_field = try getValuePrimitiveField(.Float, bits, dst); + + if (bits == 32) { // More likely to be SIMD if f32 + switch (src.*) { + .Vector4f32 => |src_vec| sumSIMDVector(f32, 4, &sum, &src_vec), + .Vector3f32 => |src_vec| sumSIMDVector(f32, 3, &sum, &src_vec), + .Vector2f32 => |src_vec| sumSIMDVector(f32, 2, &sum, &src_vec), + else => {}, + } + } + + switch (src.*) { + .Float => { + // Fast path + const s_field = try getValuePrimitiveField(.Float, bits, src); + d_field.* = s_field.*; + return; + }, + .Vector => |src_vec| for (src_vec) |*s_lane| { + const s_field = try getValuePrimitiveField(.Float, bits, s_lane); + sum += s_field.*; + }, + .Vector4f32, .Vector3f32, .Vector2f32 => {}, + else => return RuntimeError.InvalidSpirV, + } + + d_field.* = @sqrt(sum); + }, + else => return RuntimeError.InvalidSpirV, + } +} + +fn opNormalize(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void { + const target_type = (try rt.results[target_type_id].getVariant()).Type; + const dst = try rt.results[id].getValue(); + const src = try rt.results[try rt.it.next()].getValue(); + + const lane_bits = try Result.resolveLaneBitWidth(target_type, rt); + + switch (lane_bits) { + inline 16, 32, 64 => |bits| { + var sum: std.meta.Float(bits) = 0.0; + + if (bits == 32) { // More likely to be SIMD if f32 + switch (src.*) { + .Vector4f32 => |src_vec| sumSIMDVector(f32, 4, &sum, &src_vec), + .Vector3f32 => |src_vec| sumSIMDVector(f32, 3, &sum, &src_vec), + .Vector2f32 => |src_vec| sumSIMDVector(f32, 2, &sum, &src_vec), + else => {}, + } + } + + switch (src.*) { + .Float => { + const s_field = try getValuePrimitiveField(.Float, bits, src); + sum = s_field.*; + }, + .Vector => |src_vec| for (src_vec) |*s_lane| { + const s_field = try getValuePrimitiveField(.Float, bits, s_lane); + sum += s_field.*; + }, + .Vector4f32, .Vector3f32, .Vector2f32 => {}, + else => return RuntimeError.InvalidSpirV, + } + + sum = @sqrt(sum); + + if (bits == 32) { + switch (dst.*) { + .Vector4f32 => |*dst_vec| inline for (0..4) |i| { + dst_vec[i] = src.Vector4f32[i] / sum; + }, + .Vector3f32 => |*dst_vec| inline for (0..3) |i| { + dst_vec[i] = src.Vector3f32[i] / sum; + }, + .Vector2f32 => |*dst_vec| inline for (0..2) |i| { + dst_vec[i] = src.Vector2f32[i] / sum; + }, + else => {}, + } + } + + switch (dst.*) { + .Vector => |dst_vec| for (dst_vec, src.Vector) |*d_lane, *s_lane| { + const d_field = try getValuePrimitiveField(.Float, bits, d_lane); + const s_field = try getValuePrimitiveField(.Float, bits, s_lane); + d_field.* = s_field.* / sum; + }, + .Vector4f32, .Vector3f32, .Vector2f32 => {}, + else => return RuntimeError.InvalidSpirV, + } + }, + else => return RuntimeError.InvalidSpirV, + } +} diff --git a/src/Result.zig b/src/Result.zig index c7859ea..2ec4385 100644 --- a/src/Result.zig +++ b/src/Result.zig @@ -2,7 +2,8 @@ const std = @import("std"); const spv = @import("spv.zig"); const op = @import("opcodes.zig"); -const RuntimeError = @import("Runtime.zig").RuntimeError; +const Runtime = @import("Runtime.zig"); +const RuntimeError = Runtime.RuntimeError; const SpvVoid = spv.SpvVoid; const SpvByte = spv.SpvByte; @@ -227,65 +228,67 @@ pub const Value = union(Type) { } }; +pub const TypeData = union(Type) { + Void: struct {}, + Bool: struct {}, + Int: struct { + bit_length: SpvWord, + is_signed: bool, + }, + Float: struct { + bit_length: SpvWord, + }, + Vector: struct { + components_type_word: SpvWord, + components_type: Type, + member_count: SpvWord, + }, + Vector4f32: struct {}, + Vector3f32: struct {}, + Vector2f32: struct {}, + Vector4i32: struct {}, + Vector3i32: struct {}, + Vector2i32: struct {}, + Vector4u32: struct {}, + Vector3u32: struct {}, + Vector2u32: struct {}, + Matrix: struct { + column_type_word: SpvWord, + column_type: Type, + member_count: SpvWord, + }, + Array: struct { + components_type_word: SpvWord, + components_type: Type, + member_count: SpvWord, + }, + RuntimeArray: struct {}, + Structure: struct { + members_type_word: []const SpvWord, + members: []Type, + member_names: std.ArrayList([]const u8), + }, + Function: struct { + source_location: usize, + return_type: SpvWord, + params: []const SpvWord, + }, + Image: struct {}, + Sampler: struct {}, + SampledImage: struct {}, + Pointer: struct { + storage_class: spv.SpvStorageClass, + target: SpvWord, + }, +}; + pub const VariantData = union(Variant) { String: []const u8, Extension: struct { /// Should not be allocated but rather a pointer to a static array - dispatcher: []op.OpCodeExtFunc, - }, - Type: union(Type) { - Void: struct {}, - Bool: struct {}, - Int: struct { - bit_length: SpvWord, - is_signed: bool, - }, - Float: struct { - bit_length: SpvWord, - }, - Vector: struct { - components_type_word: SpvWord, - components_type: Type, - member_count: SpvWord, - }, - Vector4f32: struct {}, - Vector3f32: struct {}, - Vector2f32: struct {}, - Vector4i32: struct {}, - Vector3i32: struct {}, - Vector2i32: struct {}, - Vector4u32: struct {}, - Vector3u32: struct {}, - Vector2u32: struct {}, - Matrix: struct { - column_type_word: SpvWord, - column_type: Type, - member_count: SpvWord, - }, - Array: struct { - components_type_word: SpvWord, - components_type: Type, - member_count: SpvWord, - }, - RuntimeArray: struct {}, - Structure: struct { - members_type_word: []const SpvWord, - members: []Type, - member_names: std.ArrayList([]const u8), - }, - Function: struct { - source_location: usize, - return_type: SpvWord, - params: []const SpvWord, - }, - Image: struct {}, - Sampler: struct {}, - SampledImage: struct {}, - Pointer: struct { - storage_class: spv.SpvStorageClass, - target: SpvWord, - }, + dispatcher: []?op.OpCodeExtFunc, }, + Type: TypeData, Variable: struct { storage_class: spv.SpvStorageClass, type_word: SpvWord, @@ -364,7 +367,7 @@ pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { self.decorations.deinit(allocator); } -pub fn getValueTypeWord(self: *Self) RuntimeError!SpvWord { +pub inline fn getValueTypeWord(self: *Self) RuntimeError!SpvWord { return switch ((try self.getVariant()).*) { .Variable => |v| v.type_word, .Constant => |c| c.type_word, @@ -374,7 +377,7 @@ pub fn getValueTypeWord(self: *Self) RuntimeError!SpvWord { }; } -pub fn getValueType(self: *Self) RuntimeError!Type { +pub inline fn getValueType(self: *Self) RuntimeError!Type { return switch ((try self.getVariant()).*) { .Variable => |v| v.type, .Constant => |c| c.type, @@ -383,7 +386,7 @@ pub fn getValueType(self: *Self) RuntimeError!Type { }; } -pub fn getValue(self: *Self) RuntimeError!*Value { +pub inline fn getValue(self: *Self) RuntimeError!*Value { return switch ((try self.getVariant()).*) { .Variable => |*v| &v.value, .Constant => |*c| &c.value, @@ -471,6 +474,26 @@ pub fn dupe(self: *const Self, allocator: std.mem.Allocator) RuntimeError!Self { }; } +pub fn resolveLaneBitWidth(target_type: TypeData, rt: *const Runtime) RuntimeError!SpvWord { + return sw: switch (target_type) { + .Bool => 8, + .Float => |f| f.bit_length, + .Int => |i| i.bit_length, + .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, + .Vector4f32, + .Vector3f32, + .Vector2f32, + .Vector4i32, + .Vector3i32, + .Vector2i32, + .Vector4u32, + .Vector3u32, + .Vector2u32, + => return 32, + else => return RuntimeError.InvalidSpirV, + }; +} + pub fn resolveType(self: *const Self, results: []const Self) *const Self { return if (self.variant) |variant| switch (variant) { diff --git a/src/Runtime.zig b/src/Runtime.zig index 0440c63..494daf9 100644 --- a/src/Runtime.zig +++ b/src/Runtime.zig @@ -131,10 +131,7 @@ pub fn callEntryPoint(self: *Self, allocator: std.mem.Allocator, entry_point_ind var it_tmp = self.it; // Save because operations may iter on this iterator if (op.runtime_dispatcher[opcode]) |pfn| { - pfn(allocator, word_count, self) catch |err| switch (err) { - RuntimeError.Killed => return, - else => return err, - }; + try pfn(allocator, word_count, self); } if (!self.it.did_jump) { _ = it_tmp.skipN(word_count); diff --git a/src/ext/GLSL_std_450.zig b/src/ext/GLSL_std_450.zig deleted file mode 100644 index e69de29..0000000 diff --git a/src/lib.zig b/src/lib.zig index edef74f..c5c96cd 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -36,3 +36,10 @@ pub const Runtime = @import("Runtime.zig"); const opcodes = @import("opcodes.zig"); const spv = @import("spv.zig"); + +pub const SpvVoid = spv.SpvVoid; +pub const SpvByte = spv.SpvByte; +pub const SpvWord = spv.SpvWord; +pub const SpvBool = spv.SpvBool; + +pub const GLSL_std_450 = @import("GLSL_std_450/opcodes.zig"); diff --git a/src/opcodes.zig b/src/opcodes.zig index fb7e097..0009286 100644 --- a/src/opcodes.zig +++ b/src/opcodes.zig @@ -1,6 +1,8 @@ const std = @import("std"); const spv = @import("spv.zig"); +const GLSL_std_450 = @import("GLSL_std_450/opcodes.zig"); + const Module = @import("Module.zig"); const Runtime = @import("Runtime.zig"); const Result = @import("Result.zig"); @@ -13,13 +15,7 @@ const SpvByte = spv.SpvByte; const SpvWord = spv.SpvWord; const SpvBool = spv.SpvBool; -// OpExtInst Sin -// OpExtInst Cos -// OpExtInst Length -// OpExtInst Normalize -// OpExtInst FMax - -const ValueType = enum { +pub const ValueType = enum { Bool, Float, SInt, @@ -230,11 +226,11 @@ pub fn initRuntimeDispatcher() void { runtime_dispatcher[@intFromEnum(spv.SpvOp.ISub)] = MathEngine(.SInt, .Sub).op; runtime_dispatcher[@intFromEnum(spv.SpvOp.Kill)] = opKill; runtime_dispatcher[@intFromEnum(spv.SpvOp.Load)] = opLoad; - runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalAnd)] = CondEngine(.Float, .LogicalAnd).op; - runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalEqual)] = CondEngine(.Float, .LogicalEqual).op; - runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNot)] = CondEngine(.Float, .LogicalNot).op; - runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNotEqual)] = CondEngine(.Float, .LogicalNotEqual).op; - runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalOr)] = CondEngine(.Float, .LogicalOr).op; + runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalAnd)] = CondEngine(.Bool, .LogicalAnd).op; + runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalEqual)] = CondEngine(.Bool, .LogicalEqual).op; + runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNot)] = CondEngine(.Bool, .LogicalNot).op; + runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNotEqual)] = CondEngine(.Bool, .LogicalNotEqual).op; + runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalOr)] = CondEngine(.Bool, .LogicalOr).op; runtime_dispatcher[@intFromEnum(spv.SpvOp.MatrixTimesMatrix)] = MathEngine(.Float, .MatrixTimesMatrix).op; // TODO runtime_dispatcher[@intFromEnum(spv.SpvOp.MatrixTimesScalar)] = MathEngine(.Float, .MatrixTimesScalar).op; // TODO runtime_dispatcher[@intFromEnum(spv.SpvOp.MatrixTimesVector)] = MathEngine(.Float, .MatrixTimesVector).op; // TODO @@ -261,130 +257,271 @@ pub fn initRuntimeDispatcher() void { runtime_dispatcher[@intFromEnum(spv.SpvOp.UMod)] = MathEngine(.UInt, .Mod).op; runtime_dispatcher[@intFromEnum(spv.SpvOp.VectorTimesMatrix)] = MathEngine(.Float, .VectorTimesMatrix).op; // TODO runtime_dispatcher[@intFromEnum(spv.SpvOp.VectorTimesScalar)] = MathEngine(.Float, .VectorTimesScalar).op; + runtime_dispatcher[@intFromEnum(spv.SpvOp.ExtInst)] = opExtInst; // zig fmt: on + + // Extensions init + GLSL_std_450.initRuntimeDispatcher(); } -fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type { - if (T == .Float) @compileError("Invalid value type"); +fn extEqlName(a: []const u8, b: []const u8) bool { + for (0..@min(a.len, b.len)) |i| { + if (a[i] != b[i]) return false; + } + return true; +} + +const extensions_map = std.StaticStringMapWithEql([]?OpCodeExtFunc, extEqlName).initComptime(.{ + .{ "GLSL.std.450", GLSL_std_450.runtime_dispatcher[0..] }, +}); + +fn BitOperator(comptime T: ValueType, comptime Op: BitOp) type { return struct { - fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { - const target_type = (try rt.results[try rt.it.next()].getVariant()).Type; - const value = try rt.results[try rt.it.next()].getValue(); - const op1_value = try rt.results[try rt.it.next()].getValue(); - const op2_value: ?*Result.Value = switch (Op) { - .Not, .BitCount, .BitReverse => null, - else => try rt.results[try rt.it.next()].getValue(), + comptime { + if (T == .Float) @compileError("Invalid value type"); + } + + inline fn isUnaryOp() bool { + return comptime switch (Op) { + .Not, .BitCount, .BitReverse => true, + else => false, }; + } - const size = sw: switch (target_type) { - .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, - .Vector4f32, - .Vector3f32, - .Vector2f32, - .Vector4i32, - .Vector3i32, - .Vector2i32, - .Vector4u32, - .Vector3u32, - .Vector2u32, - => 32, - .Int => |i| i.bit_length, - else => return RuntimeError.InvalidSpirV, + inline fn bitMask(bits: u64) u64 { + return if (bits >= 32) ~@as(u64, 0) else (@as(u64, 0x1) << @intCast(bits)) - 1; + } + + inline fn bitInsert(comptime TT: type, base: TT, insert: TT, offset: u64, count: u64) TT { + const mask: TT = @intCast(bitMask(count) << @intCast(offset)); + return @as(TT, @intCast((base & ~mask) | ((insert << @intCast(offset)) & mask))); + } + + inline fn bitExtract(comptime TT: type, v: TT, offset: TT, count: u64) TT { + return (v >> @intCast(offset)) & @as(TT, @intCast(bitMask(count))); + } + + fn operationUnary(comptime TT: type, op1: TT) RuntimeError!TT { + return switch (Op) { + .BitCount => @as(TT, @intCast(@bitSizeOf(TT))), // keep return type TT + .BitReverse => @bitReverse(op1), + .Not => ~op1, + else => RuntimeError.InvalidSpirV, }; + } - const operator = struct { - inline fn bitMask(bits: u64) u64 { - return if (bits >= 32) ~@as(u64, 0) else (@as(u64, 0x1) << @intCast(bits)) - 1; - } + fn operationBinary(comptime TT: type, rt: *Runtime, op1: TT, op2: TT) RuntimeError!TT { + return switch (Op) { + .BitFieldInsert => blk: { + const offset = try rt.results[try rt.it.next()].getValue(); + const count = try rt.results[try rt.it.next()].getValue(); + break :blk bitInsert(TT, op1, op2, offset.Int.uint64, count.Int.uint64); + }, + .BitFieldSExtract => blk: { + if (T == .UInt) return RuntimeError.InvalidSpirV; + const count = try rt.results[try rt.it.next()].getValue(); + break :blk bitExtract(TT, op1, op2, count.Int.uint64); + }, + .BitFieldUExtract => blk: { + if (T == .SInt) return RuntimeError.InvalidSpirV; + const count = try rt.results[try rt.it.next()].getValue(); + break :blk bitExtract(TT, op1, op2, count.Int.uint64); + }, - inline fn bitInsert(comptime TT: type, base: TT, insert: TT, offset: u64, count: u64) TT { - const mask: TT = @intCast(bitMask(count) << @intCast(offset)); - return @as(TT, @intCast((base & ~mask) | ((insert << @intCast(offset)) & mask))); - } + .BitwiseAnd => op1 & op2, + .BitwiseOr => op1 | op2, + .BitwiseXor => op1 ^ op2, + .ShiftLeft => op1 << @intCast(op2), + .ShiftRight, .ShiftRightArithmetic => op1 >> @intCast(op2), - inline fn bitExtract(comptime TT: type, v: TT, offset: TT, count: u64) TT { - return (v >> @intCast(offset)) & @as(TT, @intCast(bitMask(count))); - } - - fn operation(comptime TT: type, rt2: *Runtime, op1: TT, op2: ?TT) RuntimeError!TT { - switch (Op) { - .BitCount => return @bitSizeOf(TT), - .BitReverse => return @bitReverse(op1), - .Not => return ~op1, - else => {}, - } - return if (op2) |v2| - switch (Op) { - .BitFieldInsert => blk: { - const offset = try rt2.results[try rt2.it.next()].getValue(); - const count = try rt2.results[try rt2.it.next()].getValue(); - break :blk bitInsert(TT, op1, v2, offset.Int.uint64, count.Int.uint64); - }, - .BitFieldSExtract => blk: { - if (T == .UInt) return RuntimeError.InvalidSpirV; - const count = try rt2.results[try rt2.it.next()].getValue(); - break :blk bitExtract(TT, op1, v2, count.Int.uint64); - }, - .BitFieldUExtract => blk: { - if (T == .SInt) return RuntimeError.InvalidSpirV; - const count = try rt2.results[try rt2.it.next()].getValue(); - break :blk bitExtract(TT, op1, v2, count.Int.uint64); - }, - .BitwiseAnd => op1 & v2, - .BitwiseOr => op1 | v2, - .BitwiseXor => op1 ^ v2, - .ShiftLeft => op1 << @intCast(v2), - .ShiftRight, .ShiftRightArithmetic => op1 >> @intCast(v2), - else => return RuntimeError.InvalidSpirV, - } - else - RuntimeError.InvalidSpirV; - } - - fn process(rt2: *Runtime, bit_count: SpvWord, v: *Result.Value, op1_v: *const Result.Value, op2_v: ?*const Result.Value) RuntimeError!void { - switch (bit_count) { - inline 8, 16, 32, 64 => |i| { - (try getValuePrimitiveField(T, i, v)).* = try operation( - getValuePrimitiveFieldType(T, i), - rt2, - (try getValuePrimitiveField(T, i, @constCast(op1_v))).*, - if (op2_v) |v2| - (try getValuePrimitiveField(T, i, @constCast(v2))).* - else - null, - ); - }, - else => return RuntimeError.InvalidSpirV, - } - } + else => RuntimeError.InvalidSpirV, }; + } - switch (value.*) { - .Int => try operator.process(rt, size, value, op1_value, op2_value), - .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i| - try operator.process(rt, size, val, &op1_v, if (op2_value) |op2_v| &op2_v.Vector[i] else null), - // No bit manipulation on VectorXf32 - .Vector4i32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.operation(i32, rt, op1_value.Vector4i32[i], if (op2_value) |op2_v| op2_v.Vector4i32[i] else null); - }, - .Vector3i32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.operation(i32, rt, op1_value.Vector3i32[i], if (op2_value) |op2_v| op2_v.Vector3i32[i] else null); - }, - .Vector2i32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.operation(i32, rt, op1_value.Vector2i32[i], if (op2_value) |op2_v| op2_v.Vector2i32[i] else null); - }, - .Vector4u32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.operation(u32, rt, op1_value.Vector4u32[i], if (op2_value) |op2_v| op2_v.Vector4u32[i] else null); - }, - .Vector3u32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.operation(u32, rt, op1_value.Vector3u32[i], if (op2_value) |op2_v| op2_v.Vector3u32[i] else null); - }, - .Vector2u32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.operation(u32, rt, op1_value.Vector2u32[i], if (op2_value) |op2_v| op2_v.Vector2u32[i] else null); + fn applyScalarBits(rt: *Runtime, bit_count: SpvWord, dst: *Result.Value, op1_v: *const Result.Value, op2_v: ?*const Result.Value) RuntimeError!void { + switch (bit_count) { + inline 8, 16, 32, 64 => |bits| { + const TT = getValuePrimitiveFieldType(T, bits); + const a = (try getValuePrimitiveField(T, bits, @constCast(op1_v))).*; + + const out = if (comptime isUnaryOp()) blk: { + break :blk try operationUnary(TT, a); + } else blk: { + const b_ptr = op2_v orelse return RuntimeError.InvalidSpirV; + const b = (try getValuePrimitiveField(T, bits, @constCast(b_ptr))).*; + break :blk try operationBinary(TT, rt, a, b); + }; + + (try getValuePrimitiveField(T, bits, dst)).* = out; }, else => return RuntimeError.InvalidSpirV, } } + + fn laneRhsPtr(op2_value: ?*Result.Value, index: usize) ?*const Result.Value { + if (comptime isUnaryOp()) return null; + const v = op2_value orelse return null; + return &v.Vector[index]; + } + + fn applyFixedVector(comptime ElemT: type, comptime N: usize, dst: *[N]ElemT, op1: *[N]ElemT, op2_value: ?*Result.Value) RuntimeError!void { + if (comptime isUnaryOp()) { + inline for (0..N) |i| dst[i] = try operationUnary(ElemT, op1[i]); + } else { + const op2 = op2_value orelse return RuntimeError.InvalidSpirV; + const b: *const [N]ElemT = switch (N) { + 2 => &op2.*.Vector2u32, // will be overridden by call sites per ElemT/tag + 3 => &op2.*.Vector3u32, + 4 => &op2.*.Vector4u32, + else => unreachable, + }; + // NOTE: the above dummy mapping isn’t type-correct for i32; call sites below pass correct rhs pointer. + _ = b; + return RuntimeError.InvalidSpirV; + } + } + + fn applyFixedVectorBinary( + comptime ElemT: type, + comptime N: usize, + rt: *Runtime, + dst: *[N]ElemT, + op1: *[N]ElemT, + op2: *[N]ElemT, + ) RuntimeError!void { + inline for (0..N) |i| dst[i] = try operationBinary(ElemT, rt, op1[i], op2[i]); + } + + fn applyFixedVectorUnary( + comptime ElemT: type, + comptime N: usize, + dst: *[N]ElemT, + op1: *[N]ElemT, + ) RuntimeError!void { + inline for (0..N) |i| dst[i] = try operationUnary(ElemT, op1[i]); + } + }; +} + +fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type { + return struct { + fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { + const target_type = (try rt.results[try rt.it.next()].getVariant()).Type; + const dst = try rt.results[try rt.it.next()].getValue(); + const op1 = try rt.results[try rt.it.next()].getValue(); + + const operator = BitOperator(T, Op); + + const op2_value: ?*Result.Value = if (comptime operator.isUnaryOp()) null else try rt.results[try rt.it.next()].getValue(); + + const lane_bits = try Result.resolveLaneBitWidth(target_type, rt); + + switch (dst.*) { + .Int => try operator.applyScalarBits(rt, lane_bits, dst, op1, if (comptime operator.isUnaryOp()) null else op2_value), + + .Vector => |dst_vec| { + const op1_vec = op1.Vector; + if (dst_vec.len != op1_vec.len) return RuntimeError.InvalidSpirV; + + for (dst_vec, op1_vec, 0..) |*d_lane, a_lane, i| { + var tmp_a = a_lane; + const b_ptr = operator.laneRhsPtr(op2_value, i); + try operator.applyScalarBits(rt, lane_bits, d_lane, &tmp_a, b_ptr); + } + }, + + .Vector4i32 => |*d| { + if (comptime operator.isUnaryOp()) + try operator.applyFixedVectorUnary(i32, 4, d, &op1.Vector4i32) + else + try operator.applyFixedVectorBinary(i32, 4, rt, d, &op1.Vector4i32, &op2_value.?.Vector4i32); + }, + .Vector3i32 => |*d| { + if (comptime operator.isUnaryOp()) + try operator.applyFixedVectorUnary(i32, 3, d, &op1.Vector3i32) + else + try operator.applyFixedVectorBinary(i32, 3, rt, d, &op1.Vector3i32, &op2_value.?.Vector3i32); + }, + .Vector2i32 => |*d| { + if (comptime operator.isUnaryOp()) + try operator.applyFixedVectorUnary(i32, 2, d, &op1.Vector2i32) + else + try operator.applyFixedVectorBinary(i32, 2, rt, d, &op1.Vector2i32, &op2_value.?.Vector2i32); + }, + + .Vector4u32 => |*d| { + if (comptime operator.isUnaryOp()) + try operator.applyFixedVectorUnary(u32, 4, d, &op1.Vector4u32) + else + try operator.applyFixedVectorBinary(u32, 4, rt, d, &op1.Vector4u32, &op2_value.?.Vector4u32); + }, + .Vector3u32 => |*d| { + if (comptime operator.isUnaryOp()) + try operator.applyFixedVectorUnary(u32, 3, d, &op1.Vector3u32) + else + try operator.applyFixedVectorBinary(u32, 3, rt, d, &op1.Vector3u32, &op2_value.?.Vector3u32); + }, + .Vector2u32 => |*d| { + if (comptime operator.isUnaryOp()) + try operator.applyFixedVectorUnary(u32, 2, d, &op1.Vector2u32) + else + try operator.applyFixedVectorBinary(u32, 2, rt, d, &op1.Vector2u32, &op2_value.?.Vector2u32); + }, + + else => return RuntimeError.InvalidSpirV, + } + } + }; +} + +fn CondOperator(comptime T: ValueType, comptime Op: CondOp) type { + return struct { + fn operation(comptime TT: type, a: TT, b: TT) RuntimeError!bool { + return switch (Op) { + .Equal, .LogicalEqual => a == b, + .NotEqual, .LogicalNotEqual => a != b, + .Greater => a > b, + .GreaterEqual => a >= b, + .Less => a < b, + .LessEqual => a <= b, + .LogicalAnd => a and b, + .LogicalOr => a or b, + else => RuntimeError.InvalidSpirV, + }; + } + + fn operationUnary(comptime TT: type, a: TT) RuntimeError!bool { + return switch (Op) { + .LogicalNot => !a, + else => RuntimeError.InvalidSpirV, + }; + } + + fn applyLane(bit_count: SpvWord, dst_bool: *Result.Value, a_v: *const Result.Value, b_v: ?*const Result.Value) RuntimeError!void { + switch (bit_count) { + inline 8, 16, 32, 64 => |bits| { + if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV; + + const TT = getValuePrimitiveFieldType(T, bits); + const a = (try getValuePrimitiveField(T, bits, @constCast(a_v))).*; + + if (comptime Op == .LogicalNot) { + dst_bool.Bool = try operationUnary(TT, a); + } else { + const b_ptr = b_v orelse return RuntimeError.InvalidSpirV; + const b = (try getValuePrimitiveField(T, bits, @constCast(b_ptr))).*; + dst_bool.Bool = try operation(TT, a, b); + } + }, + else => return RuntimeError.InvalidSpirV, + } + } + + fn laneRhsPtr(op2_value: ?*Result.Value, index: usize) ?*const Result.Value { + if (comptime Op == .LogicalNot) return null; + const v = op2_value orelse return null; + return &v.Vector[index]; + } }; } @@ -397,199 +534,151 @@ fn CondEngine(comptime T: ValueType, comptime Op: CondOp) type { else => return RuntimeError.InvalidSpirV, } - const value = try rt.results[try rt.it.next()].getValue(); + const dst = try rt.results[try rt.it.next()].getValue(); + const op1_result = &rt.results[try rt.it.next()]; const op1_type = try op1_result.getValueTypeWord(); const op1_value = try op1_result.getValue(); - const op2_value: ?*Result.Value = switch (Op) { - .LogicalNot => null, - else => try rt.results[try rt.it.next()].getValue(), - }; - const size = sw: switch ((try rt.results[op1_type].getVariant()).Type) { - .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, - .Vector4f32, - .Vector3f32, - .Vector2f32, - .Vector4i32, - .Vector3i32, - .Vector2i32, - .Vector4u32, - .Vector3u32, - .Vector2u32, - => 32, - .Float => |f| if (T == .Float) f.bit_length else return RuntimeError.InvalidSpirV, - .Int => |i| if (T == .SInt or T == .UInt) i.bit_length else return RuntimeError.InvalidSpirV, - else => return RuntimeError.InvalidSpirV, - }; + const op2_value: ?*Result.Value = if (comptime Op == .LogicalNot) null else try rt.results[try rt.it.next()].getValue(); - const operator = struct { - fn operation(comptime TT: type, op1: TT, op2: ?TT) RuntimeError!bool { - return switch (Op) { - .Equal, .LogicalEqual => op1 == op2 orelse return RuntimeError.InvalidSpirV, - .NotEqual, .LogicalNotEqual => op1 != op2 orelse return RuntimeError.InvalidSpirV, - .Greater => op1 > op2 orelse return RuntimeError.InvalidSpirV, - .GreaterEqual => op1 >= op2 orelse return RuntimeError.InvalidSpirV, - .Less => op1 < op2 orelse return RuntimeError.InvalidSpirV, - .LessEqual => op1 <= op2 orelse return RuntimeError.InvalidSpirV, - .LogicalAnd => (op1 != @as(TT, 0)) and ((op2 orelse return RuntimeError.InvalidSpirV) != @as(TT, 0)), - .LogicalOr => (op1 != @as(TT, 0)) or ((op2 orelse return RuntimeError.InvalidSpirV) != @as(TT, 0)), - .LogicalNot => (op1 == @as(TT, 0)), - }; - } + const lane_bits = try Result.resolveLaneBitWidth((try rt.results[op1_type].getVariant()).Type, rt); - fn process(bit_count: SpvWord, v: *Result.Value, op1_v: *const Result.Value, op2_v: ?*const Result.Value) RuntimeError!void { - switch (bit_count) { - inline 8, 16, 32, 64 => |i| { - if (i == 8 and T == .Float) { // No f8 - return RuntimeError.InvalidSpirV; - } - v.Bool = try operation( - getValuePrimitiveFieldType(T, i), - (try getValuePrimitiveField(T, i, @constCast(op1_v))).*, - if (op2_v) |val| (try getValuePrimitiveField(T, i, @constCast(val))).* else null, - ); - }, - else => return RuntimeError.InvalidSpirV, - } - } - }; + const operator = CondOperator(T, Op); - switch (value.*) { - .Bool => try operator.process(size, value, op1_value, op2_value), - .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i| { - try operator.process(size, val, &op1_v, if (op2_value) |op2_v| &op2_v.Vector[i] else null); + switch (dst.*) { + .Bool => try operator.applyLane(lane_bits, dst, op1_value, op2_value), + + .Vector => |dst_vec| for (dst_vec, op1_value.Vector, 0..) |*d_lane, a_lane, i| { + const b_ptr = operator.laneRhsPtr(op2_value, i); + try operator.applyLane(lane_bits, d_lane, &a_lane, b_ptr); }, - // No Vector specializations for booleans + else => return RuntimeError.InvalidSpirV, } } }; } -fn ConversionEngine(comptime From: ValueType, comptime To: ValueType) type { +fn ConversionEngine(comptime from_kind: ValueType, comptime to_kind: ValueType) type { return struct { fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { const target_type = (try rt.results[try rt.it.next()].getVariant()).Type; - const value = try rt.results[try rt.it.next()].getValue(); - const op_result = &rt.results[try rt.it.next()]; - const op_type = try op_result.getValueTypeWord(); - const op_value = try op_result.getValue(); + const dst_value = try rt.results[try rt.it.next()].getValue(); - const from_size = sw: switch ((try rt.results[op_type].getVariant()).Type) { - .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, - .Vector4f32, - .Vector3f32, - .Vector2f32, - .Vector4i32, - .Vector3i32, - .Vector2i32, - .Vector4u32, - .Vector3u32, - .Vector2u32, - => 32, - .Float => |f| if (From == .Float) f.bit_length else return RuntimeError.InvalidSpirV, - .Int => |i| if (From == .SInt or From == .UInt) i.bit_length else return RuntimeError.InvalidSpirV, - else => return RuntimeError.InvalidSpirV, - }; + const src_result = &rt.results[try rt.it.next()]; + const src_type_word = try src_result.getValueTypeWord(); + const src_value = try src_result.getValue(); - const to_size = sw: switch (target_type) { - .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, - .Vector4f32, - .Vector3f32, - .Vector2f32, - .Vector4i32, - .Vector3i32, - .Vector2i32, - .Vector4u32, - .Vector3u32, - .Vector2u32, - => 32, - .Float => |f| if (To == .Float) f.bit_length else return RuntimeError.InvalidSpirV, - .Int => |i| if (To == .SInt or To == .UInt) i.bit_length else return RuntimeError.InvalidSpirV, - else => return RuntimeError.InvalidSpirV, - }; + const from_bits = try Result.resolveLaneBitWidth((try rt.results[src_type_word].getVariant()).Type, rt); + const to_bits = try Result.resolveLaneBitWidth(target_type, rt); - const operator = struct { - fn process(from_bit_count: SpvWord, to_bit_count: SpvWord, to: *Result.Value, from: *Result.Value) RuntimeError!void { + const caster = struct { + fn castLane(comptime ToT: type, from_bit_count: SpvWord, from: *Result.Value) RuntimeError!ToT { + return switch (from_bit_count) { + inline 8, 16, 32, 64 => |bits| blk: { + if (bits == 8 and from_kind == .Float) return RuntimeError.InvalidSpirV; // No f8 + const v = (try getValuePrimitiveField(from_kind, bits, from)).*; + break :blk std.math.lossyCast(ToT, v); + }, + else => return RuntimeError.InvalidSpirV, + }; + } + + fn applyScalar(from_bit_count: SpvWord, to_bit_count: SpvWord, dst: *Result.Value, from: *Result.Value) RuntimeError!void { switch (to_bit_count) { - inline 8, 16, 32, 64 => |i| { - if (i == 8 and To == .Float) { - return RuntimeError.InvalidSpirV; // No f8 - } - - const ToType = getValuePrimitiveFieldType(To, i); - (try getValuePrimitiveField(To, i, to)).* = std.math.lossyCast( - ToType, - switch (from_bit_count) { - inline 8, 16, 32, 64 => |j| blk: { - if (j == 8 and From == .Float) { - return RuntimeError.InvalidSpirV; // Same - } - break :blk (try getValuePrimitiveField(From, j, from)).*; - }, - else => return RuntimeError.InvalidSpirV, - }, - ); + inline 8, 16, 32, 64 => |bits| { + if (bits == 8 and to_kind == .Float) return RuntimeError.InvalidSpirV; // No f8 + const ToT = getValuePrimitiveFieldType(to_kind, bits); + (try getValuePrimitiveField(to_kind, bits, dst)).* = try castLane(ToT, from_bit_count, from); }, else => return RuntimeError.InvalidSpirV, } } - fn processVecSpe(comptime T: type, from_bit_count: SpvWord, from: *Result.Value, index: usize) RuntimeError!T { - return switch (from.*) { - .Vector3f32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector2f32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector4i32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector3i32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector2i32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector4u32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector3u32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector2u32 => |vec| std.math.lossyCast(T, vec[index]), - inline else => switch (from_bit_count) { - inline 8, 16, 32, 64 => |i| std.math.lossyCast(T, blk: { - if (i == 8 and From == .Float) { - return RuntimeError.InvalidSpirV; - } - break :blk (try getValuePrimitiveField(From, i, from)).*; - }), - else => return RuntimeError.InvalidSpirV, - }, - }; + fn castSIMDVector(comptime ToT: type, comptime N: usize, dst_arr: *[N]ToT, src_arr: *const [N]ToT) void { + inline for (0..N) |i| dst_arr[i] = std.math.lossyCast(ToT, src_arr[i]); + } + + fn castSIMDVectorFromOther(comptime ToT: type, comptime FromT: type, comptime N: usize, dst_arr: *[N]ToT, src_arr: *const [N]FromT) void { + inline for (0..N) |i| dst_arr[i] = std.math.lossyCast(ToT, src_arr[i]); } }; - switch (value.*) { - .Float => if (To == .Float) try operator.process(from_size, to_size, value, op_value) else return RuntimeError.InvalidSpirV, - .Int => if (To == .SInt or To == .UInt) try operator.process(from_size, to_size, value, op_value) else return RuntimeError.InvalidSpirV, - .Vector => |vec| for (vec, op_value.Vector) |*val, *op_v| try operator.process(from_size, to_size, val, op_v), - .Vector4f32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.processVecSpe(f32, from_size, op_value, i); + switch (dst_value.*) { + .Float => { + if (to_kind != .Float) return RuntimeError.InvalidSpirV; + try caster.applyScalar(from_bits, to_bits, dst_value, src_value); }, - .Vector3f32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.processVecSpe(f32, from_size, op_value, i); + .Int => { + if (to_kind != .SInt and to_kind != .UInt) return RuntimeError.InvalidSpirV; + try caster.applyScalar(from_bits, to_bits, dst_value, src_value); }, - .Vector2f32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.processVecSpe(f32, from_size, op_value, i); + .Vector => |dst_vec| { + const src_vec = src_value.Vector; + if (dst_vec.len != src_vec.len) return RuntimeError.InvalidSpirV; + for (dst_vec, src_vec) |*d_lane, *s_lane| { + try caster.applyScalar(from_bits, to_bits, d_lane, s_lane); + } }, - .Vector4i32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.processVecSpe(i32, from_size, op_value, i); + + .Vector4f32 => |*dst| switch (src_value.*) { + .Vector4f32 => caster.castSIMDVector(f32, 4, dst, &src_value.Vector4f32), + .Vector4i32 => caster.castSIMDVectorFromOther(f32, i32, 4, dst, &src_value.Vector4i32), + .Vector4u32 => caster.castSIMDVectorFromOther(f32, u32, 4, dst, &src_value.Vector4u32), + else => return RuntimeError.InvalidSpirV, }, - .Vector3i32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.processVecSpe(i32, from_size, op_value, i); + .Vector3f32 => |*dst| switch (src_value.*) { + .Vector3f32 => caster.castSIMDVector(f32, 3, dst, &src_value.Vector3f32), + .Vector3i32 => caster.castSIMDVectorFromOther(f32, i32, 3, dst, &src_value.Vector3i32), + .Vector3u32 => caster.castSIMDVectorFromOther(f32, u32, 3, dst, &src_value.Vector3u32), + else => return RuntimeError.InvalidSpirV, }, - .Vector2i32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.processVecSpe(i32, from_size, op_value, i); + .Vector2f32 => |*dst| switch (src_value.*) { + .Vector2f32 => caster.castSIMDVector(f32, 2, dst, &src_value.Vector2f32), + .Vector2i32 => caster.castSIMDVectorFromOther(f32, i32, 2, dst, &src_value.Vector2i32), + .Vector2u32 => caster.castSIMDVectorFromOther(f32, u32, 2, dst, &src_value.Vector2u32), + else => return RuntimeError.InvalidSpirV, }, - .Vector4u32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.processVecSpe(u32, from_size, op_value, i); + + .Vector4i32 => |*dst| switch (src_value.*) { + .Vector4f32 => caster.castSIMDVectorFromOther(i32, f32, 4, dst, &src_value.Vector4f32), + .Vector4i32 => caster.castSIMDVector(i32, 4, dst, &src_value.Vector4i32), + .Vector4u32 => caster.castSIMDVectorFromOther(i32, u32, 4, dst, &src_value.Vector4u32), + else => return RuntimeError.InvalidSpirV, }, - .Vector3u32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.processVecSpe(u32, from_size, op_value, i); + .Vector3i32 => |*dst| switch (src_value.*) { + .Vector3f32 => caster.castSIMDVectorFromOther(i32, f32, 3, dst, &src_value.Vector3f32), + .Vector3i32 => caster.castSIMDVector(i32, 3, dst, &src_value.Vector3i32), + .Vector3u32 => caster.castSIMDVectorFromOther(i32, u32, 3, dst, &src_value.Vector3u32), + else => return RuntimeError.InvalidSpirV, }, - .Vector2u32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.processVecSpe(u32, from_size, op_value, i); + .Vector2i32 => |*dst| switch (src_value.*) { + .Vector2f32 => caster.castSIMDVectorFromOther(i32, f32, 2, dst, &src_value.Vector2f32), + .Vector2i32 => caster.castSIMDVector(i32, 2, dst, &src_value.Vector2i32), + .Vector2u32 => caster.castSIMDVectorFromOther(i32, u32, 2, dst, &src_value.Vector2u32), + else => return RuntimeError.InvalidSpirV, }, + + .Vector4u32 => |*dst| switch (src_value.*) { + .Vector4f32 => caster.castSIMDVectorFromOther(u32, f32, 4, dst, &src_value.Vector4f32), + .Vector4i32 => caster.castSIMDVectorFromOther(u32, i32, 4, dst, &src_value.Vector4i32), + .Vector4u32 => caster.castSIMDVector(u32, 4, dst, &src_value.Vector4u32), + else => return RuntimeError.InvalidSpirV, + }, + .Vector3u32 => |*dst| switch (src_value.*) { + .Vector3f32 => caster.castSIMDVectorFromOther(u32, f32, 3, dst, &src_value.Vector3f32), + .Vector3i32 => caster.castSIMDVectorFromOther(u32, i32, 3, dst, &src_value.Vector3i32), + .Vector3u32 => caster.castSIMDVector(u32, 3, dst, &src_value.Vector3u32), + else => return RuntimeError.InvalidSpirV, + }, + .Vector2u32 => |*dst| switch (src_value.*) { + .Vector2f32 => caster.castSIMDVectorFromOther(u32, f32, 2, dst, &src_value.Vector2f32), + .Vector2i32 => caster.castSIMDVectorFromOther(u32, i32, 2, dst, &src_value.Vector2i32), + .Vector2u32 => caster.castSIMDVector(u32, 2, dst, &src_value.Vector2u32), + else => return RuntimeError.InvalidSpirV, + }, + else => return RuntimeError.InvalidSpirV, } } @@ -600,26 +689,11 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type { return struct { fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { const target_type = (try rt.results[try rt.it.next()].getVariant()).Type; - const value = try rt.results[try rt.it.next()].getValue(); - const op1_value = try rt.results[try rt.it.next()].getValue(); - const op2_value = try rt.results[try rt.it.next()].getValue(); + const dst = try rt.results[try rt.it.next()].getValue(); + const lhs = try rt.results[try rt.it.next()].getValue(); + const rhs = try rt.results[try rt.it.next()].getValue(); - const size = sw: switch (target_type) { - .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, - .Vector4f32, - .Vector3f32, - .Vector2f32, - .Vector4i32, - .Vector3i32, - .Vector2i32, - .Vector4u32, - .Vector3u32, - .Vector2u32, - => 32, - .Float => |f| if (T == .Float) f.bit_length else return RuntimeError.InvalidSpirV, - .Int => |i| if (T == .SInt or T == .UInt) i.bit_length else return RuntimeError.InvalidSpirV, - else => return RuntimeError.InvalidSpirV, - }; + const lane_bits = try Result.resolveLaneBitWidth(target_type, rt); const operator = struct { fn operation(comptime TT: type, op1: TT, op2: TT) RuntimeError!TT { @@ -637,68 +711,77 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type { }; } - fn process(bit_count: SpvWord, v: *Result.Value, op1_v: *const Result.Value, op2_v: *const Result.Value) RuntimeError!void { + fn applyScalar(bit_count: SpvWord, d: *Result.Value, l: *Result.Value, r: *Result.Value) RuntimeError!void { switch (bit_count) { - inline 8, 16, 32, 64 => |i| { - if (i == 8 and T == .Float) { // No f8 - return RuntimeError.InvalidSpirV; - } - (try getValuePrimitiveField(T, i, v)).* = try operation( - getValuePrimitiveFieldType(T, i), - (try getValuePrimitiveField(T, i, @constCast(op1_v))).*, - (try getValuePrimitiveField(T, i, @constCast(op2_v))).*, - ); + inline 8, 16, 32, 64 => |bits| { + if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV; + + const ScalarT = getValuePrimitiveFieldType(T, bits); + const d_field = try getValuePrimitiveField(T, bits, d); + const l_field = try getValuePrimitiveField(T, bits, l); + const r_field = try getValuePrimitiveField(T, bits, r); + d_field.* = try operation(ScalarT, l_field.*, r_field.*); }, else => return RuntimeError.InvalidSpirV, } } + + inline fn applyVectorTimesScalarF32(d: []Result.Value, l: []const Result.Value, r: f32) void { + for (d, l) |*d_v, l_v| { + d_v.Float.float32 = l_v.Float.float32 * r; + } + } + + inline fn applySIMDVector(comptime ElemT: type, comptime N: usize, d: *@Vector(N, ElemT), l: *const @Vector(N, ElemT), r: *const @Vector(N, ElemT)) RuntimeError!void { + inline for (0..N) |i| { + d[i] = try operation(ElemT, l[i], r[i]); + } + } + + inline fn applyVectorSIMDTimesScalarF32(comptime N: usize, d: *@Vector(N, f32), l: *const @Vector(N, f32), r: f32) void { + inline for (0..N) |i| { + d[i] = l[i] * r; + } + } + + inline fn applySIMDVectorf32(comptime N: usize, d: *@Vector(N, f32), l: *const @Vector(N, f32), r: *const Result.Value) RuntimeError!void { + switch (Op) { + .VectorTimesScalar => applyVectorSIMDTimesScalarF32(N, d, l, r.Float.float32), + else => { + const rh: *const @Vector(N, f32) = switch (N) { + 2 => &r.Vector2f32, + 3 => &r.Vector3f32, + 4 => &r.Vector4f32, + else => unreachable, + }; + try applySIMDVector(f32, N, d, l, rh); + }, + } + } }; - switch (value.*) { - .Float => if (T == .Float) try operator.process(size, value, op1_value, op2_value) else return RuntimeError.InvalidSpirV, - .Int => if (T == .SInt or T == .UInt) try operator.process(size, value, op1_value, op2_value) else return RuntimeError.InvalidSpirV, - .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i| { - switch (Op) { - .VectorTimesScalar => try operator.process(size, val, &op1_v, op2_value), - else => try operator.process(size, val, &op1_v, &op2_value.Vector[i]), - } - }, - .Vector4f32 => |*vec| inline for (0..4) |i| { - switch (Op) { - .VectorTimesScalar => vec[i] = op1_value.Vector4f32[i] * op2_value.Float.float32, - else => vec[i] = try operator.operation(f32, op1_value.Vector4f32[i], op2_value.Vector4f32[i]), - } - }, - .Vector3f32 => |*vec| inline for (0..3) |i| { - switch (Op) { - .VectorTimesScalar => vec[i] = op1_value.Vector3f32[i] * op2_value.Float.float32, - else => vec[i] = try operator.operation(f32, op1_value.Vector3f32[i], op2_value.Vector3f32[i]), - } - }, - .Vector2f32 => |*vec| inline for (0..2) |i| { - switch (Op) { - .VectorTimesScalar => vec[i] = op1_value.Vector2f32[i] * op2_value.Float.float32, - else => vec[i] = try operator.operation(f32, op1_value.Vector2f32[i], op2_value.Vector2f32[i]), - } - }, - .Vector4i32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.operation(i32, op1_value.Vector4i32[i], op2_value.Vector4i32[i]); - }, - .Vector3i32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.operation(i32, op1_value.Vector3i32[i], op2_value.Vector3i32[i]); - }, - .Vector2i32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.operation(i32, op1_value.Vector2i32[i], op2_value.Vector2i32[i]); - }, - .Vector4u32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.operation(u32, op1_value.Vector4u32[i], op2_value.Vector4u32[i]); - }, - .Vector3u32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.operation(u32, op1_value.Vector3u32[i], op2_value.Vector3u32[i]); - }, - .Vector2u32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.operation(u32, op1_value.Vector2u32[i], op2_value.Vector2u32[i]); + switch (dst.*) { + .Int, .Float => try operator.applyScalar(lane_bits, dst, lhs, rhs), + + .Vector => |dst_vec| switch (Op) { + .VectorTimesScalar => operator.applyVectorTimesScalarF32(dst_vec, lhs.Vector, rhs.Float.float32), + else => for (dst_vec, lhs.Vector, rhs.Vector) |*d_lane, *l_lane, *r_lane| { + try operator.applyScalar(lane_bits, d_lane, l_lane, r_lane); + }, }, + + .Vector4f32 => |*d| try operator.applySIMDVectorf32(4, d, &lhs.Vector4f32, rhs), + .Vector3f32 => |*d| try operator.applySIMDVectorf32(3, d, &lhs.Vector3f32, rhs), + .Vector2f32 => |*d| try operator.applySIMDVectorf32(2, d, &lhs.Vector2f32, rhs), + + .Vector4i32 => |*d| try operator.applySIMDVector(i32, 4, d, &lhs.Vector4i32, &rhs.Vector4i32), + .Vector3i32 => |*d| try operator.applySIMDVector(i32, 3, d, &lhs.Vector3i32, &rhs.Vector3i32), + .Vector2i32 => |*d| try operator.applySIMDVector(i32, 2, d, &lhs.Vector2i32, &rhs.Vector2i32), + + .Vector4u32 => |*d| try operator.applySIMDVector(u32, 4, d, &lhs.Vector4u32, &rhs.Vector4u32), + .Vector3u32 => |*d| try operator.applySIMDVector(u32, 3, d, &lhs.Vector3u32, &rhs.Vector3u32), + .Vector2u32 => |*d| try operator.applySIMDVector(u32, 2, d, &lhs.Vector2u32, &rhs.Vector2u32), + else => return RuntimeError.InvalidSpirV, } } @@ -784,20 +867,21 @@ fn opBitcast(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { } fn copyValue(dst: *Result.Value, src: *const Result.Value) void { - if (src.getCompositeDataOrNull()) |src_slice| { - if (dst.getCompositeDataOrNull()) |dst_slice| { + switch (src.*) { + .Vector, .Matrix, .Array, .Structure => |src_slice| { + const dst_slice = switch (dst.*) { + .Vector, .Matrix, .Array, .Structure => |d| d, + else => unreachable, + }; for (0..@min(dst_slice.len, src_slice.len)) |i| { copyValue(&dst_slice[i], &src_slice[i]); } - } else { - unreachable; - } - } else { - dst.* = src.*; + }, + else => dst.* = src.*, } } -fn getValuePrimitiveField(comptime T: ValueType, comptime BitCount: SpvWord, v: *Result.Value) RuntimeError!*getValuePrimitiveFieldType(T, BitCount) { +pub fn getValuePrimitiveField(comptime T: ValueType, comptime BitCount: SpvWord, v: *Result.Value) RuntimeError!*getValuePrimitiveFieldType(T, BitCount) { return switch (T) { .Bool => &v.Bool, .Float => switch (BitCount) { @@ -815,7 +899,7 @@ fn getValuePrimitiveField(comptime T: ValueType, comptime BitCount: SpvWord, v: }; } -fn getValuePrimitiveFieldType(comptime T: ValueType, comptime BitCount: SpvWord) type { +pub fn getValuePrimitiveFieldType(comptime T: ValueType, comptime BitCount: SpvWord) type { return switch (T) { .Bool => bool, .Float => std.meta.Float(BitCount), @@ -1112,7 +1196,7 @@ fn opExtInst(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) Ru const set = try rt.it.next(); const inst = try rt.it.next(); - switch (try rt.results[set].getVariant()) { + switch ((try rt.results[set].getVariant()).*) { .Extension => |ext| if (ext.dispatcher[inst]) |pfn| { try pfn(allocator, target_type, id, word_count, rt); }, @@ -1122,10 +1206,11 @@ fn opExtInst(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) Ru fn opExtInstImport(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) RuntimeError!void { const id = try rt.it.next(); - rt.mod.results[id].name = try readStringN(allocator, &rt.it, word_count - 1); + const name = try readStringN(allocator, &rt.it, word_count - 1); + rt.mod.results[id].name = name; rt.mod.results[id].variant = .{ .Extension = .{ - .dispatcher = undefined, + .dispatcher = if (extensions_map.get(name)) |map| map else return RuntimeError.UnsupportedExtension, }, }; }