adding GLSL std 450 base

2026-01-24 02:46:02 +01:00
parent 37da19ed43
commit 96ad7f12f9
14 changed files with 1501 additions and 740 deletions
@@ -4,8 +4,8 @@ const spv = @import("spv");

 const shader_source = @embedFile("shader.spv");

-const screen_width = 1250;
-const screen_height = 720;
+const screen_width = 200;
+const screen_height = 200;

 pub fn main() !void {
    {
@@ -36,10 +36,16 @@ pub fn main() !void {
        }

        for (0..screen_height) |_| {
+            var rt = try spv.Runtime.init(allocator, &module);
            (try runner_cache.addOne(allocator)).* = .{
                .allocator = allocator,
                .surface = surface,
-                .rt = try spv.Runtime.init(allocator, &module),
+                .rt = rt,
+                .entry = try rt.getEntryPointByName("main"),
+                .color = try rt.getResultByName("color"),
+                .time = try rt.getResultByName("time"),
+                .pos = try rt.getResultByName("pos"),
+                .res = try rt.getResultByName("res"),
            };
        }

@@ -48,9 +54,11 @@ pub fn main() !void {
            .allocator = allocator,
        });

+        var timer = try std.time.Timer.start();
+
        var quit = false;
        while (!quit) {
-            try surface.clear(.{ .r = 0.0, .g = 0.0, .b = 0.0, .a = 0.0 });
+            try surface.clear(.{ .r = 0.0, .g = 0.0, .b = 0.0, .a = 1.0 });

            while (sdl3.events.poll()) |event|
                switch (event) {
@@ -65,17 +73,19 @@ pub fn main() !void {

                const pixel_map: [*]u32 = @as([*]u32, @ptrCast(@alignCast((surface.getPixels() orelse return).ptr)));

-                var timer = try std.time.Timer.start();
+                var frame_timer = try std.time.Timer.start();
                defer {
-                    const ns = timer.lap();
+                    const ns = frame_timer.lap();
                    const ms = @as(f32, @floatFromInt(ns)) / std.time.ns_per_s;
                    std.log.info("Took {d:.3}s - {d:.3}fps to render", .{ ms, 1.0 / ms });
                }

+                const delta: f32 = @as(f32, @floatFromInt(timer.read())) / std.time.ns_per_s;
+
                var wait_group: std.Thread.WaitGroup = .{};
                for (0..screen_height) |y| {
                    const runner = &runner_cache.items[y];
-                    thread_pool.spawnWg(&wait_group, Runner.run, .{ runner, y, pixel_map });
+                    thread_pool.spawnWg(&wait_group, Runner.runWrapper, .{ runner, y, pixel_map, delta });
                }
                thread_pool.waitAndWork(&wait_group);
            }
@@ -92,23 +102,33 @@ const Runner = struct {
    allocator: std.mem.Allocator,
    surface: sdl3.surface.Surface,
    rt: spv.Runtime,
+    entry: spv.SpvWord,
+    color: spv.SpvWord,
+    time: spv.SpvWord,
+    pos: spv.SpvWord,
+    res: spv.SpvWord,

-    fn run(self: *Self, y: usize, pixel_map: [*]u32) void {
+    fn runWrapper(self: *Self, y: usize, pixel_map: [*]u32, timer: f32) void {
+        @call(.always_inline, Self.run, .{ self, y, pixel_map, timer }) catch |err| {
+            std.log.err("{s}", .{@errorName(err)});
+            if (@errorReturnTrace()) |trace| {
+                std.debug.dumpStackTrace(trace.*);
+            }
+            std.process.abort();
+        };
+    }
+
+    fn run(self: *Self, y: usize, pixel_map: [*]u32, timer: f32) !void {
        var rt = self.rt; // Copy to avoid pointer access of `self` at runtime. Okay as Runtime contains only pointers and trivially copyable fields

-        const entry = rt.getEntryPointByName("main") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
-        const color = rt.getResultByName("color") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
-        const time = rt.getResultByName("time") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
-        const pos = rt.getResultByName("pos") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
-        const res = rt.getResultByName("res") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
        var output: [4]f32 = undefined;

        for (0..screen_width) |x| {
-            rt.writeInput(f32, &.{@as(f32, @floatFromInt(std.time.milliTimestamp()))}, time) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
-            rt.writeInput(f32, &.{ @floatFromInt(screen_width), @floatFromInt(screen_height) }, res) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
-            rt.writeInput(f32, &.{ @floatFromInt(x), @floatFromInt(y) }, pos) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
-            rt.callEntryPoint(self.allocator, entry) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
-            rt.readOutput(f32, output[0..], color) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
+            try rt.writeInput(f32, &.{timer}, self.time);
+            try rt.writeInput(f32, &.{ @floatFromInt(screen_width), @floatFromInt(screen_height) }, self.res);
+            try rt.writeInput(f32, &.{ @floatFromInt(x), @floatFromInt(y) }, self.pos);
+            try rt.callEntryPoint(self.allocator, self.entry);
+            try rt.readOutput(f32, output[0..], self.color);

            const rgba = self.surface.mapRgba(
                @truncate(@as(u32, @intFromFloat(output[0] * 255.0))),
@@ -16,12 +16,53 @@ struct FragOut
 [entry(frag)]
 fn main(input: FragIn) -> FragOut
 {
+    const I: i32 = 32;
+    const A: f32 = 7.5;
+    const MA: f32 = 20.0;
+    const MI: f32 = 0.001;
+
+    let uv0 = input.pos / input.res * 2.0 - vec2[f32](1.0, 1.0);
+    let uv  = vec2[f32](uv0.x * (input.res.x / input.res.y), uv0.y);
+
+    let col = vec3[f32](0.0, 0.0, 0.0);
+    let ro  = vec3[f32](0.0, 0.0, -2.0);
+    let rd  = vec3[f32](uv.x, uv.y, 1.0);
+    let dt  = 0.0;
+    let ds  = 0.0;
+    let dm  = -1.0;
+    let p   = ro;
+    let c   = vec3[f32](0.0, 0.0, 0.0);
+
+    let l = vec3[f32](0.0, sin(input.time * 0.2) * 4.0, cos(input.time * 0.2) * 4.0);
+
+    for i in 0 -> I
+    {
+        p = ro + rd * dt;
+        ds = length(c - p) - 1.0;
+        dt += ds;
+
+        if (dm == -1.0 || ds < dm)
+            dm = ds;
+
+        if (ds <= MI)
+        {
+            let value = max(dot(normalize(c - p), normalize(p - l)) - 0.35, 0.0);
+            col = vec3[f32](value, value, value);
+            break;
+        }
+
+        if (ds >= MA)
+        {
+            if (dot(normalize(rd), normalize(l - ro)) <= 1.0)
+            {
+                let value = max(dot(normalize(rd), normalize(l - ro)) + 0.15, 0.05)/ 1.15 * (1.0 - dm * A);
+                col = vec3[f32](value, value, value);
+            }
+            break;
+        }
+    }
+
    let output: FragOut;
-	output.color = vec4[f32](
-		input.pos.x / input.res.x,
-		input.pos.y / input.res.y,
-		1.0,
-		1.0
-	);
+    output.color = vec4[f32](col.x, col.y, col.z, 1.0);
    return output;
 }
@@ -1,11 +1,12 @@
 Version 1.0
 Generator: 2560130
-Bound: 50
+Bound: 203
 Schema: 0
       OpCapability Capability(Shader)
+ %42 = OpExtInstImport "GLSL.std.450"
       OpMemoryModel AddressingModel(Logical) MemoryModel(GLSL450)
-      OpEntryPoint ExecutionModel(Fragment) %24 "main" %5 %11 %14 %20
-      OpExecutionMode %24 ExecutionMode(OriginUpperLeft)
+       OpEntryPoint ExecutionModel(Fragment) %43 "main" %5 %11 %14 %20
+       OpExecutionMode %43 ExecutionMode(OriginUpperLeft)
       OpSource SourceLanguage(NZSL) 4198400
       OpSourceExtension "Version: 1.1"
       OpName %16 "FragIn"
@@ -18,7 +19,7 @@ Schema: 0
       OpName %11 "res"
       OpName %14 "pos"
       OpName %20 "color"
-      OpName %24 "main"
+       OpName %43 "main"
       OpDecorate %5 Decoration(Location) 0
       OpDecorate %11 Decoration(Location) 1
       OpDecorate %14 Decoration(Location) 2
@@ -35,51 +36,246 @@ Schema: 0
  %7 = OpConstant %6 i32(0)
  %8 = OpTypePointer StorageClass(Function) %3
  %9 = OpTypeVector %3 2
-%10 = OpTypePointer StorageClass(Input) %9
-%12 = OpConstant %6 i32(1)
-%13 = OpTypePointer StorageClass(Function) %9
-%15 = OpConstant %6 i32(2)
-%16 = OpTypeStruct %3 %9 %9
-%17 = OpTypePointer StorageClass(Function) %16
-%18 = OpTypeVector %3 4
-%19 = OpTypePointer StorageClass(Output) %18
-%21 = OpTypeStruct %18
-%22 = OpTypePointer StorageClass(Function) %21
-%23 = OpConstant %3 f32(1)
-%47 = OpTypePointer StorageClass(Function) %18
+ %10 = OpTypePointer StorageClass(Input) %9
+ %12 = OpConstant %6 i32(1)
+ %13 = OpTypePointer StorageClass(Function) %9
+ %15 = OpConstant %6 i32(2)
+ %16 = OpTypeStruct %3 %9 %9
+ %17 = OpTypePointer StorageClass(Function) %16
+ %18 = OpTypeVector %3 4
+ %19 = OpTypePointer StorageClass(Output) %18
+ %21 = OpTypeStruct %18
+ %22 = OpConstant %3 f32(2)
+ %23 = OpConstant %3 f32(1)
+ %24 = OpConstant %3 f32(0)
+ %25 = OpTypeVector %3 3
+ %26 = OpTypePointer StorageClass(Function) %25
+ %27 = OpConstant %3 f32(-2)
+ %28 = OpConstant %3 f32(-1)
+ %29 = OpConstant %3 f32(0.2)
+ %30 = OpConstant %3 f32(4)
+ %31 = OpTypePointer StorageClass(Function) %6
+ %32 = OpConstant %6 i32(32)
+ %33 = OpTypeBool
+ %34 = OpConstant %3 f32(0.001)
+ %35 = OpConstant %3 f32(0.35)
+ %36 = OpConstant %3 f32(20)
+ %37 = OpConstant %3 f32(0.15)
+ %38 = OpConstant %3 f32(0.05)
+ %39 = OpConstant %3 f32(1.15)
+ %40 = OpConstant %3 f32(7.5)
+ %41 = OpTypePointer StorageClass(Function) %21
+%200 = OpTypePointer StorageClass(Function) %18
  %5 = OpVariable %4 StorageClass(Input)
-%11 = OpVariable %10 StorageClass(Input)
-%14 = OpVariable %10 StorageClass(Input)
-%20 = OpVariable %19 StorageClass(Output)
-%24 = OpFunction %1 FunctionControl(0) %2
-%25 = OpLabel
-%26 = OpVariable %22 StorageClass(Function)
-%27 = OpVariable %17 StorageClass(Function)
-%28 = OpAccessChain %8 %27 %7
-      OpCopyMemory %28 %5
-%29 = OpAccessChain %13 %27 %12
-      OpCopyMemory %29 %11
-%30 = OpAccessChain %13 %27 %15
-      OpCopyMemory %30 %14
-%31 = OpAccessChain %13 %27 %15
-%32 = OpLoad %9 %31
-%33 = OpCompositeExtract %3 %32 0
-%34 = OpAccessChain %13 %27 %12
-%35 = OpLoad %9 %34
-%36 = OpCompositeExtract %3 %35 0
-%37 = OpFDiv %3 %33 %36
-%38 = OpAccessChain %13 %27 %15
-%39 = OpLoad %9 %38
-%40 = OpCompositeExtract %3 %39 1
-%41 = OpAccessChain %13 %27 %12
-%42 = OpLoad %9 %41
-%43 = OpCompositeExtract %3 %42 1
-%44 = OpFDiv %3 %40 %43
-%45 = OpCompositeConstruct %18 %37 %44 %23 %23
-%46 = OpAccessChain %47 %26 %7
-      OpStore %46 %45
-%48 = OpLoad %21 %26
-%49 = OpCompositeExtract %18 %48 0
-      OpStore %20 %49
+ %11 = OpVariable %10 StorageClass(Input)
+ %14 = OpVariable %10 StorageClass(Input)
+ %20 = OpVariable %19 StorageClass(Output)
+ %43 = OpFunction %1 FunctionControl(0) %2
+ %44 = OpLabel
+ %45 = OpVariable %13 StorageClass(Function)
+ %46 = OpVariable %13 StorageClass(Function)
+ %47 = OpVariable %26 StorageClass(Function)
+ %48 = OpVariable %26 StorageClass(Function)
+ %49 = OpVariable %26 StorageClass(Function)
+ %50 = OpVariable %8 StorageClass(Function)
+ %51 = OpVariable %8 StorageClass(Function)
+ %52 = OpVariable %8 StorageClass(Function)
+ %53 = OpVariable %26 StorageClass(Function)
+ %54 = OpVariable %26 StorageClass(Function)
+ %55 = OpVariable %26 StorageClass(Function)
+ %56 = OpVariable %31 StorageClass(Function)
+ %57 = OpVariable %31 StorageClass(Function)
+ %58 = OpVariable %8 StorageClass(Function)
+ %59 = OpVariable %8 StorageClass(Function)
+ %60 = OpVariable %41 StorageClass(Function)
+ %61 = OpVariable %17 StorageClass(Function)
+ %62 = OpAccessChain %8 %61 %7
+       OpCopyMemory %62 %5
+ %63 = OpAccessChain %13 %61 %12
+       OpCopyMemory %63 %11
+ %64 = OpAccessChain %13 %61 %15
+       OpCopyMemory %64 %14
+ %65 = OpAccessChain %13 %61 %15
+ %66 = OpLoad %9 %65
+ %67 = OpAccessChain %13 %61 %12
+ %68 = OpLoad %9 %67
+ %69 = OpFDiv %9 %66 %68
+ %70 = OpVectorTimesScalar %9 %69 %22
+ %71 = OpCompositeConstruct %9 %23 %23
+ %72 = OpFSub %9 %70 %71
+       OpStore %45 %72
+ %73 = OpLoad %9 %45
+ %74 = OpCompositeExtract %3 %73 0
+ %75 = OpAccessChain %13 %61 %12
+ %76 = OpLoad %9 %75
+ %77 = OpCompositeExtract %3 %76 0
+ %78 = OpAccessChain %13 %61 %12
+ %79 = OpLoad %9 %78
+ %80 = OpCompositeExtract %3 %79 1
+ %81 = OpFDiv %3 %77 %80
+ %82 = OpFMul %3 %74 %81
+ %83 = OpLoad %9 %45
+ %84 = OpCompositeExtract %3 %83 1
+ %85 = OpCompositeConstruct %9 %82 %84
+       OpStore %46 %85
+ %86 = OpCompositeConstruct %25 %24 %24 %24
+       OpStore %47 %86
+ %87 = OpCompositeConstruct %25 %24 %24 %27
+       OpStore %48 %87
+ %88 = OpLoad %9 %46
+ %89 = OpCompositeExtract %3 %88 0
+ %90 = OpLoad %9 %46
+ %91 = OpCompositeExtract %3 %90 1
+ %92 = OpCompositeConstruct %25 %89 %91 %23
+       OpStore %49 %92
+       OpStore %50 %24
+       OpStore %51 %24
+       OpStore %52 %28
+ %93 = OpLoad %25 %48
+       OpStore %53 %93
+ %94 = OpCompositeConstruct %25 %24 %24 %24
+       OpStore %54 %94
+ %95 = OpAccessChain %8 %61 %7
+ %96 = OpLoad %3 %95
+ %97 = OpFMul %3 %96 %29
+ %98 = OpExtInst %3 GLSLstd450 Sin %97
+ %99 = OpFMul %3 %98 %30
+%100 = OpAccessChain %8 %61 %7
+%101 = OpLoad %3 %100
+%102 = OpFMul %3 %101 %29
+%103 = OpExtInst %3 GLSLstd450 Cos %102
+%104 = OpFMul %3 %103 %30
+%105 = OpCompositeConstruct %25 %24 %99 %104
+       OpStore %55 %105
+       OpStore %56 %7
+       OpStore %57 %32
+       OpBranch %106
+%106 = OpLabel
+%110 = OpLoad %6 %56
+%111 = OpLoad %6 %57
+%112 = OpSLessThan %33 %110 %111
+       OpLoopMerge %108 %109 LoopControl(0)
+       OpBranchConditional %112 %107 %108
+%107 = OpLabel
+%113 = OpLoad %25 %48
+%114 = OpLoad %25 %49
+%115 = OpLoad %3 %50
+%116 = OpVectorTimesScalar %25 %114 %115
+%117 = OpFAdd %25 %113 %116
+       OpStore %53 %117
+%118 = OpLoad %25 %54
+%119 = OpLoad %25 %53
+%120 = OpFSub %25 %118 %119
+%121 = OpExtInst %3 GLSLstd450 Length %120
+%122 = OpFSub %3 %121 %23
+       OpStore %51 %122
+%123 = OpLoad %3 %50
+%124 = OpLoad %3 %51
+%125 = OpFAdd %3 %123 %124
+       OpStore %50 %125
+%129 = OpLoad %3 %52
+%130 = OpFOrdEqual %33 %129 %28
+%131 = OpLoad %3 %51
+%132 = OpLoad %3 %52
+%133 = OpFOrdLessThan %33 %131 %132
+%134 = OpLogicalOr %33 %130 %133
+       OpSelectionMerge %126 SelectionControl(0)
+       OpBranchConditional %134 %127 %128
+%127 = OpLabel
+%135 = OpLoad %3 %51
+       OpStore %52 %135
+       OpBranch %126
+%128 = OpLabel
+       OpBranch %126
+%126 = OpLabel
+%139 = OpLoad %3 %51
+%140 = OpFOrdLessThanEqual %33 %139 %34
+       OpSelectionMerge %136 SelectionControl(0)
+       OpBranchConditional %140 %137 %138
+%137 = OpLabel
+%141 = OpLoad %25 %54
+%142 = OpLoad %25 %53
+%143 = OpFSub %25 %141 %142
+%144 = OpExtInst %25 GLSLstd450 Normalize %143
+%145 = OpLoad %25 %53
+%146 = OpLoad %25 %55
+%147 = OpFSub %25 %145 %146
+%148 = OpExtInst %25 GLSLstd450 Normalize %147
+%149 = OpDot %3 %144 %148
+%150 = OpFSub %3 %149 %35
+%151 = OpExtInst %3 GLSLstd450 FMax %150 %24
+       OpStore %58 %151
+%152 = OpLoad %3 %58
+%153 = OpLoad %3 %58
+%154 = OpLoad %3 %58
+%155 = OpCompositeConstruct %25 %152 %153 %154
+       OpStore %47 %155
+       OpBranch %108
+%138 = OpLabel
+       OpBranch %136
+%136 = OpLabel
+%159 = OpLoad %3 %51
+%160 = OpFOrdGreaterThanEqual %33 %159 %36
+       OpSelectionMerge %156 SelectionControl(0)
+       OpBranchConditional %160 %157 %158
+%157 = OpLabel
+%164 = OpLoad %25 %49
+%165 = OpExtInst %25 GLSLstd450 Normalize %164
+%166 = OpLoad %25 %55
+%167 = OpLoad %25 %48
+%168 = OpFSub %25 %166 %167
+%169 = OpExtInst %25 GLSLstd450 Normalize %168
+%170 = OpDot %3 %165 %169
+%171 = OpFOrdLessThanEqual %33 %170 %23
+       OpSelectionMerge %161 SelectionControl(0)
+       OpBranchConditional %171 %162 %163
+%162 = OpLabel
+%172 = OpLoad %25 %49
+%173 = OpExtInst %25 GLSLstd450 Normalize %172
+%174 = OpLoad %25 %55
+%175 = OpLoad %25 %48
+%176 = OpFSub %25 %174 %175
+%177 = OpExtInst %25 GLSLstd450 Normalize %176
+%178 = OpDot %3 %173 %177
+%179 = OpFAdd %3 %178 %37
+%180 = OpExtInst %3 GLSLstd450 FMax %179 %38
+%181 = OpFDiv %3 %180 %39
+%182 = OpLoad %3 %52
+%183 = OpFMul %3 %182 %40
+%184 = OpFSub %3 %23 %183
+%185 = OpFMul %3 %181 %184
+       OpStore %59 %185
+%186 = OpLoad %3 %59
+%187 = OpLoad %3 %59
+%188 = OpLoad %3 %59
+%189 = OpCompositeConstruct %25 %186 %187 %188
+       OpStore %47 %189
+       OpBranch %161
+%163 = OpLabel
+       OpBranch %161
+%161 = OpLabel
+       OpBranch %108
+%158 = OpLabel
+       OpBranch %156
+%156 = OpLabel
+%190 = OpLoad %6 %56
+%191 = OpIAdd %6 %190 %12
+       OpStore %56 %191
+       OpBranch %109
+%109 = OpLabel
+       OpBranch %106
+%108 = OpLabel
+%192 = OpLoad %25 %47
+%193 = OpCompositeExtract %3 %192 0
+%194 = OpLoad %25 %47
+%195 = OpCompositeExtract %3 %194 1
+%196 = OpLoad %25 %47
+%197 = OpCompositeExtract %3 %196 2
+%198 = OpCompositeConstruct %18 %193 %195 %197 %23
+%199 = OpAccessChain %200 %60 %7
+       OpStore %199 %198
+%201 = OpLoad %21 %60
+%202 = OpCompositeExtract %18 %201 0
+       OpStore %20 %202
       OpReturn
       OpFunctionEnd
@@ -62,8 +62,8 @@ fn main(input: FragIn) -> FragOut
        }
    }

-    if (col == vec3[f32](0.0, 0.0, 0.0))
-        discard;
+   //if (col == vec3[f32](0.0, 0.0, 0.0))
+   //    discard;

    let output: FragOut;
    output.color = vec4[f32](col.x, col.y, col.z, 1.0);
@@ -1,12 +1,12 @@
 Version 1.0
 Generator: 2560130
-Bound: 210
+Bound: 203
 Schema: 0
       OpCapability Capability(Shader)
- %43 = OpExtInstImport "GLSL.std.450"
+ %42 = OpExtInstImport "GLSL.std.450"
       OpMemoryModel AddressingModel(Logical) MemoryModel(GLSL450)
-       OpEntryPoint ExecutionModel(Fragment) %44 "main" %5 %11 %14 %20
-       OpExecutionMode %44 ExecutionMode(OriginUpperLeft)
+       OpEntryPoint ExecutionModel(Fragment) %43 "main" %5 %11 %14 %20
+       OpExecutionMode %43 ExecutionMode(OriginUpperLeft)
       OpSource SourceLanguage(NZSL) 4198400
       OpSourceExtension "Version: 1.1"
       OpName %16 "FragIn"
@@ -19,7 +19,7 @@ Schema: 0
       OpName %11 "res"
       OpName %14 "pos"
       OpName %20 "color"
-       OpName %44 "main"
+       OpName %43 "main"
       OpDecorate %5 Decoration(Location) 0
       OpDecorate %11 Decoration(Location) 1
       OpDecorate %14 Decoration(Location) 2
@@ -64,229 +64,218 @@ Schema: 0
 %38 = OpConstant %3 f32(0.05)
 %39 = OpConstant %3 f32(1.15)
 %40 = OpConstant %3 f32(7.5)
- %41 = OpTypeVector %33 3
- %42 = OpTypePointer StorageClass(Function) %21
-%207 = OpTypePointer StorageClass(Function) %18
+ %41 = OpTypePointer StorageClass(Function) %21
+%200 = OpTypePointer StorageClass(Function) %18
  %5 = OpVariable %4 StorageClass(Input)
 %11 = OpVariable %10 StorageClass(Input)
 %14 = OpVariable %10 StorageClass(Input)
 %20 = OpVariable %19 StorageClass(Output)
- %44 = OpFunction %1 FunctionControl(0) %2
- %45 = OpLabel
+ %43 = OpFunction %1 FunctionControl(0) %2
+ %44 = OpLabel
+ %45 = OpVariable %13 StorageClass(Function)
 %46 = OpVariable %13 StorageClass(Function)
- %47 = OpVariable %13 StorageClass(Function)
+ %47 = OpVariable %26 StorageClass(Function)
 %48 = OpVariable %26 StorageClass(Function)
 %49 = OpVariable %26 StorageClass(Function)
- %50 = OpVariable %26 StorageClass(Function)
+ %50 = OpVariable %8 StorageClass(Function)
 %51 = OpVariable %8 StorageClass(Function)
 %52 = OpVariable %8 StorageClass(Function)
- %53 = OpVariable %8 StorageClass(Function)
+ %53 = OpVariable %26 StorageClass(Function)
 %54 = OpVariable %26 StorageClass(Function)
 %55 = OpVariable %26 StorageClass(Function)
- %56 = OpVariable %26 StorageClass(Function)
+ %56 = OpVariable %31 StorageClass(Function)
 %57 = OpVariable %31 StorageClass(Function)
- %58 = OpVariable %31 StorageClass(Function)
+ %58 = OpVariable %8 StorageClass(Function)
 %59 = OpVariable %8 StorageClass(Function)
- %60 = OpVariable %8 StorageClass(Function)
- %61 = OpVariable %42 StorageClass(Function)
- %62 = OpVariable %17 StorageClass(Function)
- %63 = OpAccessChain %8 %62 %7
-       OpCopyMemory %63 %5
- %64 = OpAccessChain %13 %62 %12
-       OpCopyMemory %64 %11
- %65 = OpAccessChain %13 %62 %15
-       OpCopyMemory %65 %14
- %66 = OpAccessChain %13 %62 %15
- %67 = OpLoad %9 %66
- %68 = OpAccessChain %13 %62 %12
- %69 = OpLoad %9 %68
- %70 = OpFDiv %9 %67 %69
- %71 = OpVectorTimesScalar %9 %70 %22
- %72 = OpCompositeConstruct %9 %23 %23
- %73 = OpFSub %9 %71 %72
-       OpStore %46 %73
- %74 = OpLoad %9 %46
- %75 = OpCompositeExtract %3 %74 0
- %76 = OpAccessChain %13 %62 %12
- %77 = OpLoad %9 %76
- %78 = OpCompositeExtract %3 %77 0
- %79 = OpAccessChain %13 %62 %12
- %80 = OpLoad %9 %79
- %81 = OpCompositeExtract %3 %80 1
- %82 = OpFDiv %3 %78 %81
- %83 = OpFMul %3 %75 %82
- %84 = OpLoad %9 %46
- %85 = OpCompositeExtract %3 %84 1
- %86 = OpCompositeConstruct %9 %83 %85
+ %60 = OpVariable %41 StorageClass(Function)
+ %61 = OpVariable %17 StorageClass(Function)
+ %62 = OpAccessChain %8 %61 %7
+       OpCopyMemory %62 %5
+ %63 = OpAccessChain %13 %61 %12
+       OpCopyMemory %63 %11
+ %64 = OpAccessChain %13 %61 %15
+       OpCopyMemory %64 %14
+ %65 = OpAccessChain %13 %61 %15
+ %66 = OpLoad %9 %65
+ %67 = OpAccessChain %13 %61 %12
+ %68 = OpLoad %9 %67
+ %69 = OpFDiv %9 %66 %68
+ %70 = OpVectorTimesScalar %9 %69 %22
+ %71 = OpCompositeConstruct %9 %23 %23
+ %72 = OpFSub %9 %70 %71
+       OpStore %45 %72
+ %73 = OpLoad %9 %45
+ %74 = OpCompositeExtract %3 %73 0
+ %75 = OpAccessChain %13 %61 %12
+ %76 = OpLoad %9 %75
+ %77 = OpCompositeExtract %3 %76 0
+ %78 = OpAccessChain %13 %61 %12
+ %79 = OpLoad %9 %78
+ %80 = OpCompositeExtract %3 %79 1
+ %81 = OpFDiv %3 %77 %80
+ %82 = OpFMul %3 %74 %81
+ %83 = OpLoad %9 %45
+ %84 = OpCompositeExtract %3 %83 1
+ %85 = OpCompositeConstruct %9 %82 %84
+       OpStore %46 %85
+ %86 = OpCompositeConstruct %25 %24 %24 %24
       OpStore %47 %86
- %87 = OpCompositeConstruct %25 %24 %24 %24
+ %87 = OpCompositeConstruct %25 %24 %24 %27
       OpStore %48 %87
- %88 = OpCompositeConstruct %25 %24 %24 %27
-       OpStore %49 %88
- %89 = OpLoad %9 %47
- %90 = OpCompositeExtract %3 %89 0
- %91 = OpLoad %9 %47
- %92 = OpCompositeExtract %3 %91 1
- %93 = OpCompositeConstruct %25 %90 %92 %23
-       OpStore %50 %93
+ %88 = OpLoad %9 %46
+ %89 = OpCompositeExtract %3 %88 0
+ %90 = OpLoad %9 %46
+ %91 = OpCompositeExtract %3 %90 1
+ %92 = OpCompositeConstruct %25 %89 %91 %23
+       OpStore %49 %92
+       OpStore %50 %24
       OpStore %51 %24
-       OpStore %52 %24
-       OpStore %53 %28
- %94 = OpLoad %25 %49
+       OpStore %52 %28
+ %93 = OpLoad %25 %48
+       OpStore %53 %93
+ %94 = OpCompositeConstruct %25 %24 %24 %24
       OpStore %54 %94
- %95 = OpCompositeConstruct %25 %24 %24 %24
-       OpStore %55 %95
- %96 = OpAccessChain %8 %62 %7
- %97 = OpLoad %3 %96
- %98 = OpFMul %3 %97 %29
- %99 = OpExtInst %3 GLSLstd450 Sin %98
-%100 = OpFMul %3 %99 %30
-%101 = OpAccessChain %8 %62 %7
-%102 = OpLoad %3 %101
-%103 = OpFMul %3 %102 %29
-%104 = OpExtInst %3 GLSLstd450 Cos %103
-%105 = OpFMul %3 %104 %30
-%106 = OpCompositeConstruct %25 %24 %100 %105
-       OpStore %56 %106
-       OpStore %57 %7
-       OpStore %58 %32
-       OpBranch %107
-%107 = OpLabel
+ %95 = OpAccessChain %8 %61 %7
+ %96 = OpLoad %3 %95
+ %97 = OpFMul %3 %96 %29
+ %98 = OpExtInst %3 GLSLstd450 Sin %97
+ %99 = OpFMul %3 %98 %30
+%100 = OpAccessChain %8 %61 %7
+%101 = OpLoad %3 %100
+%102 = OpFMul %3 %101 %29
+%103 = OpExtInst %3 GLSLstd450 Cos %102
+%104 = OpFMul %3 %103 %30
+%105 = OpCompositeConstruct %25 %24 %99 %104
+       OpStore %55 %105
+       OpStore %56 %7
+       OpStore %57 %32
+       OpBranch %106
+%106 = OpLabel
+%110 = OpLoad %6 %56
 %111 = OpLoad %6 %57
-%112 = OpLoad %6 %58
-%113 = OpSLessThan %33 %111 %112
-       OpLoopMerge %109 %110 LoopControl(0)
-       OpBranchConditional %113 %108 %109
-%108 = OpLabel
+%112 = OpSLessThan %33 %110 %111
+       OpLoopMerge %108 %109 LoopControl(0)
+       OpBranchConditional %112 %107 %108
+%107 = OpLabel
+%113 = OpLoad %25 %48
 %114 = OpLoad %25 %49
-%115 = OpLoad %25 %50
-%116 = OpLoad %3 %51
-%117 = OpVectorTimesScalar %25 %115 %116
-%118 = OpFAdd %25 %114 %117
-       OpStore %54 %118
-%119 = OpLoad %25 %55
-%120 = OpLoad %25 %54
-%121 = OpFSub %25 %119 %120
-%122 = OpExtInst %3 GLSLstd450 Length %121
-%123 = OpFSub %3 %122 %23
-       OpStore %52 %123
+%115 = OpLoad %3 %50
+%116 = OpVectorTimesScalar %25 %114 %115
+%117 = OpFAdd %25 %113 %116
+       OpStore %53 %117
+%118 = OpLoad %25 %54
+%119 = OpLoad %25 %53
+%120 = OpFSub %25 %118 %119
+%121 = OpExtInst %3 GLSLstd450 Length %120
+%122 = OpFSub %3 %121 %23
+       OpStore %51 %122
+%123 = OpLoad %3 %50
 %124 = OpLoad %3 %51
-%125 = OpLoad %3 %52
-%126 = OpFAdd %3 %124 %125
-       OpStore %51 %126
-%130 = OpLoad %3 %53
-%131 = OpFOrdEqual %33 %130 %28
+%125 = OpFAdd %3 %123 %124
+       OpStore %50 %125
+%129 = OpLoad %3 %52
+%130 = OpFOrdEqual %33 %129 %28
+%131 = OpLoad %3 %51
 %132 = OpLoad %3 %52
-%133 = OpLoad %3 %53
-%134 = OpFOrdLessThan %33 %132 %133
-%135 = OpLogicalOr %33 %131 %134
-       OpSelectionMerge %127 SelectionControl(0)
-       OpBranchConditional %135 %128 %129
-%128 = OpLabel
-%136 = OpLoad %3 %52
-       OpStore %53 %136
-       OpBranch %127
-%129 = OpLabel
-       OpBranch %127
+%133 = OpFOrdLessThan %33 %131 %132
+%134 = OpLogicalOr %33 %130 %133
+       OpSelectionMerge %126 SelectionControl(0)
+       OpBranchConditional %134 %127 %128
 %127 = OpLabel
-%140 = OpLoad %3 %52
-%141 = OpFOrdLessThanEqual %33 %140 %34
-       OpSelectionMerge %137 SelectionControl(0)
-       OpBranchConditional %141 %138 %139
-%138 = OpLabel
-%142 = OpLoad %25 %55
-%143 = OpLoad %25 %54
-%144 = OpFSub %25 %142 %143
-%145 = OpExtInst %25 GLSLstd450 Normalize %144
-%146 = OpLoad %25 %54
-%147 = OpLoad %25 %56
-%148 = OpFSub %25 %146 %147
-%149 = OpExtInst %25 GLSLstd450 Normalize %148
-%150 = OpDot %3 %145 %149
-%151 = OpFSub %3 %150 %35
-%152 = OpExtInst %3 GLSLstd450 FMax %151 %24
-       OpStore %59 %152
-%153 = OpLoad %3 %59
-%154 = OpLoad %3 %59
-%155 = OpLoad %3 %59
-%156 = OpCompositeConstruct %25 %153 %154 %155
-       OpStore %48 %156
-       OpBranch %109
-%139 = OpLabel
-       OpBranch %137
+%135 = OpLoad %3 %51
+       OpStore %52 %135
+       OpBranch %126
+%128 = OpLabel
+       OpBranch %126
+%126 = OpLabel
+%139 = OpLoad %3 %51
+%140 = OpFOrdLessThanEqual %33 %139 %34
+       OpSelectionMerge %136 SelectionControl(0)
+       OpBranchConditional %140 %137 %138
 %137 = OpLabel
-%160 = OpLoad %3 %52
-%161 = OpFOrdGreaterThanEqual %33 %160 %36
-       OpSelectionMerge %157 SelectionControl(0)
-       OpBranchConditional %161 %158 %159
-%158 = OpLabel
-%165 = OpLoad %25 %50
-%166 = OpExtInst %25 GLSLstd450 Normalize %165
-%167 = OpLoad %25 %56
-%168 = OpLoad %25 %49
-%169 = OpFSub %25 %167 %168
-%170 = OpExtInst %25 GLSLstd450 Normalize %169
-%171 = OpDot %3 %166 %170
-%172 = OpFOrdLessThanEqual %33 %171 %23
-       OpSelectionMerge %162 SelectionControl(0)
-       OpBranchConditional %172 %163 %164
-%163 = OpLabel
-%173 = OpLoad %25 %50
-%174 = OpExtInst %25 GLSLstd450 Normalize %173
-%175 = OpLoad %25 %56
-%176 = OpLoad %25 %49
-%177 = OpFSub %25 %175 %176
-%178 = OpExtInst %25 GLSLstd450 Normalize %177
-%179 = OpDot %3 %174 %178
-%180 = OpFAdd %3 %179 %37
-%181 = OpExtInst %3 GLSLstd450 FMax %180 %38
-%182 = OpFDiv %3 %181 %39
-%183 = OpLoad %3 %53
-%184 = OpFMul %3 %183 %40
-%185 = OpFSub %3 %23 %184
-%186 = OpFMul %3 %182 %185
-       OpStore %60 %186
-%187 = OpLoad %3 %60
-%188 = OpLoad %3 %60
-%189 = OpLoad %3 %60
-%190 = OpCompositeConstruct %25 %187 %188 %189
-       OpStore %48 %190
-       OpBranch %162
-%164 = OpLabel
-       OpBranch %162
-%162 = OpLabel
-       OpBranch %109
-%159 = OpLabel
-       OpBranch %157
+%141 = OpLoad %25 %54
+%142 = OpLoad %25 %53
+%143 = OpFSub %25 %141 %142
+%144 = OpExtInst %25 GLSLstd450 Normalize %143
+%145 = OpLoad %25 %53
+%146 = OpLoad %25 %55
+%147 = OpFSub %25 %145 %146
+%148 = OpExtInst %25 GLSLstd450 Normalize %147
+%149 = OpDot %3 %144 %148
+%150 = OpFSub %3 %149 %35
+%151 = OpExtInst %3 GLSLstd450 FMax %150 %24
+       OpStore %58 %151
+%152 = OpLoad %3 %58
+%153 = OpLoad %3 %58
+%154 = OpLoad %3 %58
+%155 = OpCompositeConstruct %25 %152 %153 %154
+       OpStore %47 %155
+       OpBranch %108
+%138 = OpLabel
+       OpBranch %136
+%136 = OpLabel
+%159 = OpLoad %3 %51
+%160 = OpFOrdGreaterThanEqual %33 %159 %36
+       OpSelectionMerge %156 SelectionControl(0)
+       OpBranchConditional %160 %157 %158
 %157 = OpLabel
-%191 = OpLoad %6 %57
-%192 = OpIAdd %6 %191 %12
-       OpStore %57 %192
-       OpBranch %110
-%110 = OpLabel
-       OpBranch %107
+%164 = OpLoad %25 %49
+%165 = OpExtInst %25 GLSLstd450 Normalize %164
+%166 = OpLoad %25 %55
+%167 = OpLoad %25 %48
+%168 = OpFSub %25 %166 %167
+%169 = OpExtInst %25 GLSLstd450 Normalize %168
+%170 = OpDot %3 %165 %169
+%171 = OpFOrdLessThanEqual %33 %170 %23
+       OpSelectionMerge %161 SelectionControl(0)
+       OpBranchConditional %171 %162 %163
+%162 = OpLabel
+%172 = OpLoad %25 %49
+%173 = OpExtInst %25 GLSLstd450 Normalize %172
+%174 = OpLoad %25 %55
+%175 = OpLoad %25 %48
+%176 = OpFSub %25 %174 %175
+%177 = OpExtInst %25 GLSLstd450 Normalize %176
+%178 = OpDot %3 %173 %177
+%179 = OpFAdd %3 %178 %37
+%180 = OpExtInst %3 GLSLstd450 FMax %179 %38
+%181 = OpFDiv %3 %180 %39
+%182 = OpLoad %3 %52
+%183 = OpFMul %3 %182 %40
+%184 = OpFSub %3 %23 %183
+%185 = OpFMul %3 %181 %184
+       OpStore %59 %185
+%186 = OpLoad %3 %59
+%187 = OpLoad %3 %59
+%188 = OpLoad %3 %59
+%189 = OpCompositeConstruct %25 %186 %187 %188
+       OpStore %47 %189
+       OpBranch %161
+%163 = OpLabel
+       OpBranch %161
+%161 = OpLabel
+       OpBranch %108
+%158 = OpLabel
+       OpBranch %156
+%156 = OpLabel
+%190 = OpLoad %6 %56
+%191 = OpIAdd %6 %190 %12
+       OpStore %56 %191
+       OpBranch %109
 %109 = OpLabel
-%196 = OpLoad %25 %48
-%197 = OpCompositeConstruct %25 %24 %24 %24
-%198 = OpFOrdEqual %41 %196 %197
-       OpSelectionMerge %193 SelectionControl(0)
-       OpBranchConditional %198 %194 %195
-%194 = OpLabel
-       OpKill
-%195 = OpLabel
-       OpBranch %193
-%193 = OpLabel
-%199 = OpLoad %25 %48
-%200 = OpCompositeExtract %3 %199 0
-%201 = OpLoad %25 %48
-%202 = OpCompositeExtract %3 %201 1
-%203 = OpLoad %25 %48
-%204 = OpCompositeExtract %3 %203 2
-%205 = OpCompositeConstruct %18 %200 %202 %204 %23
-%206 = OpAccessChain %207 %61 %7
-       OpStore %206 %205
-%208 = OpLoad %21 %61
-%209 = OpCompositeExtract %18 %208 0
-       OpStore %20 %209
+       OpBranch %106
+%108 = OpLabel
+%192 = OpLoad %25 %47
+%193 = OpCompositeExtract %3 %192 0
+%194 = OpLoad %25 %47
+%195 = OpCompositeExtract %3 %194 1
+%196 = OpLoad %25 %47
+%197 = OpCompositeExtract %3 %196 2
+%198 = OpCompositeConstruct %18 %193 %195 %197 %23
+%199 = OpAccessChain %200 %60 %7
+       OpStore %199 %198
+%201 = OpLoad %21 %60
+%202 = OpCompositeExtract %18 %201 0
+       OpStore %20 %202
       OpReturn
       OpFunctionEnd
@@ -0,0 +1,91 @@
+//! A jam file of translated GLSL std450 header's enums and utils
+
+pub const GLSLstd450Version: u32 = 100;
+pub const GLSLstd450Revision: u32 = 3;
+
+pub const GLSLOp = enum(u32) {
+    Bad = 0,
+    Round = 1,
+    RoundEven = 2,
+    Trunc = 3,
+    FAbs = 4,
+    SAbs = 5,
+    FSign = 6,
+    SSign = 7,
+    Floor = 8,
+    Ceil = 9,
+    Fract = 10,
+    Radians = 11,
+    Degrees = 12,
+    Sin = 13,
+    Cos = 14,
+    Tan = 15,
+    Asin = 16,
+    Acos = 17,
+    Atan = 18,
+    Sinh = 19,
+    Cosh = 20,
+    Tanh = 21,
+    Asinh = 22,
+    Acosh = 23,
+    Atanh = 24,
+    Atan2 = 25,
+    Pow = 26,
+    Exp = 27,
+    Log = 28,
+    Exp2 = 29,
+    Log2 = 30,
+    Sqrt = 31,
+    InverseSqrt = 32,
+    Determinant = 33,
+    MatrixInverse = 34,
+    Modf = 35,
+    ModfStruct = 36,
+    FMin = 37,
+    UMin = 38,
+    SMin = 39,
+    FMax = 40,
+    UMax = 41,
+    SMax = 42,
+    FClamp = 43,
+    UClamp = 44,
+    SClamp = 45,
+    FMix = 46,
+    IMix = 47,
+    Step = 48,
+    SmoothStep = 49,
+    Fma = 50,
+    Frexp = 51,
+    FrexpStruct = 52,
+    Ldexp = 53,
+    PackSnorm4x8 = 54,
+    PackUnorm4x8 = 55,
+    PackSnorm2x16 = 56,
+    PackUnorm2x16 = 57,
+    PackHalf2x16 = 58,
+    PackDouble2x32 = 59,
+    UnpackSnorm2x16 = 60,
+    UnpackUnorm2x16 = 61,
+    UnpackHalf2x16 = 62,
+    UnpackSnorm4x8 = 63,
+    UnpackUnorm4x8 = 64,
+    UnpackDouble2x32 = 65,
+    Length = 66,
+    Distance = 67,
+    Cross = 68,
+    Normalize = 69,
+    FaceForward = 70,
+    Reflect = 71,
+    Refract = 72,
+    FindILsb = 73,
+    FindSMsb = 74,
+    FindUMsb = 75,
+    InterpolateAtCentroid = 76,
+    InterpolateAtSample = 77,
+    InterpolateAtOffset = 78,
+    NMin = 79,
+    NMax = 80,
+    NClamp = 81,
+};
+
+pub const GLSLOpMaxValue: usize = 82;
@@ -0,0 +1,312 @@
+const std = @import("std");
+const spv = @import("../spv.zig");
+const ext = @import("GLSL_std_450.zig");
+const opc = @import("../opcodes.zig");
+
+const Module = @import("../Module.zig");
+const Runtime = @import("../Runtime.zig");
+const Result = @import("../Result.zig");
+const WordIterator = @import("../WordIterator.zig");
+
+const RuntimeError = Runtime.RuntimeError;
+const ValueType = opc.ValueType;
+
+const getValuePrimitiveField = opc.getValuePrimitiveField;
+const getValuePrimitiveFieldType = opc.getValuePrimitiveFieldType;
+
+const SpvVoid = spv.SpvVoid;
+const SpvByte = spv.SpvByte;
+const SpvWord = spv.SpvWord;
+const SpvBool = spv.SpvBool;
+
+const MathOp = enum {
+    Acos,
+    Acosh,
+    Asin,
+    Asinh,
+    Atan,
+    Atan2,
+    Atanh,
+    Ceil,
+    Cos,
+    Cosh,
+    Determinant,
+    Exp,
+    Exp2,
+    FAbs,
+    FClamp,
+    FMax,
+    FMin,
+    FMix,
+    FSign,
+    Floor,
+    Fract,
+    IMix,
+    InverseSqrt,
+    Log,
+    Log2,
+    Modf,
+    Pow,
+    Round,
+    RoundEven,
+    SAbs,
+    SClamp,
+    SMax,
+    SMin,
+    SSign,
+    Sin,
+    Sinh,
+    Sqrt,
+    Tan,
+    Tanh,
+    Trunc,
+    UClamp,
+    UMax,
+    UMin,
+};
+
+pub const OpCodeExtFunc = opc.OpCodeExtFunc;
+
+/// Not an EnumMap as it is way too slow for this purpose
+pub var runtime_dispatcher = [_]?OpCodeExtFunc{null} ** ext.GLSLOpMaxValue;
+
+pub fn initRuntimeDispatcher() void {
+    // zig fmt: off
+    runtime_dispatcher[@intFromEnum(ext.GLSLOp.Cos)]       = MathEngine(.Float, .Cos).opSingleOperator;
+    runtime_dispatcher[@intFromEnum(ext.GLSLOp.FMax)]      = MathEngine(.Float, .FMax).opDoubleOperators;
+    runtime_dispatcher[@intFromEnum(ext.GLSLOp.Length)]    = opLength;
+    runtime_dispatcher[@intFromEnum(ext.GLSLOp.Normalize)] = opNormalize;
+    runtime_dispatcher[@intFromEnum(ext.GLSLOp.Sin)]       = MathEngine(.Float, .Sin).opSingleOperator;
+    // zig fmt: on
+}
+
+fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type {
+    return struct {
+        fn opSingleOperator(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void {
+            const target_type = (try rt.results[target_type_id].getVariant()).Type;
+            const dst = try rt.results[id].getValue();
+            const src = try rt.results[try rt.it.next()].getValue();
+
+            const lane_bits = try Result.resolveLaneBitWidth(target_type, rt);
+
+            const operator = struct {
+                fn operation(comptime TT: type, x: TT) RuntimeError!TT {
+                    return switch (Op) {
+                        .Sin => @sin(x),
+                        .Cos => @cos(x),
+                        else => RuntimeError.InvalidSpirV,
+                    };
+                }
+
+                fn applyScalar(bit_count: SpvWord, d: *Result.Value, s: *const Result.Value) RuntimeError!void {
+                    switch (bit_count) {
+                        inline 8, 16, 32, 64 => |bits| {
+                            if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV;
+
+                            const ScalarT = getValuePrimitiveFieldType(T, bits);
+                            const d_field = try getValuePrimitiveField(T, bits, d);
+                            const s_field = try getValuePrimitiveField(T, bits, @constCast(s));
+                            d_field.* = try operation(ScalarT, s_field.*);
+                        },
+                        else => return RuntimeError.InvalidSpirV,
+                    }
+                }
+            };
+
+            switch (dst.*) {
+                .Int, .Float => try operator.applyScalar(lane_bits, dst, src),
+
+                .Vector => |dst_vec| for (dst_vec, src.Vector) |*d_lane, s_lane| {
+                    try operator.applyScalar(lane_bits, d_lane, &s_lane);
+                },
+
+                .Vector4f32 => |*d| d.* = try operator.operation(@Vector(4, f32), src.Vector4f32),
+                .Vector3f32 => |*d| d.* = try operator.operation(@Vector(3, f32), src.Vector3f32),
+                .Vector2f32 => |*d| d.* = try operator.operation(@Vector(2, f32), src.Vector2f32),
+
+                //.Vector4i32 => |*d| d.* = try operator.operation(@Vector(4, i32), src.Vector4i32),
+                //.Vector3i32 => |*d| d.* = try operator.operation(@Vector(3, i32), src.Vector3i32),
+                //.Vector2i32 => |*d| d.* = try operator.operation(@Vector(2, i32), src.Vector2i32),
+
+                //.Vector4u32 => |*d| d.* = try operator.operation(@Vector(4, u32), src.Vector4u32),
+                //.Vector3u32 => |*d| d.* = try operator.operation(@Vector(3, u32), src.Vector3u32),
+                //.Vector2u32 => |*d| d.* = try operator.operation(@Vector(2, u32), src.Vector2u32),
+
+                else => return RuntimeError.InvalidSpirV,
+            }
+        }
+
+        fn opDoubleOperators(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void {
+            const target_type = (try rt.results[target_type_id].getVariant()).Type;
+            const dst = try rt.results[id].getValue();
+            const lhs = try rt.results[try rt.it.next()].getValue();
+            const rhs = try rt.results[try rt.it.next()].getValue();
+
+            const lane_bits = try Result.resolveLaneBitWidth(target_type, rt);
+
+            const operator = struct {
+                fn operation(comptime TT: type, l: TT, r: TT) RuntimeError!TT {
+                    return switch (Op) {
+                        .FMax => @max(l, r),
+                        else => RuntimeError.InvalidSpirV,
+                    };
+                }
+
+                fn applyScalar(bit_count: SpvWord, d: *Result.Value, l: *const Result.Value, r: *const Result.Value) RuntimeError!void {
+                    switch (bit_count) {
+                        inline 8, 16, 32, 64 => |bits| {
+                            if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV;
+
+                            const ScalarT = getValuePrimitiveFieldType(T, bits);
+                            const d_field = try getValuePrimitiveField(T, bits, d);
+                            const l_field = try getValuePrimitiveField(T, bits, @constCast(l));
+                            const r_field = try getValuePrimitiveField(T, bits, @constCast(r));
+                            d_field.* = try operation(ScalarT, l_field.*, r_field.*);
+                        },
+                        else => return RuntimeError.InvalidSpirV,
+                    }
+                }
+
+                inline fn applySIMDVector(comptime ElemT: type, comptime N: usize, d: *@Vector(N, ElemT), l: *const @Vector(N, ElemT), r: *const @Vector(N, ElemT)) RuntimeError!void {
+                    inline for (0..N) |i| {
+                        d[i] = try operation(ElemT, l[i], r[i]);
+                    }
+                }
+            };
+
+            switch (dst.*) {
+                .Int, .Float => try operator.applyScalar(lane_bits, dst, lhs, rhs),
+
+                .Vector => |dst_vec| for (dst_vec, lhs.Vector, rhs.Vector) |*d_lane, l_lane, r_lane| {
+                    try operator.applyScalar(lane_bits, d_lane, &l_lane, &r_lane);
+                },
+
+                .Vector4f32 => |*d| try operator.applySIMDVector(f32, 4, d, &lhs.Vector4f32, &rhs.Vector4f32),
+                .Vector3f32 => |*d| try operator.applySIMDVector(f32, 3, d, &lhs.Vector3f32, &rhs.Vector3f32),
+                .Vector2f32 => |*d| try operator.applySIMDVector(f32, 2, d, &lhs.Vector2f32, &rhs.Vector2f32),
+
+                .Vector4i32 => |*d| try operator.applySIMDVector(i32, 4, d, &lhs.Vector4i32, &rhs.Vector4i32),
+                .Vector3i32 => |*d| try operator.applySIMDVector(i32, 3, d, &lhs.Vector3i32, &rhs.Vector3i32),
+                .Vector2i32 => |*d| try operator.applySIMDVector(i32, 2, d, &lhs.Vector2i32, &rhs.Vector2i32),
+
+                .Vector4u32 => |*d| try operator.applySIMDVector(u32, 4, d, &lhs.Vector4u32, &rhs.Vector4u32),
+                .Vector3u32 => |*d| try operator.applySIMDVector(u32, 3, d, &lhs.Vector3u32, &rhs.Vector3u32),
+                .Vector2u32 => |*d| try operator.applySIMDVector(u32, 2, d, &lhs.Vector2u32, &rhs.Vector2u32),
+
+                else => return RuntimeError.InvalidSpirV,
+            }
+        }
+    };
+}
+
+inline fn sumSIMDVector(comptime ElemT: type, comptime N: usize, d: *ElemT, v: *const @Vector(N, ElemT)) void {
+    inline for (0..N) |i| {
+        d.* += v[i];
+    }
+}
+
+fn opLength(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void {
+    const target_type = (try rt.results[target_type_id].getVariant()).Type;
+    const dst = try rt.results[id].getValue();
+    const src = try rt.results[try rt.it.next()].getValue();
+
+    const lane_bits = try Result.resolveLaneBitWidth(target_type, rt);
+
+    switch (lane_bits) {
+        inline 16, 32, 64 => |bits| {
+            var sum: std.meta.Float(bits) = 0.0;
+            const d_field = try getValuePrimitiveField(.Float, bits, dst);
+
+            if (bits == 32) { // More likely to be SIMD if f32
+                switch (src.*) {
+                    .Vector4f32 => |src_vec| sumSIMDVector(f32, 4, &sum, &src_vec),
+                    .Vector3f32 => |src_vec| sumSIMDVector(f32, 3, &sum, &src_vec),
+                    .Vector2f32 => |src_vec| sumSIMDVector(f32, 2, &sum, &src_vec),
+                    else => {},
+                }
+            }
+
+            switch (src.*) {
+                .Float => {
+                    // Fast path
+                    const s_field = try getValuePrimitiveField(.Float, bits, src);
+                    d_field.* = s_field.*;
+                    return;
+                },
+                .Vector => |src_vec| for (src_vec) |*s_lane| {
+                    const s_field = try getValuePrimitiveField(.Float, bits, s_lane);
+                    sum += s_field.*;
+                },
+                .Vector4f32, .Vector3f32, .Vector2f32 => {},
+                else => return RuntimeError.InvalidSpirV,
+            }
+
+            d_field.* = @sqrt(sum);
+        },
+        else => return RuntimeError.InvalidSpirV,
+    }
+}
+
+fn opNormalize(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void {
+    const target_type = (try rt.results[target_type_id].getVariant()).Type;
+    const dst = try rt.results[id].getValue();
+    const src = try rt.results[try rt.it.next()].getValue();
+
+    const lane_bits = try Result.resolveLaneBitWidth(target_type, rt);
+
+    switch (lane_bits) {
+        inline 16, 32, 64 => |bits| {
+            var sum: std.meta.Float(bits) = 0.0;
+
+            if (bits == 32) { // More likely to be SIMD if f32
+                switch (src.*) {
+                    .Vector4f32 => |src_vec| sumSIMDVector(f32, 4, &sum, &src_vec),
+                    .Vector3f32 => |src_vec| sumSIMDVector(f32, 3, &sum, &src_vec),
+                    .Vector2f32 => |src_vec| sumSIMDVector(f32, 2, &sum, &src_vec),
+                    else => {},
+                }
+            }
+
+            switch (src.*) {
+                .Float => {
+                    const s_field = try getValuePrimitiveField(.Float, bits, src);
+                    sum = s_field.*;
+                },
+                .Vector => |src_vec| for (src_vec) |*s_lane| {
+                    const s_field = try getValuePrimitiveField(.Float, bits, s_lane);
+                    sum += s_field.*;
+                },
+                .Vector4f32, .Vector3f32, .Vector2f32 => {},
+                else => return RuntimeError.InvalidSpirV,
+            }
+
+            sum = @sqrt(sum);
+
+            if (bits == 32) {
+                switch (dst.*) {
+                    .Vector4f32 => |*dst_vec| inline for (0..4) |i| {
+                        dst_vec[i] = src.Vector4f32[i] / sum;
+                    },
+                    .Vector3f32 => |*dst_vec| inline for (0..3) |i| {
+                        dst_vec[i] = src.Vector3f32[i] / sum;
+                    },
+                    .Vector2f32 => |*dst_vec| inline for (0..2) |i| {
+                        dst_vec[i] = src.Vector2f32[i] / sum;
+                    },
+                    else => {},
+                }
+            }
+
+            switch (dst.*) {
+                .Vector => |dst_vec| for (dst_vec, src.Vector) |*d_lane, *s_lane| {
+                    const d_field = try getValuePrimitiveField(.Float, bits, d_lane);
+                    const s_field = try getValuePrimitiveField(.Float, bits, s_lane);
+                    d_field.* = s_field.* / sum;
+                },
+                .Vector4f32, .Vector3f32, .Vector2f32 => {},
+                else => return RuntimeError.InvalidSpirV,
+            }
+        },
+        else => return RuntimeError.InvalidSpirV,
+    }
+}
@@ -2,7 +2,8 @@ const std = @import("std");
 const spv = @import("spv.zig");
 const op = @import("opcodes.zig");

-const RuntimeError = @import("Runtime.zig").RuntimeError;
+const Runtime = @import("Runtime.zig");
+const RuntimeError = Runtime.RuntimeError;

 const SpvVoid = spv.SpvVoid;
 const SpvByte = spv.SpvByte;
@@ -227,13 +228,7 @@ pub const Value = union(Type) {
    }
 };

-pub const VariantData = union(Variant) {
-    String: []const u8,
-    Extension: struct {
-        /// Should not be allocated but rather a pointer to a static array
-        dispatcher: []op.OpCodeExtFunc,
-    },
-    Type: union(Type) {
+pub const TypeData = union(Type) {
    Void: struct {},
    Bool: struct {},
    Int: struct {
@@ -285,7 +280,15 @@ pub const VariantData = union(Variant) {
        storage_class: spv.SpvStorageClass,
        target: SpvWord,
    },
+};
+
+pub const VariantData = union(Variant) {
+    String: []const u8,
+    Extension: struct {
+        /// Should not be allocated but rather a pointer to a static array
+        dispatcher: []?op.OpCodeExtFunc,
    },
+    Type: TypeData,
    Variable: struct {
        storage_class: spv.SpvStorageClass,
        type_word: SpvWord,
@@ -364,7 +367,7 @@ pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
    self.decorations.deinit(allocator);
 }

-pub fn getValueTypeWord(self: *Self) RuntimeError!SpvWord {
+pub inline fn getValueTypeWord(self: *Self) RuntimeError!SpvWord {
    return switch ((try self.getVariant()).*) {
        .Variable => |v| v.type_word,
        .Constant => |c| c.type_word,
@@ -374,7 +377,7 @@ pub fn getValueTypeWord(self: *Self) RuntimeError!SpvWord {
    };
 }

-pub fn getValueType(self: *Self) RuntimeError!Type {
+pub inline fn getValueType(self: *Self) RuntimeError!Type {
    return switch ((try self.getVariant()).*) {
        .Variable => |v| v.type,
        .Constant => |c| c.type,
@@ -383,7 +386,7 @@ pub fn getValueType(self: *Self) RuntimeError!Type {
    };
 }

-pub fn getValue(self: *Self) RuntimeError!*Value {
+pub inline fn getValue(self: *Self) RuntimeError!*Value {
    return switch ((try self.getVariant()).*) {
        .Variable => |*v| &v.value,
        .Constant => |*c| &c.value,
@@ -471,6 +474,26 @@ pub fn dupe(self: *const Self, allocator: std.mem.Allocator) RuntimeError!Self {
    };
 }

+pub fn resolveLaneBitWidth(target_type: TypeData, rt: *const Runtime) RuntimeError!SpvWord {
+    return sw: switch (target_type) {
+        .Bool => 8,
+        .Float => |f| f.bit_length,
+        .Int => |i| i.bit_length,
+        .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
+        .Vector4f32,
+        .Vector3f32,
+        .Vector2f32,
+        .Vector4i32,
+        .Vector3i32,
+        .Vector2i32,
+        .Vector4u32,
+        .Vector3u32,
+        .Vector2u32,
+        => return 32,
+        else => return RuntimeError.InvalidSpirV,
+    };
+}
+
 pub fn resolveType(self: *const Self, results: []const Self) *const Self {
    return if (self.variant) |variant|
        switch (variant) {
@@ -131,10 +131,7 @@ pub fn callEntryPoint(self: *Self, allocator: std.mem.Allocator, entry_point_ind

        var it_tmp = self.it; // Save because operations may iter on this iterator
        if (op.runtime_dispatcher[opcode]) |pfn| {
-            pfn(allocator, word_count, self) catch |err| switch (err) {
-                RuntimeError.Killed => return,
-                else => return err,
-            };
+            try pfn(allocator, word_count, self);
        }
        if (!self.it.did_jump) {
            _ = it_tmp.skipN(word_count);
@@ -36,3 +36,10 @@ pub const Runtime = @import("Runtime.zig");

 const opcodes = @import("opcodes.zig");
 const spv = @import("spv.zig");
+
+pub const SpvVoid = spv.SpvVoid;
+pub const SpvByte = spv.SpvByte;
+pub const SpvWord = spv.SpvWord;
+pub const SpvBool = spv.SpvBool;
+
+pub const GLSL_std_450 = @import("GLSL_std_450/opcodes.zig");
@@ -1,6 +1,8 @@
 const std = @import("std");
 const spv = @import("spv.zig");

+const GLSL_std_450 = @import("GLSL_std_450/opcodes.zig");
+
 const Module = @import("Module.zig");
 const Runtime = @import("Runtime.zig");
 const Result = @import("Result.zig");
@@ -13,13 +15,7 @@ const SpvByte = spv.SpvByte;
 const SpvWord = spv.SpvWord;
 const SpvBool = spv.SpvBool;

-// OpExtInst Sin
-// OpExtInst Cos
-// OpExtInst Length
-// OpExtInst Normalize
-// OpExtInst FMax
-
-const ValueType = enum {
+pub const ValueType = enum {
    Bool,
    Float,
    SInt,
@@ -230,11 +226,11 @@ pub fn initRuntimeDispatcher() void {
    runtime_dispatcher[@intFromEnum(spv.SpvOp.ISub)]                   = MathEngine(.SInt, .Sub).op;
    runtime_dispatcher[@intFromEnum(spv.SpvOp.Kill)]                   = opKill;
    runtime_dispatcher[@intFromEnum(spv.SpvOp.Load)]                   = opLoad;
-    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalAnd)]             = CondEngine(.Float, .LogicalAnd).op;
-    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalEqual)]           = CondEngine(.Float, .LogicalEqual).op;
-    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNot)]             = CondEngine(.Float, .LogicalNot).op;
-    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNotEqual)]        = CondEngine(.Float, .LogicalNotEqual).op;
-    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalOr)]              = CondEngine(.Float, .LogicalOr).op;
+    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalAnd)]             = CondEngine(.Bool, .LogicalAnd).op;
+    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalEqual)]           = CondEngine(.Bool, .LogicalEqual).op;
+    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNot)]             = CondEngine(.Bool, .LogicalNot).op;
+    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNotEqual)]        = CondEngine(.Bool, .LogicalNotEqual).op;
+    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalOr)]              = CondEngine(.Bool, .LogicalOr).op;
    runtime_dispatcher[@intFromEnum(spv.SpvOp.MatrixTimesMatrix)]      = MathEngine(.Float, .MatrixTimesMatrix).op; // TODO
    runtime_dispatcher[@intFromEnum(spv.SpvOp.MatrixTimesScalar)]      = MathEngine(.Float, .MatrixTimesScalar).op; // TODO
    runtime_dispatcher[@intFromEnum(spv.SpvOp.MatrixTimesVector)]      = MathEngine(.Float, .MatrixTimesVector).op; // TODO
@@ -261,38 +257,37 @@ pub fn initRuntimeDispatcher() void {
    runtime_dispatcher[@intFromEnum(spv.SpvOp.UMod)]                   = MathEngine(.UInt, .Mod).op;
    runtime_dispatcher[@intFromEnum(spv.SpvOp.VectorTimesMatrix)]      = MathEngine(.Float, .VectorTimesMatrix).op; // TODO
    runtime_dispatcher[@intFromEnum(spv.SpvOp.VectorTimesScalar)]      = MathEngine(.Float, .VectorTimesScalar).op;
+    runtime_dispatcher[@intFromEnum(spv.SpvOp.ExtInst)]                = opExtInst;
    // zig fmt: on
+
+    // Extensions init
+    GLSL_std_450.initRuntimeDispatcher();
 }

-fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type {
-    if (T == .Float) @compileError("Invalid value type");
+fn extEqlName(a: []const u8, b: []const u8) bool {
+    for (0..@min(a.len, b.len)) |i| {
+        if (a[i] != b[i]) return false;
+    }
+    return true;
+}
+
+const extensions_map = std.StaticStringMapWithEql([]?OpCodeExtFunc, extEqlName).initComptime(.{
+    .{ "GLSL.std.450", GLSL_std_450.runtime_dispatcher[0..] },
+});
+
+fn BitOperator(comptime T: ValueType, comptime Op: BitOp) type {
    return struct {
-        fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
-            const target_type = (try rt.results[try rt.it.next()].getVariant()).Type;
-            const value = try rt.results[try rt.it.next()].getValue();
-            const op1_value = try rt.results[try rt.it.next()].getValue();
-            const op2_value: ?*Result.Value = switch (Op) {
-                .Not, .BitCount, .BitReverse => null,
-                else => try rt.results[try rt.it.next()].getValue(),
-            };
+        comptime {
+            if (T == .Float) @compileError("Invalid value type");
+        }

-            const size = sw: switch (target_type) {
-                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
-                .Vector4f32,
-                .Vector3f32,
-                .Vector2f32,
-                .Vector4i32,
-                .Vector3i32,
-                .Vector2i32,
-                .Vector4u32,
-                .Vector3u32,
-                .Vector2u32,
-                => 32,
-                .Int => |i| i.bit_length,
-                else => return RuntimeError.InvalidSpirV,
+        inline fn isUnaryOp() bool {
+            return comptime switch (Op) {
+                .Not, .BitCount, .BitReverse => true,
+                else => false,
            };
+        }

-            const operator = struct {
        inline fn bitMask(bits: u64) u64 {
            return if (bits >= 32) ~@as(u64, 0) else (@as(u64, 0x1) << @intCast(bits)) - 1;
        }
@@ -306,85 +301,227 @@ fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type {
            return (v >> @intCast(offset)) & @as(TT, @intCast(bitMask(count)));
        }

-                fn operation(comptime TT: type, rt2: *Runtime, op1: TT, op2: ?TT) RuntimeError!TT {
-                    switch (Op) {
-                        .BitCount => return @bitSizeOf(TT),
-                        .BitReverse => return @bitReverse(op1),
-                        .Not => return ~op1,
-                        else => {},
+        fn operationUnary(comptime TT: type, op1: TT) RuntimeError!TT {
+            return switch (Op) {
+                .BitCount => @as(TT, @intCast(@bitSizeOf(TT))), // keep return type TT
+                .BitReverse => @bitReverse(op1),
+                .Not => ~op1,
+                else => RuntimeError.InvalidSpirV,
+            };
        }
-                    return if (op2) |v2|
-                        switch (Op) {
+
+        fn operationBinary(comptime TT: type, rt: *Runtime, op1: TT, op2: TT) RuntimeError!TT {
+            return switch (Op) {
                .BitFieldInsert => blk: {
-                                const offset = try rt2.results[try rt2.it.next()].getValue();
-                                const count = try rt2.results[try rt2.it.next()].getValue();
-                                break :blk bitInsert(TT, op1, v2, offset.Int.uint64, count.Int.uint64);
+                    const offset = try rt.results[try rt.it.next()].getValue();
+                    const count = try rt.results[try rt.it.next()].getValue();
+                    break :blk bitInsert(TT, op1, op2, offset.Int.uint64, count.Int.uint64);
                },
                .BitFieldSExtract => blk: {
                    if (T == .UInt) return RuntimeError.InvalidSpirV;
-                                const count = try rt2.results[try rt2.it.next()].getValue();
-                                break :blk bitExtract(TT, op1, v2, count.Int.uint64);
+                    const count = try rt.results[try rt.it.next()].getValue();
+                    break :blk bitExtract(TT, op1, op2, count.Int.uint64);
                },
                .BitFieldUExtract => blk: {
                    if (T == .SInt) return RuntimeError.InvalidSpirV;
-                                const count = try rt2.results[try rt2.it.next()].getValue();
-                                break :blk bitExtract(TT, op1, v2, count.Int.uint64);
+                    const count = try rt.results[try rt.it.next()].getValue();
+                    break :blk bitExtract(TT, op1, op2, count.Int.uint64);
                },
-                            .BitwiseAnd => op1 & v2,
-                            .BitwiseOr => op1 | v2,
-                            .BitwiseXor => op1 ^ v2,
-                            .ShiftLeft => op1 << @intCast(v2),
-                            .ShiftRight, .ShiftRightArithmetic => op1 >> @intCast(v2),
-                            else => return RuntimeError.InvalidSpirV,
-                        }
-                    else
-                        RuntimeError.InvalidSpirV;
+
+                .BitwiseAnd => op1 & op2,
+                .BitwiseOr => op1 | op2,
+                .BitwiseXor => op1 ^ op2,
+                .ShiftLeft => op1 << @intCast(op2),
+                .ShiftRight, .ShiftRightArithmetic => op1 >> @intCast(op2),
+
+                else => RuntimeError.InvalidSpirV,
+            };
        }

-                fn process(rt2: *Runtime, bit_count: SpvWord, v: *Result.Value, op1_v: *const Result.Value, op2_v: ?*const Result.Value) RuntimeError!void {
+        fn applyScalarBits(rt: *Runtime, bit_count: SpvWord, dst: *Result.Value, op1_v: *const Result.Value, op2_v: ?*const Result.Value) RuntimeError!void {
            switch (bit_count) {
-                        inline 8, 16, 32, 64 => |i| {
-                            (try getValuePrimitiveField(T, i, v)).* = try operation(
-                                getValuePrimitiveFieldType(T, i),
-                                rt2,
-                                (try getValuePrimitiveField(T, i, @constCast(op1_v))).*,
-                                if (op2_v) |v2|
-                                    (try getValuePrimitiveField(T, i, @constCast(v2))).*
-                                else
-                                    null,
-                            );
+                inline 8, 16, 32, 64 => |bits| {
+                    const TT = getValuePrimitiveFieldType(T, bits);
+                    const a = (try getValuePrimitiveField(T, bits, @constCast(op1_v))).*;
+
+                    const out = if (comptime isUnaryOp()) blk: {
+                        break :blk try operationUnary(TT, a);
+                    } else blk: {
+                        const b_ptr = op2_v orelse return RuntimeError.InvalidSpirV;
+                        const b = (try getValuePrimitiveField(T, bits, @constCast(b_ptr))).*;
+                        break :blk try operationBinary(TT, rt, a, b);
+                    };
+
+                    (try getValuePrimitiveField(T, bits, dst)).* = out;
                },
                else => return RuntimeError.InvalidSpirV,
            }
        }
+
+        fn laneRhsPtr(op2_value: ?*Result.Value, index: usize) ?*const Result.Value {
+            if (comptime isUnaryOp()) return null;
+            const v = op2_value orelse return null;
+            return &v.Vector[index];
+        }
+
+        fn applyFixedVector(comptime ElemT: type, comptime N: usize, dst: *[N]ElemT, op1: *[N]ElemT, op2_value: ?*Result.Value) RuntimeError!void {
+            if (comptime isUnaryOp()) {
+                inline for (0..N) |i| dst[i] = try operationUnary(ElemT, op1[i]);
+            } else {
+                const op2 = op2_value orelse return RuntimeError.InvalidSpirV;
+                const b: *const [N]ElemT = switch (N) {
+                    2 => &op2.*.Vector2u32, // will be overridden by call sites per ElemT/tag
+                    3 => &op2.*.Vector3u32,
+                    4 => &op2.*.Vector4u32,
+                    else => unreachable,
+                };
+                // NOTE: the above dummy mapping isn’t type-correct for i32; call sites below pass correct rhs pointer.
+                _ = b;
+                return RuntimeError.InvalidSpirV;
+            }
+        }
+
+        fn applyFixedVectorBinary(
+            comptime ElemT: type,
+            comptime N: usize,
+            rt: *Runtime,
+            dst: *[N]ElemT,
+            op1: *[N]ElemT,
+            op2: *[N]ElemT,
+        ) RuntimeError!void {
+            inline for (0..N) |i| dst[i] = try operationBinary(ElemT, rt, op1[i], op2[i]);
+        }
+
+        fn applyFixedVectorUnary(
+            comptime ElemT: type,
+            comptime N: usize,
+            dst: *[N]ElemT,
+            op1: *[N]ElemT,
+        ) RuntimeError!void {
+            inline for (0..N) |i| dst[i] = try operationUnary(ElemT, op1[i]);
+        }
+    };
+}
+
+fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type {
+    return struct {
+        fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
+            const target_type = (try rt.results[try rt.it.next()].getVariant()).Type;
+            const dst = try rt.results[try rt.it.next()].getValue();
+            const op1 = try rt.results[try rt.it.next()].getValue();
+
+            const operator = BitOperator(T, Op);
+
+            const op2_value: ?*Result.Value = if (comptime operator.isUnaryOp()) null else try rt.results[try rt.it.next()].getValue();
+
+            const lane_bits = try Result.resolveLaneBitWidth(target_type, rt);
+
+            switch (dst.*) {
+                .Int => try operator.applyScalarBits(rt, lane_bits, dst, op1, if (comptime operator.isUnaryOp()) null else op2_value),
+
+                .Vector => |dst_vec| {
+                    const op1_vec = op1.Vector;
+                    if (dst_vec.len != op1_vec.len) return RuntimeError.InvalidSpirV;
+
+                    for (dst_vec, op1_vec, 0..) |*d_lane, a_lane, i| {
+                        var tmp_a = a_lane;
+                        const b_ptr = operator.laneRhsPtr(op2_value, i);
+                        try operator.applyScalarBits(rt, lane_bits, d_lane, &tmp_a, b_ptr);
+                    }
+                },
+
+                .Vector4i32 => |*d| {
+                    if (comptime operator.isUnaryOp())
+                        try operator.applyFixedVectorUnary(i32, 4, d, &op1.Vector4i32)
+                    else
+                        try operator.applyFixedVectorBinary(i32, 4, rt, d, &op1.Vector4i32, &op2_value.?.Vector4i32);
+                },
+                .Vector3i32 => |*d| {
+                    if (comptime operator.isUnaryOp())
+                        try operator.applyFixedVectorUnary(i32, 3, d, &op1.Vector3i32)
+                    else
+                        try operator.applyFixedVectorBinary(i32, 3, rt, d, &op1.Vector3i32, &op2_value.?.Vector3i32);
+                },
+                .Vector2i32 => |*d| {
+                    if (comptime operator.isUnaryOp())
+                        try operator.applyFixedVectorUnary(i32, 2, d, &op1.Vector2i32)
+                    else
+                        try operator.applyFixedVectorBinary(i32, 2, rt, d, &op1.Vector2i32, &op2_value.?.Vector2i32);
+                },
+
+                .Vector4u32 => |*d| {
+                    if (comptime operator.isUnaryOp())
+                        try operator.applyFixedVectorUnary(u32, 4, d, &op1.Vector4u32)
+                    else
+                        try operator.applyFixedVectorBinary(u32, 4, rt, d, &op1.Vector4u32, &op2_value.?.Vector4u32);
+                },
+                .Vector3u32 => |*d| {
+                    if (comptime operator.isUnaryOp())
+                        try operator.applyFixedVectorUnary(u32, 3, d, &op1.Vector3u32)
+                    else
+                        try operator.applyFixedVectorBinary(u32, 3, rt, d, &op1.Vector3u32, &op2_value.?.Vector3u32);
+                },
+                .Vector2u32 => |*d| {
+                    if (comptime operator.isUnaryOp())
+                        try operator.applyFixedVectorUnary(u32, 2, d, &op1.Vector2u32)
+                    else
+                        try operator.applyFixedVectorBinary(u32, 2, rt, d, &op1.Vector2u32, &op2_value.?.Vector2u32);
+                },
+
+                else => return RuntimeError.InvalidSpirV,
+            }
+        }
    };
+}

-            switch (value.*) {
-                .Int => try operator.process(rt, size, value, op1_value, op2_value),
-                .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i|
-                    try operator.process(rt, size, val, &op1_v, if (op2_value) |op2_v| &op2_v.Vector[i] else null),
-                // No bit manipulation on VectorXf32
-                .Vector4i32 => |*vec| inline for (0..4) |i| {
-                    vec[i] = try operator.operation(i32, rt, op1_value.Vector4i32[i], if (op2_value) |op2_v| op2_v.Vector4i32[i] else null);
-                },
-                .Vector3i32 => |*vec| inline for (0..3) |i| {
-                    vec[i] = try operator.operation(i32, rt, op1_value.Vector3i32[i], if (op2_value) |op2_v| op2_v.Vector3i32[i] else null);
-                },
-                .Vector2i32 => |*vec| inline for (0..2) |i| {
-                    vec[i] = try operator.operation(i32, rt, op1_value.Vector2i32[i], if (op2_value) |op2_v| op2_v.Vector2i32[i] else null);
-                },
-                .Vector4u32 => |*vec| inline for (0..4) |i| {
-                    vec[i] = try operator.operation(u32, rt, op1_value.Vector4u32[i], if (op2_value) |op2_v| op2_v.Vector4u32[i] else null);
-                },
-                .Vector3u32 => |*vec| inline for (0..3) |i| {
-                    vec[i] = try operator.operation(u32, rt, op1_value.Vector3u32[i], if (op2_value) |op2_v| op2_v.Vector3u32[i] else null);
-                },
-                .Vector2u32 => |*vec| inline for (0..2) |i| {
-                    vec[i] = try operator.operation(u32, rt, op1_value.Vector2u32[i], if (op2_value) |op2_v| op2_v.Vector2u32[i] else null);
+fn CondOperator(comptime T: ValueType, comptime Op: CondOp) type {
+    return struct {
+        fn operation(comptime TT: type, a: TT, b: TT) RuntimeError!bool {
+            return switch (Op) {
+                .Equal, .LogicalEqual => a == b,
+                .NotEqual, .LogicalNotEqual => a != b,
+                .Greater => a > b,
+                .GreaterEqual => a >= b,
+                .Less => a < b,
+                .LessEqual => a <= b,
+                .LogicalAnd => a and b,
+                .LogicalOr => a or b,
+                else => RuntimeError.InvalidSpirV,
+            };
+        }
+
+        fn operationUnary(comptime TT: type, a: TT) RuntimeError!bool {
+            return switch (Op) {
+                .LogicalNot => !a,
+                else => RuntimeError.InvalidSpirV,
+            };
+        }
+
+        fn applyLane(bit_count: SpvWord, dst_bool: *Result.Value, a_v: *const Result.Value, b_v: ?*const Result.Value) RuntimeError!void {
+            switch (bit_count) {
+                inline 8, 16, 32, 64 => |bits| {
+                    if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV;
+
+                    const TT = getValuePrimitiveFieldType(T, bits);
+                    const a = (try getValuePrimitiveField(T, bits, @constCast(a_v))).*;
+
+                    if (comptime Op == .LogicalNot) {
+                        dst_bool.Bool = try operationUnary(TT, a);
+                    } else {
+                        const b_ptr = b_v orelse return RuntimeError.InvalidSpirV;
+                        const b = (try getValuePrimitiveField(T, bits, @constCast(b_ptr))).*;
+                        dst_bool.Bool = try operation(TT, a, b);
+                    }
                },
                else => return RuntimeError.InvalidSpirV,
            }
        }
+
+        fn laneRhsPtr(op2_value: ?*Result.Value, index: usize) ?*const Result.Value {
+            if (comptime Op == .LogicalNot) return null;
+            const v = op2_value orelse return null;
+            return &v.Vector[index];
+        }
    };
 }

@@ -397,199 +534,151 @@ fn CondEngine(comptime T: ValueType, comptime Op: CondOp) type {
                else => return RuntimeError.InvalidSpirV,
            }

-            const value = try rt.results[try rt.it.next()].getValue();
+            const dst = try rt.results[try rt.it.next()].getValue();
+
            const op1_result = &rt.results[try rt.it.next()];
            const op1_type = try op1_result.getValueTypeWord();
            const op1_value = try op1_result.getValue();
-            const op2_value: ?*Result.Value = switch (Op) {
-                .LogicalNot => null,
-                else => try rt.results[try rt.it.next()].getValue(),
-            };

-            const size = sw: switch ((try rt.results[op1_type].getVariant()).Type) {
-                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
-                .Vector4f32,
-                .Vector3f32,
-                .Vector2f32,
-                .Vector4i32,
-                .Vector3i32,
-                .Vector2i32,
-                .Vector4u32,
-                .Vector3u32,
-                .Vector2u32,
-                => 32,
-                .Float => |f| if (T == .Float) f.bit_length else return RuntimeError.InvalidSpirV,
-                .Int => |i| if (T == .SInt or T == .UInt) i.bit_length else return RuntimeError.InvalidSpirV,
-                else => return RuntimeError.InvalidSpirV,
-            };
+            const op2_value: ?*Result.Value = if (comptime Op == .LogicalNot) null else try rt.results[try rt.it.next()].getValue();

-            const operator = struct {
-                fn operation(comptime TT: type, op1: TT, op2: ?TT) RuntimeError!bool {
-                    return switch (Op) {
-                        .Equal, .LogicalEqual => op1 == op2 orelse return RuntimeError.InvalidSpirV,
-                        .NotEqual, .LogicalNotEqual => op1 != op2 orelse return RuntimeError.InvalidSpirV,
-                        .Greater => op1 > op2 orelse return RuntimeError.InvalidSpirV,
-                        .GreaterEqual => op1 >= op2 orelse return RuntimeError.InvalidSpirV,
-                        .Less => op1 < op2 orelse return RuntimeError.InvalidSpirV,
-                        .LessEqual => op1 <= op2 orelse return RuntimeError.InvalidSpirV,
-                        .LogicalAnd => (op1 != @as(TT, 0)) and ((op2 orelse return RuntimeError.InvalidSpirV) != @as(TT, 0)),
-                        .LogicalOr => (op1 != @as(TT, 0)) or ((op2 orelse return RuntimeError.InvalidSpirV) != @as(TT, 0)),
-                        .LogicalNot => (op1 == @as(TT, 0)),
-                    };
-                }
+            const lane_bits = try Result.resolveLaneBitWidth((try rt.results[op1_type].getVariant()).Type, rt);

-                fn process(bit_count: SpvWord, v: *Result.Value, op1_v: *const Result.Value, op2_v: ?*const Result.Value) RuntimeError!void {
-                    switch (bit_count) {
-                        inline 8, 16, 32, 64 => |i| {
-                            if (i == 8 and T == .Float) { // No f8
-                                return RuntimeError.InvalidSpirV;
-                            }
-                            v.Bool = try operation(
-                                getValuePrimitiveFieldType(T, i),
-                                (try getValuePrimitiveField(T, i, @constCast(op1_v))).*,
-                                if (op2_v) |val| (try getValuePrimitiveField(T, i, @constCast(val))).* else null,
-                            );
+            const operator = CondOperator(T, Op);
+
+            switch (dst.*) {
+                .Bool => try operator.applyLane(lane_bits, dst, op1_value, op2_value),
+
+                .Vector => |dst_vec| for (dst_vec, op1_value.Vector, 0..) |*d_lane, a_lane, i| {
+                    const b_ptr = operator.laneRhsPtr(op2_value, i);
+                    try operator.applyLane(lane_bits, d_lane, &a_lane, b_ptr);
                },
-                        else => return RuntimeError.InvalidSpirV,
-                    }
-                }
-            };

-            switch (value.*) {
-                .Bool => try operator.process(size, value, op1_value, op2_value),
-                .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i| {
-                    try operator.process(size, val, &op1_v, if (op2_value) |op2_v| &op2_v.Vector[i] else null);
-                },
-                // No Vector specializations for booleans
                else => return RuntimeError.InvalidSpirV,
            }
        }
    };
 }

-fn ConversionEngine(comptime From: ValueType, comptime To: ValueType) type {
+fn ConversionEngine(comptime from_kind: ValueType, comptime to_kind: ValueType) type {
    return struct {
        fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
            const target_type = (try rt.results[try rt.it.next()].getVariant()).Type;
-            const value = try rt.results[try rt.it.next()].getValue();
-            const op_result = &rt.results[try rt.it.next()];
-            const op_type = try op_result.getValueTypeWord();
-            const op_value = try op_result.getValue();
+            const dst_value = try rt.results[try rt.it.next()].getValue();

-            const from_size = sw: switch ((try rt.results[op_type].getVariant()).Type) {
-                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
-                .Vector4f32,
-                .Vector3f32,
-                .Vector2f32,
-                .Vector4i32,
-                .Vector3i32,
-                .Vector2i32,
-                .Vector4u32,
-                .Vector3u32,
-                .Vector2u32,
-                => 32,
-                .Float => |f| if (From == .Float) f.bit_length else return RuntimeError.InvalidSpirV,
-                .Int => |i| if (From == .SInt or From == .UInt) i.bit_length else return RuntimeError.InvalidSpirV,
+            const src_result = &rt.results[try rt.it.next()];
+            const src_type_word = try src_result.getValueTypeWord();
+            const src_value = try src_result.getValue();
+
+            const from_bits = try Result.resolveLaneBitWidth((try rt.results[src_type_word].getVariant()).Type, rt);
+            const to_bits = try Result.resolveLaneBitWidth(target_type, rt);
+
+            const caster = struct {
+                fn castLane(comptime ToT: type, from_bit_count: SpvWord, from: *Result.Value) RuntimeError!ToT {
+                    return switch (from_bit_count) {
+                        inline 8, 16, 32, 64 => |bits| blk: {
+                            if (bits == 8 and from_kind == .Float) return RuntimeError.InvalidSpirV; // No f8
+                            const v = (try getValuePrimitiveField(from_kind, bits, from)).*;
+                            break :blk std.math.lossyCast(ToT, v);
+                        },
                        else => return RuntimeError.InvalidSpirV,
                    };
+                }

-            const to_size = sw: switch (target_type) {
-                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
-                .Vector4f32,
-                .Vector3f32,
-                .Vector2f32,
-                .Vector4i32,
-                .Vector3i32,
-                .Vector2i32,
-                .Vector4u32,
-                .Vector3u32,
-                .Vector2u32,
-                => 32,
-                .Float => |f| if (To == .Float) f.bit_length else return RuntimeError.InvalidSpirV,
-                .Int => |i| if (To == .SInt or To == .UInt) i.bit_length else return RuntimeError.InvalidSpirV,
-                else => return RuntimeError.InvalidSpirV,
-            };
-
-            const operator = struct {
-                fn process(from_bit_count: SpvWord, to_bit_count: SpvWord, to: *Result.Value, from: *Result.Value) RuntimeError!void {
+                fn applyScalar(from_bit_count: SpvWord, to_bit_count: SpvWord, dst: *Result.Value, from: *Result.Value) RuntimeError!void {
                    switch (to_bit_count) {
-                        inline 8, 16, 32, 64 => |i| {
-                            if (i == 8 and To == .Float) {
-                                return RuntimeError.InvalidSpirV; // No f8
-                            }
-
-                            const ToType = getValuePrimitiveFieldType(To, i);
-                            (try getValuePrimitiveField(To, i, to)).* = std.math.lossyCast(
-                                ToType,
-                                switch (from_bit_count) {
-                                    inline 8, 16, 32, 64 => |j| blk: {
-                                        if (j == 8 and From == .Float) {
-                                            return RuntimeError.InvalidSpirV; // Same
-                                        }
-                                        break :blk (try getValuePrimitiveField(From, j, from)).*;
-                                    },
-                                    else => return RuntimeError.InvalidSpirV,
-                                },
-                            );
+                        inline 8, 16, 32, 64 => |bits| {
+                            if (bits == 8 and to_kind == .Float) return RuntimeError.InvalidSpirV; // No f8
+                            const ToT = getValuePrimitiveFieldType(to_kind, bits);
+                            (try getValuePrimitiveField(to_kind, bits, dst)).* = try castLane(ToT, from_bit_count, from);
                        },
                        else => return RuntimeError.InvalidSpirV,
                    }
                }

-                fn processVecSpe(comptime T: type, from_bit_count: SpvWord, from: *Result.Value, index: usize) RuntimeError!T {
-                    return switch (from.*) {
-                        .Vector3f32 => |vec| std.math.lossyCast(T, vec[index]),
-                        .Vector2f32 => |vec| std.math.lossyCast(T, vec[index]),
-                        .Vector4i32 => |vec| std.math.lossyCast(T, vec[index]),
-                        .Vector3i32 => |vec| std.math.lossyCast(T, vec[index]),
-                        .Vector2i32 => |vec| std.math.lossyCast(T, vec[index]),
-                        .Vector4u32 => |vec| std.math.lossyCast(T, vec[index]),
-                        .Vector3u32 => |vec| std.math.lossyCast(T, vec[index]),
-                        .Vector2u32 => |vec| std.math.lossyCast(T, vec[index]),
-                        inline else => switch (from_bit_count) {
-                            inline 8, 16, 32, 64 => |i| std.math.lossyCast(T, blk: {
-                                if (i == 8 and From == .Float) {
-                                    return RuntimeError.InvalidSpirV;
+                fn castSIMDVector(comptime ToT: type, comptime N: usize, dst_arr: *[N]ToT, src_arr: *const [N]ToT) void {
+                    inline for (0..N) |i| dst_arr[i] = std.math.lossyCast(ToT, src_arr[i]);
                }
-                                break :blk (try getValuePrimitiveField(From, i, from)).*;
-                            }),
-                            else => return RuntimeError.InvalidSpirV,
-                        },
-                    };
+
+                fn castSIMDVectorFromOther(comptime ToT: type, comptime FromT: type, comptime N: usize, dst_arr: *[N]ToT, src_arr: *const [N]FromT) void {
+                    inline for (0..N) |i| dst_arr[i] = std.math.lossyCast(ToT, src_arr[i]);
                }
            };

-            switch (value.*) {
-                .Float => if (To == .Float) try operator.process(from_size, to_size, value, op_value) else return RuntimeError.InvalidSpirV,
-                .Int => if (To == .SInt or To == .UInt) try operator.process(from_size, to_size, value, op_value) else return RuntimeError.InvalidSpirV,
-                .Vector => |vec| for (vec, op_value.Vector) |*val, *op_v| try operator.process(from_size, to_size, val, op_v),
-                .Vector4f32 => |*vec| inline for (0..4) |i| {
-                    vec[i] = try operator.processVecSpe(f32, from_size, op_value, i);
+            switch (dst_value.*) {
+                .Float => {
+                    if (to_kind != .Float) return RuntimeError.InvalidSpirV;
+                    try caster.applyScalar(from_bits, to_bits, dst_value, src_value);
                },
-                .Vector3f32 => |*vec| inline for (0..3) |i| {
-                    vec[i] = try operator.processVecSpe(f32, from_size, op_value, i);
+                .Int => {
+                    if (to_kind != .SInt and to_kind != .UInt) return RuntimeError.InvalidSpirV;
+                    try caster.applyScalar(from_bits, to_bits, dst_value, src_value);
                },
-                .Vector2f32 => |*vec| inline for (0..2) |i| {
-                    vec[i] = try operator.processVecSpe(f32, from_size, op_value, i);
+                .Vector => |dst_vec| {
+                    const src_vec = src_value.Vector;
+                    if (dst_vec.len != src_vec.len) return RuntimeError.InvalidSpirV;
+                    for (dst_vec, src_vec) |*d_lane, *s_lane| {
+                        try caster.applyScalar(from_bits, to_bits, d_lane, s_lane);
+                    }
                },
-                .Vector4i32 => |*vec| inline for (0..4) |i| {
-                    vec[i] = try operator.processVecSpe(i32, from_size, op_value, i);
+
+                .Vector4f32 => |*dst| switch (src_value.*) {
+                    .Vector4f32 => caster.castSIMDVector(f32, 4, dst, &src_value.Vector4f32),
+                    .Vector4i32 => caster.castSIMDVectorFromOther(f32, i32, 4, dst, &src_value.Vector4i32),
+                    .Vector4u32 => caster.castSIMDVectorFromOther(f32, u32, 4, dst, &src_value.Vector4u32),
+                    else => return RuntimeError.InvalidSpirV,
                },
-                .Vector3i32 => |*vec| inline for (0..3) |i| {
-                    vec[i] = try operator.processVecSpe(i32, from_size, op_value, i);
+                .Vector3f32 => |*dst| switch (src_value.*) {
+                    .Vector3f32 => caster.castSIMDVector(f32, 3, dst, &src_value.Vector3f32),
+                    .Vector3i32 => caster.castSIMDVectorFromOther(f32, i32, 3, dst, &src_value.Vector3i32),
+                    .Vector3u32 => caster.castSIMDVectorFromOther(f32, u32, 3, dst, &src_value.Vector3u32),
+                    else => return RuntimeError.InvalidSpirV,
                },
-                .Vector2i32 => |*vec| inline for (0..2) |i| {
-                    vec[i] = try operator.processVecSpe(i32, from_size, op_value, i);
+                .Vector2f32 => |*dst| switch (src_value.*) {
+                    .Vector2f32 => caster.castSIMDVector(f32, 2, dst, &src_value.Vector2f32),
+                    .Vector2i32 => caster.castSIMDVectorFromOther(f32, i32, 2, dst, &src_value.Vector2i32),
+                    .Vector2u32 => caster.castSIMDVectorFromOther(f32, u32, 2, dst, &src_value.Vector2u32),
+                    else => return RuntimeError.InvalidSpirV,
                },
-                .Vector4u32 => |*vec| inline for (0..4) |i| {
-                    vec[i] = try operator.processVecSpe(u32, from_size, op_value, i);
+
+                .Vector4i32 => |*dst| switch (src_value.*) {
+                    .Vector4f32 => caster.castSIMDVectorFromOther(i32, f32, 4, dst, &src_value.Vector4f32),
+                    .Vector4i32 => caster.castSIMDVector(i32, 4, dst, &src_value.Vector4i32),
+                    .Vector4u32 => caster.castSIMDVectorFromOther(i32, u32, 4, dst, &src_value.Vector4u32),
+                    else => return RuntimeError.InvalidSpirV,
                },
-                .Vector3u32 => |*vec| inline for (0..3) |i| {
-                    vec[i] = try operator.processVecSpe(u32, from_size, op_value, i);
+                .Vector3i32 => |*dst| switch (src_value.*) {
+                    .Vector3f32 => caster.castSIMDVectorFromOther(i32, f32, 3, dst, &src_value.Vector3f32),
+                    .Vector3i32 => caster.castSIMDVector(i32, 3, dst, &src_value.Vector3i32),
+                    .Vector3u32 => caster.castSIMDVectorFromOther(i32, u32, 3, dst, &src_value.Vector3u32),
+                    else => return RuntimeError.InvalidSpirV,
                },
-                .Vector2u32 => |*vec| inline for (0..2) |i| {
-                    vec[i] = try operator.processVecSpe(u32, from_size, op_value, i);
+                .Vector2i32 => |*dst| switch (src_value.*) {
+                    .Vector2f32 => caster.castSIMDVectorFromOther(i32, f32, 2, dst, &src_value.Vector2f32),
+                    .Vector2i32 => caster.castSIMDVector(i32, 2, dst, &src_value.Vector2i32),
+                    .Vector2u32 => caster.castSIMDVectorFromOther(i32, u32, 2, dst, &src_value.Vector2u32),
+                    else => return RuntimeError.InvalidSpirV,
                },
+
+                .Vector4u32 => |*dst| switch (src_value.*) {
+                    .Vector4f32 => caster.castSIMDVectorFromOther(u32, f32, 4, dst, &src_value.Vector4f32),
+                    .Vector4i32 => caster.castSIMDVectorFromOther(u32, i32, 4, dst, &src_value.Vector4i32),
+                    .Vector4u32 => caster.castSIMDVector(u32, 4, dst, &src_value.Vector4u32),
+                    else => return RuntimeError.InvalidSpirV,
+                },
+                .Vector3u32 => |*dst| switch (src_value.*) {
+                    .Vector3f32 => caster.castSIMDVectorFromOther(u32, f32, 3, dst, &src_value.Vector3f32),
+                    .Vector3i32 => caster.castSIMDVectorFromOther(u32, i32, 3, dst, &src_value.Vector3i32),
+                    .Vector3u32 => caster.castSIMDVector(u32, 3, dst, &src_value.Vector3u32),
+                    else => return RuntimeError.InvalidSpirV,
+                },
+                .Vector2u32 => |*dst| switch (src_value.*) {
+                    .Vector2f32 => caster.castSIMDVectorFromOther(u32, f32, 2, dst, &src_value.Vector2f32),
+                    .Vector2i32 => caster.castSIMDVectorFromOther(u32, i32, 2, dst, &src_value.Vector2i32),
+                    .Vector2u32 => caster.castSIMDVector(u32, 2, dst, &src_value.Vector2u32),
+                    else => return RuntimeError.InvalidSpirV,
+                },
+
                else => return RuntimeError.InvalidSpirV,
            }
        }
@@ -600,26 +689,11 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type {
    return struct {
        fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
            const target_type = (try rt.results[try rt.it.next()].getVariant()).Type;
-            const value = try rt.results[try rt.it.next()].getValue();
-            const op1_value = try rt.results[try rt.it.next()].getValue();
-            const op2_value = try rt.results[try rt.it.next()].getValue();
+            const dst = try rt.results[try rt.it.next()].getValue();
+            const lhs = try rt.results[try rt.it.next()].getValue();
+            const rhs = try rt.results[try rt.it.next()].getValue();

-            const size = sw: switch (target_type) {
-                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
-                .Vector4f32,
-                .Vector3f32,
-                .Vector2f32,
-                .Vector4i32,
-                .Vector3i32,
-                .Vector2i32,
-                .Vector4u32,
-                .Vector3u32,
-                .Vector2u32,
-                => 32,
-                .Float => |f| if (T == .Float) f.bit_length else return RuntimeError.InvalidSpirV,
-                .Int => |i| if (T == .SInt or T == .UInt) i.bit_length else return RuntimeError.InvalidSpirV,
-                else => return RuntimeError.InvalidSpirV,
-            };
+            const lane_bits = try Result.resolveLaneBitWidth(target_type, rt);

            const operator = struct {
                fn operation(comptime TT: type, op1: TT, op2: TT) RuntimeError!TT {
@@ -637,68 +711,77 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type {
                    };
                }

-                fn process(bit_count: SpvWord, v: *Result.Value, op1_v: *const Result.Value, op2_v: *const Result.Value) RuntimeError!void {
+                fn applyScalar(bit_count: SpvWord, d: *Result.Value, l: *Result.Value, r: *Result.Value) RuntimeError!void {
                    switch (bit_count) {
-                        inline 8, 16, 32, 64 => |i| {
-                            if (i == 8 and T == .Float) { // No f8
-                                return RuntimeError.InvalidSpirV;
-                            }
-                            (try getValuePrimitiveField(T, i, v)).* = try operation(
-                                getValuePrimitiveFieldType(T, i),
-                                (try getValuePrimitiveField(T, i, @constCast(op1_v))).*,
-                                (try getValuePrimitiveField(T, i, @constCast(op2_v))).*,
-                            );
+                        inline 8, 16, 32, 64 => |bits| {
+                            if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV;
+
+                            const ScalarT = getValuePrimitiveFieldType(T, bits);
+                            const d_field = try getValuePrimitiveField(T, bits, d);
+                            const l_field = try getValuePrimitiveField(T, bits, l);
+                            const r_field = try getValuePrimitiveField(T, bits, r);
+                            d_field.* = try operation(ScalarT, l_field.*, r_field.*);
                        },
                        else => return RuntimeError.InvalidSpirV,
                    }
                }
+
+                inline fn applyVectorTimesScalarF32(d: []Result.Value, l: []const Result.Value, r: f32) void {
+                    for (d, l) |*d_v, l_v| {
+                        d_v.Float.float32 = l_v.Float.float32 * r;
+                    }
+                }
+
+                inline fn applySIMDVector(comptime ElemT: type, comptime N: usize, d: *@Vector(N, ElemT), l: *const @Vector(N, ElemT), r: *const @Vector(N, ElemT)) RuntimeError!void {
+                    inline for (0..N) |i| {
+                        d[i] = try operation(ElemT, l[i], r[i]);
+                    }
+                }
+
+                inline fn applyVectorSIMDTimesScalarF32(comptime N: usize, d: *@Vector(N, f32), l: *const @Vector(N, f32), r: f32) void {
+                    inline for (0..N) |i| {
+                        d[i] = l[i] * r;
+                    }
+                }
+
+                inline fn applySIMDVectorf32(comptime N: usize, d: *@Vector(N, f32), l: *const @Vector(N, f32), r: *const Result.Value) RuntimeError!void {
+                    switch (Op) {
+                        .VectorTimesScalar => applyVectorSIMDTimesScalarF32(N, d, l, r.Float.float32),
+                        else => {
+                            const rh: *const @Vector(N, f32) = switch (N) {
+                                2 => &r.Vector2f32,
+                                3 => &r.Vector3f32,
+                                4 => &r.Vector4f32,
+                                else => unreachable,
+                            };
+                            try applySIMDVector(f32, N, d, l, rh);
+                        },
+                    }
+                }
            };

-            switch (value.*) {
-                .Float => if (T == .Float) try operator.process(size, value, op1_value, op2_value) else return RuntimeError.InvalidSpirV,
-                .Int => if (T == .SInt or T == .UInt) try operator.process(size, value, op1_value, op2_value) else return RuntimeError.InvalidSpirV,
-                .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i| {
-                    switch (Op) {
-                        .VectorTimesScalar => try operator.process(size, val, &op1_v, op2_value),
-                        else => try operator.process(size, val, &op1_v, &op2_value.Vector[i]),
-                    }
+            switch (dst.*) {
+                .Int, .Float => try operator.applyScalar(lane_bits, dst, lhs, rhs),
+
+                .Vector => |dst_vec| switch (Op) {
+                    .VectorTimesScalar => operator.applyVectorTimesScalarF32(dst_vec, lhs.Vector, rhs.Float.float32),
+                    else => for (dst_vec, lhs.Vector, rhs.Vector) |*d_lane, *l_lane, *r_lane| {
+                        try operator.applyScalar(lane_bits, d_lane, l_lane, r_lane);
                    },
-                .Vector4f32 => |*vec| inline for (0..4) |i| {
-                    switch (Op) {
-                        .VectorTimesScalar => vec[i] = op1_value.Vector4f32[i] * op2_value.Float.float32,
-                        else => vec[i] = try operator.operation(f32, op1_value.Vector4f32[i], op2_value.Vector4f32[i]),
-                    }
-                },
-                .Vector3f32 => |*vec| inline for (0..3) |i| {
-                    switch (Op) {
-                        .VectorTimesScalar => vec[i] = op1_value.Vector3f32[i] * op2_value.Float.float32,
-                        else => vec[i] = try operator.operation(f32, op1_value.Vector3f32[i], op2_value.Vector3f32[i]),
-                    }
-                },
-                .Vector2f32 => |*vec| inline for (0..2) |i| {
-                    switch (Op) {
-                        .VectorTimesScalar => vec[i] = op1_value.Vector2f32[i] * op2_value.Float.float32,
-                        else => vec[i] = try operator.operation(f32, op1_value.Vector2f32[i], op2_value.Vector2f32[i]),
-                    }
-                },
-                .Vector4i32 => |*vec| inline for (0..4) |i| {
-                    vec[i] = try operator.operation(i32, op1_value.Vector4i32[i], op2_value.Vector4i32[i]);
-                },
-                .Vector3i32 => |*vec| inline for (0..3) |i| {
-                    vec[i] = try operator.operation(i32, op1_value.Vector3i32[i], op2_value.Vector3i32[i]);
-                },
-                .Vector2i32 => |*vec| inline for (0..2) |i| {
-                    vec[i] = try operator.operation(i32, op1_value.Vector2i32[i], op2_value.Vector2i32[i]);
-                },
-                .Vector4u32 => |*vec| inline for (0..4) |i| {
-                    vec[i] = try operator.operation(u32, op1_value.Vector4u32[i], op2_value.Vector4u32[i]);
-                },
-                .Vector3u32 => |*vec| inline for (0..3) |i| {
-                    vec[i] = try operator.operation(u32, op1_value.Vector3u32[i], op2_value.Vector3u32[i]);
-                },
-                .Vector2u32 => |*vec| inline for (0..2) |i| {
-                    vec[i] = try operator.operation(u32, op1_value.Vector2u32[i], op2_value.Vector2u32[i]);
                },
+
+                .Vector4f32 => |*d| try operator.applySIMDVectorf32(4, d, &lhs.Vector4f32, rhs),
+                .Vector3f32 => |*d| try operator.applySIMDVectorf32(3, d, &lhs.Vector3f32, rhs),
+                .Vector2f32 => |*d| try operator.applySIMDVectorf32(2, d, &lhs.Vector2f32, rhs),
+
+                .Vector4i32 => |*d| try operator.applySIMDVector(i32, 4, d, &lhs.Vector4i32, &rhs.Vector4i32),
+                .Vector3i32 => |*d| try operator.applySIMDVector(i32, 3, d, &lhs.Vector3i32, &rhs.Vector3i32),
+                .Vector2i32 => |*d| try operator.applySIMDVector(i32, 2, d, &lhs.Vector2i32, &rhs.Vector2i32),
+
+                .Vector4u32 => |*d| try operator.applySIMDVector(u32, 4, d, &lhs.Vector4u32, &rhs.Vector4u32),
+                .Vector3u32 => |*d| try operator.applySIMDVector(u32, 3, d, &lhs.Vector3u32, &rhs.Vector3u32),
+                .Vector2u32 => |*d| try operator.applySIMDVector(u32, 2, d, &lhs.Vector2u32, &rhs.Vector2u32),
+
                else => return RuntimeError.InvalidSpirV,
            }
        }
@@ -784,20 +867,21 @@ fn opBitcast(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
 }

 fn copyValue(dst: *Result.Value, src: *const Result.Value) void {
-    if (src.getCompositeDataOrNull()) |src_slice| {
-        if (dst.getCompositeDataOrNull()) |dst_slice| {
+    switch (src.*) {
+        .Vector, .Matrix, .Array, .Structure => |src_slice| {
+            const dst_slice = switch (dst.*) {
+                .Vector, .Matrix, .Array, .Structure => |d| d,
+                else => unreachable,
+            };
            for (0..@min(dst_slice.len, src_slice.len)) |i| {
                copyValue(&dst_slice[i], &src_slice[i]);
            }
-        } else {
-            unreachable;
-        }
-    } else {
-        dst.* = src.*;
+        },
+        else => dst.* = src.*,
    }
 }

-fn getValuePrimitiveField(comptime T: ValueType, comptime BitCount: SpvWord, v: *Result.Value) RuntimeError!*getValuePrimitiveFieldType(T, BitCount) {
+pub fn getValuePrimitiveField(comptime T: ValueType, comptime BitCount: SpvWord, v: *Result.Value) RuntimeError!*getValuePrimitiveFieldType(T, BitCount) {
    return switch (T) {
        .Bool => &v.Bool,
        .Float => switch (BitCount) {
@@ -815,7 +899,7 @@ fn getValuePrimitiveField(comptime T: ValueType, comptime BitCount: SpvWord, v:
    };
 }

-fn getValuePrimitiveFieldType(comptime T: ValueType, comptime BitCount: SpvWord) type {
+pub fn getValuePrimitiveFieldType(comptime T: ValueType, comptime BitCount: SpvWord) type {
    return switch (T) {
        .Bool => bool,
        .Float => std.meta.Float(BitCount),
@@ -1112,7 +1196,7 @@ fn opExtInst(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) Ru
    const set = try rt.it.next();
    const inst = try rt.it.next();

-    switch (try rt.results[set].getVariant()) {
+    switch ((try rt.results[set].getVariant()).*) {
        .Extension => |ext| if (ext.dispatcher[inst]) |pfn| {
            try pfn(allocator, target_type, id, word_count, rt);
        },
@@ -1122,10 +1206,11 @@ fn opExtInst(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) Ru

 fn opExtInstImport(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) RuntimeError!void {
    const id = try rt.it.next();
-    rt.mod.results[id].name = try readStringN(allocator, &rt.it, word_count - 1);
+    const name = try readStringN(allocator, &rt.it, word_count - 1);
+    rt.mod.results[id].name = name;
    rt.mod.results[id].variant = .{
        .Extension = .{
-            .dispatcher = undefined,
+            .dispatcher = if (extensions_map.get(name)) |map| map else return RuntimeError.UnsupportedExtension,
        },
    };
 }