adding GLSL std 450 base

2026-01-24 02:46:02 +01:00
parent 37da19ed43
commit 96ad7f12f9
14 changed files with 1501 additions and 740 deletions
@@ -4,8 +4,8 @@ const spv = @import("spv");
 const shader_source = @embedFile("shader.spv");
-const screen_width = 1250;
+const screen_width = 200;
-const screen_height = 720;
+const screen_height = 200;
 pub fn main() !void {
    {
@@ -36,10 +36,16 @@ pub fn main() !void {
        }
        for (0..screen_height) |_| {
            var rt = try spv.Runtime.init(allocator, &module);
            (try runner_cache.addOne(allocator)).* = .{
                .allocator = allocator,
                .surface = surface,
-                .rt = try spv.Runtime.init(allocator, &module),
+                .rt = rt,
                .entry = try rt.getEntryPointByName("main"),
                .color = try rt.getResultByName("color"),
                .time = try rt.getResultByName("time"),
                .pos = try rt.getResultByName("pos"),
                .res = try rt.getResultByName("res"),
            };
        }
@@ -48,9 +54,11 @@ pub fn main() !void {
            .allocator = allocator,
        });
        var timer = try std.time.Timer.start();
        var quit = false;
        while (!quit) {
-            try surface.clear(.{ .r = 0.0, .g = 0.0, .b = 0.0, .a = 0.0 });
+            try surface.clear(.{ .r = 0.0, .g = 0.0, .b = 0.0, .a = 1.0 });
            while (sdl3.events.poll()) |event|
                switch (event) {
@@ -65,17 +73,19 @@ pub fn main() !void {
                const pixel_map: [*]u32 = @as([*]u32, @ptrCast(@alignCast((surface.getPixels() orelse return).ptr)));
-                var timer = try std.time.Timer.start();
+                var frame_timer = try std.time.Timer.start();
                defer {
-                    const ns = timer.lap();
+                    const ns = frame_timer.lap();
                    const ms = @as(f32, @floatFromInt(ns)) / std.time.ns_per_s;
                    std.log.info("Took {d:.3}s - {d:.3}fps to render", .{ ms, 1.0 / ms });
                }
                const delta: f32 = @as(f32, @floatFromInt(timer.read())) / std.time.ns_per_s;
                var wait_group: std.Thread.WaitGroup = .{};
                for (0..screen_height) |y| {
                    const runner = &runner_cache.items[y];
-                    thread_pool.spawnWg(&wait_group, Runner.run, .{ runner, y, pixel_map });
+                    thread_pool.spawnWg(&wait_group, Runner.runWrapper, .{ runner, y, pixel_map, delta });
                }
                thread_pool.waitAndWork(&wait_group);
            }
@@ -92,23 +102,33 @@ const Runner = struct {
    allocator: std.mem.Allocator,
    surface: sdl3.surface.Surface,
    rt: spv.Runtime,
    entry: spv.SpvWord,
    color: spv.SpvWord,
    time: spv.SpvWord,
    pos: spv.SpvWord,
    res: spv.SpvWord,
-    fn run(self: *Self, y: usize, pixel_map: [*]u32) void {
+    fn runWrapper(self: *Self, y: usize, pixel_map: [*]u32, timer: f32) void {
        @call(.always_inline, Self.run, .{ self, y, pixel_map, timer }) catch |err| {
            std.log.err("{s}", .{@errorName(err)});
            if (@errorReturnTrace()) |trace| {
                std.debug.dumpStackTrace(trace.*);
            }
            std.process.abort();
        };
    }
    fn run(self: *Self, y: usize, pixel_map: [*]u32, timer: f32) !void {
        var rt = self.rt; // Copy to avoid pointer access of `self` at runtime. Okay as Runtime contains only pointers and trivially copyable fields
        const entry = rt.getEntryPointByName("main") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
        const color = rt.getResultByName("color") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
        const time = rt.getResultByName("time") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
        const pos = rt.getResultByName("pos") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
        const res = rt.getResultByName("res") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
        var output: [4]f32 = undefined;
        for (0..screen_width) |x| {
-            rt.writeInput(f32, &.{@as(f32, @floatFromInt(std.time.milliTimestamp()))}, time) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
+            try rt.writeInput(f32, &.{timer}, self.time);
-            rt.writeInput(f32, &.{ @floatFromInt(screen_width), @floatFromInt(screen_height) }, res) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
+            try rt.writeInput(f32, &.{ @floatFromInt(screen_width), @floatFromInt(screen_height) }, self.res);
-            rt.writeInput(f32, &.{ @floatFromInt(x), @floatFromInt(y) }, pos) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
+            try rt.writeInput(f32, &.{ @floatFromInt(x), @floatFromInt(y) }, self.pos);
-            rt.callEntryPoint(self.allocator, entry) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
+            try rt.callEntryPoint(self.allocator, self.entry);
-            rt.readOutput(f32, output[0..], color) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)});
+            try rt.readOutput(f32, output[0..], self.color);
            const rgba = self.surface.mapRgba(
                @truncate(@as(u32, @intFromFloat(output[0] * 255.0))),
@@ -3,25 +3,66 @@ module;
 struct FragIn
 {
-	[location(0)] time: f32,
+    [location(0)] time: f32,
-	[location(1)] res: vec2[f32],
+    [location(1)] res: vec2[f32],
-	[location(2)] pos: vec2[f32],
+    [location(2)] pos: vec2[f32],
 }
 struct FragOut
 {
-	[location(0)] color: vec4[f32]
+    [location(0)] color: vec4[f32]
 }
 [entry(frag)]
 fn main(input: FragIn) -> FragOut
 {
-	let output: FragOut;
+    const I: i32 = 32;
-	output.color = vec4[f32](
+    const A: f32 = 7.5;
-		input.pos.x / input.res.x,
+    const MA: f32 = 20.0;
-		input.pos.y / input.res.y,
+    const MI: f32 = 0.001;
-		1.0,
+
-		1.0
+    let uv0 = input.pos / input.res * 2.0 - vec2[f32](1.0, 1.0);
-	);
+    let uv  = vec2[f32](uv0.x * (input.res.x / input.res.y), uv0.y);
-	return output;
+
    let col = vec3[f32](0.0, 0.0, 0.0);
    let ro  = vec3[f32](0.0, 0.0, -2.0);
    let rd  = vec3[f32](uv.x, uv.y, 1.0);
    let dt  = 0.0;
    let ds  = 0.0;
    let dm  = -1.0;
    let p   = ro;
    let c   = vec3[f32](0.0, 0.0, 0.0);
    let l = vec3[f32](0.0, sin(input.time * 0.2) * 4.0, cos(input.time * 0.2) * 4.0);
    for i in 0 -> I
    {
        p = ro + rd * dt;
        ds = length(c - p) - 1.0;
        dt += ds;
        if (dm == -1.0 || ds < dm)
            dm = ds;
        if (ds <= MI)
        {
            let value = max(dot(normalize(c - p), normalize(p - l)) - 0.35, 0.0);
            col = vec3[f32](value, value, value);
            break;
        }
        if (ds >= MA)
        {
            if (dot(normalize(rd), normalize(l - ro)) <= 1.0)
            {
                let value = max(dot(normalize(rd), normalize(l - ro)) + 0.15, 0.05)/ 1.15 * (1.0 - dm * A);
                col = vec3[f32](value, value, value);
            }
            break;
        }
    }
    let output: FragOut;
    output.color = vec4[f32](col.x, col.y, col.z, 1.0);
    return output;
 }
@@ -1,85 +1,281 @@
 Version 1.0
 Generator: 2560130
-Bound: 50
+Bound: 203
 Schema: 0
-      OpCapability Capability(Shader)
+       OpCapability Capability(Shader)
-      OpMemoryModel AddressingModel(Logical) MemoryModel(GLSL450)
+ %42 = OpExtInstImport "GLSL.std.450"
-      OpEntryPoint ExecutionModel(Fragment) %24 "main" %5 %11 %14 %20
+       OpMemoryModel AddressingModel(Logical) MemoryModel(GLSL450)
-      OpExecutionMode %24 ExecutionMode(OriginUpperLeft)
+       OpEntryPoint ExecutionModel(Fragment) %43 "main" %5 %11 %14 %20
-      OpSource SourceLanguage(NZSL) 4198400
+       OpExecutionMode %43 ExecutionMode(OriginUpperLeft)
-      OpSourceExtension "Version: 1.1"
+       OpSource SourceLanguage(NZSL) 4198400
-      OpName %16 "FragIn"
+       OpSourceExtension "Version: 1.1"
-      OpMemberName %16 0 "time"
+       OpName %16 "FragIn"
-      OpMemberName %16 1 "res"
+       OpMemberName %16 0 "time"
-      OpMemberName %16 2 "pos"
+       OpMemberName %16 1 "res"
-      OpName %21 "FragOut"
+       OpMemberName %16 2 "pos"
-      OpMemberName %21 0 "color"
+       OpName %21 "FragOut"
-      OpName %5 "time"
+       OpMemberName %21 0 "color"
-      OpName %11 "res"
+       OpName %5 "time"
-      OpName %14 "pos"
+       OpName %11 "res"
-      OpName %20 "color"
+       OpName %14 "pos"
-      OpName %24 "main"
+       OpName %20 "color"
-      OpDecorate %5 Decoration(Location) 0
+       OpName %43 "main"
-      OpDecorate %11 Decoration(Location) 1
+       OpDecorate %5 Decoration(Location) 0
-      OpDecorate %14 Decoration(Location) 2
+       OpDecorate %11 Decoration(Location) 1
-      OpDecorate %20 Decoration(Location) 0
+       OpDecorate %14 Decoration(Location) 2
-      OpMemberDecorate %16 0 Decoration(Offset) 0
+       OpDecorate %20 Decoration(Location) 0
-      OpMemberDecorate %16 1 Decoration(Offset) 8
+       OpMemberDecorate %16 0 Decoration(Offset) 0
-      OpMemberDecorate %16 2 Decoration(Offset) 16
+       OpMemberDecorate %16 1 Decoration(Offset) 8
-      OpMemberDecorate %21 0 Decoration(Offset) 0
+       OpMemberDecorate %16 2 Decoration(Offset) 16
- %1 = OpTypeVoid
+       OpMemberDecorate %21 0 Decoration(Offset) 0
- %2 = OpTypeFunction %1
+  %1 = OpTypeVoid
- %3 = OpTypeFloat 32
+  %2 = OpTypeFunction %1
- %4 = OpTypePointer StorageClass(Input) %3
+  %3 = OpTypeFloat 32
- %6 = OpTypeInt 32 1
+  %4 = OpTypePointer StorageClass(Input) %3
- %7 = OpConstant %6 i32(0)
+  %6 = OpTypeInt 32 1
- %8 = OpTypePointer StorageClass(Function) %3
+  %7 = OpConstant %6 i32(0)
- %9 = OpTypeVector %3 2
+  %8 = OpTypePointer StorageClass(Function) %3
-%10 = OpTypePointer StorageClass(Input) %9
+  %9 = OpTypeVector %3 2
-%12 = OpConstant %6 i32(1)
+ %10 = OpTypePointer StorageClass(Input) %9
-%13 = OpTypePointer StorageClass(Function) %9
+ %12 = OpConstant %6 i32(1)
-%15 = OpConstant %6 i32(2)
+ %13 = OpTypePointer StorageClass(Function) %9
-%16 = OpTypeStruct %3 %9 %9
+ %15 = OpConstant %6 i32(2)
-%17 = OpTypePointer StorageClass(Function) %16
+ %16 = OpTypeStruct %3 %9 %9
-%18 = OpTypeVector %3 4
+ %17 = OpTypePointer StorageClass(Function) %16
-%19 = OpTypePointer StorageClass(Output) %18
+ %18 = OpTypeVector %3 4
-%21 = OpTypeStruct %18
+ %19 = OpTypePointer StorageClass(Output) %18
-%22 = OpTypePointer StorageClass(Function) %21
+ %21 = OpTypeStruct %18
-%23 = OpConstant %3 f32(1)
+ %22 = OpConstant %3 f32(2)
-%47 = OpTypePointer StorageClass(Function) %18
+ %23 = OpConstant %3 f32(1)
- %5 = OpVariable %4 StorageClass(Input)
+ %24 = OpConstant %3 f32(0)
-%11 = OpVariable %10 StorageClass(Input)
+ %25 = OpTypeVector %3 3
-%14 = OpVariable %10 StorageClass(Input)
+ %26 = OpTypePointer StorageClass(Function) %25
-%20 = OpVariable %19 StorageClass(Output)
+ %27 = OpConstant %3 f32(-2)
-%24 = OpFunction %1 FunctionControl(0) %2
+ %28 = OpConstant %3 f32(-1)
-%25 = OpLabel
+ %29 = OpConstant %3 f32(0.2)
-%26 = OpVariable %22 StorageClass(Function)
+ %30 = OpConstant %3 f32(4)
-%27 = OpVariable %17 StorageClass(Function)
+ %31 = OpTypePointer StorageClass(Function) %6
-%28 = OpAccessChain %8 %27 %7
+ %32 = OpConstant %6 i32(32)
-      OpCopyMemory %28 %5
+ %33 = OpTypeBool
-%29 = OpAccessChain %13 %27 %12
+ %34 = OpConstant %3 f32(0.001)
-      OpCopyMemory %29 %11
+ %35 = OpConstant %3 f32(0.35)
-%30 = OpAccessChain %13 %27 %15
+ %36 = OpConstant %3 f32(20)
-      OpCopyMemory %30 %14
+ %37 = OpConstant %3 f32(0.15)
-%31 = OpAccessChain %13 %27 %15
+ %38 = OpConstant %3 f32(0.05)
-%32 = OpLoad %9 %31
+ %39 = OpConstant %3 f32(1.15)
-%33 = OpCompositeExtract %3 %32 0
+ %40 = OpConstant %3 f32(7.5)
-%34 = OpAccessChain %13 %27 %12
+ %41 = OpTypePointer StorageClass(Function) %21
-%35 = OpLoad %9 %34
+%200 = OpTypePointer StorageClass(Function) %18
-%36 = OpCompositeExtract %3 %35 0
+  %5 = OpVariable %4 StorageClass(Input)
-%37 = OpFDiv %3 %33 %36
+ %11 = OpVariable %10 StorageClass(Input)
-%38 = OpAccessChain %13 %27 %15
+ %14 = OpVariable %10 StorageClass(Input)
-%39 = OpLoad %9 %38
+ %20 = OpVariable %19 StorageClass(Output)
-%40 = OpCompositeExtract %3 %39 1
+ %43 = OpFunction %1 FunctionControl(0) %2
-%41 = OpAccessChain %13 %27 %12
+ %44 = OpLabel
-%42 = OpLoad %9 %41
+ %45 = OpVariable %13 StorageClass(Function)
-%43 = OpCompositeExtract %3 %42 1
+ %46 = OpVariable %13 StorageClass(Function)
-%44 = OpFDiv %3 %40 %43
+ %47 = OpVariable %26 StorageClass(Function)
-%45 = OpCompositeConstruct %18 %37 %44 %23 %23
+ %48 = OpVariable %26 StorageClass(Function)
-%46 = OpAccessChain %47 %26 %7
+ %49 = OpVariable %26 StorageClass(Function)
-      OpStore %46 %45
+ %50 = OpVariable %8 StorageClass(Function)
-%48 = OpLoad %21 %26
+ %51 = OpVariable %8 StorageClass(Function)
-%49 = OpCompositeExtract %18 %48 0
+ %52 = OpVariable %8 StorageClass(Function)
-      OpStore %20 %49
+ %53 = OpVariable %26 StorageClass(Function)
-      OpReturn
+ %54 = OpVariable %26 StorageClass(Function)
-      OpFunctionEnd
+ %55 = OpVariable %26 StorageClass(Function)
 %56 = OpVariable %31 StorageClass(Function)
 %57 = OpVariable %31 StorageClass(Function)
 %58 = OpVariable %8 StorageClass(Function)
 %59 = OpVariable %8 StorageClass(Function)
 %60 = OpVariable %41 StorageClass(Function)
 %61 = OpVariable %17 StorageClass(Function)
 %62 = OpAccessChain %8 %61 %7
       OpCopyMemory %62 %5
 %63 = OpAccessChain %13 %61 %12
       OpCopyMemory %63 %11
 %64 = OpAccessChain %13 %61 %15
       OpCopyMemory %64 %14
 %65 = OpAccessChain %13 %61 %15
 %66 = OpLoad %9 %65
 %67 = OpAccessChain %13 %61 %12
 %68 = OpLoad %9 %67
 %69 = OpFDiv %9 %66 %68
 %70 = OpVectorTimesScalar %9 %69 %22
 %71 = OpCompositeConstruct %9 %23 %23
 %72 = OpFSub %9 %70 %71
       OpStore %45 %72
 %73 = OpLoad %9 %45
 %74 = OpCompositeExtract %3 %73 0
 %75 = OpAccessChain %13 %61 %12
 %76 = OpLoad %9 %75
 %77 = OpCompositeExtract %3 %76 0
 %78 = OpAccessChain %13 %61 %12
 %79 = OpLoad %9 %78
 %80 = OpCompositeExtract %3 %79 1
 %81 = OpFDiv %3 %77 %80
 %82 = OpFMul %3 %74 %81
 %83 = OpLoad %9 %45
 %84 = OpCompositeExtract %3 %83 1
 %85 = OpCompositeConstruct %9 %82 %84
       OpStore %46 %85
 %86 = OpCompositeConstruct %25 %24 %24 %24
       OpStore %47 %86
 %87 = OpCompositeConstruct %25 %24 %24 %27
       OpStore %48 %87
 %88 = OpLoad %9 %46
 %89 = OpCompositeExtract %3 %88 0
 %90 = OpLoad %9 %46
 %91 = OpCompositeExtract %3 %90 1
 %92 = OpCompositeConstruct %25 %89 %91 %23
       OpStore %49 %92
       OpStore %50 %24
       OpStore %51 %24
       OpStore %52 %28
 %93 = OpLoad %25 %48
       OpStore %53 %93
 %94 = OpCompositeConstruct %25 %24 %24 %24
       OpStore %54 %94
 %95 = OpAccessChain %8 %61 %7
 %96 = OpLoad %3 %95
 %97 = OpFMul %3 %96 %29
 %98 = OpExtInst %3 GLSLstd450 Sin %97
 %99 = OpFMul %3 %98 %30
 %100 = OpAccessChain %8 %61 %7
 %101 = OpLoad %3 %100
 %102 = OpFMul %3 %101 %29
 %103 = OpExtInst %3 GLSLstd450 Cos %102
 %104 = OpFMul %3 %103 %30
 %105 = OpCompositeConstruct %25 %24 %99 %104
       OpStore %55 %105
       OpStore %56 %7
       OpStore %57 %32
       OpBranch %106
 %106 = OpLabel
 %110 = OpLoad %6 %56
 %111 = OpLoad %6 %57
 %112 = OpSLessThan %33 %110 %111
       OpLoopMerge %108 %109 LoopControl(0)
       OpBranchConditional %112 %107 %108
 %107 = OpLabel
 %113 = OpLoad %25 %48
 %114 = OpLoad %25 %49
 %115 = OpLoad %3 %50
 %116 = OpVectorTimesScalar %25 %114 %115
 %117 = OpFAdd %25 %113 %116
       OpStore %53 %117
 %118 = OpLoad %25 %54
 %119 = OpLoad %25 %53
 %120 = OpFSub %25 %118 %119
 %121 = OpExtInst %3 GLSLstd450 Length %120
 %122 = OpFSub %3 %121 %23
       OpStore %51 %122
 %123 = OpLoad %3 %50
 %124 = OpLoad %3 %51
 %125 = OpFAdd %3 %123 %124
       OpStore %50 %125
 %129 = OpLoad %3 %52
 %130 = OpFOrdEqual %33 %129 %28
 %131 = OpLoad %3 %51
 %132 = OpLoad %3 %52
 %133 = OpFOrdLessThan %33 %131 %132
 %134 = OpLogicalOr %33 %130 %133
       OpSelectionMerge %126 SelectionControl(0)
       OpBranchConditional %134 %127 %128
 %127 = OpLabel
 %135 = OpLoad %3 %51
       OpStore %52 %135
       OpBranch %126
 %128 = OpLabel
       OpBranch %126
 %126 = OpLabel
 %139 = OpLoad %3 %51
 %140 = OpFOrdLessThanEqual %33 %139 %34
       OpSelectionMerge %136 SelectionControl(0)
       OpBranchConditional %140 %137 %138
 %137 = OpLabel
 %141 = OpLoad %25 %54
 %142 = OpLoad %25 %53
 %143 = OpFSub %25 %141 %142
 %144 = OpExtInst %25 GLSLstd450 Normalize %143
 %145 = OpLoad %25 %53
 %146 = OpLoad %25 %55
 %147 = OpFSub %25 %145 %146
 %148 = OpExtInst %25 GLSLstd450 Normalize %147
 %149 = OpDot %3 %144 %148
 %150 = OpFSub %3 %149 %35
 %151 = OpExtInst %3 GLSLstd450 FMax %150 %24
       OpStore %58 %151
 %152 = OpLoad %3 %58
 %153 = OpLoad %3 %58
 %154 = OpLoad %3 %58
 %155 = OpCompositeConstruct %25 %152 %153 %154
       OpStore %47 %155
       OpBranch %108
 %138 = OpLabel
       OpBranch %136
 %136 = OpLabel
 %159 = OpLoad %3 %51
 %160 = OpFOrdGreaterThanEqual %33 %159 %36
       OpSelectionMerge %156 SelectionControl(0)
       OpBranchConditional %160 %157 %158
 %157 = OpLabel
 %164 = OpLoad %25 %49
 %165 = OpExtInst %25 GLSLstd450 Normalize %164
 %166 = OpLoad %25 %55
 %167 = OpLoad %25 %48
 %168 = OpFSub %25 %166 %167
 %169 = OpExtInst %25 GLSLstd450 Normalize %168
 %170 = OpDot %3 %165 %169
 %171 = OpFOrdLessThanEqual %33 %170 %23
       OpSelectionMerge %161 SelectionControl(0)
       OpBranchConditional %171 %162 %163
 %162 = OpLabel
 %172 = OpLoad %25 %49
 %173 = OpExtInst %25 GLSLstd450 Normalize %172
 %174 = OpLoad %25 %55
 %175 = OpLoad %25 %48
 %176 = OpFSub %25 %174 %175
 %177 = OpExtInst %25 GLSLstd450 Normalize %176
 %178 = OpDot %3 %173 %177
 %179 = OpFAdd %3 %178 %37
 %180 = OpExtInst %3 GLSLstd450 FMax %179 %38
 %181 = OpFDiv %3 %180 %39
 %182 = OpLoad %3 %52
 %183 = OpFMul %3 %182 %40
 %184 = OpFSub %3 %23 %183
 %185 = OpFMul %3 %181 %184
       OpStore %59 %185
 %186 = OpLoad %3 %59
 %187 = OpLoad %3 %59
 %188 = OpLoad %3 %59
 %189 = OpCompositeConstruct %25 %186 %187 %188
       OpStore %47 %189
       OpBranch %161
 %163 = OpLabel
       OpBranch %161
 %161 = OpLabel
       OpBranch %108
 %158 = OpLabel
       OpBranch %156
 %156 = OpLabel
 %190 = OpLoad %6 %56
 %191 = OpIAdd %6 %190 %12
       OpStore %56 %191
       OpBranch %109
 %109 = OpLabel
       OpBranch %106
 %108 = OpLabel
 %192 = OpLoad %25 %47
 %193 = OpCompositeExtract %3 %192 0
 %194 = OpLoad %25 %47
 %195 = OpCompositeExtract %3 %194 1
 %196 = OpLoad %25 %47
 %197 = OpCompositeExtract %3 %196 2
 %198 = OpCompositeConstruct %18 %193 %195 %197 %23
 %199 = OpAccessChain %200 %60 %7
       OpStore %199 %198
 %201 = OpLoad %21 %60
 %202 = OpCompositeExtract %18 %201 0
       OpStore %20 %202
       OpReturn
       OpFunctionEnd
@@ -62,8 +62,8 @@ fn main(input: FragIn) -> FragOut
        }
    }
-    if (col == vec3[f32](0.0, 0.0, 0.0))
+   //if (col == vec3[f32](0.0, 0.0, 0.0))
-        discard;
+   //    discard;
    let output: FragOut;
    output.color = vec4[f32](col.x, col.y, col.z, 1.0);
@@ -1,12 +1,12 @@
 Version 1.0
 Generator: 2560130
-Bound: 210
+Bound: 203
 Schema: 0
       OpCapability Capability(Shader)
- %43 = OpExtInstImport "GLSL.std.450"
+ %42 = OpExtInstImport "GLSL.std.450"
       OpMemoryModel AddressingModel(Logical) MemoryModel(GLSL450)
-       OpEntryPoint ExecutionModel(Fragment) %44 "main" %5 %11 %14 %20
+       OpEntryPoint ExecutionModel(Fragment) %43 "main" %5 %11 %14 %20
-       OpExecutionMode %44 ExecutionMode(OriginUpperLeft)
+       OpExecutionMode %43 ExecutionMode(OriginUpperLeft)
       OpSource SourceLanguage(NZSL) 4198400
       OpSourceExtension "Version: 1.1"
       OpName %16 "FragIn"
@@ -19,7 +19,7 @@ Schema: 0
       OpName %11 "res"
       OpName %14 "pos"
       OpName %20 "color"
-       OpName %44 "main"
+       OpName %43 "main"
       OpDecorate %5 Decoration(Location) 0
       OpDecorate %11 Decoration(Location) 1
       OpDecorate %14 Decoration(Location) 2
@@ -64,229 +64,218 @@ Schema: 0
 %38 = OpConstant %3 f32(0.05)
 %39 = OpConstant %3 f32(1.15)
 %40 = OpConstant %3 f32(7.5)
- %41 = OpTypeVector %33 3
+ %41 = OpTypePointer StorageClass(Function) %21
- %42 = OpTypePointer StorageClass(Function) %21
+%200 = OpTypePointer StorageClass(Function) %18
 %207 = OpTypePointer StorageClass(Function) %18
  %5 = OpVariable %4 StorageClass(Input)
 %11 = OpVariable %10 StorageClass(Input)
 %14 = OpVariable %10 StorageClass(Input)
 %20 = OpVariable %19 StorageClass(Output)
- %44 = OpFunction %1 FunctionControl(0) %2
+ %43 = OpFunction %1 FunctionControl(0) %2
- %45 = OpLabel
+ %44 = OpLabel
 %45 = OpVariable %13 StorageClass(Function)
 %46 = OpVariable %13 StorageClass(Function)
- %47 = OpVariable %13 StorageClass(Function)
+ %47 = OpVariable %26 StorageClass(Function)
 %48 = OpVariable %26 StorageClass(Function)
 %49 = OpVariable %26 StorageClass(Function)
- %50 = OpVariable %26 StorageClass(Function)
+ %50 = OpVariable %8 StorageClass(Function)
 %51 = OpVariable %8 StorageClass(Function)
 %52 = OpVariable %8 StorageClass(Function)
- %53 = OpVariable %8 StorageClass(Function)
+ %53 = OpVariable %26 StorageClass(Function)
 %54 = OpVariable %26 StorageClass(Function)
 %55 = OpVariable %26 StorageClass(Function)
- %56 = OpVariable %26 StorageClass(Function)
+ %56 = OpVariable %31 StorageClass(Function)
 %57 = OpVariable %31 StorageClass(Function)
- %58 = OpVariable %31 StorageClass(Function)
+ %58 = OpVariable %8 StorageClass(Function)
 %59 = OpVariable %8 StorageClass(Function)
- %60 = OpVariable %8 StorageClass(Function)
+ %60 = OpVariable %41 StorageClass(Function)
- %61 = OpVariable %42 StorageClass(Function)
+ %61 = OpVariable %17 StorageClass(Function)
- %62 = OpVariable %17 StorageClass(Function)
+ %62 = OpAccessChain %8 %61 %7
- %63 = OpAccessChain %8 %62 %7
+       OpCopyMemory %62 %5
-       OpCopyMemory %63 %5
+ %63 = OpAccessChain %13 %61 %12
- %64 = OpAccessChain %13 %62 %12
+       OpCopyMemory %63 %11
-       OpCopyMemory %64 %11
+ %64 = OpAccessChain %13 %61 %15
- %65 = OpAccessChain %13 %62 %15
+       OpCopyMemory %64 %14
-       OpCopyMemory %65 %14
+ %65 = OpAccessChain %13 %61 %15
- %66 = OpAccessChain %13 %62 %15
+ %66 = OpLoad %9 %65
- %67 = OpLoad %9 %66
+ %67 = OpAccessChain %13 %61 %12
- %68 = OpAccessChain %13 %62 %12
+ %68 = OpLoad %9 %67
- %69 = OpLoad %9 %68
+ %69 = OpFDiv %9 %66 %68
- %70 = OpFDiv %9 %67 %69
+ %70 = OpVectorTimesScalar %9 %69 %22
- %71 = OpVectorTimesScalar %9 %70 %22
+ %71 = OpCompositeConstruct %9 %23 %23
- %72 = OpCompositeConstruct %9 %23 %23
+ %72 = OpFSub %9 %70 %71
- %73 = OpFSub %9 %71 %72
+       OpStore %45 %72
-       OpStore %46 %73
+ %73 = OpLoad %9 %45
- %74 = OpLoad %9 %46
+ %74 = OpCompositeExtract %3 %73 0
- %75 = OpCompositeExtract %3 %74 0
+ %75 = OpAccessChain %13 %61 %12
- %76 = OpAccessChain %13 %62 %12
+ %76 = OpLoad %9 %75
- %77 = OpLoad %9 %76
+ %77 = OpCompositeExtract %3 %76 0
- %78 = OpCompositeExtract %3 %77 0
+ %78 = OpAccessChain %13 %61 %12
- %79 = OpAccessChain %13 %62 %12
+ %79 = OpLoad %9 %78
- %80 = OpLoad %9 %79
+ %80 = OpCompositeExtract %3 %79 1
- %81 = OpCompositeExtract %3 %80 1
+ %81 = OpFDiv %3 %77 %80
- %82 = OpFDiv %3 %78 %81
+ %82 = OpFMul %3 %74 %81
- %83 = OpFMul %3 %75 %82
+ %83 = OpLoad %9 %45
- %84 = OpLoad %9 %46
+ %84 = OpCompositeExtract %3 %83 1
- %85 = OpCompositeExtract %3 %84 1
+ %85 = OpCompositeConstruct %9 %82 %84
- %86 = OpCompositeConstruct %9 %83 %85
+       OpStore %46 %85
 %86 = OpCompositeConstruct %25 %24 %24 %24
       OpStore %47 %86
- %87 = OpCompositeConstruct %25 %24 %24 %24
+ %87 = OpCompositeConstruct %25 %24 %24 %27
       OpStore %48 %87
- %88 = OpCompositeConstruct %25 %24 %24 %27
+ %88 = OpLoad %9 %46
-       OpStore %49 %88
+ %89 = OpCompositeExtract %3 %88 0
- %89 = OpLoad %9 %47
+ %90 = OpLoad %9 %46
- %90 = OpCompositeExtract %3 %89 0
+ %91 = OpCompositeExtract %3 %90 1
- %91 = OpLoad %9 %47
+ %92 = OpCompositeConstruct %25 %89 %91 %23
- %92 = OpCompositeExtract %3 %91 1
+       OpStore %49 %92
- %93 = OpCompositeConstruct %25 %90 %92 %23
+       OpStore %50 %24
       OpStore %50 %93
       OpStore %51 %24
-       OpStore %52 %24
+       OpStore %52 %28
-       OpStore %53 %28
+ %93 = OpLoad %25 %48
- %94 = OpLoad %25 %49
+       OpStore %53 %93
 %94 = OpCompositeConstruct %25 %24 %24 %24
       OpStore %54 %94
- %95 = OpCompositeConstruct %25 %24 %24 %24
+ %95 = OpAccessChain %8 %61 %7
-       OpStore %55 %95
+ %96 = OpLoad %3 %95
- %96 = OpAccessChain %8 %62 %7
+ %97 = OpFMul %3 %96 %29
- %97 = OpLoad %3 %96
+ %98 = OpExtInst %3 GLSLstd450 Sin %97
- %98 = OpFMul %3 %97 %29
+ %99 = OpFMul %3 %98 %30
- %99 = OpExtInst %3 GLSLstd450 Sin %98
+%100 = OpAccessChain %8 %61 %7
-%100 = OpFMul %3 %99 %30
+%101 = OpLoad %3 %100
-%101 = OpAccessChain %8 %62 %7
+%102 = OpFMul %3 %101 %29
-%102 = OpLoad %3 %101
+%103 = OpExtInst %3 GLSLstd450 Cos %102
-%103 = OpFMul %3 %102 %29
+%104 = OpFMul %3 %103 %30
-%104 = OpExtInst %3 GLSLstd450 Cos %103
+%105 = OpCompositeConstruct %25 %24 %99 %104
-%105 = OpFMul %3 %104 %30
+       OpStore %55 %105
-%106 = OpCompositeConstruct %25 %24 %100 %105
+       OpStore %56 %7
-       OpStore %56 %106
+       OpStore %57 %32
-       OpStore %57 %7
+       OpBranch %106
-       OpStore %58 %32
+%106 = OpLabel
-       OpBranch %107
+%110 = OpLoad %6 %56
 %107 = OpLabel
 %111 = OpLoad %6 %57
-%112 = OpLoad %6 %58
+%112 = OpSLessThan %33 %110 %111
-%113 = OpSLessThan %33 %111 %112
+       OpLoopMerge %108 %109 LoopControl(0)
-       OpLoopMerge %109 %110 LoopControl(0)
+       OpBranchConditional %112 %107 %108
-       OpBranchConditional %113 %108 %109
+%107 = OpLabel
-%108 = OpLabel
+%113 = OpLoad %25 %48
 %114 = OpLoad %25 %49
-%115 = OpLoad %25 %50
+%115 = OpLoad %3 %50
-%116 = OpLoad %3 %51
+%116 = OpVectorTimesScalar %25 %114 %115
-%117 = OpVectorTimesScalar %25 %115 %116
+%117 = OpFAdd %25 %113 %116
-%118 = OpFAdd %25 %114 %117
+       OpStore %53 %117
-       OpStore %54 %118
+%118 = OpLoad %25 %54
-%119 = OpLoad %25 %55
+%119 = OpLoad %25 %53
-%120 = OpLoad %25 %54
+%120 = OpFSub %25 %118 %119
-%121 = OpFSub %25 %119 %120
+%121 = OpExtInst %3 GLSLstd450 Length %120
-%122 = OpExtInst %3 GLSLstd450 Length %121
+%122 = OpFSub %3 %121 %23
-%123 = OpFSub %3 %122 %23
+       OpStore %51 %122
-       OpStore %52 %123
+%123 = OpLoad %3 %50
 %124 = OpLoad %3 %51
-%125 = OpLoad %3 %52
+%125 = OpFAdd %3 %123 %124
-%126 = OpFAdd %3 %124 %125
+       OpStore %50 %125
-       OpStore %51 %126
+%129 = OpLoad %3 %52
-%130 = OpLoad %3 %53
+%130 = OpFOrdEqual %33 %129 %28
-%131 = OpFOrdEqual %33 %130 %28
+%131 = OpLoad %3 %51
 %132 = OpLoad %3 %52
-%133 = OpLoad %3 %53
+%133 = OpFOrdLessThan %33 %131 %132
-%134 = OpFOrdLessThan %33 %132 %133
+%134 = OpLogicalOr %33 %130 %133
-%135 = OpLogicalOr %33 %131 %134
+       OpSelectionMerge %126 SelectionControl(0)
-       OpSelectionMerge %127 SelectionControl(0)
+       OpBranchConditional %134 %127 %128
       OpBranchConditional %135 %128 %129
 %128 = OpLabel
 %136 = OpLoad %3 %52
       OpStore %53 %136
       OpBranch %127
 %129 = OpLabel
       OpBranch %127
 %127 = OpLabel
-%140 = OpLoad %3 %52
+%135 = OpLoad %3 %51
-%141 = OpFOrdLessThanEqual %33 %140 %34
+       OpStore %52 %135
-       OpSelectionMerge %137 SelectionControl(0)
+       OpBranch %126
-       OpBranchConditional %141 %138 %139
+%128 = OpLabel
-%138 = OpLabel
+       OpBranch %126
-%142 = OpLoad %25 %55
+%126 = OpLabel
-%143 = OpLoad %25 %54
+%139 = OpLoad %3 %51
-%144 = OpFSub %25 %142 %143
+%140 = OpFOrdLessThanEqual %33 %139 %34
-%145 = OpExtInst %25 GLSLstd450 Normalize %144
+       OpSelectionMerge %136 SelectionControl(0)
-%146 = OpLoad %25 %54
+       OpBranchConditional %140 %137 %138
 %147 = OpLoad %25 %56
 %148 = OpFSub %25 %146 %147
 %149 = OpExtInst %25 GLSLstd450 Normalize %148
 %150 = OpDot %3 %145 %149
 %151 = OpFSub %3 %150 %35
 %152 = OpExtInst %3 GLSLstd450 FMax %151 %24
       OpStore %59 %152
 %153 = OpLoad %3 %59
 %154 = OpLoad %3 %59
 %155 = OpLoad %3 %59
 %156 = OpCompositeConstruct %25 %153 %154 %155
       OpStore %48 %156
       OpBranch %109
 %139 = OpLabel
       OpBranch %137
 %137 = OpLabel
-%160 = OpLoad %3 %52
+%141 = OpLoad %25 %54
-%161 = OpFOrdGreaterThanEqual %33 %160 %36
+%142 = OpLoad %25 %53
-       OpSelectionMerge %157 SelectionControl(0)
+%143 = OpFSub %25 %141 %142
-       OpBranchConditional %161 %158 %159
+%144 = OpExtInst %25 GLSLstd450 Normalize %143
-%158 = OpLabel
+%145 = OpLoad %25 %53
-%165 = OpLoad %25 %50
+%146 = OpLoad %25 %55
-%166 = OpExtInst %25 GLSLstd450 Normalize %165
+%147 = OpFSub %25 %145 %146
-%167 = OpLoad %25 %56
+%148 = OpExtInst %25 GLSLstd450 Normalize %147
-%168 = OpLoad %25 %49
+%149 = OpDot %3 %144 %148
-%169 = OpFSub %25 %167 %168
+%150 = OpFSub %3 %149 %35
-%170 = OpExtInst %25 GLSLstd450 Normalize %169
+%151 = OpExtInst %3 GLSLstd450 FMax %150 %24
-%171 = OpDot %3 %166 %170
+       OpStore %58 %151
-%172 = OpFOrdLessThanEqual %33 %171 %23
+%152 = OpLoad %3 %58
-       OpSelectionMerge %162 SelectionControl(0)
+%153 = OpLoad %3 %58
-       OpBranchConditional %172 %163 %164
+%154 = OpLoad %3 %58
-%163 = OpLabel
+%155 = OpCompositeConstruct %25 %152 %153 %154
-%173 = OpLoad %25 %50
+       OpStore %47 %155
-%174 = OpExtInst %25 GLSLstd450 Normalize %173
+       OpBranch %108
-%175 = OpLoad %25 %56
+%138 = OpLabel
-%176 = OpLoad %25 %49
+       OpBranch %136
-%177 = OpFSub %25 %175 %176
+%136 = OpLabel
-%178 = OpExtInst %25 GLSLstd450 Normalize %177
+%159 = OpLoad %3 %51
-%179 = OpDot %3 %174 %178
+%160 = OpFOrdGreaterThanEqual %33 %159 %36
-%180 = OpFAdd %3 %179 %37
+       OpSelectionMerge %156 SelectionControl(0)
-%181 = OpExtInst %3 GLSLstd450 FMax %180 %38
+       OpBranchConditional %160 %157 %158
 %182 = OpFDiv %3 %181 %39
 %183 = OpLoad %3 %53
 %184 = OpFMul %3 %183 %40
 %185 = OpFSub %3 %23 %184
 %186 = OpFMul %3 %182 %185
       OpStore %60 %186
 %187 = OpLoad %3 %60
 %188 = OpLoad %3 %60
 %189 = OpLoad %3 %60
 %190 = OpCompositeConstruct %25 %187 %188 %189
       OpStore %48 %190
       OpBranch %162
 %164 = OpLabel
       OpBranch %162
 %162 = OpLabel
       OpBranch %109
 %159 = OpLabel
       OpBranch %157
 %157 = OpLabel
-%191 = OpLoad %6 %57
+%164 = OpLoad %25 %49
-%192 = OpIAdd %6 %191 %12
+%165 = OpExtInst %25 GLSLstd450 Normalize %164
-       OpStore %57 %192
+%166 = OpLoad %25 %55
-       OpBranch %110
+%167 = OpLoad %25 %48
-%110 = OpLabel
+%168 = OpFSub %25 %166 %167
-       OpBranch %107
+%169 = OpExtInst %25 GLSLstd450 Normalize %168
 %170 = OpDot %3 %165 %169
 %171 = OpFOrdLessThanEqual %33 %170 %23
       OpSelectionMerge %161 SelectionControl(0)
       OpBranchConditional %171 %162 %163
 %162 = OpLabel
 %172 = OpLoad %25 %49
 %173 = OpExtInst %25 GLSLstd450 Normalize %172
 %174 = OpLoad %25 %55
 %175 = OpLoad %25 %48
 %176 = OpFSub %25 %174 %175
 %177 = OpExtInst %25 GLSLstd450 Normalize %176
 %178 = OpDot %3 %173 %177
 %179 = OpFAdd %3 %178 %37
 %180 = OpExtInst %3 GLSLstd450 FMax %179 %38
 %181 = OpFDiv %3 %180 %39
 %182 = OpLoad %3 %52
 %183 = OpFMul %3 %182 %40
 %184 = OpFSub %3 %23 %183
 %185 = OpFMul %3 %181 %184
       OpStore %59 %185
 %186 = OpLoad %3 %59
 %187 = OpLoad %3 %59
 %188 = OpLoad %3 %59
 %189 = OpCompositeConstruct %25 %186 %187 %188
       OpStore %47 %189
       OpBranch %161
 %163 = OpLabel
       OpBranch %161
 %161 = OpLabel
       OpBranch %108
 %158 = OpLabel
       OpBranch %156
 %156 = OpLabel
 %190 = OpLoad %6 %56
 %191 = OpIAdd %6 %190 %12
       OpStore %56 %191
       OpBranch %109
 %109 = OpLabel
-%196 = OpLoad %25 %48
+       OpBranch %106
-%197 = OpCompositeConstruct %25 %24 %24 %24
+%108 = OpLabel
-%198 = OpFOrdEqual %41 %196 %197
+%192 = OpLoad %25 %47
-       OpSelectionMerge %193 SelectionControl(0)
+%193 = OpCompositeExtract %3 %192 0
-       OpBranchConditional %198 %194 %195
+%194 = OpLoad %25 %47
-%194 = OpLabel
+%195 = OpCompositeExtract %3 %194 1
-       OpKill
+%196 = OpLoad %25 %47
-%195 = OpLabel
+%197 = OpCompositeExtract %3 %196 2
-       OpBranch %193
+%198 = OpCompositeConstruct %18 %193 %195 %197 %23
-%193 = OpLabel
+%199 = OpAccessChain %200 %60 %7
-%199 = OpLoad %25 %48
+       OpStore %199 %198
-%200 = OpCompositeExtract %3 %199 0
+%201 = OpLoad %21 %60
-%201 = OpLoad %25 %48
+%202 = OpCompositeExtract %18 %201 0
-%202 = OpCompositeExtract %3 %201 1
+       OpStore %20 %202
 %203 = OpLoad %25 %48
 %204 = OpCompositeExtract %3 %203 2
 %205 = OpCompositeConstruct %18 %200 %202 %204 %23
 %206 = OpAccessChain %207 %61 %7
       OpStore %206 %205
 %208 = OpLoad %21 %61
 %209 = OpCompositeExtract %18 %208 0
       OpStore %20 %209
       OpReturn
       OpFunctionEnd
@@ -0,0 +1,91 @@
 //! A jam file of translated GLSL std450 header's enums and utils
 pub const GLSLstd450Version: u32 = 100;
 pub const GLSLstd450Revision: u32 = 3;
 pub const GLSLOp = enum(u32) {
    Bad = 0,
    Round = 1,
    RoundEven = 2,
    Trunc = 3,
    FAbs = 4,
    SAbs = 5,
    FSign = 6,
    SSign = 7,
    Floor = 8,
    Ceil = 9,
    Fract = 10,
    Radians = 11,
    Degrees = 12,
    Sin = 13,
    Cos = 14,
    Tan = 15,
    Asin = 16,
    Acos = 17,
    Atan = 18,
    Sinh = 19,
    Cosh = 20,
    Tanh = 21,
    Asinh = 22,
    Acosh = 23,
    Atanh = 24,
    Atan2 = 25,
    Pow = 26,
    Exp = 27,
    Log = 28,
    Exp2 = 29,
    Log2 = 30,
    Sqrt = 31,
    InverseSqrt = 32,
    Determinant = 33,
    MatrixInverse = 34,
    Modf = 35,
    ModfStruct = 36,
    FMin = 37,
    UMin = 38,
    SMin = 39,
    FMax = 40,
    UMax = 41,
    SMax = 42,
    FClamp = 43,
    UClamp = 44,
    SClamp = 45,
    FMix = 46,
    IMix = 47,
    Step = 48,
    SmoothStep = 49,
    Fma = 50,
    Frexp = 51,
    FrexpStruct = 52,
    Ldexp = 53,
    PackSnorm4x8 = 54,
    PackUnorm4x8 = 55,
    PackSnorm2x16 = 56,
    PackUnorm2x16 = 57,
    PackHalf2x16 = 58,
    PackDouble2x32 = 59,
    UnpackSnorm2x16 = 60,
    UnpackUnorm2x16 = 61,
    UnpackHalf2x16 = 62,
    UnpackSnorm4x8 = 63,
    UnpackUnorm4x8 = 64,
    UnpackDouble2x32 = 65,
    Length = 66,
    Distance = 67,
    Cross = 68,
    Normalize = 69,
    FaceForward = 70,
    Reflect = 71,
    Refract = 72,
    FindILsb = 73,
    FindSMsb = 74,
    FindUMsb = 75,
    InterpolateAtCentroid = 76,
    InterpolateAtSample = 77,
    InterpolateAtOffset = 78,
    NMin = 79,
    NMax = 80,
    NClamp = 81,
 };
 pub const GLSLOpMaxValue: usize = 82;
@@ -0,0 +1,312 @@
 const std = @import("std");
 const spv = @import("../spv.zig");
 const ext = @import("GLSL_std_450.zig");
 const opc = @import("../opcodes.zig");
 const Module = @import("../Module.zig");
 const Runtime = @import("../Runtime.zig");
 const Result = @import("../Result.zig");
 const WordIterator = @import("../WordIterator.zig");
 const RuntimeError = Runtime.RuntimeError;
 const ValueType = opc.ValueType;
 const getValuePrimitiveField = opc.getValuePrimitiveField;
 const getValuePrimitiveFieldType = opc.getValuePrimitiveFieldType;
 const SpvVoid = spv.SpvVoid;
 const SpvByte = spv.SpvByte;
 const SpvWord = spv.SpvWord;
 const SpvBool = spv.SpvBool;
 const MathOp = enum {
    Acos,
    Acosh,
    Asin,
    Asinh,
    Atan,
    Atan2,
    Atanh,
    Ceil,
    Cos,
    Cosh,
    Determinant,
    Exp,
    Exp2,
    FAbs,
    FClamp,
    FMax,
    FMin,
    FMix,
    FSign,
    Floor,
    Fract,
    IMix,
    InverseSqrt,
    Log,
    Log2,
    Modf,
    Pow,
    Round,
    RoundEven,
    SAbs,
    SClamp,
    SMax,
    SMin,
    SSign,
    Sin,
    Sinh,
    Sqrt,
    Tan,
    Tanh,
    Trunc,
    UClamp,
    UMax,
    UMin,
 };
 pub const OpCodeExtFunc = opc.OpCodeExtFunc;
 /// Not an EnumMap as it is way too slow for this purpose
 pub var runtime_dispatcher = [_]?OpCodeExtFunc{null} ** ext.GLSLOpMaxValue;
 pub fn initRuntimeDispatcher() void {
    // zig fmt: off
    runtime_dispatcher[@intFromEnum(ext.GLSLOp.Cos)]       = MathEngine(.Float, .Cos).opSingleOperator;
    runtime_dispatcher[@intFromEnum(ext.GLSLOp.FMax)]      = MathEngine(.Float, .FMax).opDoubleOperators;
    runtime_dispatcher[@intFromEnum(ext.GLSLOp.Length)]    = opLength;
    runtime_dispatcher[@intFromEnum(ext.GLSLOp.Normalize)] = opNormalize;
    runtime_dispatcher[@intFromEnum(ext.GLSLOp.Sin)]       = MathEngine(.Float, .Sin).opSingleOperator;
    // zig fmt: on
 }
 fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type {
    return struct {
        fn opSingleOperator(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void {
            const target_type = (try rt.results[target_type_id].getVariant()).Type;
            const dst = try rt.results[id].getValue();
            const src = try rt.results[try rt.it.next()].getValue();
            const lane_bits = try Result.resolveLaneBitWidth(target_type, rt);
            const operator = struct {
                fn operation(comptime TT: type, x: TT) RuntimeError!TT {
                    return switch (Op) {
                        .Sin => @sin(x),
                        .Cos => @cos(x),
                        else => RuntimeError.InvalidSpirV,
                    };
                }
                fn applyScalar(bit_count: SpvWord, d: *Result.Value, s: *const Result.Value) RuntimeError!void {
                    switch (bit_count) {
                        inline 8, 16, 32, 64 => |bits| {
                            if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV;
                            const ScalarT = getValuePrimitiveFieldType(T, bits);
                            const d_field = try getValuePrimitiveField(T, bits, d);
                            const s_field = try getValuePrimitiveField(T, bits, @constCast(s));
                            d_field.* = try operation(ScalarT, s_field.*);
                        },
                        else => return RuntimeError.InvalidSpirV,
                    }
                }
            };
            switch (dst.*) {
                .Int, .Float => try operator.applyScalar(lane_bits, dst, src),
                .Vector => |dst_vec| for (dst_vec, src.Vector) |*d_lane, s_lane| {
                    try operator.applyScalar(lane_bits, d_lane, &s_lane);
                },
                .Vector4f32 => |*d| d.* = try operator.operation(@Vector(4, f32), src.Vector4f32),
                .Vector3f32 => |*d| d.* = try operator.operation(@Vector(3, f32), src.Vector3f32),
                .Vector2f32 => |*d| d.* = try operator.operation(@Vector(2, f32), src.Vector2f32),
                //.Vector4i32 => |*d| d.* = try operator.operation(@Vector(4, i32), src.Vector4i32),
                //.Vector3i32 => |*d| d.* = try operator.operation(@Vector(3, i32), src.Vector3i32),
                //.Vector2i32 => |*d| d.* = try operator.operation(@Vector(2, i32), src.Vector2i32),
                //.Vector4u32 => |*d| d.* = try operator.operation(@Vector(4, u32), src.Vector4u32),
                //.Vector3u32 => |*d| d.* = try operator.operation(@Vector(3, u32), src.Vector3u32),
                //.Vector2u32 => |*d| d.* = try operator.operation(@Vector(2, u32), src.Vector2u32),
                else => return RuntimeError.InvalidSpirV,
            }
        }
        fn opDoubleOperators(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void {
            const target_type = (try rt.results[target_type_id].getVariant()).Type;
            const dst = try rt.results[id].getValue();
            const lhs = try rt.results[try rt.it.next()].getValue();
            const rhs = try rt.results[try rt.it.next()].getValue();
            const lane_bits = try Result.resolveLaneBitWidth(target_type, rt);
            const operator = struct {
                fn operation(comptime TT: type, l: TT, r: TT) RuntimeError!TT {
                    return switch (Op) {
                        .FMax => @max(l, r),
                        else => RuntimeError.InvalidSpirV,
                    };
                }
                fn applyScalar(bit_count: SpvWord, d: *Result.Value, l: *const Result.Value, r: *const Result.Value) RuntimeError!void {
                    switch (bit_count) {
                        inline 8, 16, 32, 64 => |bits| {
                            if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV;
                            const ScalarT = getValuePrimitiveFieldType(T, bits);
                            const d_field = try getValuePrimitiveField(T, bits, d);
                            const l_field = try getValuePrimitiveField(T, bits, @constCast(l));
                            const r_field = try getValuePrimitiveField(T, bits, @constCast(r));
                            d_field.* = try operation(ScalarT, l_field.*, r_field.*);
                        },
                        else => return RuntimeError.InvalidSpirV,
                    }
                }
                inline fn applySIMDVector(comptime ElemT: type, comptime N: usize, d: *@Vector(N, ElemT), l: *const @Vector(N, ElemT), r: *const @Vector(N, ElemT)) RuntimeError!void {
                    inline for (0..N) |i| {
                        d[i] = try operation(ElemT, l[i], r[i]);
                    }
                }
            };
            switch (dst.*) {
                .Int, .Float => try operator.applyScalar(lane_bits, dst, lhs, rhs),
                .Vector => |dst_vec| for (dst_vec, lhs.Vector, rhs.Vector) |*d_lane, l_lane, r_lane| {
                    try operator.applyScalar(lane_bits, d_lane, &l_lane, &r_lane);
                },
                .Vector4f32 => |*d| try operator.applySIMDVector(f32, 4, d, &lhs.Vector4f32, &rhs.Vector4f32),
                .Vector3f32 => |*d| try operator.applySIMDVector(f32, 3, d, &lhs.Vector3f32, &rhs.Vector3f32),
                .Vector2f32 => |*d| try operator.applySIMDVector(f32, 2, d, &lhs.Vector2f32, &rhs.Vector2f32),
                .Vector4i32 => |*d| try operator.applySIMDVector(i32, 4, d, &lhs.Vector4i32, &rhs.Vector4i32),
                .Vector3i32 => |*d| try operator.applySIMDVector(i32, 3, d, &lhs.Vector3i32, &rhs.Vector3i32),
                .Vector2i32 => |*d| try operator.applySIMDVector(i32, 2, d, &lhs.Vector2i32, &rhs.Vector2i32),
                .Vector4u32 => |*d| try operator.applySIMDVector(u32, 4, d, &lhs.Vector4u32, &rhs.Vector4u32),
                .Vector3u32 => |*d| try operator.applySIMDVector(u32, 3, d, &lhs.Vector3u32, &rhs.Vector3u32),
                .Vector2u32 => |*d| try operator.applySIMDVector(u32, 2, d, &lhs.Vector2u32, &rhs.Vector2u32),
                else => return RuntimeError.InvalidSpirV,
            }
        }
    };
 }
 inline fn sumSIMDVector(comptime ElemT: type, comptime N: usize, d: *ElemT, v: *const @Vector(N, ElemT)) void {
    inline for (0..N) |i| {
        d.* += v[i];
    }
 }
 fn opLength(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void {
    const target_type = (try rt.results[target_type_id].getVariant()).Type;
    const dst = try rt.results[id].getValue();
    const src = try rt.results[try rt.it.next()].getValue();
    const lane_bits = try Result.resolveLaneBitWidth(target_type, rt);
    switch (lane_bits) {
        inline 16, 32, 64 => |bits| {
            var sum: std.meta.Float(bits) = 0.0;
            const d_field = try getValuePrimitiveField(.Float, bits, dst);
            if (bits == 32) { // More likely to be SIMD if f32
                switch (src.*) {
                    .Vector4f32 => |src_vec| sumSIMDVector(f32, 4, &sum, &src_vec),
                    .Vector3f32 => |src_vec| sumSIMDVector(f32, 3, &sum, &src_vec),
                    .Vector2f32 => |src_vec| sumSIMDVector(f32, 2, &sum, &src_vec),
                    else => {},
                }
            }
            switch (src.*) {
                .Float => {
                    // Fast path
                    const s_field = try getValuePrimitiveField(.Float, bits, src);
                    d_field.* = s_field.*;
                    return;
                },
                .Vector => |src_vec| for (src_vec) |*s_lane| {
                    const s_field = try getValuePrimitiveField(.Float, bits, s_lane);
                    sum += s_field.*;
                },
                .Vector4f32, .Vector3f32, .Vector2f32 => {},
                else => return RuntimeError.InvalidSpirV,
            }
            d_field.* = @sqrt(sum);
        },
        else => return RuntimeError.InvalidSpirV,
    }
 }
 fn opNormalize(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void {
    const target_type = (try rt.results[target_type_id].getVariant()).Type;
    const dst = try rt.results[id].getValue();
    const src = try rt.results[try rt.it.next()].getValue();
    const lane_bits = try Result.resolveLaneBitWidth(target_type, rt);
    switch (lane_bits) {
        inline 16, 32, 64 => |bits| {
            var sum: std.meta.Float(bits) = 0.0;
            if (bits == 32) { // More likely to be SIMD if f32
                switch (src.*) {
                    .Vector4f32 => |src_vec| sumSIMDVector(f32, 4, &sum, &src_vec),
                    .Vector3f32 => |src_vec| sumSIMDVector(f32, 3, &sum, &src_vec),
                    .Vector2f32 => |src_vec| sumSIMDVector(f32, 2, &sum, &src_vec),
                    else => {},
                }
            }
            switch (src.*) {
                .Float => {
                    const s_field = try getValuePrimitiveField(.Float, bits, src);
                    sum = s_field.*;
                },
                .Vector => |src_vec| for (src_vec) |*s_lane| {
                    const s_field = try getValuePrimitiveField(.Float, bits, s_lane);
                    sum += s_field.*;
                },
                .Vector4f32, .Vector3f32, .Vector2f32 => {},
                else => return RuntimeError.InvalidSpirV,
            }
            sum = @sqrt(sum);
            if (bits == 32) {
                switch (dst.*) {
                    .Vector4f32 => |*dst_vec| inline for (0..4) |i| {
                        dst_vec[i] = src.Vector4f32[i] / sum;
                    },
                    .Vector3f32 => |*dst_vec| inline for (0..3) |i| {
                        dst_vec[i] = src.Vector3f32[i] / sum;
                    },
                    .Vector2f32 => |*dst_vec| inline for (0..2) |i| {
                        dst_vec[i] = src.Vector2f32[i] / sum;
                    },
                    else => {},
                }
            }
            switch (dst.*) {
                .Vector => |dst_vec| for (dst_vec, src.Vector) |*d_lane, *s_lane| {
                    const d_field = try getValuePrimitiveField(.Float, bits, d_lane);
                    const s_field = try getValuePrimitiveField(.Float, bits, s_lane);
                    d_field.* = s_field.* / sum;
                },
                .Vector4f32, .Vector3f32, .Vector2f32 => {},
                else => return RuntimeError.InvalidSpirV,
            }
        },
        else => return RuntimeError.InvalidSpirV,
    }
 }
@@ -2,7 +2,8 @@ const std = @import("std");
 const spv = @import("spv.zig");
 const op = @import("opcodes.zig");
-const RuntimeError = @import("Runtime.zig").RuntimeError;
+const Runtime = @import("Runtime.zig");
 const RuntimeError = Runtime.RuntimeError;
 const SpvVoid = spv.SpvVoid;
 const SpvByte = spv.SpvByte;
@@ -227,65 +228,67 @@ pub const Value = union(Type) {
    }
 };
 pub const TypeData = union(Type) {
    Void: struct {},
    Bool: struct {},
    Int: struct {
        bit_length: SpvWord,
        is_signed: bool,
    },
    Float: struct {
        bit_length: SpvWord,
    },
    Vector: struct {
        components_type_word: SpvWord,
        components_type: Type,
        member_count: SpvWord,
    },
    Vector4f32: struct {},
    Vector3f32: struct {},
    Vector2f32: struct {},
    Vector4i32: struct {},
    Vector3i32: struct {},
    Vector2i32: struct {},
    Vector4u32: struct {},
    Vector3u32: struct {},
    Vector2u32: struct {},
    Matrix: struct {
        column_type_word: SpvWord,
        column_type: Type,
        member_count: SpvWord,
    },
    Array: struct {
        components_type_word: SpvWord,
        components_type: Type,
        member_count: SpvWord,
    },
    RuntimeArray: struct {},
    Structure: struct {
        members_type_word: []const SpvWord,
        members: []Type,
        member_names: std.ArrayList([]const u8),
    },
    Function: struct {
        source_location: usize,
        return_type: SpvWord,
        params: []const SpvWord,
    },
    Image: struct {},
    Sampler: struct {},
    SampledImage: struct {},
    Pointer: struct {
        storage_class: spv.SpvStorageClass,
        target: SpvWord,
    },
 };
 pub const VariantData = union(Variant) {
    String: []const u8,
    Extension: struct {
        /// Should not be allocated but rather a pointer to a static array
-        dispatcher: []op.OpCodeExtFunc,
+        dispatcher: []?op.OpCodeExtFunc,
    },
    Type: union(Type) {
        Void: struct {},
        Bool: struct {},
        Int: struct {
            bit_length: SpvWord,
            is_signed: bool,
        },
        Float: struct {
            bit_length: SpvWord,
        },
        Vector: struct {
            components_type_word: SpvWord,
            components_type: Type,
            member_count: SpvWord,
        },
        Vector4f32: struct {},
        Vector3f32: struct {},
        Vector2f32: struct {},
        Vector4i32: struct {},
        Vector3i32: struct {},
        Vector2i32: struct {},
        Vector4u32: struct {},
        Vector3u32: struct {},
        Vector2u32: struct {},
        Matrix: struct {
            column_type_word: SpvWord,
            column_type: Type,
            member_count: SpvWord,
        },
        Array: struct {
            components_type_word: SpvWord,
            components_type: Type,
            member_count: SpvWord,
        },
        RuntimeArray: struct {},
        Structure: struct {
            members_type_word: []const SpvWord,
            members: []Type,
            member_names: std.ArrayList([]const u8),
        },
        Function: struct {
            source_location: usize,
            return_type: SpvWord,
            params: []const SpvWord,
        },
        Image: struct {},
        Sampler: struct {},
        SampledImage: struct {},
        Pointer: struct {
            storage_class: spv.SpvStorageClass,
            target: SpvWord,
        },
    },
    Type: TypeData,
    Variable: struct {
        storage_class: spv.SpvStorageClass,
        type_word: SpvWord,
@@ -364,7 +367,7 @@ pub fn deinit(self: *Self, allocator: std.mem.Allocator) void {
    self.decorations.deinit(allocator);
 }
-pub fn getValueTypeWord(self: *Self) RuntimeError!SpvWord {
+pub inline fn getValueTypeWord(self: *Self) RuntimeError!SpvWord {
    return switch ((try self.getVariant()).*) {
        .Variable => |v| v.type_word,
        .Constant => |c| c.type_word,
@@ -374,7 +377,7 @@ pub fn getValueTypeWord(self: *Self) RuntimeError!SpvWord {
    };
 }
-pub fn getValueType(self: *Self) RuntimeError!Type {
+pub inline fn getValueType(self: *Self) RuntimeError!Type {
    return switch ((try self.getVariant()).*) {
        .Variable => |v| v.type,
        .Constant => |c| c.type,
@@ -383,7 +386,7 @@ pub fn getValueType(self: *Self) RuntimeError!Type {
    };
 }
-pub fn getValue(self: *Self) RuntimeError!*Value {
+pub inline fn getValue(self: *Self) RuntimeError!*Value {
    return switch ((try self.getVariant()).*) {
        .Variable => |*v| &v.value,
        .Constant => |*c| &c.value,
@@ -471,6 +474,26 @@ pub fn dupe(self: *const Self, allocator: std.mem.Allocator) RuntimeError!Self {
    };
 }
 pub fn resolveLaneBitWidth(target_type: TypeData, rt: *const Runtime) RuntimeError!SpvWord {
    return sw: switch (target_type) {
        .Bool => 8,
        .Float => |f| f.bit_length,
        .Int => |i| i.bit_length,
        .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
        .Vector4f32,
        .Vector3f32,
        .Vector2f32,
        .Vector4i32,
        .Vector3i32,
        .Vector2i32,
        .Vector4u32,
        .Vector3u32,
        .Vector2u32,
        => return 32,
        else => return RuntimeError.InvalidSpirV,
    };
 }
 pub fn resolveType(self: *const Self, results: []const Self) *const Self {
    return if (self.variant) |variant|
        switch (variant) {
@@ -131,10 +131,7 @@ pub fn callEntryPoint(self: *Self, allocator: std.mem.Allocator, entry_point_ind
        var it_tmp = self.it; // Save because operations may iter on this iterator
        if (op.runtime_dispatcher[opcode]) |pfn| {
-            pfn(allocator, word_count, self) catch |err| switch (err) {
+            try pfn(allocator, word_count, self);
                RuntimeError.Killed => return,
                else => return err,
            };
        }
        if (!self.it.did_jump) {
            _ = it_tmp.skipN(word_count);
@@ -36,3 +36,10 @@ pub const Runtime = @import("Runtime.zig");
 const opcodes = @import("opcodes.zig");
 const spv = @import("spv.zig");
 pub const SpvVoid = spv.SpvVoid;
 pub const SpvByte = spv.SpvByte;
 pub const SpvWord = spv.SpvWord;
 pub const SpvBool = spv.SpvBool;
 pub const GLSL_std_450 = @import("GLSL_std_450/opcodes.zig");
@@ -1,6 +1,8 @@
 const std = @import("std");
 const spv = @import("spv.zig");
 const GLSL_std_450 = @import("GLSL_std_450/opcodes.zig");
 const Module = @import("Module.zig");
 const Runtime = @import("Runtime.zig");
 const Result = @import("Result.zig");
@@ -13,13 +15,7 @@ const SpvByte = spv.SpvByte;
 const SpvWord = spv.SpvWord;
 const SpvBool = spv.SpvBool;
-// OpExtInst Sin
+pub const ValueType = enum {
 // OpExtInst Cos
 // OpExtInst Length
 // OpExtInst Normalize
 // OpExtInst FMax
 const ValueType = enum {
    Bool,
    Float,
    SInt,
@@ -230,11 +226,11 @@ pub fn initRuntimeDispatcher() void {
    runtime_dispatcher[@intFromEnum(spv.SpvOp.ISub)]                   = MathEngine(.SInt, .Sub).op;
    runtime_dispatcher[@intFromEnum(spv.SpvOp.Kill)]                   = opKill;
    runtime_dispatcher[@intFromEnum(spv.SpvOp.Load)]                   = opLoad;
-    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalAnd)]             = CondEngine(.Float, .LogicalAnd).op;
+    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalAnd)]             = CondEngine(.Bool, .LogicalAnd).op;
-    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalEqual)]           = CondEngine(.Float, .LogicalEqual).op;
+    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalEqual)]           = CondEngine(.Bool, .LogicalEqual).op;
-    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNot)]             = CondEngine(.Float, .LogicalNot).op;
+    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNot)]             = CondEngine(.Bool, .LogicalNot).op;
-    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNotEqual)]        = CondEngine(.Float, .LogicalNotEqual).op;
+    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNotEqual)]        = CondEngine(.Bool, .LogicalNotEqual).op;
-    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalOr)]              = CondEngine(.Float, .LogicalOr).op;
+    runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalOr)]              = CondEngine(.Bool, .LogicalOr).op;
    runtime_dispatcher[@intFromEnum(spv.SpvOp.MatrixTimesMatrix)]      = MathEngine(.Float, .MatrixTimesMatrix).op; // TODO
    runtime_dispatcher[@intFromEnum(spv.SpvOp.MatrixTimesScalar)]      = MathEngine(.Float, .MatrixTimesScalar).op; // TODO
    runtime_dispatcher[@intFromEnum(spv.SpvOp.MatrixTimesVector)]      = MathEngine(.Float, .MatrixTimesVector).op; // TODO
@@ -261,130 +257,271 @@ pub fn initRuntimeDispatcher() void {
    runtime_dispatcher[@intFromEnum(spv.SpvOp.UMod)]                   = MathEngine(.UInt, .Mod).op;
    runtime_dispatcher[@intFromEnum(spv.SpvOp.VectorTimesMatrix)]      = MathEngine(.Float, .VectorTimesMatrix).op; // TODO
    runtime_dispatcher[@intFromEnum(spv.SpvOp.VectorTimesScalar)]      = MathEngine(.Float, .VectorTimesScalar).op;
    runtime_dispatcher[@intFromEnum(spv.SpvOp.ExtInst)]                = opExtInst;
    // zig fmt: on
    // Extensions init
    GLSL_std_450.initRuntimeDispatcher();
 }
-fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type {
+fn extEqlName(a: []const u8, b: []const u8) bool {
-    if (T == .Float) @compileError("Invalid value type");
+    for (0..@min(a.len, b.len)) |i| {
        if (a[i] != b[i]) return false;
    }
    return true;
 }
 const extensions_map = std.StaticStringMapWithEql([]?OpCodeExtFunc, extEqlName).initComptime(.{
    .{ "GLSL.std.450", GLSL_std_450.runtime_dispatcher[0..] },
 });
 fn BitOperator(comptime T: ValueType, comptime Op: BitOp) type {
    return struct {
-        fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
+        comptime {
-            const target_type = (try rt.results[try rt.it.next()].getVariant()).Type;
+            if (T == .Float) @compileError("Invalid value type");
-            const value = try rt.results[try rt.it.next()].getValue();
+        }
-            const op1_value = try rt.results[try rt.it.next()].getValue();
+
-            const op2_value: ?*Result.Value = switch (Op) {
+        inline fn isUnaryOp() bool {
-                .Not, .BitCount, .BitReverse => null,
+            return comptime switch (Op) {
-                else => try rt.results[try rt.it.next()].getValue(),
+                .Not, .BitCount, .BitReverse => true,
                else => false,
            };
        }
-            const size = sw: switch (target_type) {
+        inline fn bitMask(bits: u64) u64 {
-                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
+            return if (bits >= 32) ~@as(u64, 0) else (@as(u64, 0x1) << @intCast(bits)) - 1;
-                .Vector4f32,
+        }
-                .Vector3f32,
+
-                .Vector2f32,
+        inline fn bitInsert(comptime TT: type, base: TT, insert: TT, offset: u64, count: u64) TT {
-                .Vector4i32,
+            const mask: TT = @intCast(bitMask(count) << @intCast(offset));
-                .Vector3i32,
+            return @as(TT, @intCast((base & ~mask) | ((insert << @intCast(offset)) & mask)));
-                .Vector2i32,
+        }
-                .Vector4u32,
+
-                .Vector3u32,
+        inline fn bitExtract(comptime TT: type, v: TT, offset: TT, count: u64) TT {
-                .Vector2u32,
+            return (v >> @intCast(offset)) & @as(TT, @intCast(bitMask(count)));
-                => 32,
+        }
-                .Int => |i| i.bit_length,
+
-                else => return RuntimeError.InvalidSpirV,
+        fn operationUnary(comptime TT: type, op1: TT) RuntimeError!TT {
            return switch (Op) {
                .BitCount => @as(TT, @intCast(@bitSizeOf(TT))), // keep return type TT
                .BitReverse => @bitReverse(op1),
                .Not => ~op1,
                else => RuntimeError.InvalidSpirV,
            };
        }
-            const operator = struct {
+        fn operationBinary(comptime TT: type, rt: *Runtime, op1: TT, op2: TT) RuntimeError!TT {
-                inline fn bitMask(bits: u64) u64 {
+            return switch (Op) {
-                    return if (bits >= 32) ~@as(u64, 0) else (@as(u64, 0x1) << @intCast(bits)) - 1;
+                .BitFieldInsert => blk: {
-                }
+                    const offset = try rt.results[try rt.it.next()].getValue();
                    const count = try rt.results[try rt.it.next()].getValue();
                    break :blk bitInsert(TT, op1, op2, offset.Int.uint64, count.Int.uint64);
                },
                .BitFieldSExtract => blk: {
                    if (T == .UInt) return RuntimeError.InvalidSpirV;
                    const count = try rt.results[try rt.it.next()].getValue();
                    break :blk bitExtract(TT, op1, op2, count.Int.uint64);
                },
                .BitFieldUExtract => blk: {
                    if (T == .SInt) return RuntimeError.InvalidSpirV;
                    const count = try rt.results[try rt.it.next()].getValue();
                    break :blk bitExtract(TT, op1, op2, count.Int.uint64);
                },
-                inline fn bitInsert(comptime TT: type, base: TT, insert: TT, offset: u64, count: u64) TT {
+                .BitwiseAnd => op1 & op2,
-                    const mask: TT = @intCast(bitMask(count) << @intCast(offset));
+                .BitwiseOr => op1 | op2,
-                    return @as(TT, @intCast((base & ~mask) | ((insert << @intCast(offset)) & mask)));
+                .BitwiseXor => op1 ^ op2,
-                }
+                .ShiftLeft => op1 << @intCast(op2),
                .ShiftRight, .ShiftRightArithmetic => op1 >> @intCast(op2),
-                inline fn bitExtract(comptime TT: type, v: TT, offset: TT, count: u64) TT {
+                else => RuntimeError.InvalidSpirV,
                    return (v >> @intCast(offset)) & @as(TT, @intCast(bitMask(count)));
                }
                fn operation(comptime TT: type, rt2: *Runtime, op1: TT, op2: ?TT) RuntimeError!TT {
                    switch (Op) {
                        .BitCount => return @bitSizeOf(TT),
                        .BitReverse => return @bitReverse(op1),
                        .Not => return ~op1,
                        else => {},
                    }
                    return if (op2) |v2|
                        switch (Op) {
                            .BitFieldInsert => blk: {
                                const offset = try rt2.results[try rt2.it.next()].getValue();
                                const count = try rt2.results[try rt2.it.next()].getValue();
                                break :blk bitInsert(TT, op1, v2, offset.Int.uint64, count.Int.uint64);
                            },
                            .BitFieldSExtract => blk: {
                                if (T == .UInt) return RuntimeError.InvalidSpirV;
                                const count = try rt2.results[try rt2.it.next()].getValue();
                                break :blk bitExtract(TT, op1, v2, count.Int.uint64);
                            },
                            .BitFieldUExtract => blk: {
                                if (T == .SInt) return RuntimeError.InvalidSpirV;
                                const count = try rt2.results[try rt2.it.next()].getValue();
                                break :blk bitExtract(TT, op1, v2, count.Int.uint64);
                            },
                            .BitwiseAnd => op1 & v2,
                            .BitwiseOr => op1 | v2,
                            .BitwiseXor => op1 ^ v2,
                            .ShiftLeft => op1 << @intCast(v2),
                            .ShiftRight, .ShiftRightArithmetic => op1 >> @intCast(v2),
                            else => return RuntimeError.InvalidSpirV,
                        }
                    else
                        RuntimeError.InvalidSpirV;
                }
                fn process(rt2: *Runtime, bit_count: SpvWord, v: *Result.Value, op1_v: *const Result.Value, op2_v: ?*const Result.Value) RuntimeError!void {
                    switch (bit_count) {
                        inline 8, 16, 32, 64 => |i| {
                            (try getValuePrimitiveField(T, i, v)).* = try operation(
                                getValuePrimitiveFieldType(T, i),
                                rt2,
                                (try getValuePrimitiveField(T, i, @constCast(op1_v))).*,
                                if (op2_v) |v2|
                                    (try getValuePrimitiveField(T, i, @constCast(v2))).*
                                else
                                    null,
                            );
                        },
                        else => return RuntimeError.InvalidSpirV,
                    }
                }
            };
        }
-            switch (value.*) {
+        fn applyScalarBits(rt: *Runtime, bit_count: SpvWord, dst: *Result.Value, op1_v: *const Result.Value, op2_v: ?*const Result.Value) RuntimeError!void {
-                .Int => try operator.process(rt, size, value, op1_value, op2_value),
+            switch (bit_count) {
-                .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i|
+                inline 8, 16, 32, 64 => |bits| {
-                    try operator.process(rt, size, val, &op1_v, if (op2_value) |op2_v| &op2_v.Vector[i] else null),
+                    const TT = getValuePrimitiveFieldType(T, bits);
-                // No bit manipulation on VectorXf32
+                    const a = (try getValuePrimitiveField(T, bits, @constCast(op1_v))).*;
-                .Vector4i32 => |*vec| inline for (0..4) |i| {
+
-                    vec[i] = try operator.operation(i32, rt, op1_value.Vector4i32[i], if (op2_value) |op2_v| op2_v.Vector4i32[i] else null);
+                    const out = if (comptime isUnaryOp()) blk: {
-                },
+                        break :blk try operationUnary(TT, a);
-                .Vector3i32 => |*vec| inline for (0..3) |i| {
+                    } else blk: {
-                    vec[i] = try operator.operation(i32, rt, op1_value.Vector3i32[i], if (op2_value) |op2_v| op2_v.Vector3i32[i] else null);
+                        const b_ptr = op2_v orelse return RuntimeError.InvalidSpirV;
-                },
+                        const b = (try getValuePrimitiveField(T, bits, @constCast(b_ptr))).*;
-                .Vector2i32 => |*vec| inline for (0..2) |i| {
+                        break :blk try operationBinary(TT, rt, a, b);
-                    vec[i] = try operator.operation(i32, rt, op1_value.Vector2i32[i], if (op2_value) |op2_v| op2_v.Vector2i32[i] else null);
+                    };
-                },
+
-                .Vector4u32 => |*vec| inline for (0..4) |i| {
+                    (try getValuePrimitiveField(T, bits, dst)).* = out;
                    vec[i] = try operator.operation(u32, rt, op1_value.Vector4u32[i], if (op2_value) |op2_v| op2_v.Vector4u32[i] else null);
                },
                .Vector3u32 => |*vec| inline for (0..3) |i| {
                    vec[i] = try operator.operation(u32, rt, op1_value.Vector3u32[i], if (op2_value) |op2_v| op2_v.Vector3u32[i] else null);
                },
                .Vector2u32 => |*vec| inline for (0..2) |i| {
                    vec[i] = try operator.operation(u32, rt, op1_value.Vector2u32[i], if (op2_value) |op2_v| op2_v.Vector2u32[i] else null);
                },
                else => return RuntimeError.InvalidSpirV,
            }
        }
        fn laneRhsPtr(op2_value: ?*Result.Value, index: usize) ?*const Result.Value {
            if (comptime isUnaryOp()) return null;
            const v = op2_value orelse return null;
            return &v.Vector[index];
        }
        fn applyFixedVector(comptime ElemT: type, comptime N: usize, dst: *[N]ElemT, op1: *[N]ElemT, op2_value: ?*Result.Value) RuntimeError!void {
            if (comptime isUnaryOp()) {
                inline for (0..N) |i| dst[i] = try operationUnary(ElemT, op1[i]);
            } else {
                const op2 = op2_value orelse return RuntimeError.InvalidSpirV;
                const b: *const [N]ElemT = switch (N) {
                    2 => &op2.*.Vector2u32, // will be overridden by call sites per ElemT/tag
                    3 => &op2.*.Vector3u32,
                    4 => &op2.*.Vector4u32,
                    else => unreachable,
                };
                // NOTE: the above dummy mapping isn’t type-correct for i32; call sites below pass correct rhs pointer.
                _ = b;
                return RuntimeError.InvalidSpirV;
            }
        }
        fn applyFixedVectorBinary(
            comptime ElemT: type,
            comptime N: usize,
            rt: *Runtime,
            dst: *[N]ElemT,
            op1: *[N]ElemT,
            op2: *[N]ElemT,
        ) RuntimeError!void {
            inline for (0..N) |i| dst[i] = try operationBinary(ElemT, rt, op1[i], op2[i]);
        }
        fn applyFixedVectorUnary(
            comptime ElemT: type,
            comptime N: usize,
            dst: *[N]ElemT,
            op1: *[N]ElemT,
        ) RuntimeError!void {
            inline for (0..N) |i| dst[i] = try operationUnary(ElemT, op1[i]);
        }
    };
 }
 fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type {
    return struct {
        fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
            const target_type = (try rt.results[try rt.it.next()].getVariant()).Type;
            const dst = try rt.results[try rt.it.next()].getValue();
            const op1 = try rt.results[try rt.it.next()].getValue();
            const operator = BitOperator(T, Op);
            const op2_value: ?*Result.Value = if (comptime operator.isUnaryOp()) null else try rt.results[try rt.it.next()].getValue();
            const lane_bits = try Result.resolveLaneBitWidth(target_type, rt);
            switch (dst.*) {
                .Int => try operator.applyScalarBits(rt, lane_bits, dst, op1, if (comptime operator.isUnaryOp()) null else op2_value),
                .Vector => |dst_vec| {
                    const op1_vec = op1.Vector;
                    if (dst_vec.len != op1_vec.len) return RuntimeError.InvalidSpirV;
                    for (dst_vec, op1_vec, 0..) |*d_lane, a_lane, i| {
                        var tmp_a = a_lane;
                        const b_ptr = operator.laneRhsPtr(op2_value, i);
                        try operator.applyScalarBits(rt, lane_bits, d_lane, &tmp_a, b_ptr);
                    }
                },
                .Vector4i32 => |*d| {
                    if (comptime operator.isUnaryOp())
                        try operator.applyFixedVectorUnary(i32, 4, d, &op1.Vector4i32)
                    else
                        try operator.applyFixedVectorBinary(i32, 4, rt, d, &op1.Vector4i32, &op2_value.?.Vector4i32);
                },
                .Vector3i32 => |*d| {
                    if (comptime operator.isUnaryOp())
                        try operator.applyFixedVectorUnary(i32, 3, d, &op1.Vector3i32)
                    else
                        try operator.applyFixedVectorBinary(i32, 3, rt, d, &op1.Vector3i32, &op2_value.?.Vector3i32);
                },
                .Vector2i32 => |*d| {
                    if (comptime operator.isUnaryOp())
                        try operator.applyFixedVectorUnary(i32, 2, d, &op1.Vector2i32)
                    else
                        try operator.applyFixedVectorBinary(i32, 2, rt, d, &op1.Vector2i32, &op2_value.?.Vector2i32);
                },
                .Vector4u32 => |*d| {
                    if (comptime operator.isUnaryOp())
                        try operator.applyFixedVectorUnary(u32, 4, d, &op1.Vector4u32)
                    else
                        try operator.applyFixedVectorBinary(u32, 4, rt, d, &op1.Vector4u32, &op2_value.?.Vector4u32);
                },
                .Vector3u32 => |*d| {
                    if (comptime operator.isUnaryOp())
                        try operator.applyFixedVectorUnary(u32, 3, d, &op1.Vector3u32)
                    else
                        try operator.applyFixedVectorBinary(u32, 3, rt, d, &op1.Vector3u32, &op2_value.?.Vector3u32);
                },
                .Vector2u32 => |*d| {
                    if (comptime operator.isUnaryOp())
                        try operator.applyFixedVectorUnary(u32, 2, d, &op1.Vector2u32)
                    else
                        try operator.applyFixedVectorBinary(u32, 2, rt, d, &op1.Vector2u32, &op2_value.?.Vector2u32);
                },
                else => return RuntimeError.InvalidSpirV,
            }
        }
    };
 }
 fn CondOperator(comptime T: ValueType, comptime Op: CondOp) type {
    return struct {
        fn operation(comptime TT: type, a: TT, b: TT) RuntimeError!bool {
            return switch (Op) {
                .Equal, .LogicalEqual => a == b,
                .NotEqual, .LogicalNotEqual => a != b,
                .Greater => a > b,
                .GreaterEqual => a >= b,
                .Less => a < b,
                .LessEqual => a <= b,
                .LogicalAnd => a and b,
                .LogicalOr => a or b,
                else => RuntimeError.InvalidSpirV,
            };
        }
        fn operationUnary(comptime TT: type, a: TT) RuntimeError!bool {
            return switch (Op) {
                .LogicalNot => !a,
                else => RuntimeError.InvalidSpirV,
            };
        }
        fn applyLane(bit_count: SpvWord, dst_bool: *Result.Value, a_v: *const Result.Value, b_v: ?*const Result.Value) RuntimeError!void {
            switch (bit_count) {
                inline 8, 16, 32, 64 => |bits| {
                    if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV;
                    const TT = getValuePrimitiveFieldType(T, bits);
                    const a = (try getValuePrimitiveField(T, bits, @constCast(a_v))).*;
                    if (comptime Op == .LogicalNot) {
                        dst_bool.Bool = try operationUnary(TT, a);
                    } else {
                        const b_ptr = b_v orelse return RuntimeError.InvalidSpirV;
                        const b = (try getValuePrimitiveField(T, bits, @constCast(b_ptr))).*;
                        dst_bool.Bool = try operation(TT, a, b);
                    }
                },
                else => return RuntimeError.InvalidSpirV,
            }
        }
        fn laneRhsPtr(op2_value: ?*Result.Value, index: usize) ?*const Result.Value {
            if (comptime Op == .LogicalNot) return null;
            const v = op2_value orelse return null;
            return &v.Vector[index];
        }
    };
 }
@@ -397,199 +534,151 @@ fn CondEngine(comptime T: ValueType, comptime Op: CondOp) type {
                else => return RuntimeError.InvalidSpirV,
            }
-            const value = try rt.results[try rt.it.next()].getValue();
+            const dst = try rt.results[try rt.it.next()].getValue();
            const op1_result = &rt.results[try rt.it.next()];
            const op1_type = try op1_result.getValueTypeWord();
            const op1_value = try op1_result.getValue();
            const op2_value: ?*Result.Value = switch (Op) {
                .LogicalNot => null,
                else => try rt.results[try rt.it.next()].getValue(),
            };
-            const size = sw: switch ((try rt.results[op1_type].getVariant()).Type) {
+            const op2_value: ?*Result.Value = if (comptime Op == .LogicalNot) null else try rt.results[try rt.it.next()].getValue();
                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
                .Vector4f32,
                .Vector3f32,
                .Vector2f32,
                .Vector4i32,
                .Vector3i32,
                .Vector2i32,
                .Vector4u32,
                .Vector3u32,
                .Vector2u32,
                => 32,
                .Float => |f| if (T == .Float) f.bit_length else return RuntimeError.InvalidSpirV,
                .Int => |i| if (T == .SInt or T == .UInt) i.bit_length else return RuntimeError.InvalidSpirV,
                else => return RuntimeError.InvalidSpirV,
            };
-            const operator = struct {
+            const lane_bits = try Result.resolveLaneBitWidth((try rt.results[op1_type].getVariant()).Type, rt);
                fn operation(comptime TT: type, op1: TT, op2: ?TT) RuntimeError!bool {
                    return switch (Op) {
                        .Equal, .LogicalEqual => op1 == op2 orelse return RuntimeError.InvalidSpirV,
                        .NotEqual, .LogicalNotEqual => op1 != op2 orelse return RuntimeError.InvalidSpirV,
                        .Greater => op1 > op2 orelse return RuntimeError.InvalidSpirV,
                        .GreaterEqual => op1 >= op2 orelse return RuntimeError.InvalidSpirV,
                        .Less => op1 < op2 orelse return RuntimeError.InvalidSpirV,
                        .LessEqual => op1 <= op2 orelse return RuntimeError.InvalidSpirV,
                        .LogicalAnd => (op1 != @as(TT, 0)) and ((op2 orelse return RuntimeError.InvalidSpirV) != @as(TT, 0)),
                        .LogicalOr => (op1 != @as(TT, 0)) or ((op2 orelse return RuntimeError.InvalidSpirV) != @as(TT, 0)),
                        .LogicalNot => (op1 == @as(TT, 0)),
                    };
                }
-                fn process(bit_count: SpvWord, v: *Result.Value, op1_v: *const Result.Value, op2_v: ?*const Result.Value) RuntimeError!void {
+            const operator = CondOperator(T, Op);
                    switch (bit_count) {
                        inline 8, 16, 32, 64 => |i| {
                            if (i == 8 and T == .Float) { // No f8
                                return RuntimeError.InvalidSpirV;
                            }
                            v.Bool = try operation(
                                getValuePrimitiveFieldType(T, i),
                                (try getValuePrimitiveField(T, i, @constCast(op1_v))).*,
                                if (op2_v) |val| (try getValuePrimitiveField(T, i, @constCast(val))).* else null,
                            );
                        },
                        else => return RuntimeError.InvalidSpirV,
                    }
                }
            };
-            switch (value.*) {
+            switch (dst.*) {
-                .Bool => try operator.process(size, value, op1_value, op2_value),
+                .Bool => try operator.applyLane(lane_bits, dst, op1_value, op2_value),
-                .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i| {
+
-                    try operator.process(size, val, &op1_v, if (op2_value) |op2_v| &op2_v.Vector[i] else null);
+                .Vector => |dst_vec| for (dst_vec, op1_value.Vector, 0..) |*d_lane, a_lane, i| {
                    const b_ptr = operator.laneRhsPtr(op2_value, i);
                    try operator.applyLane(lane_bits, d_lane, &a_lane, b_ptr);
                },
-                // No Vector specializations for booleans
+
                else => return RuntimeError.InvalidSpirV,
            }
        }
    };
 }
-fn ConversionEngine(comptime From: ValueType, comptime To: ValueType) type {
+fn ConversionEngine(comptime from_kind: ValueType, comptime to_kind: ValueType) type {
    return struct {
        fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
            const target_type = (try rt.results[try rt.it.next()].getVariant()).Type;
-            const value = try rt.results[try rt.it.next()].getValue();
+            const dst_value = try rt.results[try rt.it.next()].getValue();
            const op_result = &rt.results[try rt.it.next()];
            const op_type = try op_result.getValueTypeWord();
            const op_value = try op_result.getValue();
-            const from_size = sw: switch ((try rt.results[op_type].getVariant()).Type) {
+            const src_result = &rt.results[try rt.it.next()];
-                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
+            const src_type_word = try src_result.getValueTypeWord();
-                .Vector4f32,
+            const src_value = try src_result.getValue();
                .Vector3f32,
                .Vector2f32,
                .Vector4i32,
                .Vector3i32,
                .Vector2i32,
                .Vector4u32,
                .Vector3u32,
                .Vector2u32,
                => 32,
                .Float => |f| if (From == .Float) f.bit_length else return RuntimeError.InvalidSpirV,
                .Int => |i| if (From == .SInt or From == .UInt) i.bit_length else return RuntimeError.InvalidSpirV,
                else => return RuntimeError.InvalidSpirV,
            };
-            const to_size = sw: switch (target_type) {
+            const from_bits = try Result.resolveLaneBitWidth((try rt.results[src_type_word].getVariant()).Type, rt);
-                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
+            const to_bits = try Result.resolveLaneBitWidth(target_type, rt);
                .Vector4f32,
                .Vector3f32,
                .Vector2f32,
                .Vector4i32,
                .Vector3i32,
                .Vector2i32,
                .Vector4u32,
                .Vector3u32,
                .Vector2u32,
                => 32,
                .Float => |f| if (To == .Float) f.bit_length else return RuntimeError.InvalidSpirV,
                .Int => |i| if (To == .SInt or To == .UInt) i.bit_length else return RuntimeError.InvalidSpirV,
                else => return RuntimeError.InvalidSpirV,
            };
-            const operator = struct {
+            const caster = struct {
-                fn process(from_bit_count: SpvWord, to_bit_count: SpvWord, to: *Result.Value, from: *Result.Value) RuntimeError!void {
+                fn castLane(comptime ToT: type, from_bit_count: SpvWord, from: *Result.Value) RuntimeError!ToT {
                    return switch (from_bit_count) {
                        inline 8, 16, 32, 64 => |bits| blk: {
                            if (bits == 8 and from_kind == .Float) return RuntimeError.InvalidSpirV; // No f8
                            const v = (try getValuePrimitiveField(from_kind, bits, from)).*;
                            break :blk std.math.lossyCast(ToT, v);
                        },
                        else => return RuntimeError.InvalidSpirV,
                    };
                }
                fn applyScalar(from_bit_count: SpvWord, to_bit_count: SpvWord, dst: *Result.Value, from: *Result.Value) RuntimeError!void {
                    switch (to_bit_count) {
-                        inline 8, 16, 32, 64 => |i| {
+                        inline 8, 16, 32, 64 => |bits| {
-                            if (i == 8 and To == .Float) {
+                            if (bits == 8 and to_kind == .Float) return RuntimeError.InvalidSpirV; // No f8
-                                return RuntimeError.InvalidSpirV; // No f8
+                            const ToT = getValuePrimitiveFieldType(to_kind, bits);
-                            }
+                            (try getValuePrimitiveField(to_kind, bits, dst)).* = try castLane(ToT, from_bit_count, from);
                            const ToType = getValuePrimitiveFieldType(To, i);
                            (try getValuePrimitiveField(To, i, to)).* = std.math.lossyCast(
                                ToType,
                                switch (from_bit_count) {
                                    inline 8, 16, 32, 64 => |j| blk: {
                                        if (j == 8 and From == .Float) {
                                            return RuntimeError.InvalidSpirV; // Same
                                        }
                                        break :blk (try getValuePrimitiveField(From, j, from)).*;
                                    },
                                    else => return RuntimeError.InvalidSpirV,
                                },
                            );
                        },
                        else => return RuntimeError.InvalidSpirV,
                    }
                }
-                fn processVecSpe(comptime T: type, from_bit_count: SpvWord, from: *Result.Value, index: usize) RuntimeError!T {
+                fn castSIMDVector(comptime ToT: type, comptime N: usize, dst_arr: *[N]ToT, src_arr: *const [N]ToT) void {
-                    return switch (from.*) {
+                    inline for (0..N) |i| dst_arr[i] = std.math.lossyCast(ToT, src_arr[i]);
-                        .Vector3f32 => |vec| std.math.lossyCast(T, vec[index]),
+                }
-                        .Vector2f32 => |vec| std.math.lossyCast(T, vec[index]),
+
-                        .Vector4i32 => |vec| std.math.lossyCast(T, vec[index]),
+                fn castSIMDVectorFromOther(comptime ToT: type, comptime FromT: type, comptime N: usize, dst_arr: *[N]ToT, src_arr: *const [N]FromT) void {
-                        .Vector3i32 => |vec| std.math.lossyCast(T, vec[index]),
+                    inline for (0..N) |i| dst_arr[i] = std.math.lossyCast(ToT, src_arr[i]);
                        .Vector2i32 => |vec| std.math.lossyCast(T, vec[index]),
                        .Vector4u32 => |vec| std.math.lossyCast(T, vec[index]),
                        .Vector3u32 => |vec| std.math.lossyCast(T, vec[index]),
                        .Vector2u32 => |vec| std.math.lossyCast(T, vec[index]),
                        inline else => switch (from_bit_count) {
                            inline 8, 16, 32, 64 => |i| std.math.lossyCast(T, blk: {
                                if (i == 8 and From == .Float) {
                                    return RuntimeError.InvalidSpirV;
                                }
                                break :blk (try getValuePrimitiveField(From, i, from)).*;
                            }),
                            else => return RuntimeError.InvalidSpirV,
                        },
                    };
                }
            };
-            switch (value.*) {
+            switch (dst_value.*) {
-                .Float => if (To == .Float) try operator.process(from_size, to_size, value, op_value) else return RuntimeError.InvalidSpirV,
+                .Float => {
-                .Int => if (To == .SInt or To == .UInt) try operator.process(from_size, to_size, value, op_value) else return RuntimeError.InvalidSpirV,
+                    if (to_kind != .Float) return RuntimeError.InvalidSpirV;
-                .Vector => |vec| for (vec, op_value.Vector) |*val, *op_v| try operator.process(from_size, to_size, val, op_v),
+                    try caster.applyScalar(from_bits, to_bits, dst_value, src_value);
                .Vector4f32 => |*vec| inline for (0..4) |i| {
                    vec[i] = try operator.processVecSpe(f32, from_size, op_value, i);
                },
-                .Vector3f32 => |*vec| inline for (0..3) |i| {
+                .Int => {
-                    vec[i] = try operator.processVecSpe(f32, from_size, op_value, i);
+                    if (to_kind != .SInt and to_kind != .UInt) return RuntimeError.InvalidSpirV;
                    try caster.applyScalar(from_bits, to_bits, dst_value, src_value);
                },
-                .Vector2f32 => |*vec| inline for (0..2) |i| {
+                .Vector => |dst_vec| {
-                    vec[i] = try operator.processVecSpe(f32, from_size, op_value, i);
+                    const src_vec = src_value.Vector;
                    if (dst_vec.len != src_vec.len) return RuntimeError.InvalidSpirV;
                    for (dst_vec, src_vec) |*d_lane, *s_lane| {
                        try caster.applyScalar(from_bits, to_bits, d_lane, s_lane);
                    }
                },
-                .Vector4i32 => |*vec| inline for (0..4) |i| {
+
-                    vec[i] = try operator.processVecSpe(i32, from_size, op_value, i);
+                .Vector4f32 => |*dst| switch (src_value.*) {
                    .Vector4f32 => caster.castSIMDVector(f32, 4, dst, &src_value.Vector4f32),
                    .Vector4i32 => caster.castSIMDVectorFromOther(f32, i32, 4, dst, &src_value.Vector4i32),
                    .Vector4u32 => caster.castSIMDVectorFromOther(f32, u32, 4, dst, &src_value.Vector4u32),
                    else => return RuntimeError.InvalidSpirV,
                },
-                .Vector3i32 => |*vec| inline for (0..3) |i| {
+                .Vector3f32 => |*dst| switch (src_value.*) {
-                    vec[i] = try operator.processVecSpe(i32, from_size, op_value, i);
+                    .Vector3f32 => caster.castSIMDVector(f32, 3, dst, &src_value.Vector3f32),
                    .Vector3i32 => caster.castSIMDVectorFromOther(f32, i32, 3, dst, &src_value.Vector3i32),
                    .Vector3u32 => caster.castSIMDVectorFromOther(f32, u32, 3, dst, &src_value.Vector3u32),
                    else => return RuntimeError.InvalidSpirV,
                },
-                .Vector2i32 => |*vec| inline for (0..2) |i| {
+                .Vector2f32 => |*dst| switch (src_value.*) {
-                    vec[i] = try operator.processVecSpe(i32, from_size, op_value, i);
+                    .Vector2f32 => caster.castSIMDVector(f32, 2, dst, &src_value.Vector2f32),
                    .Vector2i32 => caster.castSIMDVectorFromOther(f32, i32, 2, dst, &src_value.Vector2i32),
                    .Vector2u32 => caster.castSIMDVectorFromOther(f32, u32, 2, dst, &src_value.Vector2u32),
                    else => return RuntimeError.InvalidSpirV,
                },
-                .Vector4u32 => |*vec| inline for (0..4) |i| {
+
-                    vec[i] = try operator.processVecSpe(u32, from_size, op_value, i);
+                .Vector4i32 => |*dst| switch (src_value.*) {
                    .Vector4f32 => caster.castSIMDVectorFromOther(i32, f32, 4, dst, &src_value.Vector4f32),
                    .Vector4i32 => caster.castSIMDVector(i32, 4, dst, &src_value.Vector4i32),
                    .Vector4u32 => caster.castSIMDVectorFromOther(i32, u32, 4, dst, &src_value.Vector4u32),
                    else => return RuntimeError.InvalidSpirV,
                },
-                .Vector3u32 => |*vec| inline for (0..3) |i| {
+                .Vector3i32 => |*dst| switch (src_value.*) {
-                    vec[i] = try operator.processVecSpe(u32, from_size, op_value, i);
+                    .Vector3f32 => caster.castSIMDVectorFromOther(i32, f32, 3, dst, &src_value.Vector3f32),
                    .Vector3i32 => caster.castSIMDVector(i32, 3, dst, &src_value.Vector3i32),
                    .Vector3u32 => caster.castSIMDVectorFromOther(i32, u32, 3, dst, &src_value.Vector3u32),
                    else => return RuntimeError.InvalidSpirV,
                },
-                .Vector2u32 => |*vec| inline for (0..2) |i| {
+                .Vector2i32 => |*dst| switch (src_value.*) {
-                    vec[i] = try operator.processVecSpe(u32, from_size, op_value, i);
+                    .Vector2f32 => caster.castSIMDVectorFromOther(i32, f32, 2, dst, &src_value.Vector2f32),
                    .Vector2i32 => caster.castSIMDVector(i32, 2, dst, &src_value.Vector2i32),
                    .Vector2u32 => caster.castSIMDVectorFromOther(i32, u32, 2, dst, &src_value.Vector2u32),
                    else => return RuntimeError.InvalidSpirV,
                },
                .Vector4u32 => |*dst| switch (src_value.*) {
                    .Vector4f32 => caster.castSIMDVectorFromOther(u32, f32, 4, dst, &src_value.Vector4f32),
                    .Vector4i32 => caster.castSIMDVectorFromOther(u32, i32, 4, dst, &src_value.Vector4i32),
                    .Vector4u32 => caster.castSIMDVector(u32, 4, dst, &src_value.Vector4u32),
                    else => return RuntimeError.InvalidSpirV,
                },
                .Vector3u32 => |*dst| switch (src_value.*) {
                    .Vector3f32 => caster.castSIMDVectorFromOther(u32, f32, 3, dst, &src_value.Vector3f32),
                    .Vector3i32 => caster.castSIMDVectorFromOther(u32, i32, 3, dst, &src_value.Vector3i32),
                    .Vector3u32 => caster.castSIMDVector(u32, 3, dst, &src_value.Vector3u32),
                    else => return RuntimeError.InvalidSpirV,
                },
                .Vector2u32 => |*dst| switch (src_value.*) {
                    .Vector2f32 => caster.castSIMDVectorFromOther(u32, f32, 2, dst, &src_value.Vector2f32),
                    .Vector2i32 => caster.castSIMDVectorFromOther(u32, i32, 2, dst, &src_value.Vector2i32),
                    .Vector2u32 => caster.castSIMDVector(u32, 2, dst, &src_value.Vector2u32),
                    else => return RuntimeError.InvalidSpirV,
                },
                else => return RuntimeError.InvalidSpirV,
            }
        }
@@ -600,26 +689,11 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type {
    return struct {
        fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
            const target_type = (try rt.results[try rt.it.next()].getVariant()).Type;
-            const value = try rt.results[try rt.it.next()].getValue();
+            const dst = try rt.results[try rt.it.next()].getValue();
-            const op1_value = try rt.results[try rt.it.next()].getValue();
+            const lhs = try rt.results[try rt.it.next()].getValue();
-            const op2_value = try rt.results[try rt.it.next()].getValue();
+            const rhs = try rt.results[try rt.it.next()].getValue();
-            const size = sw: switch (target_type) {
+            const lane_bits = try Result.resolveLaneBitWidth(target_type, rt);
                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
                .Vector4f32,
                .Vector3f32,
                .Vector2f32,
                .Vector4i32,
                .Vector3i32,
                .Vector2i32,
                .Vector4u32,
                .Vector3u32,
                .Vector2u32,
                => 32,
                .Float => |f| if (T == .Float) f.bit_length else return RuntimeError.InvalidSpirV,
                .Int => |i| if (T == .SInt or T == .UInt) i.bit_length else return RuntimeError.InvalidSpirV,
                else => return RuntimeError.InvalidSpirV,
            };
            const operator = struct {
                fn operation(comptime TT: type, op1: TT, op2: TT) RuntimeError!TT {
@@ -637,68 +711,77 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type {
                    };
                }
-                fn process(bit_count: SpvWord, v: *Result.Value, op1_v: *const Result.Value, op2_v: *const Result.Value) RuntimeError!void {
+                fn applyScalar(bit_count: SpvWord, d: *Result.Value, l: *Result.Value, r: *Result.Value) RuntimeError!void {
                    switch (bit_count) {
-                        inline 8, 16, 32, 64 => |i| {
+                        inline 8, 16, 32, 64 => |bits| {
-                            if (i == 8 and T == .Float) { // No f8
+                            if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV;
-                                return RuntimeError.InvalidSpirV;
+
-                            }
+                            const ScalarT = getValuePrimitiveFieldType(T, bits);
-                            (try getValuePrimitiveField(T, i, v)).* = try operation(
+                            const d_field = try getValuePrimitiveField(T, bits, d);
-                                getValuePrimitiveFieldType(T, i),
+                            const l_field = try getValuePrimitiveField(T, bits, l);
-                                (try getValuePrimitiveField(T, i, @constCast(op1_v))).*,
+                            const r_field = try getValuePrimitiveField(T, bits, r);
-                                (try getValuePrimitiveField(T, i, @constCast(op2_v))).*,
+                            d_field.* = try operation(ScalarT, l_field.*, r_field.*);
                            );
                        },
                        else => return RuntimeError.InvalidSpirV,
                    }
                }
                inline fn applyVectorTimesScalarF32(d: []Result.Value, l: []const Result.Value, r: f32) void {
                    for (d, l) |*d_v, l_v| {
                        d_v.Float.float32 = l_v.Float.float32 * r;
                    }
                }
                inline fn applySIMDVector(comptime ElemT: type, comptime N: usize, d: *@Vector(N, ElemT), l: *const @Vector(N, ElemT), r: *const @Vector(N, ElemT)) RuntimeError!void {
                    inline for (0..N) |i| {
                        d[i] = try operation(ElemT, l[i], r[i]);
                    }
                }
                inline fn applyVectorSIMDTimesScalarF32(comptime N: usize, d: *@Vector(N, f32), l: *const @Vector(N, f32), r: f32) void {
                    inline for (0..N) |i| {
                        d[i] = l[i] * r;
                    }
                }
                inline fn applySIMDVectorf32(comptime N: usize, d: *@Vector(N, f32), l: *const @Vector(N, f32), r: *const Result.Value) RuntimeError!void {
                    switch (Op) {
                        .VectorTimesScalar => applyVectorSIMDTimesScalarF32(N, d, l, r.Float.float32),
                        else => {
                            const rh: *const @Vector(N, f32) = switch (N) {
                                2 => &r.Vector2f32,
                                3 => &r.Vector3f32,
                                4 => &r.Vector4f32,
                                else => unreachable,
                            };
                            try applySIMDVector(f32, N, d, l, rh);
                        },
                    }
                }
            };
-            switch (value.*) {
+            switch (dst.*) {
-                .Float => if (T == .Float) try operator.process(size, value, op1_value, op2_value) else return RuntimeError.InvalidSpirV,
+                .Int, .Float => try operator.applyScalar(lane_bits, dst, lhs, rhs),
-                .Int => if (T == .SInt or T == .UInt) try operator.process(size, value, op1_value, op2_value) else return RuntimeError.InvalidSpirV,
+
-                .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i| {
+                .Vector => |dst_vec| switch (Op) {
-                    switch (Op) {
+                    .VectorTimesScalar => operator.applyVectorTimesScalarF32(dst_vec, lhs.Vector, rhs.Float.float32),
-                        .VectorTimesScalar => try operator.process(size, val, &op1_v, op2_value),
+                    else => for (dst_vec, lhs.Vector, rhs.Vector) |*d_lane, *l_lane, *r_lane| {
-                        else => try operator.process(size, val, &op1_v, &op2_value.Vector[i]),
+                        try operator.applyScalar(lane_bits, d_lane, l_lane, r_lane);
-                    }
+                    },
                },
                .Vector4f32 => |*vec| inline for (0..4) |i| {
                    switch (Op) {
                        .VectorTimesScalar => vec[i] = op1_value.Vector4f32[i] * op2_value.Float.float32,
                        else => vec[i] = try operator.operation(f32, op1_value.Vector4f32[i], op2_value.Vector4f32[i]),
                    }
                },
                .Vector3f32 => |*vec| inline for (0..3) |i| {
                    switch (Op) {
                        .VectorTimesScalar => vec[i] = op1_value.Vector3f32[i] * op2_value.Float.float32,
                        else => vec[i] = try operator.operation(f32, op1_value.Vector3f32[i], op2_value.Vector3f32[i]),
                    }
                },
                .Vector2f32 => |*vec| inline for (0..2) |i| {
                    switch (Op) {
                        .VectorTimesScalar => vec[i] = op1_value.Vector2f32[i] * op2_value.Float.float32,
                        else => vec[i] = try operator.operation(f32, op1_value.Vector2f32[i], op2_value.Vector2f32[i]),
                    }
                },
                .Vector4i32 => |*vec| inline for (0..4) |i| {
                    vec[i] = try operator.operation(i32, op1_value.Vector4i32[i], op2_value.Vector4i32[i]);
                },
                .Vector3i32 => |*vec| inline for (0..3) |i| {
                    vec[i] = try operator.operation(i32, op1_value.Vector3i32[i], op2_value.Vector3i32[i]);
                },
                .Vector2i32 => |*vec| inline for (0..2) |i| {
                    vec[i] = try operator.operation(i32, op1_value.Vector2i32[i], op2_value.Vector2i32[i]);
                },
                .Vector4u32 => |*vec| inline for (0..4) |i| {
                    vec[i] = try operator.operation(u32, op1_value.Vector4u32[i], op2_value.Vector4u32[i]);
                },
                .Vector3u32 => |*vec| inline for (0..3) |i| {
                    vec[i] = try operator.operation(u32, op1_value.Vector3u32[i], op2_value.Vector3u32[i]);
                },
                .Vector2u32 => |*vec| inline for (0..2) |i| {
                    vec[i] = try operator.operation(u32, op1_value.Vector2u32[i], op2_value.Vector2u32[i]);
                },
                .Vector4f32 => |*d| try operator.applySIMDVectorf32(4, d, &lhs.Vector4f32, rhs),
                .Vector3f32 => |*d| try operator.applySIMDVectorf32(3, d, &lhs.Vector3f32, rhs),
                .Vector2f32 => |*d| try operator.applySIMDVectorf32(2, d, &lhs.Vector2f32, rhs),
                .Vector4i32 => |*d| try operator.applySIMDVector(i32, 4, d, &lhs.Vector4i32, &rhs.Vector4i32),
                .Vector3i32 => |*d| try operator.applySIMDVector(i32, 3, d, &lhs.Vector3i32, &rhs.Vector3i32),
                .Vector2i32 => |*d| try operator.applySIMDVector(i32, 2, d, &lhs.Vector2i32, &rhs.Vector2i32),
                .Vector4u32 => |*d| try operator.applySIMDVector(u32, 4, d, &lhs.Vector4u32, &rhs.Vector4u32),
                .Vector3u32 => |*d| try operator.applySIMDVector(u32, 3, d, &lhs.Vector3u32, &rhs.Vector3u32),
                .Vector2u32 => |*d| try operator.applySIMDVector(u32, 2, d, &lhs.Vector2u32, &rhs.Vector2u32),
                else => return RuntimeError.InvalidSpirV,
            }
        }
@@ -784,20 +867,21 @@ fn opBitcast(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
 }
 fn copyValue(dst: *Result.Value, src: *const Result.Value) void {
-    if (src.getCompositeDataOrNull()) |src_slice| {
+    switch (src.*) {
-        if (dst.getCompositeDataOrNull()) |dst_slice| {
+        .Vector, .Matrix, .Array, .Structure => |src_slice| {
            const dst_slice = switch (dst.*) {
                .Vector, .Matrix, .Array, .Structure => |d| d,
                else => unreachable,
            };
            for (0..@min(dst_slice.len, src_slice.len)) |i| {
                copyValue(&dst_slice[i], &src_slice[i]);
            }
-        } else {
+        },
-            unreachable;
+        else => dst.* = src.*,
        }
    } else {
        dst.* = src.*;
    }
 }
-fn getValuePrimitiveField(comptime T: ValueType, comptime BitCount: SpvWord, v: *Result.Value) RuntimeError!*getValuePrimitiveFieldType(T, BitCount) {
+pub fn getValuePrimitiveField(comptime T: ValueType, comptime BitCount: SpvWord, v: *Result.Value) RuntimeError!*getValuePrimitiveFieldType(T, BitCount) {
    return switch (T) {
        .Bool => &v.Bool,
        .Float => switch (BitCount) {
@@ -815,7 +899,7 @@ fn getValuePrimitiveField(comptime T: ValueType, comptime BitCount: SpvWord, v:
    };
 }
-fn getValuePrimitiveFieldType(comptime T: ValueType, comptime BitCount: SpvWord) type {
+pub fn getValuePrimitiveFieldType(comptime T: ValueType, comptime BitCount: SpvWord) type {
    return switch (T) {
        .Bool => bool,
        .Float => std.meta.Float(BitCount),
@@ -1112,7 +1196,7 @@ fn opExtInst(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) Ru
    const set = try rt.it.next();
    const inst = try rt.it.next();
-    switch (try rt.results[set].getVariant()) {
+    switch ((try rt.results[set].getVariant()).*) {
        .Extension => |ext| if (ext.dispatcher[inst]) |pfn| {
            try pfn(allocator, target_type, id, word_count, rt);
        },
@@ -1122,10 +1206,11 @@ fn opExtInst(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) Ru
 fn opExtInstImport(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) RuntimeError!void {
    const id = try rt.it.next();
-    rt.mod.results[id].name = try readStringN(allocator, &rt.it, word_count - 1);
+    const name = try readStringN(allocator, &rt.it, word_count - 1);
    rt.mod.results[id].name = name;
    rt.mod.results[id].variant = .{
        .Extension = .{
-            .dispatcher = undefined,
+            .dispatcher = if (extensions_map.get(name)) |map| map else return RuntimeError.UnsupportedExtension,
        },
    };
 }