adding SIMD vector specializations

2026-01-21 14:52:37 +01:00
parent 45adad727d
commit 19687251b0
7 changed files with 455 additions and 56 deletions
@@ -31,6 +31,7 @@ pub fn build(b: *std.Build) void {
            .optimize = optimize,
            .imports = &.{
                .{ .name = "spv", .module = mod },
                .{ .name = "pretty", .module = pretty.module("pretty") },
            },
        }),
    });
@@ -42,6 +42,12 @@ pub fn main() !void {
            const tri_h = bottom_y - top_y;
            const max_half_w = @divTrunc(screen_width, 2) - margin_x;
            var timer = try std.time.Timer.start();
            defer {
                const ns = timer.lap();
                std.log.info("Took {d:.3}s to render", .{@as(f32, @floatFromInt(ns)) / std.time.ns_per_s});
            }
            for (top_y..bottom_y) |y| {
                const t: f32 = @as(f32, @floatFromInt(y - top_y)) / @as(f32, @floatFromInt(tri_h));
                const half_w: usize = @intFromFloat((t * @as(f32, @floatFromInt(max_half_w))) + 0.5);
@@ -79,7 +85,7 @@ pub fn main() !void {
        try window.updateSurface();
-        std.Thread.sleep(10_000_000_000);
+        std.Thread.sleep(2_000_000_000);
    }
    std.log.info("Successfully executed", .{});
 }
@@ -19,6 +19,10 @@ const Value = Result.Value;
 const Self = @This();
 pub const ModuleOptions = struct {
    use_simd_vectors_specializations: bool = true,
 };
 const SpvEntryPoint = struct {
    exec_model: spv.SpvExecutionModel,
    id: SpvWord,
@@ -40,6 +44,8 @@ const ModuleError = error{
    OutOfMemory,
 };
 options: ModuleOptions,
 it: WordIterator,
 version_major: SpvByte,
@@ -77,8 +83,9 @@ output_locations: std.ArrayList(SpvWord),
 bindings: std.AutoHashMap(SpvBinding, Value),
 push_constants: []Value,
-pub fn init(allocator: std.mem.Allocator, source: []const SpvWord) ModuleError!Self {
+pub fn init(allocator: std.mem.Allocator, source: []const SpvWord, options: ModuleOptions) ModuleError!Self {
    var self: Self = std.mem.zeroInit(Self, .{
        .options = options,
        .code = allocator.dupe(SpvWord, source) catch return ModuleError.OutOfMemory,
        .files = std.ArrayList(SpvSource).empty,
        .extensions = std.ArrayList([]const u8).empty,
@@ -8,6 +8,18 @@ const SpvByte = spv.SpvByte;
 const SpvWord = spv.SpvWord;
 const SpvBool = spv.SpvBool;
 pub const Vec4f32 = @Vector(4, f32);
 pub const Vec3f32 = @Vector(3, f32);
 pub const Vec2f32 = @Vector(2, f32);
 pub const Vec4i32 = @Vector(4, i32);
 pub const Vec3i32 = @Vector(3, i32);
 pub const Vec2i32 = @Vector(2, i32);
 pub const Vec4u32 = @Vector(4, u32);
 pub const Vec3u32 = @Vector(3, u32);
 pub const Vec2u32 = @Vector(2, u32);
 pub const Variant = enum {
    String,
    Extension,
@@ -26,6 +38,15 @@ pub const Type = enum {
    Int,
    Float,
    Vector,
    Vector4f32,
    Vector3f32,
    Vector2f32,
    Vector4i32,
    Vector3i32,
    Vector2i32,
    Vector4u32,
    Vector3u32,
    Vector2u32,
    Matrix,
    Array,
    RuntimeArray,
@@ -73,6 +94,15 @@ pub const Value = union(Type) {
        float64: f64,
    },
    Vector: []Value,
    Vector4f32: Vec4f32,
    Vector3f32: Vec3f32,
    Vector2f32: Vec2f32,
    Vector4i32: Vec4i32,
    Vector3i32: Vec3i32,
    Vector2i32: Vec2i32,
    Vector4u32: Vec4u32,
    Vector3u32: Vec3u32,
    Vector2u32: Vec2u32,
    Matrix: []Value,
    Array: []Value,
    RuntimeArray: struct {},
@@ -108,6 +138,15 @@ pub const Value = union(Type) {
                    }
                    break :blk self;
                },
                .Vector4f32 => .{ .Vector4f32 = Vec4f32{ 0.0, 0.0, 0.0, 0.0 } },
                .Vector3f32 => .{ .Vector3f32 = Vec3f32{ 0.0, 0.0, 0.0 } },
                .Vector2f32 => .{ .Vector2f32 = Vec2f32{ 0.0, 0.0 } },
                .Vector4i32 => .{ .Vector4i32 = Vec4i32{ 0, 0, 0, 0 } },
                .Vector3i32 => .{ .Vector3i32 = Vec3i32{ 0, 0, 0 } },
                .Vector2i32 => .{ .Vector2i32 = Vec2i32{ 0, 0 } },
                .Vector4u32 => .{ .Vector4u32 = Vec4u32{ 0, 0, 0, 0 } },
                .Vector3u32 => .{ .Vector3u32 = Vec3u32{ 0, 0, 0 } },
                .Vector2u32 => .{ .Vector2u32 = Vec2u32{ 0, 0 } },
                .Matrix => |m| blk: {
                    var self: Value = .{ .Matrix = allocator.alloc(Value, member_count) catch return RuntimeError.OutOfMemory };
                    errdefer self.deinit(allocator);
@@ -205,6 +244,15 @@ pub const VariantData = union(Variant) {
            components_type: Type,
            member_count: SpvWord,
        },
        Vector4f32: struct {},
        Vector3f32: struct {},
        Vector2f32: struct {},
        Vector4i32: struct {},
        Vector3i32: struct {},
        Vector2i32: struct {},
        Vector4u32: struct {},
        Vector3u32: struct {},
        Vector2u32: struct {},
        Matrix: struct {
            column_type_word: SpvWord,
            column_type: Type,
@@ -253,7 +301,7 @@ pub const VariantData = union(Variant) {
    },
    AccessChain: struct {
        target: SpvWord,
-        value: Value,
+        value: *Value,
    },
    FunctionParameter: struct {
        type_word: SpvWord,
@@ -335,7 +383,7 @@ pub fn getValue(self: *Self) RuntimeError!*Value {
    return switch ((try self.getVariant()).*) {
        .Variable => |*v| &v.value,
        .Constant => |*c| &c.value,
-        .AccessChain => |*a| &a.value,
+        .AccessChain => |a| a.value,
        .FunctionParameter => |*p| p.value_ptr orelse return RuntimeError.InvalidSpirV,
        else => RuntimeError.InvalidSpirV,
    };
@@ -438,6 +486,9 @@ pub fn getMemberCounts(self: *const Self) usize {
            .Type => |t| switch (t) {
                .Bool, .Int, .Float, .Image, .Sampler => return 1,
                .Vector => |v| return v.member_count,
                .Vector4f32, .Vector4i32, .Vector4u32 => return 4,
                .Vector3f32, .Vector3i32, .Vector3u32 => return 3,
                .Vector2f32, .Vector2i32, .Vector2u32 => return 2,
                .Matrix => |m| return m.member_count,
                .Array => |a| return a.member_count,
                .SampledImage => return 2,
@@ -466,6 +517,15 @@ pub fn initValue(allocator: std.mem.Allocator, member_count: usize, results: []c
                }
                break :blk value;
            },
            .Vector4f32 => .{ .Vector4f32 = Vec4f32{ 0.0, 0.0, 0.0, 0.0 } },
            .Vector3f32 => .{ .Vector3f32 = Vec3f32{ 0.0, 0.0, 0.0 } },
            .Vector2f32 => .{ .Vector2f32 = Vec2f32{ 0.0, 0.0 } },
            .Vector4i32 => .{ .Vector4i32 = Vec4i32{ 0, 0, 0, 0 } },
            .Vector3i32 => .{ .Vector3i32 = Vec3i32{ 0, 0, 0 } },
            .Vector2i32 => .{ .Vector2i32 = Vec2i32{ 0, 0 } },
            .Vector4u32 => .{ .Vector4u32 = Vec4u32{ 0, 0, 0, 0 } },
            .Vector3u32 => .{ .Vector3u32 = Vec3u32{ 0, 0, 0 } },
            .Vector2u32 => .{ .Vector2u32 = Vec2u32{ 0, 0 } },
            .Matrix => |m| blk: {
                const value: Value = .{ .Matrix = allocator.alloc(Value, member_count) catch return RuntimeError.OutOfMemory };
                errdefer allocator.free(value.Matrix);
@@ -476,7 +536,7 @@ pub fn initValue(allocator: std.mem.Allocator, member_count: usize, results: []c
            },
            .Array => |a| blk: {
                const value: Value = .{ .Array = allocator.alloc(Value, member_count) catch return RuntimeError.OutOfMemory };
-                errdefer allocator.free(value.Vector);
+                errdefer allocator.free(value.Array);
                for (value.Array) |*val| {
                    val.* = try Value.init(allocator, results, a.components_type_word);
                }
@@ -199,6 +199,42 @@ fn readValue(self: *const Self, comptime T: type, output: []T, value: *const Res
                inline else => return RuntimeError.InvalidValueType,
            }
        },
        .Vector4f32 => |vec| inline for (0..4) |i| switch (T) {
            f32 => output[i] = vec[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector3f32 => |vec| inline for (0..3) |i| switch (T) {
            f32 => output[i] = vec[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector2f32 => |vec| inline for (0..2) |i| switch (T) {
            f32 => output[i] = vec[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector4i32 => |vec| inline for (0..4) |i| switch (T) {
            i32 => output[i] = vec[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector3i32 => |vec| inline for (0..3) |i| switch (T) {
            i32 => output[i] = vec[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector2i32 => |vec| inline for (0..2) |i| switch (T) {
            i32 => output[i] = vec[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector4u32 => |vec| inline for (0..4) |i| switch (T) {
            u32 => output[i] = vec[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector3u32 => |vec| inline for (0..3) |i| switch (T) {
            u32 => output[i] = vec[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector2u32 => |vec| inline for (0..2) |i| switch (T) {
            u32 => output[i] = vec[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector, .Matrix, .Array, .Structure => |values| for (values, 0..) |v, i| try self.readValue(T, output[i..], &v),
        else => return RuntimeError.InvalidValueType,
    }
@@ -234,6 +270,42 @@ fn writeValue(self: *const Self, comptime T: type, input: []const T, value: *Res
                inline else => return RuntimeError.InvalidValueType,
            }
        },
        .Vector4f32 => |vec| inline for (0..4) |i| switch (T) {
            f32 => vec[i] = input[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector3f32 => |vec| inline for (0..3) |i| switch (T) {
            f32 => vec[i] = input[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector2f32 => |vec| inline for (0..2) |i| switch (T) {
            f32 => vec[i] = input[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector4i32 => |vec| inline for (0..4) |i| switch (T) {
            i32 => vec[i] = input[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector3i32 => |vec| inline for (0..3) |i| switch (T) {
            i32 => vec[i] = input[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector2i32 => |vec| inline for (0..2) |i| switch (T) {
            i32 => vec[i] = input[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector4u32 => |vec| inline for (0..4) |i| switch (T) {
            u32 => vec[i] = input[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector3u32 => |vec| inline for (0..3) |i| switch (T) {
            u32 => vec[i] = input[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector2u32 => |vec| inline for (0..2) |i| switch (T) {
            u32 => vec[i] = input[i],
            inline else => return RuntimeError.InvalidValueType,
        },
        .Vector, .Matrix, .Array, .Structure => |*values| for (values.*, 0..) |*v, i| try self.writeValue(T, input[i..], v),
        else => return RuntimeError.InvalidValueType,
    }
@@ -239,6 +239,16 @@ fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type {
            const size = sw: switch (target_type) {
                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
                .Vector4f32,
                .Vector3f32,
                .Vector2f32,
                .Vector4i32,
                .Vector3i32,
                .Vector2i32,
                .Vector4u32,
                .Vector3u32,
                .Vector2u32,
                => 32,
                .Int => |i| i.bit_length,
                else => return RuntimeError.InvalidSpirV,
            };
@@ -249,7 +259,7 @@ fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type {
                }
                inline fn bitInsert(comptime TT: type, base: TT, insert: TT, offset: u64, count: u64) TT {
-                    const mask = bitMask(count) << @intCast(offset);
+                    const mask: TT = @intCast(bitMask(count) << @intCast(offset));
                    return @as(TT, @intCast((base & ~mask) | ((insert << @intCast(offset)) & mask)));
                }
@@ -314,6 +324,25 @@ fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type {
                .Int => try operator.process(rt, size, value, op1_value, op2_value),
                .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i|
                    try operator.process(rt, size, val, &op1_v, if (op2_value) |op2_v| &op2_v.Vector[i] else null),
                // No bit manipulation on VectorXf32
                .Vector4i32 => |*vec| inline for (0..4) |i| {
                    vec[i] = try operator.operation(i32, rt, op1_value.Vector4i32[i], if (op2_value) |op2_v| op2_v.Vector4i32[i] else null);
                },
                .Vector3i32 => |*vec| inline for (0..3) |i| {
                    vec[i] = try operator.operation(i32, rt, op1_value.Vector3i32[i], if (op2_value) |op2_v| op2_v.Vector3i32[i] else null);
                },
                .Vector2i32 => |*vec| inline for (0..2) |i| {
                    vec[i] = try operator.operation(i32, rt, op1_value.Vector2i32[i], if (op2_value) |op2_v| op2_v.Vector2i32[i] else null);
                },
                .Vector4u32 => |*vec| inline for (0..4) |i| {
                    vec[i] = try operator.operation(u32, rt, op1_value.Vector4u32[i], if (op2_value) |op2_v| op2_v.Vector4u32[i] else null);
                },
                .Vector3u32 => |*vec| inline for (0..3) |i| {
                    vec[i] = try operator.operation(u32, rt, op1_value.Vector3u32[i], if (op2_value) |op2_v| op2_v.Vector3u32[i] else null);
                },
                .Vector2u32 => |*vec| inline for (0..2) |i| {
                    vec[i] = try operator.operation(u32, rt, op1_value.Vector2u32[i], if (op2_value) |op2_v| op2_v.Vector2u32[i] else null);
                },
                else => return RuntimeError.InvalidSpirV,
            }
        }
@@ -337,6 +366,16 @@ fn CondEngine(comptime T: ValueType, comptime Op: CondOp) type {
            const size = sw: switch ((try rt.results[op1_type].getVariant()).Type) {
                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
                .Vector4f32,
                .Vector3f32,
                .Vector2f32,
                .Vector4i32,
                .Vector3i32,
                .Vector2i32,
                .Vector4u32,
                .Vector3u32,
                .Vector2u32,
                => 32,
                .Float => |f| if (T == .Float) f.bit_length else return RuntimeError.InvalidSpirV,
                .Int => |i| if (T == .SInt or T == .UInt) i.bit_length else return RuntimeError.InvalidSpirV,
                else => return RuntimeError.InvalidSpirV,
@@ -374,6 +413,7 @@ fn CondEngine(comptime T: ValueType, comptime Op: CondOp) type {
            switch (value.*) {
                .Bool => try operator.process(size, value, op1_value, op2_value),
                .Vector => |vec| for (vec, op1_value.Vector, op2_value.Vector) |*val, op1_v, op2_v| try operator.process(size, val, &op1_v, &op2_v),
                // No Vector specializations for booleans
                else => return RuntimeError.InvalidSpirV,
            }
        }
@@ -391,6 +431,16 @@ fn ConversionEngine(comptime From: ValueType, comptime To: ValueType) type {
            const from_size = sw: switch ((try rt.results[op_type].getVariant()).Type) {
                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
                .Vector4f32,
                .Vector3f32,
                .Vector2f32,
                .Vector4i32,
                .Vector3i32,
                .Vector2i32,
                .Vector4u32,
                .Vector3u32,
                .Vector2u32,
                => 32,
                .Float => |f| if (From == .Float) f.bit_length else return RuntimeError.InvalidSpirV,
                .Int => |i| if (From == .SInt or From == .UInt) i.bit_length else return RuntimeError.InvalidSpirV,
                else => return RuntimeError.InvalidSpirV,
@@ -398,6 +448,16 @@ fn ConversionEngine(comptime From: ValueType, comptime To: ValueType) type {
            const to_size = sw: switch (target_type) {
                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
                .Vector4f32,
                .Vector3f32,
                .Vector2f32,
                .Vector4i32,
                .Vector3i32,
                .Vector2i32,
                .Vector4u32,
                .Vector3u32,
                .Vector2u32,
                => 32,
                .Float => |f| if (To == .Float) f.bit_length else return RuntimeError.InvalidSpirV,
                .Int => |i| if (To == .SInt or To == .UInt) i.bit_length else return RuntimeError.InvalidSpirV,
                else => return RuntimeError.InvalidSpirV,
@@ -428,12 +488,61 @@ fn ConversionEngine(comptime From: ValueType, comptime To: ValueType) type {
                        else => return RuntimeError.InvalidSpirV,
                    }
                }
                fn processVecSpe(comptime T: type, from_bit_count: SpvWord, from: *Result.Value, index: usize) RuntimeError!T {
                    return switch (from.*) {
                        .Vector3f32 => |vec| std.math.lossyCast(T, vec[index]),
                        .Vector2f32 => |vec| std.math.lossyCast(T, vec[index]),
                        .Vector4i32 => |vec| std.math.lossyCast(T, vec[index]),
                        .Vector3i32 => |vec| std.math.lossyCast(T, vec[index]),
                        .Vector2i32 => |vec| std.math.lossyCast(T, vec[index]),
                        .Vector4u32 => |vec| std.math.lossyCast(T, vec[index]),
                        .Vector3u32 => |vec| std.math.lossyCast(T, vec[index]),
                        .Vector2u32 => |vec| std.math.lossyCast(T, vec[index]),
                        inline else => switch (from_bit_count) {
                            inline 8, 16, 32, 64 => |i| std.math.lossyCast(T, blk: {
                                if (i == 8 and From == .Float) {
                                    return RuntimeError.InvalidSpirV;
                                }
                                break :blk (try getValuePrimitiveField(From, i, from)).*;
                            }),
                            else => return RuntimeError.InvalidSpirV,
                        },
                    };
                }
            };
            switch (value.*) {
                .Float => if (To == .Float) try operator.process(from_size, to_size, value, op_value) else return RuntimeError.InvalidSpirV,
                .Int => if (To == .SInt or To == .UInt) try operator.process(from_size, to_size, value, op_value) else return RuntimeError.InvalidSpirV,
                .Vector => |vec| for (vec, op_value.Vector) |*val, *op_v| try operator.process(from_size, to_size, val, op_v),
                .Vector4f32 => |*vec| inline for (0..4) |i| {
                    vec[i] = try operator.processVecSpe(f32, from_size, op_value, i);
                },
                .Vector3f32 => |*vec| inline for (0..3) |i| {
                    vec[i] = try operator.processVecSpe(f32, from_size, op_value, i);
                },
                .Vector2f32 => |*vec| inline for (0..2) |i| {
                    vec[i] = try operator.processVecSpe(f32, from_size, op_value, i);
                },
                .Vector4i32 => |*vec| inline for (0..4) |i| {
                    vec[i] = try operator.processVecSpe(i32, from_size, op_value, i);
                },
                .Vector3i32 => |*vec| inline for (0..3) |i| {
                    vec[i] = try operator.processVecSpe(i32, from_size, op_value, i);
                },
                .Vector2i32 => |*vec| inline for (0..2) |i| {
                    vec[i] = try operator.processVecSpe(i32, from_size, op_value, i);
                },
                .Vector4u32 => |*vec| inline for (0..4) |i| {
                    vec[i] = try operator.processVecSpe(u32, from_size, op_value, i);
                },
                .Vector3u32 => |*vec| inline for (0..3) |i| {
                    vec[i] = try operator.processVecSpe(u32, from_size, op_value, i);
                },
                .Vector2u32 => |*vec| inline for (0..2) |i| {
                    vec[i] = try operator.processVecSpe(u32, from_size, op_value, i);
                },
                else => return RuntimeError.InvalidSpirV,
            }
        }
@@ -450,6 +559,16 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type {
            const size = sw: switch (target_type) {
                .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type,
                .Vector4f32,
                .Vector3f32,
                .Vector2f32,
                .Vector4i32,
                .Vector3i32,
                .Vector2i32,
                .Vector4u32,
                .Vector3u32,
                .Vector2u32,
                => 32,
                .Float => |f| if (T == .Float) f.bit_length else return RuntimeError.InvalidSpirV,
                .Int => |i| if (T == .SInt or T == .UInt) i.bit_length else return RuntimeError.InvalidSpirV,
                else => return RuntimeError.InvalidSpirV,
@@ -491,6 +610,33 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type {
                .Float => if (T == .Float) try operator.process(size, value, op1_value, op2_value) else return RuntimeError.InvalidSpirV,
                .Int => if (T == .SInt or T == .UInt) try operator.process(size, value, op1_value, op2_value) else return RuntimeError.InvalidSpirV,
                .Vector => |vec| for (vec, op1_value.Vector, op2_value.Vector) |*val, op1_v, op2_v| try operator.process(size, val, &op1_v, &op2_v),
                .Vector4f32 => |*vec| inline for (0..4) |i| {
                    vec[i] = try operator.operation(f32, op1_value.Vector4f32[i], op2_value.Vector4f32[i]);
                },
                .Vector3f32 => |*vec| inline for (0..3) |i| {
                    vec[i] = try operator.operation(f32, op1_value.Vector3f32[i], op2_value.Vector3f32[i]);
                },
                .Vector2f32 => |*vec| inline for (0..2) |i| {
                    vec[i] = try operator.operation(f32, op1_value.Vector2f32[i], op2_value.Vector2f32[i]);
                },
                .Vector4i32 => |*vec| inline for (0..4) |i| {
                    vec[i] = try operator.operation(i32, op1_value.Vector4i32[i], op2_value.Vector4i32[i]);
                },
                .Vector3i32 => |*vec| inline for (0..3) |i| {
                    vec[i] = try operator.operation(i32, op1_value.Vector3i32[i], op2_value.Vector3i32[i]);
                },
                .Vector2i32 => |*vec| inline for (0..2) |i| {
                    vec[i] = try operator.operation(i32, op1_value.Vector2i32[i], op2_value.Vector2i32[i]);
                },
                .Vector4u32 => |*vec| inline for (0..4) |i| {
                    vec[i] = try operator.operation(u32, op1_value.Vector4u32[i], op2_value.Vector4u32[i]);
                },
                .Vector3u32 => |*vec| inline for (0..3) |i| {
                    vec[i] = try operator.operation(u32, op1_value.Vector3u32[i], op2_value.Vector3u32[i]);
                },
                .Vector2u32 => |*vec| inline for (0..2) |i| {
                    vec[i] = try operator.operation(u32, op1_value.Vector2u32[i], op2_value.Vector2u32[i]);
                },
                else => return RuntimeError.InvalidSpirV,
            }
        }
@@ -570,6 +716,7 @@ fn opBitcast(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
    switch (to_value.*) {
        .Int, .Float => try caster.cast(to_value, from_value),
        .Vector => |vec| for (vec, from_value.Vector) |*t, *f| try caster.cast(t, f),
        // TODO: vectors specializations
        else => return RuntimeError.InvalidSpirV,
    }
 }
@@ -622,31 +769,44 @@ fn opAccessChain(_: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) Runtim
    var value_ptr = try base.getValue();
    const index_count = word_count - 3;
    for (0..index_count) |_| {
        const member = &rt.results[try rt.it.next()];
        const member_value = switch ((try member.getVariant()).*) {
            .Constant => |c| &c.value,
            .Variable => |v| &v.value,
            else => return RuntimeError.InvalidSpirV,
        };
        switch (member_value.*) {
            .Int => |i| {
                switch (value_ptr.*) {
                    .Vector, .Matrix, .Array, .Structure => |v| {
                        if (i.uint32 > v.len) return RuntimeError.InvalidSpirV;
                        value_ptr = &v[i.uint32];
                    },
                    else => return RuntimeError.InvalidSpirV,
                }
            },
            else => return RuntimeError.InvalidSpirV,
        }
    }
    rt.results[id].variant = .{
        .AccessChain = .{
            .target = var_type,
-            .value = value_ptr.*,
+            .value = blk: {
                for (0..index_count) |_| {
                    const member = &rt.results[try rt.it.next()];
                    const member_value = switch ((try member.getVariant()).*) {
                        .Constant => |c| &c.value,
                        .Variable => |v| &v.value,
                        else => return RuntimeError.InvalidSpirV,
                    };
                    switch (member_value.*) {
                        .Int => |i| {
                            switch (value_ptr.*) {
                                .Vector, .Matrix, .Array, .Structure => |v| {
                                    if (i.uint32 > v.len) return RuntimeError.InvalidSpirV;
                                    value_ptr = &v[i.uint32];
                                },
                                //.Vector4f32 => |v| {
                                //    if (i.uint32 > 4) return RuntimeError.InvalidSpirV;
                                //    break :blk .{
                                //        .Float = .{ .float32 = v[i.uint32] },
                                //    };
                                //},
                                //.Vector2f32 => |v| {
                                //    if (i.uint32 > 2) return RuntimeError.InvalidSpirV;
                                //    break :blk .{
                                //        .Float = .{ .float32 = v[i.uint32] },
                                //    };
                                //},
                                else => return RuntimeError.InvalidSpirV,
                            }
                        },
                        else => return RuntimeError.InvalidSpirV,
                    }
                }
                break :blk value_ptr;
            },
        },
    };
 }
@@ -685,10 +845,44 @@ fn opCompositeConstruct(_: std.mem.Allocator, word_count: SpvWord, rt: *Runtime)
    const id = try rt.it.next();
    const index_count = word_count - 2;
-    const target = (try rt.results[id].getVariant()).Constant.value.getCompositeDataOrNull() orelse return RuntimeError.InvalidSpirV;
+    const value = &(try rt.results[id].getVariant()).Constant.value;
-    for (target[0..index_count]) |*elem| {
+    if (value.getCompositeDataOrNull()) |target| {
-        const value = (try rt.results[try rt.it.next()].getVariant()).Constant.value;
+        for (target[0..index_count]) |*elem| {
-        elem.* = value;
+            const elem_value = (try rt.results[try rt.it.next()].getVariant()).Constant.value;
            elem.* = elem_value;
        }
        return;
    }
    switch (value.*) {
        .Vector4f32 => |*vec| inline for (0..4) |i| {
            vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Float.float32;
        },
        .Vector3f32 => |*vec| inline for (0..3) |i| {
            vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Float.float32;
        },
        .Vector2f32 => |*vec| inline for (0..2) |i| {
            vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Float.float32;
        },
        .Vector4i32 => |*vec| inline for (0..4) |i| {
            vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Int.sint32;
        },
        .Vector3i32 => |*vec| inline for (0..3) |i| {
            vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Int.sint32;
        },
        .Vector2i32 => |*vec| inline for (0..2) |i| {
            vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Int.sint32;
        },
        .Vector4u32 => |*vec| inline for (0..4) |i| {
            vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Int.uint32;
        },
        .Vector3u32 => |*vec| inline for (0..3) |i| {
            vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Int.uint32;
        },
        .Vector2u32 => |*vec| inline for (0..2) |i| {
            vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Int.uint32;
        },
        else => return RuntimeError.InvalidSpirV,
    }
 }
@@ -696,13 +890,8 @@ fn opCompositeExtract(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Ru
    const res_type = try rt.it.next();
    const id = try rt.it.next();
    const composite_id = try rt.it.next();
    const index_count = word_count - 3;
-    var composite = (try rt.results[composite_id].getVariant()).Constant.value;
+
    for (0..index_count) |_| {
        const member_id = try rt.it.next();
        composite = (composite.getCompositeDataOrNull() orelse return RuntimeError.InvalidSpirV)[member_id];
    }
    rt.results[id].variant = .{
        .Constant = .{
            .type_word = res_type,
@@ -710,7 +899,29 @@ fn opCompositeExtract(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Ru
                .Type => |t| @as(Result.Type, t),
                else => return RuntimeError.InvalidSpirV,
            },
-            .value = try composite.dupe(allocator),
+            .value = blk: {
                var composite = (try rt.results[composite_id].getVariant()).Constant.value;
                for (0..index_count) |_| {
                    const member_id = try rt.it.next();
                    if (composite.getCompositeDataOrNull()) |v| {
                        composite = v[member_id];
                        continue;
                    }
                    switch (composite) {
                        .Vector4f32 => |v| break :blk .{ .Float = .{ .float32 = v[member_id] } },
                        .Vector3f32 => |v| break :blk .{ .Float = .{ .float32 = v[member_id] } },
                        .Vector2f32 => |v| break :blk .{ .Float = .{ .float32 = v[member_id] } },
                        .Vector4i32 => |v| break :blk .{ .Int = .{ .sint32 = v[member_id] } },
                        .Vector3i32 => |v| break :blk .{ .Int = .{ .sint32 = v[member_id] } },
                        .Vector2i32 => |v| break :blk .{ .Int = .{ .sint32 = v[member_id] } },
                        .Vector4u32 => |v| break :blk .{ .Int = .{ .uint32 = v[member_id] } },
                        .Vector3u32 => |v| break :blk .{ .Int = .{ .uint32 = v[member_id] } },
                        .Vector2u32 => |v| break :blk .{ .Int = .{ .uint32 = v[member_id] } },
                        else => return RuntimeError.InvalidSpirV,
                    }
                }
                break :blk try composite.dupe(allocator);
            },
        },
    };
 }
@@ -1139,18 +1350,49 @@ fn opTypeStruct(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime)
 fn opTypeVector(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
    const id = try rt.it.next();
    const components_type_word = try rt.it.next();
-    rt.mod.results[id].variant = .{
+    var components_type_size: usize = 0;
-        .Type = .{
+    const components_type_concrete = try rt.mod.results[components_type_word].getVariant();
    const components_type = switch (components_type_concrete.*) {
        .Type => |t| blk: {
            switch (t) {
                .Int => |i| components_type_size = i.bit_length,
                .Float => |f| components_type_size = f.bit_length,
                else => {},
            }
            break :blk @as(Result.Type, t);
        },
        else => return RuntimeError.InvalidSpirV,
    };
    const member_count = try rt.it.next();
    rt.mod.results[id].variant = .{ .Type = blk: {
        if (components_type_size == 32 and rt.mod.options.use_simd_vectors_specializations) {
            switch (components_type) {
                .Float => switch (member_count) {
                    2 => break :blk .{ .Vector2f32 = .{} },
                    3 => break :blk .{ .Vector3f32 = .{} },
                    4 => break :blk .{ .Vector4f32 = .{} },
                    else => {},
                },
                .Int => {
                    const is_signed = components_type_concrete.Type.Int.is_signed;
                    switch (member_count) {
                        2 => break :blk if (is_signed) .{ .Vector2i32 = .{} } else .{ .Vector2u32 = .{} },
                        3 => break :blk if (is_signed) .{ .Vector3i32 = .{} } else .{ .Vector3u32 = .{} },
                        4 => break :blk if (is_signed) .{ .Vector4i32 = .{} } else .{ .Vector4u32 = .{} },
                        else => {},
                    }
                },
                else => {},
            }
        }
        break :blk .{
            .Vector = .{
                .components_type_word = components_type_word,
-                .components_type = switch ((try rt.mod.results[components_type_word].getVariant()).*) {
+                .components_type = components_type,
-                    .Type => |t| @as(Result.Type, t),
+                .member_count = member_count,
                    else => return RuntimeError.InvalidSpirV,
                },
                .member_count = try rt.it.next(),
            },
-        },
+        };
-    };
+    } };
 }
 fn opTypeVoid(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
@@ -23,17 +23,28 @@ pub const case = struct {
    pub fn expectOutput(comptime T: type, comptime len: usize, source: []const u32, output_name: []const u8, expected: []const T) !void {
        const allocator = std.testing.allocator;
-        var module = try spv.Module.init(allocator, source);
+        const module_options = [_]spv.Module.ModuleOptions{
-        defer module.deinit(allocator);
+            .{
                .use_simd_vectors_specializations = true,
            },
            .{
                .use_simd_vectors_specializations = false,
            },
        };
-        var rt = try spv.Runtime.init(allocator, &module);
+        for (module_options) |opt| {
-        defer rt.deinit(allocator);
+            var module = try spv.Module.init(allocator, source, opt);
            defer module.deinit(allocator);
-        try rt.callEntryPoint(allocator, try rt.getEntryPointByName("main"));
+            var rt = try spv.Runtime.init(allocator, &module);
-        var output: [len]T = undefined;
+            defer rt.deinit(allocator);
        try rt.readOutput(T, output[0..len], try rt.getResultByName(output_name));
-        try std.testing.expectEqualSlices(T, expected, &output);
+            try rt.callEntryPoint(allocator, try rt.getEntryPointByName("main"));
            var output: [len]T = undefined;
            try rt.readOutput(T, output[0..len], try rt.getResultByName(output_name));
            try std.testing.expectEqualSlices(T, expected, &output);
        }
    }
    pub fn random(comptime T: type) T {