From 19687251b0093d2f14a9fca8c2f4224fcdb1ec29 Mon Sep 17 00:00:00 2001 From: Kbz-8 Date: Wed, 21 Jan 2026 14:52:37 +0100 Subject: [PATCH] adding SIMD vector specializations --- build.zig | 1 + example/main.zig | 8 +- src/Module.zig | 9 +- src/Result.zig | 66 +++++++++- src/Runtime.zig | 72 +++++++++++ src/opcodes.zig | 328 ++++++++++++++++++++++++++++++++++++++++------- test/root.zig | 27 ++-- 7 files changed, 455 insertions(+), 56 deletions(-) diff --git a/build.zig b/build.zig index fc20121..fc20ead 100644 --- a/build.zig +++ b/build.zig @@ -31,6 +31,7 @@ pub fn build(b: *std.Build) void { .optimize = optimize, .imports = &.{ .{ .name = "spv", .module = mod }, + .{ .name = "pretty", .module = pretty.module("pretty") }, }, }), }); diff --git a/example/main.zig b/example/main.zig index f0f400f..d263c6e 100644 --- a/example/main.zig +++ b/example/main.zig @@ -42,6 +42,12 @@ pub fn main() !void { const tri_h = bottom_y - top_y; const max_half_w = @divTrunc(screen_width, 2) - margin_x; + var timer = try std.time.Timer.start(); + defer { + const ns = timer.lap(); + std.log.info("Took {d:.3}s to render", .{@as(f32, @floatFromInt(ns)) / std.time.ns_per_s}); + } + for (top_y..bottom_y) |y| { const t: f32 = @as(f32, @floatFromInt(y - top_y)) / @as(f32, @floatFromInt(tri_h)); const half_w: usize = @intFromFloat((t * @as(f32, @floatFromInt(max_half_w))) + 0.5); @@ -79,7 +85,7 @@ pub fn main() !void { try window.updateSurface(); - std.Thread.sleep(10_000_000_000); + std.Thread.sleep(2_000_000_000); } std.log.info("Successfully executed", .{}); } diff --git a/src/Module.zig b/src/Module.zig index e67d33e..2076b94 100644 --- a/src/Module.zig +++ b/src/Module.zig @@ -19,6 +19,10 @@ const Value = Result.Value; const Self = @This(); +pub const ModuleOptions = struct { + use_simd_vectors_specializations: bool = true, +}; + const SpvEntryPoint = struct { exec_model: spv.SpvExecutionModel, id: SpvWord, @@ -40,6 +44,8 @@ const ModuleError = error{ OutOfMemory, }; +options: ModuleOptions, + it: WordIterator, version_major: SpvByte, @@ -77,8 +83,9 @@ output_locations: std.ArrayList(SpvWord), bindings: std.AutoHashMap(SpvBinding, Value), push_constants: []Value, -pub fn init(allocator: std.mem.Allocator, source: []const SpvWord) ModuleError!Self { +pub fn init(allocator: std.mem.Allocator, source: []const SpvWord, options: ModuleOptions) ModuleError!Self { var self: Self = std.mem.zeroInit(Self, .{ + .options = options, .code = allocator.dupe(SpvWord, source) catch return ModuleError.OutOfMemory, .files = std.ArrayList(SpvSource).empty, .extensions = std.ArrayList([]const u8).empty, diff --git a/src/Result.zig b/src/Result.zig index 48d103f..4cbbce0 100644 --- a/src/Result.zig +++ b/src/Result.zig @@ -8,6 +8,18 @@ const SpvByte = spv.SpvByte; const SpvWord = spv.SpvWord; const SpvBool = spv.SpvBool; +pub const Vec4f32 = @Vector(4, f32); +pub const Vec3f32 = @Vector(3, f32); +pub const Vec2f32 = @Vector(2, f32); + +pub const Vec4i32 = @Vector(4, i32); +pub const Vec3i32 = @Vector(3, i32); +pub const Vec2i32 = @Vector(2, i32); + +pub const Vec4u32 = @Vector(4, u32); +pub const Vec3u32 = @Vector(3, u32); +pub const Vec2u32 = @Vector(2, u32); + pub const Variant = enum { String, Extension, @@ -26,6 +38,15 @@ pub const Type = enum { Int, Float, Vector, + Vector4f32, + Vector3f32, + Vector2f32, + Vector4i32, + Vector3i32, + Vector2i32, + Vector4u32, + Vector3u32, + Vector2u32, Matrix, Array, RuntimeArray, @@ -73,6 +94,15 @@ pub const Value = union(Type) { float64: f64, }, Vector: []Value, + Vector4f32: Vec4f32, + Vector3f32: Vec3f32, + Vector2f32: Vec2f32, + Vector4i32: Vec4i32, + Vector3i32: Vec3i32, + Vector2i32: Vec2i32, + Vector4u32: Vec4u32, + Vector3u32: Vec3u32, + Vector2u32: Vec2u32, Matrix: []Value, Array: []Value, RuntimeArray: struct {}, @@ -108,6 +138,15 @@ pub const Value = union(Type) { } break :blk self; }, + .Vector4f32 => .{ .Vector4f32 = Vec4f32{ 0.0, 0.0, 0.0, 0.0 } }, + .Vector3f32 => .{ .Vector3f32 = Vec3f32{ 0.0, 0.0, 0.0 } }, + .Vector2f32 => .{ .Vector2f32 = Vec2f32{ 0.0, 0.0 } }, + .Vector4i32 => .{ .Vector4i32 = Vec4i32{ 0, 0, 0, 0 } }, + .Vector3i32 => .{ .Vector3i32 = Vec3i32{ 0, 0, 0 } }, + .Vector2i32 => .{ .Vector2i32 = Vec2i32{ 0, 0 } }, + .Vector4u32 => .{ .Vector4u32 = Vec4u32{ 0, 0, 0, 0 } }, + .Vector3u32 => .{ .Vector3u32 = Vec3u32{ 0, 0, 0 } }, + .Vector2u32 => .{ .Vector2u32 = Vec2u32{ 0, 0 } }, .Matrix => |m| blk: { var self: Value = .{ .Matrix = allocator.alloc(Value, member_count) catch return RuntimeError.OutOfMemory }; errdefer self.deinit(allocator); @@ -205,6 +244,15 @@ pub const VariantData = union(Variant) { components_type: Type, member_count: SpvWord, }, + Vector4f32: struct {}, + Vector3f32: struct {}, + Vector2f32: struct {}, + Vector4i32: struct {}, + Vector3i32: struct {}, + Vector2i32: struct {}, + Vector4u32: struct {}, + Vector3u32: struct {}, + Vector2u32: struct {}, Matrix: struct { column_type_word: SpvWord, column_type: Type, @@ -253,7 +301,7 @@ pub const VariantData = union(Variant) { }, AccessChain: struct { target: SpvWord, - value: Value, + value: *Value, }, FunctionParameter: struct { type_word: SpvWord, @@ -335,7 +383,7 @@ pub fn getValue(self: *Self) RuntimeError!*Value { return switch ((try self.getVariant()).*) { .Variable => |*v| &v.value, .Constant => |*c| &c.value, - .AccessChain => |*a| &a.value, + .AccessChain => |a| a.value, .FunctionParameter => |*p| p.value_ptr orelse return RuntimeError.InvalidSpirV, else => RuntimeError.InvalidSpirV, }; @@ -438,6 +486,9 @@ pub fn getMemberCounts(self: *const Self) usize { .Type => |t| switch (t) { .Bool, .Int, .Float, .Image, .Sampler => return 1, .Vector => |v| return v.member_count, + .Vector4f32, .Vector4i32, .Vector4u32 => return 4, + .Vector3f32, .Vector3i32, .Vector3u32 => return 3, + .Vector2f32, .Vector2i32, .Vector2u32 => return 2, .Matrix => |m| return m.member_count, .Array => |a| return a.member_count, .SampledImage => return 2, @@ -466,6 +517,15 @@ pub fn initValue(allocator: std.mem.Allocator, member_count: usize, results: []c } break :blk value; }, + .Vector4f32 => .{ .Vector4f32 = Vec4f32{ 0.0, 0.0, 0.0, 0.0 } }, + .Vector3f32 => .{ .Vector3f32 = Vec3f32{ 0.0, 0.0, 0.0 } }, + .Vector2f32 => .{ .Vector2f32 = Vec2f32{ 0.0, 0.0 } }, + .Vector4i32 => .{ .Vector4i32 = Vec4i32{ 0, 0, 0, 0 } }, + .Vector3i32 => .{ .Vector3i32 = Vec3i32{ 0, 0, 0 } }, + .Vector2i32 => .{ .Vector2i32 = Vec2i32{ 0, 0 } }, + .Vector4u32 => .{ .Vector4u32 = Vec4u32{ 0, 0, 0, 0 } }, + .Vector3u32 => .{ .Vector3u32 = Vec3u32{ 0, 0, 0 } }, + .Vector2u32 => .{ .Vector2u32 = Vec2u32{ 0, 0 } }, .Matrix => |m| blk: { const value: Value = .{ .Matrix = allocator.alloc(Value, member_count) catch return RuntimeError.OutOfMemory }; errdefer allocator.free(value.Matrix); @@ -476,7 +536,7 @@ pub fn initValue(allocator: std.mem.Allocator, member_count: usize, results: []c }, .Array => |a| blk: { const value: Value = .{ .Array = allocator.alloc(Value, member_count) catch return RuntimeError.OutOfMemory }; - errdefer allocator.free(value.Vector); + errdefer allocator.free(value.Array); for (value.Array) |*val| { val.* = try Value.init(allocator, results, a.components_type_word); } diff --git a/src/Runtime.zig b/src/Runtime.zig index 9370e21..f0fce82 100644 --- a/src/Runtime.zig +++ b/src/Runtime.zig @@ -199,6 +199,42 @@ fn readValue(self: *const Self, comptime T: type, output: []T, value: *const Res inline else => return RuntimeError.InvalidValueType, } }, + .Vector4f32 => |vec| inline for (0..4) |i| switch (T) { + f32 => output[i] = vec[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector3f32 => |vec| inline for (0..3) |i| switch (T) { + f32 => output[i] = vec[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector2f32 => |vec| inline for (0..2) |i| switch (T) { + f32 => output[i] = vec[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector4i32 => |vec| inline for (0..4) |i| switch (T) { + i32 => output[i] = vec[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector3i32 => |vec| inline for (0..3) |i| switch (T) { + i32 => output[i] = vec[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector2i32 => |vec| inline for (0..2) |i| switch (T) { + i32 => output[i] = vec[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector4u32 => |vec| inline for (0..4) |i| switch (T) { + u32 => output[i] = vec[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector3u32 => |vec| inline for (0..3) |i| switch (T) { + u32 => output[i] = vec[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector2u32 => |vec| inline for (0..2) |i| switch (T) { + u32 => output[i] = vec[i], + inline else => return RuntimeError.InvalidValueType, + }, .Vector, .Matrix, .Array, .Structure => |values| for (values, 0..) |v, i| try self.readValue(T, output[i..], &v), else => return RuntimeError.InvalidValueType, } @@ -234,6 +270,42 @@ fn writeValue(self: *const Self, comptime T: type, input: []const T, value: *Res inline else => return RuntimeError.InvalidValueType, } }, + .Vector4f32 => |vec| inline for (0..4) |i| switch (T) { + f32 => vec[i] = input[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector3f32 => |vec| inline for (0..3) |i| switch (T) { + f32 => vec[i] = input[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector2f32 => |vec| inline for (0..2) |i| switch (T) { + f32 => vec[i] = input[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector4i32 => |vec| inline for (0..4) |i| switch (T) { + i32 => vec[i] = input[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector3i32 => |vec| inline for (0..3) |i| switch (T) { + i32 => vec[i] = input[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector2i32 => |vec| inline for (0..2) |i| switch (T) { + i32 => vec[i] = input[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector4u32 => |vec| inline for (0..4) |i| switch (T) { + u32 => vec[i] = input[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector3u32 => |vec| inline for (0..3) |i| switch (T) { + u32 => vec[i] = input[i], + inline else => return RuntimeError.InvalidValueType, + }, + .Vector2u32 => |vec| inline for (0..2) |i| switch (T) { + u32 => vec[i] = input[i], + inline else => return RuntimeError.InvalidValueType, + }, .Vector, .Matrix, .Array, .Structure => |*values| for (values.*, 0..) |*v, i| try self.writeValue(T, input[i..], v), else => return RuntimeError.InvalidValueType, } diff --git a/src/opcodes.zig b/src/opcodes.zig index 9c24334..a5d2b26 100644 --- a/src/opcodes.zig +++ b/src/opcodes.zig @@ -239,6 +239,16 @@ fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type { const size = sw: switch (target_type) { .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, + .Vector4f32, + .Vector3f32, + .Vector2f32, + .Vector4i32, + .Vector3i32, + .Vector2i32, + .Vector4u32, + .Vector3u32, + .Vector2u32, + => 32, .Int => |i| i.bit_length, else => return RuntimeError.InvalidSpirV, }; @@ -249,7 +259,7 @@ fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type { } inline fn bitInsert(comptime TT: type, base: TT, insert: TT, offset: u64, count: u64) TT { - const mask = bitMask(count) << @intCast(offset); + const mask: TT = @intCast(bitMask(count) << @intCast(offset)); return @as(TT, @intCast((base & ~mask) | ((insert << @intCast(offset)) & mask))); } @@ -314,6 +324,25 @@ fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type { .Int => try operator.process(rt, size, value, op1_value, op2_value), .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i| try operator.process(rt, size, val, &op1_v, if (op2_value) |op2_v| &op2_v.Vector[i] else null), + // No bit manipulation on VectorXf32 + .Vector4i32 => |*vec| inline for (0..4) |i| { + vec[i] = try operator.operation(i32, rt, op1_value.Vector4i32[i], if (op2_value) |op2_v| op2_v.Vector4i32[i] else null); + }, + .Vector3i32 => |*vec| inline for (0..3) |i| { + vec[i] = try operator.operation(i32, rt, op1_value.Vector3i32[i], if (op2_value) |op2_v| op2_v.Vector3i32[i] else null); + }, + .Vector2i32 => |*vec| inline for (0..2) |i| { + vec[i] = try operator.operation(i32, rt, op1_value.Vector2i32[i], if (op2_value) |op2_v| op2_v.Vector2i32[i] else null); + }, + .Vector4u32 => |*vec| inline for (0..4) |i| { + vec[i] = try operator.operation(u32, rt, op1_value.Vector4u32[i], if (op2_value) |op2_v| op2_v.Vector4u32[i] else null); + }, + .Vector3u32 => |*vec| inline for (0..3) |i| { + vec[i] = try operator.operation(u32, rt, op1_value.Vector3u32[i], if (op2_value) |op2_v| op2_v.Vector3u32[i] else null); + }, + .Vector2u32 => |*vec| inline for (0..2) |i| { + vec[i] = try operator.operation(u32, rt, op1_value.Vector2u32[i], if (op2_value) |op2_v| op2_v.Vector2u32[i] else null); + }, else => return RuntimeError.InvalidSpirV, } } @@ -337,6 +366,16 @@ fn CondEngine(comptime T: ValueType, comptime Op: CondOp) type { const size = sw: switch ((try rt.results[op1_type].getVariant()).Type) { .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, + .Vector4f32, + .Vector3f32, + .Vector2f32, + .Vector4i32, + .Vector3i32, + .Vector2i32, + .Vector4u32, + .Vector3u32, + .Vector2u32, + => 32, .Float => |f| if (T == .Float) f.bit_length else return RuntimeError.InvalidSpirV, .Int => |i| if (T == .SInt or T == .UInt) i.bit_length else return RuntimeError.InvalidSpirV, else => return RuntimeError.InvalidSpirV, @@ -374,6 +413,7 @@ fn CondEngine(comptime T: ValueType, comptime Op: CondOp) type { switch (value.*) { .Bool => try operator.process(size, value, op1_value, op2_value), .Vector => |vec| for (vec, op1_value.Vector, op2_value.Vector) |*val, op1_v, op2_v| try operator.process(size, val, &op1_v, &op2_v), + // No Vector specializations for booleans else => return RuntimeError.InvalidSpirV, } } @@ -391,6 +431,16 @@ fn ConversionEngine(comptime From: ValueType, comptime To: ValueType) type { const from_size = sw: switch ((try rt.results[op_type].getVariant()).Type) { .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, + .Vector4f32, + .Vector3f32, + .Vector2f32, + .Vector4i32, + .Vector3i32, + .Vector2i32, + .Vector4u32, + .Vector3u32, + .Vector2u32, + => 32, .Float => |f| if (From == .Float) f.bit_length else return RuntimeError.InvalidSpirV, .Int => |i| if (From == .SInt or From == .UInt) i.bit_length else return RuntimeError.InvalidSpirV, else => return RuntimeError.InvalidSpirV, @@ -398,6 +448,16 @@ fn ConversionEngine(comptime From: ValueType, comptime To: ValueType) type { const to_size = sw: switch (target_type) { .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, + .Vector4f32, + .Vector3f32, + .Vector2f32, + .Vector4i32, + .Vector3i32, + .Vector2i32, + .Vector4u32, + .Vector3u32, + .Vector2u32, + => 32, .Float => |f| if (To == .Float) f.bit_length else return RuntimeError.InvalidSpirV, .Int => |i| if (To == .SInt or To == .UInt) i.bit_length else return RuntimeError.InvalidSpirV, else => return RuntimeError.InvalidSpirV, @@ -428,12 +488,61 @@ fn ConversionEngine(comptime From: ValueType, comptime To: ValueType) type { else => return RuntimeError.InvalidSpirV, } } + + fn processVecSpe(comptime T: type, from_bit_count: SpvWord, from: *Result.Value, index: usize) RuntimeError!T { + return switch (from.*) { + .Vector3f32 => |vec| std.math.lossyCast(T, vec[index]), + .Vector2f32 => |vec| std.math.lossyCast(T, vec[index]), + .Vector4i32 => |vec| std.math.lossyCast(T, vec[index]), + .Vector3i32 => |vec| std.math.lossyCast(T, vec[index]), + .Vector2i32 => |vec| std.math.lossyCast(T, vec[index]), + .Vector4u32 => |vec| std.math.lossyCast(T, vec[index]), + .Vector3u32 => |vec| std.math.lossyCast(T, vec[index]), + .Vector2u32 => |vec| std.math.lossyCast(T, vec[index]), + inline else => switch (from_bit_count) { + inline 8, 16, 32, 64 => |i| std.math.lossyCast(T, blk: { + if (i == 8 and From == .Float) { + return RuntimeError.InvalidSpirV; + } + break :blk (try getValuePrimitiveField(From, i, from)).*; + }), + else => return RuntimeError.InvalidSpirV, + }, + }; + } }; switch (value.*) { .Float => if (To == .Float) try operator.process(from_size, to_size, value, op_value) else return RuntimeError.InvalidSpirV, .Int => if (To == .SInt or To == .UInt) try operator.process(from_size, to_size, value, op_value) else return RuntimeError.InvalidSpirV, .Vector => |vec| for (vec, op_value.Vector) |*val, *op_v| try operator.process(from_size, to_size, val, op_v), + .Vector4f32 => |*vec| inline for (0..4) |i| { + vec[i] = try operator.processVecSpe(f32, from_size, op_value, i); + }, + .Vector3f32 => |*vec| inline for (0..3) |i| { + vec[i] = try operator.processVecSpe(f32, from_size, op_value, i); + }, + .Vector2f32 => |*vec| inline for (0..2) |i| { + vec[i] = try operator.processVecSpe(f32, from_size, op_value, i); + }, + .Vector4i32 => |*vec| inline for (0..4) |i| { + vec[i] = try operator.processVecSpe(i32, from_size, op_value, i); + }, + .Vector3i32 => |*vec| inline for (0..3) |i| { + vec[i] = try operator.processVecSpe(i32, from_size, op_value, i); + }, + .Vector2i32 => |*vec| inline for (0..2) |i| { + vec[i] = try operator.processVecSpe(i32, from_size, op_value, i); + }, + .Vector4u32 => |*vec| inline for (0..4) |i| { + vec[i] = try operator.processVecSpe(u32, from_size, op_value, i); + }, + .Vector3u32 => |*vec| inline for (0..3) |i| { + vec[i] = try operator.processVecSpe(u32, from_size, op_value, i); + }, + .Vector2u32 => |*vec| inline for (0..2) |i| { + vec[i] = try operator.processVecSpe(u32, from_size, op_value, i); + }, else => return RuntimeError.InvalidSpirV, } } @@ -450,6 +559,16 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type { const size = sw: switch (target_type) { .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, + .Vector4f32, + .Vector3f32, + .Vector2f32, + .Vector4i32, + .Vector3i32, + .Vector2i32, + .Vector4u32, + .Vector3u32, + .Vector2u32, + => 32, .Float => |f| if (T == .Float) f.bit_length else return RuntimeError.InvalidSpirV, .Int => |i| if (T == .SInt or T == .UInt) i.bit_length else return RuntimeError.InvalidSpirV, else => return RuntimeError.InvalidSpirV, @@ -491,6 +610,33 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type { .Float => if (T == .Float) try operator.process(size, value, op1_value, op2_value) else return RuntimeError.InvalidSpirV, .Int => if (T == .SInt or T == .UInt) try operator.process(size, value, op1_value, op2_value) else return RuntimeError.InvalidSpirV, .Vector => |vec| for (vec, op1_value.Vector, op2_value.Vector) |*val, op1_v, op2_v| try operator.process(size, val, &op1_v, &op2_v), + .Vector4f32 => |*vec| inline for (0..4) |i| { + vec[i] = try operator.operation(f32, op1_value.Vector4f32[i], op2_value.Vector4f32[i]); + }, + .Vector3f32 => |*vec| inline for (0..3) |i| { + vec[i] = try operator.operation(f32, op1_value.Vector3f32[i], op2_value.Vector3f32[i]); + }, + .Vector2f32 => |*vec| inline for (0..2) |i| { + vec[i] = try operator.operation(f32, op1_value.Vector2f32[i], op2_value.Vector2f32[i]); + }, + .Vector4i32 => |*vec| inline for (0..4) |i| { + vec[i] = try operator.operation(i32, op1_value.Vector4i32[i], op2_value.Vector4i32[i]); + }, + .Vector3i32 => |*vec| inline for (0..3) |i| { + vec[i] = try operator.operation(i32, op1_value.Vector3i32[i], op2_value.Vector3i32[i]); + }, + .Vector2i32 => |*vec| inline for (0..2) |i| { + vec[i] = try operator.operation(i32, op1_value.Vector2i32[i], op2_value.Vector2i32[i]); + }, + .Vector4u32 => |*vec| inline for (0..4) |i| { + vec[i] = try operator.operation(u32, op1_value.Vector4u32[i], op2_value.Vector4u32[i]); + }, + .Vector3u32 => |*vec| inline for (0..3) |i| { + vec[i] = try operator.operation(u32, op1_value.Vector3u32[i], op2_value.Vector3u32[i]); + }, + .Vector2u32 => |*vec| inline for (0..2) |i| { + vec[i] = try operator.operation(u32, op1_value.Vector2u32[i], op2_value.Vector2u32[i]); + }, else => return RuntimeError.InvalidSpirV, } } @@ -570,6 +716,7 @@ fn opBitcast(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { switch (to_value.*) { .Int, .Float => try caster.cast(to_value, from_value), .Vector => |vec| for (vec, from_value.Vector) |*t, *f| try caster.cast(t, f), + // TODO: vectors specializations else => return RuntimeError.InvalidSpirV, } } @@ -622,31 +769,44 @@ fn opAccessChain(_: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) Runtim var value_ptr = try base.getValue(); const index_count = word_count - 3; - for (0..index_count) |_| { - const member = &rt.results[try rt.it.next()]; - const member_value = switch ((try member.getVariant()).*) { - .Constant => |c| &c.value, - .Variable => |v| &v.value, - else => return RuntimeError.InvalidSpirV, - }; - switch (member_value.*) { - .Int => |i| { - switch (value_ptr.*) { - .Vector, .Matrix, .Array, .Structure => |v| { - if (i.uint32 > v.len) return RuntimeError.InvalidSpirV; - value_ptr = &v[i.uint32]; - }, - else => return RuntimeError.InvalidSpirV, - } - }, - else => return RuntimeError.InvalidSpirV, - } - } - rt.results[id].variant = .{ .AccessChain = .{ .target = var_type, - .value = value_ptr.*, + .value = blk: { + for (0..index_count) |_| { + const member = &rt.results[try rt.it.next()]; + const member_value = switch ((try member.getVariant()).*) { + .Constant => |c| &c.value, + .Variable => |v| &v.value, + else => return RuntimeError.InvalidSpirV, + }; + switch (member_value.*) { + .Int => |i| { + switch (value_ptr.*) { + .Vector, .Matrix, .Array, .Structure => |v| { + if (i.uint32 > v.len) return RuntimeError.InvalidSpirV; + value_ptr = &v[i.uint32]; + }, + //.Vector4f32 => |v| { + // if (i.uint32 > 4) return RuntimeError.InvalidSpirV; + // break :blk .{ + // .Float = .{ .float32 = v[i.uint32] }, + // }; + //}, + //.Vector2f32 => |v| { + // if (i.uint32 > 2) return RuntimeError.InvalidSpirV; + // break :blk .{ + // .Float = .{ .float32 = v[i.uint32] }, + // }; + //}, + else => return RuntimeError.InvalidSpirV, + } + }, + else => return RuntimeError.InvalidSpirV, + } + } + break :blk value_ptr; + }, }, }; } @@ -685,10 +845,44 @@ fn opCompositeConstruct(_: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) const id = try rt.it.next(); const index_count = word_count - 2; - const target = (try rt.results[id].getVariant()).Constant.value.getCompositeDataOrNull() orelse return RuntimeError.InvalidSpirV; - for (target[0..index_count]) |*elem| { - const value = (try rt.results[try rt.it.next()].getVariant()).Constant.value; - elem.* = value; + const value = &(try rt.results[id].getVariant()).Constant.value; + if (value.getCompositeDataOrNull()) |target| { + for (target[0..index_count]) |*elem| { + const elem_value = (try rt.results[try rt.it.next()].getVariant()).Constant.value; + elem.* = elem_value; + } + return; + } + + switch (value.*) { + .Vector4f32 => |*vec| inline for (0..4) |i| { + vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Float.float32; + }, + .Vector3f32 => |*vec| inline for (0..3) |i| { + vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Float.float32; + }, + .Vector2f32 => |*vec| inline for (0..2) |i| { + vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Float.float32; + }, + .Vector4i32 => |*vec| inline for (0..4) |i| { + vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Int.sint32; + }, + .Vector3i32 => |*vec| inline for (0..3) |i| { + vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Int.sint32; + }, + .Vector2i32 => |*vec| inline for (0..2) |i| { + vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Int.sint32; + }, + .Vector4u32 => |*vec| inline for (0..4) |i| { + vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Int.uint32; + }, + .Vector3u32 => |*vec| inline for (0..3) |i| { + vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Int.uint32; + }, + .Vector2u32 => |*vec| inline for (0..2) |i| { + vec[i] = (try rt.results[try rt.it.next()].getVariant()).Constant.value.Int.uint32; + }, + else => return RuntimeError.InvalidSpirV, } } @@ -696,13 +890,8 @@ fn opCompositeExtract(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Ru const res_type = try rt.it.next(); const id = try rt.it.next(); const composite_id = try rt.it.next(); - const index_count = word_count - 3; - var composite = (try rt.results[composite_id].getVariant()).Constant.value; - for (0..index_count) |_| { - const member_id = try rt.it.next(); - composite = (composite.getCompositeDataOrNull() orelse return RuntimeError.InvalidSpirV)[member_id]; - } + rt.results[id].variant = .{ .Constant = .{ .type_word = res_type, @@ -710,7 +899,29 @@ fn opCompositeExtract(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Ru .Type => |t| @as(Result.Type, t), else => return RuntimeError.InvalidSpirV, }, - .value = try composite.dupe(allocator), + .value = blk: { + var composite = (try rt.results[composite_id].getVariant()).Constant.value; + for (0..index_count) |_| { + const member_id = try rt.it.next(); + if (composite.getCompositeDataOrNull()) |v| { + composite = v[member_id]; + continue; + } + switch (composite) { + .Vector4f32 => |v| break :blk .{ .Float = .{ .float32 = v[member_id] } }, + .Vector3f32 => |v| break :blk .{ .Float = .{ .float32 = v[member_id] } }, + .Vector2f32 => |v| break :blk .{ .Float = .{ .float32 = v[member_id] } }, + .Vector4i32 => |v| break :blk .{ .Int = .{ .sint32 = v[member_id] } }, + .Vector3i32 => |v| break :blk .{ .Int = .{ .sint32 = v[member_id] } }, + .Vector2i32 => |v| break :blk .{ .Int = .{ .sint32 = v[member_id] } }, + .Vector4u32 => |v| break :blk .{ .Int = .{ .uint32 = v[member_id] } }, + .Vector3u32 => |v| break :blk .{ .Int = .{ .uint32 = v[member_id] } }, + .Vector2u32 => |v| break :blk .{ .Int = .{ .uint32 = v[member_id] } }, + else => return RuntimeError.InvalidSpirV, + } + } + break :blk try composite.dupe(allocator); + }, }, }; } @@ -1139,18 +1350,49 @@ fn opTypeStruct(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) fn opTypeVector(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { const id = try rt.it.next(); const components_type_word = try rt.it.next(); - rt.mod.results[id].variant = .{ - .Type = .{ + var components_type_size: usize = 0; + const components_type_concrete = try rt.mod.results[components_type_word].getVariant(); + const components_type = switch (components_type_concrete.*) { + .Type => |t| blk: { + switch (t) { + .Int => |i| components_type_size = i.bit_length, + .Float => |f| components_type_size = f.bit_length, + else => {}, + } + break :blk @as(Result.Type, t); + }, + else => return RuntimeError.InvalidSpirV, + }; + const member_count = try rt.it.next(); + rt.mod.results[id].variant = .{ .Type = blk: { + if (components_type_size == 32 and rt.mod.options.use_simd_vectors_specializations) { + switch (components_type) { + .Float => switch (member_count) { + 2 => break :blk .{ .Vector2f32 = .{} }, + 3 => break :blk .{ .Vector3f32 = .{} }, + 4 => break :blk .{ .Vector4f32 = .{} }, + else => {}, + }, + .Int => { + const is_signed = components_type_concrete.Type.Int.is_signed; + switch (member_count) { + 2 => break :blk if (is_signed) .{ .Vector2i32 = .{} } else .{ .Vector2u32 = .{} }, + 3 => break :blk if (is_signed) .{ .Vector3i32 = .{} } else .{ .Vector3u32 = .{} }, + 4 => break :blk if (is_signed) .{ .Vector4i32 = .{} } else .{ .Vector4u32 = .{} }, + else => {}, + } + }, + else => {}, + } + } + break :blk .{ .Vector = .{ .components_type_word = components_type_word, - .components_type = switch ((try rt.mod.results[components_type_word].getVariant()).*) { - .Type => |t| @as(Result.Type, t), - else => return RuntimeError.InvalidSpirV, - }, - .member_count = try rt.it.next(), + .components_type = components_type, + .member_count = member_count, }, - }, - }; + }; + } }; } fn opTypeVoid(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { diff --git a/test/root.zig b/test/root.zig index 0f62a4a..6eed822 100644 --- a/test/root.zig +++ b/test/root.zig @@ -23,17 +23,28 @@ pub const case = struct { pub fn expectOutput(comptime T: type, comptime len: usize, source: []const u32, output_name: []const u8, expected: []const T) !void { const allocator = std.testing.allocator; - var module = try spv.Module.init(allocator, source); - defer module.deinit(allocator); + const module_options = [_]spv.Module.ModuleOptions{ + .{ + .use_simd_vectors_specializations = true, + }, + .{ + .use_simd_vectors_specializations = false, + }, + }; - var rt = try spv.Runtime.init(allocator, &module); - defer rt.deinit(allocator); + for (module_options) |opt| { + var module = try spv.Module.init(allocator, source, opt); + defer module.deinit(allocator); - try rt.callEntryPoint(allocator, try rt.getEntryPointByName("main")); - var output: [len]T = undefined; - try rt.readOutput(T, output[0..len], try rt.getResultByName(output_name)); + var rt = try spv.Runtime.init(allocator, &module); + defer rt.deinit(allocator); - try std.testing.expectEqualSlices(T, expected, &output); + try rt.callEntryPoint(allocator, try rt.getEntryPointByName("main")); + var output: [len]T = undefined; + try rt.readOutput(T, output[0..len], try rt.getResultByName(output_name)); + + try std.testing.expectEqualSlices(T, expected, &output); + } } pub fn random(comptime T: type) T {