diff --git a/example/main.zig b/example/main.zig index 0cc17dd..b30b8bb 100644 --- a/example/main.zig +++ b/example/main.zig @@ -4,8 +4,8 @@ const spv = @import("spv"); const shader_source = @embedFile("shader.spv"); -const screen_width = 1250; -const screen_height = 720; +const screen_width = 200; +const screen_height = 200; pub fn main() !void { { @@ -36,10 +36,16 @@ pub fn main() !void { } for (0..screen_height) |_| { + var rt = try spv.Runtime.init(allocator, &module); (try runner_cache.addOne(allocator)).* = .{ .allocator = allocator, .surface = surface, - .rt = try spv.Runtime.init(allocator, &module), + .rt = rt, + .entry = try rt.getEntryPointByName("main"), + .color = try rt.getResultByName("color"), + .time = try rt.getResultByName("time"), + .pos = try rt.getResultByName("pos"), + .res = try rt.getResultByName("res"), }; } @@ -48,9 +54,11 @@ pub fn main() !void { .allocator = allocator, }); + var timer = try std.time.Timer.start(); + var quit = false; while (!quit) { - try surface.clear(.{ .r = 0.0, .g = 0.0, .b = 0.0, .a = 0.0 }); + try surface.clear(.{ .r = 0.0, .g = 0.0, .b = 0.0, .a = 1.0 }); while (sdl3.events.poll()) |event| switch (event) { @@ -65,17 +73,19 @@ pub fn main() !void { const pixel_map: [*]u32 = @as([*]u32, @ptrCast(@alignCast((surface.getPixels() orelse return).ptr))); - var timer = try std.time.Timer.start(); + var frame_timer = try std.time.Timer.start(); defer { - const ns = timer.lap(); + const ns = frame_timer.lap(); const ms = @as(f32, @floatFromInt(ns)) / std.time.ns_per_s; std.log.info("Took {d:.3}s - {d:.3}fps to render", .{ ms, 1.0 / ms }); } + const delta: f32 = @as(f32, @floatFromInt(timer.read())) / std.time.ns_per_s; + var wait_group: std.Thread.WaitGroup = .{}; for (0..screen_height) |y| { const runner = &runner_cache.items[y]; - thread_pool.spawnWg(&wait_group, Runner.run, .{ runner, y, pixel_map }); + thread_pool.spawnWg(&wait_group, Runner.runWrapper, .{ runner, y, pixel_map, delta }); } thread_pool.waitAndWork(&wait_group); } @@ -92,23 +102,33 @@ const Runner = struct { allocator: std.mem.Allocator, surface: sdl3.surface.Surface, rt: spv.Runtime, + entry: spv.SpvWord, + color: spv.SpvWord, + time: spv.SpvWord, + pos: spv.SpvWord, + res: spv.SpvWord, - fn run(self: *Self, y: usize, pixel_map: [*]u32) void { + fn runWrapper(self: *Self, y: usize, pixel_map: [*]u32, timer: f32) void { + @call(.always_inline, Self.run, .{ self, y, pixel_map, timer }) catch |err| { + std.log.err("{s}", .{@errorName(err)}); + if (@errorReturnTrace()) |trace| { + std.debug.dumpStackTrace(trace.*); + } + std.process.abort(); + }; + } + + fn run(self: *Self, y: usize, pixel_map: [*]u32, timer: f32) !void { var rt = self.rt; // Copy to avoid pointer access of `self` at runtime. Okay as Runtime contains only pointers and trivially copyable fields - const entry = rt.getEntryPointByName("main") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - const color = rt.getResultByName("color") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - const time = rt.getResultByName("time") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - const pos = rt.getResultByName("pos") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - const res = rt.getResultByName("res") catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); var output: [4]f32 = undefined; for (0..screen_width) |x| { - rt.writeInput(f32, &.{@as(f32, @floatFromInt(std.time.milliTimestamp()))}, time) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - rt.writeInput(f32, &.{ @floatFromInt(screen_width), @floatFromInt(screen_height) }, res) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - rt.writeInput(f32, &.{ @floatFromInt(x), @floatFromInt(y) }, pos) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - rt.callEntryPoint(self.allocator, entry) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); - rt.readOutput(f32, output[0..], color) catch |err| std.debug.panic("Catch error {s}", .{@errorName(err)}); + try rt.writeInput(f32, &.{timer}, self.time); + try rt.writeInput(f32, &.{ @floatFromInt(screen_width), @floatFromInt(screen_height) }, self.res); + try rt.writeInput(f32, &.{ @floatFromInt(x), @floatFromInt(y) }, self.pos); + try rt.callEntryPoint(self.allocator, self.entry); + try rt.readOutput(f32, output[0..], self.color); const rgba = self.surface.mapRgba( @truncate(@as(u32, @intFromFloat(output[0] * 255.0))), diff --git a/example/shader.nzsl b/example/shader.nzsl index 064ce5e..ba00b3f 100644 --- a/example/shader.nzsl +++ b/example/shader.nzsl @@ -3,25 +3,66 @@ module; struct FragIn { - [location(0)] time: f32, - [location(1)] res: vec2[f32], - [location(2)] pos: vec2[f32], + [location(0)] time: f32, + [location(1)] res: vec2[f32], + [location(2)] pos: vec2[f32], } struct FragOut { - [location(0)] color: vec4[f32] + [location(0)] color: vec4[f32] } [entry(frag)] fn main(input: FragIn) -> FragOut { - let output: FragOut; - output.color = vec4[f32]( - input.pos.x / input.res.x, - input.pos.y / input.res.y, - 1.0, - 1.0 - ); - return output; + const I: i32 = 32; + const A: f32 = 7.5; + const MA: f32 = 20.0; + const MI: f32 = 0.001; + + let uv0 = input.pos / input.res * 2.0 - vec2[f32](1.0, 1.0); + let uv = vec2[f32](uv0.x * (input.res.x / input.res.y), uv0.y); + + let col = vec3[f32](0.0, 0.0, 0.0); + let ro = vec3[f32](0.0, 0.0, -2.0); + let rd = vec3[f32](uv.x, uv.y, 1.0); + let dt = 0.0; + let ds = 0.0; + let dm = -1.0; + let p = ro; + let c = vec3[f32](0.0, 0.0, 0.0); + + let l = vec3[f32](0.0, sin(input.time * 0.2) * 4.0, cos(input.time * 0.2) * 4.0); + + for i in 0 -> I + { + p = ro + rd * dt; + ds = length(c - p) - 1.0; + dt += ds; + + if (dm == -1.0 || ds < dm) + dm = ds; + + if (ds <= MI) + { + let value = max(dot(normalize(c - p), normalize(p - l)) - 0.35, 0.0); + col = vec3[f32](value, value, value); + break; + } + + if (ds >= MA) + { + if (dot(normalize(rd), normalize(l - ro)) <= 1.0) + { + let value = max(dot(normalize(rd), normalize(l - ro)) + 0.15, 0.05)/ 1.15 * (1.0 - dm * A); + col = vec3[f32](value, value, value); + } + break; + } + } + + let output: FragOut; + output.color = vec4[f32](col.x, col.y, col.z, 1.0); + return output; } diff --git a/example/shader.spv b/example/shader.spv index 599620a..984e29b 100644 Binary files a/example/shader.spv and b/example/shader.spv differ diff --git a/example/shader.spv.txt b/example/shader.spv.txt index fe4fbe6..10493e2 100644 --- a/example/shader.spv.txt +++ b/example/shader.spv.txt @@ -1,85 +1,281 @@ Version 1.0 Generator: 2560130 -Bound: 50 +Bound: 203 Schema: 0 - OpCapability Capability(Shader) - OpMemoryModel AddressingModel(Logical) MemoryModel(GLSL450) - OpEntryPoint ExecutionModel(Fragment) %24 "main" %5 %11 %14 %20 - OpExecutionMode %24 ExecutionMode(OriginUpperLeft) - OpSource SourceLanguage(NZSL) 4198400 - OpSourceExtension "Version: 1.1" - OpName %16 "FragIn" - OpMemberName %16 0 "time" - OpMemberName %16 1 "res" - OpMemberName %16 2 "pos" - OpName %21 "FragOut" - OpMemberName %21 0 "color" - OpName %5 "time" - OpName %11 "res" - OpName %14 "pos" - OpName %20 "color" - OpName %24 "main" - OpDecorate %5 Decoration(Location) 0 - OpDecorate %11 Decoration(Location) 1 - OpDecorate %14 Decoration(Location) 2 - OpDecorate %20 Decoration(Location) 0 - OpMemberDecorate %16 0 Decoration(Offset) 0 - OpMemberDecorate %16 1 Decoration(Offset) 8 - OpMemberDecorate %16 2 Decoration(Offset) 16 - OpMemberDecorate %21 0 Decoration(Offset) 0 - %1 = OpTypeVoid - %2 = OpTypeFunction %1 - %3 = OpTypeFloat 32 - %4 = OpTypePointer StorageClass(Input) %3 - %6 = OpTypeInt 32 1 - %7 = OpConstant %6 i32(0) - %8 = OpTypePointer StorageClass(Function) %3 - %9 = OpTypeVector %3 2 -%10 = OpTypePointer StorageClass(Input) %9 -%12 = OpConstant %6 i32(1) -%13 = OpTypePointer StorageClass(Function) %9 -%15 = OpConstant %6 i32(2) -%16 = OpTypeStruct %3 %9 %9 -%17 = OpTypePointer StorageClass(Function) %16 -%18 = OpTypeVector %3 4 -%19 = OpTypePointer StorageClass(Output) %18 -%21 = OpTypeStruct %18 -%22 = OpTypePointer StorageClass(Function) %21 -%23 = OpConstant %3 f32(1) -%47 = OpTypePointer StorageClass(Function) %18 - %5 = OpVariable %4 StorageClass(Input) -%11 = OpVariable %10 StorageClass(Input) -%14 = OpVariable %10 StorageClass(Input) -%20 = OpVariable %19 StorageClass(Output) -%24 = OpFunction %1 FunctionControl(0) %2 -%25 = OpLabel -%26 = OpVariable %22 StorageClass(Function) -%27 = OpVariable %17 StorageClass(Function) -%28 = OpAccessChain %8 %27 %7 - OpCopyMemory %28 %5 -%29 = OpAccessChain %13 %27 %12 - OpCopyMemory %29 %11 -%30 = OpAccessChain %13 %27 %15 - OpCopyMemory %30 %14 -%31 = OpAccessChain %13 %27 %15 -%32 = OpLoad %9 %31 -%33 = OpCompositeExtract %3 %32 0 -%34 = OpAccessChain %13 %27 %12 -%35 = OpLoad %9 %34 -%36 = OpCompositeExtract %3 %35 0 -%37 = OpFDiv %3 %33 %36 -%38 = OpAccessChain %13 %27 %15 -%39 = OpLoad %9 %38 -%40 = OpCompositeExtract %3 %39 1 -%41 = OpAccessChain %13 %27 %12 -%42 = OpLoad %9 %41 -%43 = OpCompositeExtract %3 %42 1 -%44 = OpFDiv %3 %40 %43 -%45 = OpCompositeConstruct %18 %37 %44 %23 %23 -%46 = OpAccessChain %47 %26 %7 - OpStore %46 %45 -%48 = OpLoad %21 %26 -%49 = OpCompositeExtract %18 %48 0 - OpStore %20 %49 - OpReturn - OpFunctionEnd + OpCapability Capability(Shader) + %42 = OpExtInstImport "GLSL.std.450" + OpMemoryModel AddressingModel(Logical) MemoryModel(GLSL450) + OpEntryPoint ExecutionModel(Fragment) %43 "main" %5 %11 %14 %20 + OpExecutionMode %43 ExecutionMode(OriginUpperLeft) + OpSource SourceLanguage(NZSL) 4198400 + OpSourceExtension "Version: 1.1" + OpName %16 "FragIn" + OpMemberName %16 0 "time" + OpMemberName %16 1 "res" + OpMemberName %16 2 "pos" + OpName %21 "FragOut" + OpMemberName %21 0 "color" + OpName %5 "time" + OpName %11 "res" + OpName %14 "pos" + OpName %20 "color" + OpName %43 "main" + OpDecorate %5 Decoration(Location) 0 + OpDecorate %11 Decoration(Location) 1 + OpDecorate %14 Decoration(Location) 2 + OpDecorate %20 Decoration(Location) 0 + OpMemberDecorate %16 0 Decoration(Offset) 0 + OpMemberDecorate %16 1 Decoration(Offset) 8 + OpMemberDecorate %16 2 Decoration(Offset) 16 + OpMemberDecorate %21 0 Decoration(Offset) 0 + %1 = OpTypeVoid + %2 = OpTypeFunction %1 + %3 = OpTypeFloat 32 + %4 = OpTypePointer StorageClass(Input) %3 + %6 = OpTypeInt 32 1 + %7 = OpConstant %6 i32(0) + %8 = OpTypePointer StorageClass(Function) %3 + %9 = OpTypeVector %3 2 + %10 = OpTypePointer StorageClass(Input) %9 + %12 = OpConstant %6 i32(1) + %13 = OpTypePointer StorageClass(Function) %9 + %15 = OpConstant %6 i32(2) + %16 = OpTypeStruct %3 %9 %9 + %17 = OpTypePointer StorageClass(Function) %16 + %18 = OpTypeVector %3 4 + %19 = OpTypePointer StorageClass(Output) %18 + %21 = OpTypeStruct %18 + %22 = OpConstant %3 f32(2) + %23 = OpConstant %3 f32(1) + %24 = OpConstant %3 f32(0) + %25 = OpTypeVector %3 3 + %26 = OpTypePointer StorageClass(Function) %25 + %27 = OpConstant %3 f32(-2) + %28 = OpConstant %3 f32(-1) + %29 = OpConstant %3 f32(0.2) + %30 = OpConstant %3 f32(4) + %31 = OpTypePointer StorageClass(Function) %6 + %32 = OpConstant %6 i32(32) + %33 = OpTypeBool + %34 = OpConstant %3 f32(0.001) + %35 = OpConstant %3 f32(0.35) + %36 = OpConstant %3 f32(20) + %37 = OpConstant %3 f32(0.15) + %38 = OpConstant %3 f32(0.05) + %39 = OpConstant %3 f32(1.15) + %40 = OpConstant %3 f32(7.5) + %41 = OpTypePointer StorageClass(Function) %21 +%200 = OpTypePointer StorageClass(Function) %18 + %5 = OpVariable %4 StorageClass(Input) + %11 = OpVariable %10 StorageClass(Input) + %14 = OpVariable %10 StorageClass(Input) + %20 = OpVariable %19 StorageClass(Output) + %43 = OpFunction %1 FunctionControl(0) %2 + %44 = OpLabel + %45 = OpVariable %13 StorageClass(Function) + %46 = OpVariable %13 StorageClass(Function) + %47 = OpVariable %26 StorageClass(Function) + %48 = OpVariable %26 StorageClass(Function) + %49 = OpVariable %26 StorageClass(Function) + %50 = OpVariable %8 StorageClass(Function) + %51 = OpVariable %8 StorageClass(Function) + %52 = OpVariable %8 StorageClass(Function) + %53 = OpVariable %26 StorageClass(Function) + %54 = OpVariable %26 StorageClass(Function) + %55 = OpVariable %26 StorageClass(Function) + %56 = OpVariable %31 StorageClass(Function) + %57 = OpVariable %31 StorageClass(Function) + %58 = OpVariable %8 StorageClass(Function) + %59 = OpVariable %8 StorageClass(Function) + %60 = OpVariable %41 StorageClass(Function) + %61 = OpVariable %17 StorageClass(Function) + %62 = OpAccessChain %8 %61 %7 + OpCopyMemory %62 %5 + %63 = OpAccessChain %13 %61 %12 + OpCopyMemory %63 %11 + %64 = OpAccessChain %13 %61 %15 + OpCopyMemory %64 %14 + %65 = OpAccessChain %13 %61 %15 + %66 = OpLoad %9 %65 + %67 = OpAccessChain %13 %61 %12 + %68 = OpLoad %9 %67 + %69 = OpFDiv %9 %66 %68 + %70 = OpVectorTimesScalar %9 %69 %22 + %71 = OpCompositeConstruct %9 %23 %23 + %72 = OpFSub %9 %70 %71 + OpStore %45 %72 + %73 = OpLoad %9 %45 + %74 = OpCompositeExtract %3 %73 0 + %75 = OpAccessChain %13 %61 %12 + %76 = OpLoad %9 %75 + %77 = OpCompositeExtract %3 %76 0 + %78 = OpAccessChain %13 %61 %12 + %79 = OpLoad %9 %78 + %80 = OpCompositeExtract %3 %79 1 + %81 = OpFDiv %3 %77 %80 + %82 = OpFMul %3 %74 %81 + %83 = OpLoad %9 %45 + %84 = OpCompositeExtract %3 %83 1 + %85 = OpCompositeConstruct %9 %82 %84 + OpStore %46 %85 + %86 = OpCompositeConstruct %25 %24 %24 %24 + OpStore %47 %86 + %87 = OpCompositeConstruct %25 %24 %24 %27 + OpStore %48 %87 + %88 = OpLoad %9 %46 + %89 = OpCompositeExtract %3 %88 0 + %90 = OpLoad %9 %46 + %91 = OpCompositeExtract %3 %90 1 + %92 = OpCompositeConstruct %25 %89 %91 %23 + OpStore %49 %92 + OpStore %50 %24 + OpStore %51 %24 + OpStore %52 %28 + %93 = OpLoad %25 %48 + OpStore %53 %93 + %94 = OpCompositeConstruct %25 %24 %24 %24 + OpStore %54 %94 + %95 = OpAccessChain %8 %61 %7 + %96 = OpLoad %3 %95 + %97 = OpFMul %3 %96 %29 + %98 = OpExtInst %3 GLSLstd450 Sin %97 + %99 = OpFMul %3 %98 %30 +%100 = OpAccessChain %8 %61 %7 +%101 = OpLoad %3 %100 +%102 = OpFMul %3 %101 %29 +%103 = OpExtInst %3 GLSLstd450 Cos %102 +%104 = OpFMul %3 %103 %30 +%105 = OpCompositeConstruct %25 %24 %99 %104 + OpStore %55 %105 + OpStore %56 %7 + OpStore %57 %32 + OpBranch %106 +%106 = OpLabel +%110 = OpLoad %6 %56 +%111 = OpLoad %6 %57 +%112 = OpSLessThan %33 %110 %111 + OpLoopMerge %108 %109 LoopControl(0) + OpBranchConditional %112 %107 %108 +%107 = OpLabel +%113 = OpLoad %25 %48 +%114 = OpLoad %25 %49 +%115 = OpLoad %3 %50 +%116 = OpVectorTimesScalar %25 %114 %115 +%117 = OpFAdd %25 %113 %116 + OpStore %53 %117 +%118 = OpLoad %25 %54 +%119 = OpLoad %25 %53 +%120 = OpFSub %25 %118 %119 +%121 = OpExtInst %3 GLSLstd450 Length %120 +%122 = OpFSub %3 %121 %23 + OpStore %51 %122 +%123 = OpLoad %3 %50 +%124 = OpLoad %3 %51 +%125 = OpFAdd %3 %123 %124 + OpStore %50 %125 +%129 = OpLoad %3 %52 +%130 = OpFOrdEqual %33 %129 %28 +%131 = OpLoad %3 %51 +%132 = OpLoad %3 %52 +%133 = OpFOrdLessThan %33 %131 %132 +%134 = OpLogicalOr %33 %130 %133 + OpSelectionMerge %126 SelectionControl(0) + OpBranchConditional %134 %127 %128 +%127 = OpLabel +%135 = OpLoad %3 %51 + OpStore %52 %135 + OpBranch %126 +%128 = OpLabel + OpBranch %126 +%126 = OpLabel +%139 = OpLoad %3 %51 +%140 = OpFOrdLessThanEqual %33 %139 %34 + OpSelectionMerge %136 SelectionControl(0) + OpBranchConditional %140 %137 %138 +%137 = OpLabel +%141 = OpLoad %25 %54 +%142 = OpLoad %25 %53 +%143 = OpFSub %25 %141 %142 +%144 = OpExtInst %25 GLSLstd450 Normalize %143 +%145 = OpLoad %25 %53 +%146 = OpLoad %25 %55 +%147 = OpFSub %25 %145 %146 +%148 = OpExtInst %25 GLSLstd450 Normalize %147 +%149 = OpDot %3 %144 %148 +%150 = OpFSub %3 %149 %35 +%151 = OpExtInst %3 GLSLstd450 FMax %150 %24 + OpStore %58 %151 +%152 = OpLoad %3 %58 +%153 = OpLoad %3 %58 +%154 = OpLoad %3 %58 +%155 = OpCompositeConstruct %25 %152 %153 %154 + OpStore %47 %155 + OpBranch %108 +%138 = OpLabel + OpBranch %136 +%136 = OpLabel +%159 = OpLoad %3 %51 +%160 = OpFOrdGreaterThanEqual %33 %159 %36 + OpSelectionMerge %156 SelectionControl(0) + OpBranchConditional %160 %157 %158 +%157 = OpLabel +%164 = OpLoad %25 %49 +%165 = OpExtInst %25 GLSLstd450 Normalize %164 +%166 = OpLoad %25 %55 +%167 = OpLoad %25 %48 +%168 = OpFSub %25 %166 %167 +%169 = OpExtInst %25 GLSLstd450 Normalize %168 +%170 = OpDot %3 %165 %169 +%171 = OpFOrdLessThanEqual %33 %170 %23 + OpSelectionMerge %161 SelectionControl(0) + OpBranchConditional %171 %162 %163 +%162 = OpLabel +%172 = OpLoad %25 %49 +%173 = OpExtInst %25 GLSLstd450 Normalize %172 +%174 = OpLoad %25 %55 +%175 = OpLoad %25 %48 +%176 = OpFSub %25 %174 %175 +%177 = OpExtInst %25 GLSLstd450 Normalize %176 +%178 = OpDot %3 %173 %177 +%179 = OpFAdd %3 %178 %37 +%180 = OpExtInst %3 GLSLstd450 FMax %179 %38 +%181 = OpFDiv %3 %180 %39 +%182 = OpLoad %3 %52 +%183 = OpFMul %3 %182 %40 +%184 = OpFSub %3 %23 %183 +%185 = OpFMul %3 %181 %184 + OpStore %59 %185 +%186 = OpLoad %3 %59 +%187 = OpLoad %3 %59 +%188 = OpLoad %3 %59 +%189 = OpCompositeConstruct %25 %186 %187 %188 + OpStore %47 %189 + OpBranch %161 +%163 = OpLabel + OpBranch %161 +%161 = OpLabel + OpBranch %108 +%158 = OpLabel + OpBranch %156 +%156 = OpLabel +%190 = OpLoad %6 %56 +%191 = OpIAdd %6 %190 %12 + OpStore %56 %191 + OpBranch %109 +%109 = OpLabel + OpBranch %106 +%108 = OpLabel +%192 = OpLoad %25 %47 +%193 = OpCompositeExtract %3 %192 0 +%194 = OpLoad %25 %47 +%195 = OpCompositeExtract %3 %194 1 +%196 = OpLoad %25 %47 +%197 = OpCompositeExtract %3 %196 2 +%198 = OpCompositeConstruct %18 %193 %195 %197 %23 +%199 = OpAccessChain %200 %60 %7 + OpStore %199 %198 +%201 = OpLoad %21 %60 +%202 = OpCompositeExtract %18 %201 0 + OpStore %20 %202 + OpReturn + OpFunctionEnd diff --git a/sandbox/shader.nzsl b/sandbox/shader.nzsl index c5252e6..ec4e798 100644 --- a/sandbox/shader.nzsl +++ b/sandbox/shader.nzsl @@ -62,8 +62,8 @@ fn main(input: FragIn) -> FragOut } } - if (col == vec3[f32](0.0, 0.0, 0.0)) - discard; + //if (col == vec3[f32](0.0, 0.0, 0.0)) + // discard; let output: FragOut; output.color = vec4[f32](col.x, col.y, col.z, 1.0); diff --git a/sandbox/shader.spv b/sandbox/shader.spv index 4306992..3801e1e 100644 Binary files a/sandbox/shader.spv and b/sandbox/shader.spv differ diff --git a/sandbox/shader.spv.txt b/sandbox/shader.spv.txt index 880ef68..9eecca9 100644 --- a/sandbox/shader.spv.txt +++ b/sandbox/shader.spv.txt @@ -1,12 +1,12 @@ Version 1.0 Generator: 2560130 -Bound: 210 +Bound: 203 Schema: 0 OpCapability Capability(Shader) - %43 = OpExtInstImport "GLSL.std.450" + %42 = OpExtInstImport "GLSL.std.450" OpMemoryModel AddressingModel(Logical) MemoryModel(GLSL450) - OpEntryPoint ExecutionModel(Fragment) %44 "main" %5 %11 %14 %20 - OpExecutionMode %44 ExecutionMode(OriginUpperLeft) + OpEntryPoint ExecutionModel(Fragment) %43 "main" %5 %11 %14 %20 + OpExecutionMode %43 ExecutionMode(OriginUpperLeft) OpSource SourceLanguage(NZSL) 4198400 OpSourceExtension "Version: 1.1" OpName %16 "FragIn" @@ -19,7 +19,7 @@ Schema: 0 OpName %11 "res" OpName %14 "pos" OpName %20 "color" - OpName %44 "main" + OpName %43 "main" OpDecorate %5 Decoration(Location) 0 OpDecorate %11 Decoration(Location) 1 OpDecorate %14 Decoration(Location) 2 @@ -64,229 +64,218 @@ Schema: 0 %38 = OpConstant %3 f32(0.05) %39 = OpConstant %3 f32(1.15) %40 = OpConstant %3 f32(7.5) - %41 = OpTypeVector %33 3 - %42 = OpTypePointer StorageClass(Function) %21 -%207 = OpTypePointer StorageClass(Function) %18 + %41 = OpTypePointer StorageClass(Function) %21 +%200 = OpTypePointer StorageClass(Function) %18 %5 = OpVariable %4 StorageClass(Input) %11 = OpVariable %10 StorageClass(Input) %14 = OpVariable %10 StorageClass(Input) %20 = OpVariable %19 StorageClass(Output) - %44 = OpFunction %1 FunctionControl(0) %2 - %45 = OpLabel + %43 = OpFunction %1 FunctionControl(0) %2 + %44 = OpLabel + %45 = OpVariable %13 StorageClass(Function) %46 = OpVariable %13 StorageClass(Function) - %47 = OpVariable %13 StorageClass(Function) + %47 = OpVariable %26 StorageClass(Function) %48 = OpVariable %26 StorageClass(Function) %49 = OpVariable %26 StorageClass(Function) - %50 = OpVariable %26 StorageClass(Function) + %50 = OpVariable %8 StorageClass(Function) %51 = OpVariable %8 StorageClass(Function) %52 = OpVariable %8 StorageClass(Function) - %53 = OpVariable %8 StorageClass(Function) + %53 = OpVariable %26 StorageClass(Function) %54 = OpVariable %26 StorageClass(Function) %55 = OpVariable %26 StorageClass(Function) - %56 = OpVariable %26 StorageClass(Function) + %56 = OpVariable %31 StorageClass(Function) %57 = OpVariable %31 StorageClass(Function) - %58 = OpVariable %31 StorageClass(Function) + %58 = OpVariable %8 StorageClass(Function) %59 = OpVariable %8 StorageClass(Function) - %60 = OpVariable %8 StorageClass(Function) - %61 = OpVariable %42 StorageClass(Function) - %62 = OpVariable %17 StorageClass(Function) - %63 = OpAccessChain %8 %62 %7 - OpCopyMemory %63 %5 - %64 = OpAccessChain %13 %62 %12 - OpCopyMemory %64 %11 - %65 = OpAccessChain %13 %62 %15 - OpCopyMemory %65 %14 - %66 = OpAccessChain %13 %62 %15 - %67 = OpLoad %9 %66 - %68 = OpAccessChain %13 %62 %12 - %69 = OpLoad %9 %68 - %70 = OpFDiv %9 %67 %69 - %71 = OpVectorTimesScalar %9 %70 %22 - %72 = OpCompositeConstruct %9 %23 %23 - %73 = OpFSub %9 %71 %72 - OpStore %46 %73 - %74 = OpLoad %9 %46 - %75 = OpCompositeExtract %3 %74 0 - %76 = OpAccessChain %13 %62 %12 - %77 = OpLoad %9 %76 - %78 = OpCompositeExtract %3 %77 0 - %79 = OpAccessChain %13 %62 %12 - %80 = OpLoad %9 %79 - %81 = OpCompositeExtract %3 %80 1 - %82 = OpFDiv %3 %78 %81 - %83 = OpFMul %3 %75 %82 - %84 = OpLoad %9 %46 - %85 = OpCompositeExtract %3 %84 1 - %86 = OpCompositeConstruct %9 %83 %85 + %60 = OpVariable %41 StorageClass(Function) + %61 = OpVariable %17 StorageClass(Function) + %62 = OpAccessChain %8 %61 %7 + OpCopyMemory %62 %5 + %63 = OpAccessChain %13 %61 %12 + OpCopyMemory %63 %11 + %64 = OpAccessChain %13 %61 %15 + OpCopyMemory %64 %14 + %65 = OpAccessChain %13 %61 %15 + %66 = OpLoad %9 %65 + %67 = OpAccessChain %13 %61 %12 + %68 = OpLoad %9 %67 + %69 = OpFDiv %9 %66 %68 + %70 = OpVectorTimesScalar %9 %69 %22 + %71 = OpCompositeConstruct %9 %23 %23 + %72 = OpFSub %9 %70 %71 + OpStore %45 %72 + %73 = OpLoad %9 %45 + %74 = OpCompositeExtract %3 %73 0 + %75 = OpAccessChain %13 %61 %12 + %76 = OpLoad %9 %75 + %77 = OpCompositeExtract %3 %76 0 + %78 = OpAccessChain %13 %61 %12 + %79 = OpLoad %9 %78 + %80 = OpCompositeExtract %3 %79 1 + %81 = OpFDiv %3 %77 %80 + %82 = OpFMul %3 %74 %81 + %83 = OpLoad %9 %45 + %84 = OpCompositeExtract %3 %83 1 + %85 = OpCompositeConstruct %9 %82 %84 + OpStore %46 %85 + %86 = OpCompositeConstruct %25 %24 %24 %24 OpStore %47 %86 - %87 = OpCompositeConstruct %25 %24 %24 %24 + %87 = OpCompositeConstruct %25 %24 %24 %27 OpStore %48 %87 - %88 = OpCompositeConstruct %25 %24 %24 %27 - OpStore %49 %88 - %89 = OpLoad %9 %47 - %90 = OpCompositeExtract %3 %89 0 - %91 = OpLoad %9 %47 - %92 = OpCompositeExtract %3 %91 1 - %93 = OpCompositeConstruct %25 %90 %92 %23 - OpStore %50 %93 + %88 = OpLoad %9 %46 + %89 = OpCompositeExtract %3 %88 0 + %90 = OpLoad %9 %46 + %91 = OpCompositeExtract %3 %90 1 + %92 = OpCompositeConstruct %25 %89 %91 %23 + OpStore %49 %92 + OpStore %50 %24 OpStore %51 %24 - OpStore %52 %24 - OpStore %53 %28 - %94 = OpLoad %25 %49 + OpStore %52 %28 + %93 = OpLoad %25 %48 + OpStore %53 %93 + %94 = OpCompositeConstruct %25 %24 %24 %24 OpStore %54 %94 - %95 = OpCompositeConstruct %25 %24 %24 %24 - OpStore %55 %95 - %96 = OpAccessChain %8 %62 %7 - %97 = OpLoad %3 %96 - %98 = OpFMul %3 %97 %29 - %99 = OpExtInst %3 GLSLstd450 Sin %98 -%100 = OpFMul %3 %99 %30 -%101 = OpAccessChain %8 %62 %7 -%102 = OpLoad %3 %101 -%103 = OpFMul %3 %102 %29 -%104 = OpExtInst %3 GLSLstd450 Cos %103 -%105 = OpFMul %3 %104 %30 -%106 = OpCompositeConstruct %25 %24 %100 %105 - OpStore %56 %106 - OpStore %57 %7 - OpStore %58 %32 - OpBranch %107 -%107 = OpLabel + %95 = OpAccessChain %8 %61 %7 + %96 = OpLoad %3 %95 + %97 = OpFMul %3 %96 %29 + %98 = OpExtInst %3 GLSLstd450 Sin %97 + %99 = OpFMul %3 %98 %30 +%100 = OpAccessChain %8 %61 %7 +%101 = OpLoad %3 %100 +%102 = OpFMul %3 %101 %29 +%103 = OpExtInst %3 GLSLstd450 Cos %102 +%104 = OpFMul %3 %103 %30 +%105 = OpCompositeConstruct %25 %24 %99 %104 + OpStore %55 %105 + OpStore %56 %7 + OpStore %57 %32 + OpBranch %106 +%106 = OpLabel +%110 = OpLoad %6 %56 %111 = OpLoad %6 %57 -%112 = OpLoad %6 %58 -%113 = OpSLessThan %33 %111 %112 - OpLoopMerge %109 %110 LoopControl(0) - OpBranchConditional %113 %108 %109 -%108 = OpLabel +%112 = OpSLessThan %33 %110 %111 + OpLoopMerge %108 %109 LoopControl(0) + OpBranchConditional %112 %107 %108 +%107 = OpLabel +%113 = OpLoad %25 %48 %114 = OpLoad %25 %49 -%115 = OpLoad %25 %50 -%116 = OpLoad %3 %51 -%117 = OpVectorTimesScalar %25 %115 %116 -%118 = OpFAdd %25 %114 %117 - OpStore %54 %118 -%119 = OpLoad %25 %55 -%120 = OpLoad %25 %54 -%121 = OpFSub %25 %119 %120 -%122 = OpExtInst %3 GLSLstd450 Length %121 -%123 = OpFSub %3 %122 %23 - OpStore %52 %123 +%115 = OpLoad %3 %50 +%116 = OpVectorTimesScalar %25 %114 %115 +%117 = OpFAdd %25 %113 %116 + OpStore %53 %117 +%118 = OpLoad %25 %54 +%119 = OpLoad %25 %53 +%120 = OpFSub %25 %118 %119 +%121 = OpExtInst %3 GLSLstd450 Length %120 +%122 = OpFSub %3 %121 %23 + OpStore %51 %122 +%123 = OpLoad %3 %50 %124 = OpLoad %3 %51 -%125 = OpLoad %3 %52 -%126 = OpFAdd %3 %124 %125 - OpStore %51 %126 -%130 = OpLoad %3 %53 -%131 = OpFOrdEqual %33 %130 %28 +%125 = OpFAdd %3 %123 %124 + OpStore %50 %125 +%129 = OpLoad %3 %52 +%130 = OpFOrdEqual %33 %129 %28 +%131 = OpLoad %3 %51 %132 = OpLoad %3 %52 -%133 = OpLoad %3 %53 -%134 = OpFOrdLessThan %33 %132 %133 -%135 = OpLogicalOr %33 %131 %134 - OpSelectionMerge %127 SelectionControl(0) - OpBranchConditional %135 %128 %129 -%128 = OpLabel -%136 = OpLoad %3 %52 - OpStore %53 %136 - OpBranch %127 -%129 = OpLabel - OpBranch %127 +%133 = OpFOrdLessThan %33 %131 %132 +%134 = OpLogicalOr %33 %130 %133 + OpSelectionMerge %126 SelectionControl(0) + OpBranchConditional %134 %127 %128 %127 = OpLabel -%140 = OpLoad %3 %52 -%141 = OpFOrdLessThanEqual %33 %140 %34 - OpSelectionMerge %137 SelectionControl(0) - OpBranchConditional %141 %138 %139 -%138 = OpLabel -%142 = OpLoad %25 %55 -%143 = OpLoad %25 %54 -%144 = OpFSub %25 %142 %143 -%145 = OpExtInst %25 GLSLstd450 Normalize %144 -%146 = OpLoad %25 %54 -%147 = OpLoad %25 %56 -%148 = OpFSub %25 %146 %147 -%149 = OpExtInst %25 GLSLstd450 Normalize %148 -%150 = OpDot %3 %145 %149 -%151 = OpFSub %3 %150 %35 -%152 = OpExtInst %3 GLSLstd450 FMax %151 %24 - OpStore %59 %152 -%153 = OpLoad %3 %59 -%154 = OpLoad %3 %59 -%155 = OpLoad %3 %59 -%156 = OpCompositeConstruct %25 %153 %154 %155 - OpStore %48 %156 - OpBranch %109 -%139 = OpLabel - OpBranch %137 +%135 = OpLoad %3 %51 + OpStore %52 %135 + OpBranch %126 +%128 = OpLabel + OpBranch %126 +%126 = OpLabel +%139 = OpLoad %3 %51 +%140 = OpFOrdLessThanEqual %33 %139 %34 + OpSelectionMerge %136 SelectionControl(0) + OpBranchConditional %140 %137 %138 %137 = OpLabel -%160 = OpLoad %3 %52 -%161 = OpFOrdGreaterThanEqual %33 %160 %36 - OpSelectionMerge %157 SelectionControl(0) - OpBranchConditional %161 %158 %159 -%158 = OpLabel -%165 = OpLoad %25 %50 -%166 = OpExtInst %25 GLSLstd450 Normalize %165 -%167 = OpLoad %25 %56 -%168 = OpLoad %25 %49 -%169 = OpFSub %25 %167 %168 -%170 = OpExtInst %25 GLSLstd450 Normalize %169 -%171 = OpDot %3 %166 %170 -%172 = OpFOrdLessThanEqual %33 %171 %23 - OpSelectionMerge %162 SelectionControl(0) - OpBranchConditional %172 %163 %164 -%163 = OpLabel -%173 = OpLoad %25 %50 -%174 = OpExtInst %25 GLSLstd450 Normalize %173 -%175 = OpLoad %25 %56 -%176 = OpLoad %25 %49 -%177 = OpFSub %25 %175 %176 -%178 = OpExtInst %25 GLSLstd450 Normalize %177 -%179 = OpDot %3 %174 %178 -%180 = OpFAdd %3 %179 %37 -%181 = OpExtInst %3 GLSLstd450 FMax %180 %38 -%182 = OpFDiv %3 %181 %39 -%183 = OpLoad %3 %53 -%184 = OpFMul %3 %183 %40 -%185 = OpFSub %3 %23 %184 -%186 = OpFMul %3 %182 %185 - OpStore %60 %186 -%187 = OpLoad %3 %60 -%188 = OpLoad %3 %60 -%189 = OpLoad %3 %60 -%190 = OpCompositeConstruct %25 %187 %188 %189 - OpStore %48 %190 - OpBranch %162 -%164 = OpLabel - OpBranch %162 -%162 = OpLabel - OpBranch %109 -%159 = OpLabel - OpBranch %157 +%141 = OpLoad %25 %54 +%142 = OpLoad %25 %53 +%143 = OpFSub %25 %141 %142 +%144 = OpExtInst %25 GLSLstd450 Normalize %143 +%145 = OpLoad %25 %53 +%146 = OpLoad %25 %55 +%147 = OpFSub %25 %145 %146 +%148 = OpExtInst %25 GLSLstd450 Normalize %147 +%149 = OpDot %3 %144 %148 +%150 = OpFSub %3 %149 %35 +%151 = OpExtInst %3 GLSLstd450 FMax %150 %24 + OpStore %58 %151 +%152 = OpLoad %3 %58 +%153 = OpLoad %3 %58 +%154 = OpLoad %3 %58 +%155 = OpCompositeConstruct %25 %152 %153 %154 + OpStore %47 %155 + OpBranch %108 +%138 = OpLabel + OpBranch %136 +%136 = OpLabel +%159 = OpLoad %3 %51 +%160 = OpFOrdGreaterThanEqual %33 %159 %36 + OpSelectionMerge %156 SelectionControl(0) + OpBranchConditional %160 %157 %158 %157 = OpLabel -%191 = OpLoad %6 %57 -%192 = OpIAdd %6 %191 %12 - OpStore %57 %192 - OpBranch %110 -%110 = OpLabel - OpBranch %107 +%164 = OpLoad %25 %49 +%165 = OpExtInst %25 GLSLstd450 Normalize %164 +%166 = OpLoad %25 %55 +%167 = OpLoad %25 %48 +%168 = OpFSub %25 %166 %167 +%169 = OpExtInst %25 GLSLstd450 Normalize %168 +%170 = OpDot %3 %165 %169 +%171 = OpFOrdLessThanEqual %33 %170 %23 + OpSelectionMerge %161 SelectionControl(0) + OpBranchConditional %171 %162 %163 +%162 = OpLabel +%172 = OpLoad %25 %49 +%173 = OpExtInst %25 GLSLstd450 Normalize %172 +%174 = OpLoad %25 %55 +%175 = OpLoad %25 %48 +%176 = OpFSub %25 %174 %175 +%177 = OpExtInst %25 GLSLstd450 Normalize %176 +%178 = OpDot %3 %173 %177 +%179 = OpFAdd %3 %178 %37 +%180 = OpExtInst %3 GLSLstd450 FMax %179 %38 +%181 = OpFDiv %3 %180 %39 +%182 = OpLoad %3 %52 +%183 = OpFMul %3 %182 %40 +%184 = OpFSub %3 %23 %183 +%185 = OpFMul %3 %181 %184 + OpStore %59 %185 +%186 = OpLoad %3 %59 +%187 = OpLoad %3 %59 +%188 = OpLoad %3 %59 +%189 = OpCompositeConstruct %25 %186 %187 %188 + OpStore %47 %189 + OpBranch %161 +%163 = OpLabel + OpBranch %161 +%161 = OpLabel + OpBranch %108 +%158 = OpLabel + OpBranch %156 +%156 = OpLabel +%190 = OpLoad %6 %56 +%191 = OpIAdd %6 %190 %12 + OpStore %56 %191 + OpBranch %109 %109 = OpLabel -%196 = OpLoad %25 %48 -%197 = OpCompositeConstruct %25 %24 %24 %24 -%198 = OpFOrdEqual %41 %196 %197 - OpSelectionMerge %193 SelectionControl(0) - OpBranchConditional %198 %194 %195 -%194 = OpLabel - OpKill -%195 = OpLabel - OpBranch %193 -%193 = OpLabel -%199 = OpLoad %25 %48 -%200 = OpCompositeExtract %3 %199 0 -%201 = OpLoad %25 %48 -%202 = OpCompositeExtract %3 %201 1 -%203 = OpLoad %25 %48 -%204 = OpCompositeExtract %3 %203 2 -%205 = OpCompositeConstruct %18 %200 %202 %204 %23 -%206 = OpAccessChain %207 %61 %7 - OpStore %206 %205 -%208 = OpLoad %21 %61 -%209 = OpCompositeExtract %18 %208 0 - OpStore %20 %209 + OpBranch %106 +%108 = OpLabel +%192 = OpLoad %25 %47 +%193 = OpCompositeExtract %3 %192 0 +%194 = OpLoad %25 %47 +%195 = OpCompositeExtract %3 %194 1 +%196 = OpLoad %25 %47 +%197 = OpCompositeExtract %3 %196 2 +%198 = OpCompositeConstruct %18 %193 %195 %197 %23 +%199 = OpAccessChain %200 %60 %7 + OpStore %199 %198 +%201 = OpLoad %21 %60 +%202 = OpCompositeExtract %18 %201 0 + OpStore %20 %202 OpReturn OpFunctionEnd diff --git a/src/GLSL_std_450/GLSL_std_450.zig b/src/GLSL_std_450/GLSL_std_450.zig new file mode 100644 index 0000000..0845f31 --- /dev/null +++ b/src/GLSL_std_450/GLSL_std_450.zig @@ -0,0 +1,91 @@ +//! A jam file of translated GLSL std450 header's enums and utils + +pub const GLSLstd450Version: u32 = 100; +pub const GLSLstd450Revision: u32 = 3; + +pub const GLSLOp = enum(u32) { + Bad = 0, + Round = 1, + RoundEven = 2, + Trunc = 3, + FAbs = 4, + SAbs = 5, + FSign = 6, + SSign = 7, + Floor = 8, + Ceil = 9, + Fract = 10, + Radians = 11, + Degrees = 12, + Sin = 13, + Cos = 14, + Tan = 15, + Asin = 16, + Acos = 17, + Atan = 18, + Sinh = 19, + Cosh = 20, + Tanh = 21, + Asinh = 22, + Acosh = 23, + Atanh = 24, + Atan2 = 25, + Pow = 26, + Exp = 27, + Log = 28, + Exp2 = 29, + Log2 = 30, + Sqrt = 31, + InverseSqrt = 32, + Determinant = 33, + MatrixInverse = 34, + Modf = 35, + ModfStruct = 36, + FMin = 37, + UMin = 38, + SMin = 39, + FMax = 40, + UMax = 41, + SMax = 42, + FClamp = 43, + UClamp = 44, + SClamp = 45, + FMix = 46, + IMix = 47, + Step = 48, + SmoothStep = 49, + Fma = 50, + Frexp = 51, + FrexpStruct = 52, + Ldexp = 53, + PackSnorm4x8 = 54, + PackUnorm4x8 = 55, + PackSnorm2x16 = 56, + PackUnorm2x16 = 57, + PackHalf2x16 = 58, + PackDouble2x32 = 59, + UnpackSnorm2x16 = 60, + UnpackUnorm2x16 = 61, + UnpackHalf2x16 = 62, + UnpackSnorm4x8 = 63, + UnpackUnorm4x8 = 64, + UnpackDouble2x32 = 65, + Length = 66, + Distance = 67, + Cross = 68, + Normalize = 69, + FaceForward = 70, + Reflect = 71, + Refract = 72, + FindILsb = 73, + FindSMsb = 74, + FindUMsb = 75, + InterpolateAtCentroid = 76, + InterpolateAtSample = 77, + InterpolateAtOffset = 78, + NMin = 79, + NMax = 80, + NClamp = 81, +}; + +pub const GLSLOpMaxValue: usize = 82; diff --git a/src/GLSL_std_450/opcodes.zig b/src/GLSL_std_450/opcodes.zig new file mode 100644 index 0000000..c48b821 --- /dev/null +++ b/src/GLSL_std_450/opcodes.zig @@ -0,0 +1,312 @@ +const std = @import("std"); +const spv = @import("../spv.zig"); +const ext = @import("GLSL_std_450.zig"); +const opc = @import("../opcodes.zig"); + +const Module = @import("../Module.zig"); +const Runtime = @import("../Runtime.zig"); +const Result = @import("../Result.zig"); +const WordIterator = @import("../WordIterator.zig"); + +const RuntimeError = Runtime.RuntimeError; +const ValueType = opc.ValueType; + +const getValuePrimitiveField = opc.getValuePrimitiveField; +const getValuePrimitiveFieldType = opc.getValuePrimitiveFieldType; + +const SpvVoid = spv.SpvVoid; +const SpvByte = spv.SpvByte; +const SpvWord = spv.SpvWord; +const SpvBool = spv.SpvBool; + +const MathOp = enum { + Acos, + Acosh, + Asin, + Asinh, + Atan, + Atan2, + Atanh, + Ceil, + Cos, + Cosh, + Determinant, + Exp, + Exp2, + FAbs, + FClamp, + FMax, + FMin, + FMix, + FSign, + Floor, + Fract, + IMix, + InverseSqrt, + Log, + Log2, + Modf, + Pow, + Round, + RoundEven, + SAbs, + SClamp, + SMax, + SMin, + SSign, + Sin, + Sinh, + Sqrt, + Tan, + Tanh, + Trunc, + UClamp, + UMax, + UMin, +}; + +pub const OpCodeExtFunc = opc.OpCodeExtFunc; + +/// Not an EnumMap as it is way too slow for this purpose +pub var runtime_dispatcher = [_]?OpCodeExtFunc{null} ** ext.GLSLOpMaxValue; + +pub fn initRuntimeDispatcher() void { + // zig fmt: off + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Cos)] = MathEngine(.Float, .Cos).opSingleOperator; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.FMax)] = MathEngine(.Float, .FMax).opDoubleOperators; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Length)] = opLength; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Normalize)] = opNormalize; + runtime_dispatcher[@intFromEnum(ext.GLSLOp.Sin)] = MathEngine(.Float, .Sin).opSingleOperator; + // zig fmt: on +} + +fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type { + return struct { + fn opSingleOperator(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void { + const target_type = (try rt.results[target_type_id].getVariant()).Type; + const dst = try rt.results[id].getValue(); + const src = try rt.results[try rt.it.next()].getValue(); + + const lane_bits = try Result.resolveLaneBitWidth(target_type, rt); + + const operator = struct { + fn operation(comptime TT: type, x: TT) RuntimeError!TT { + return switch (Op) { + .Sin => @sin(x), + .Cos => @cos(x), + else => RuntimeError.InvalidSpirV, + }; + } + + fn applyScalar(bit_count: SpvWord, d: *Result.Value, s: *const Result.Value) RuntimeError!void { + switch (bit_count) { + inline 8, 16, 32, 64 => |bits| { + if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV; + + const ScalarT = getValuePrimitiveFieldType(T, bits); + const d_field = try getValuePrimitiveField(T, bits, d); + const s_field = try getValuePrimitiveField(T, bits, @constCast(s)); + d_field.* = try operation(ScalarT, s_field.*); + }, + else => return RuntimeError.InvalidSpirV, + } + } + }; + + switch (dst.*) { + .Int, .Float => try operator.applyScalar(lane_bits, dst, src), + + .Vector => |dst_vec| for (dst_vec, src.Vector) |*d_lane, s_lane| { + try operator.applyScalar(lane_bits, d_lane, &s_lane); + }, + + .Vector4f32 => |*d| d.* = try operator.operation(@Vector(4, f32), src.Vector4f32), + .Vector3f32 => |*d| d.* = try operator.operation(@Vector(3, f32), src.Vector3f32), + .Vector2f32 => |*d| d.* = try operator.operation(@Vector(2, f32), src.Vector2f32), + + //.Vector4i32 => |*d| d.* = try operator.operation(@Vector(4, i32), src.Vector4i32), + //.Vector3i32 => |*d| d.* = try operator.operation(@Vector(3, i32), src.Vector3i32), + //.Vector2i32 => |*d| d.* = try operator.operation(@Vector(2, i32), src.Vector2i32), + + //.Vector4u32 => |*d| d.* = try operator.operation(@Vector(4, u32), src.Vector4u32), + //.Vector3u32 => |*d| d.* = try operator.operation(@Vector(3, u32), src.Vector3u32), + //.Vector2u32 => |*d| d.* = try operator.operation(@Vector(2, u32), src.Vector2u32), + + else => return RuntimeError.InvalidSpirV, + } + } + + fn opDoubleOperators(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void { + const target_type = (try rt.results[target_type_id].getVariant()).Type; + const dst = try rt.results[id].getValue(); + const lhs = try rt.results[try rt.it.next()].getValue(); + const rhs = try rt.results[try rt.it.next()].getValue(); + + const lane_bits = try Result.resolveLaneBitWidth(target_type, rt); + + const operator = struct { + fn operation(comptime TT: type, l: TT, r: TT) RuntimeError!TT { + return switch (Op) { + .FMax => @max(l, r), + else => RuntimeError.InvalidSpirV, + }; + } + + fn applyScalar(bit_count: SpvWord, d: *Result.Value, l: *const Result.Value, r: *const Result.Value) RuntimeError!void { + switch (bit_count) { + inline 8, 16, 32, 64 => |bits| { + if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV; + + const ScalarT = getValuePrimitiveFieldType(T, bits); + const d_field = try getValuePrimitiveField(T, bits, d); + const l_field = try getValuePrimitiveField(T, bits, @constCast(l)); + const r_field = try getValuePrimitiveField(T, bits, @constCast(r)); + d_field.* = try operation(ScalarT, l_field.*, r_field.*); + }, + else => return RuntimeError.InvalidSpirV, + } + } + + inline fn applySIMDVector(comptime ElemT: type, comptime N: usize, d: *@Vector(N, ElemT), l: *const @Vector(N, ElemT), r: *const @Vector(N, ElemT)) RuntimeError!void { + inline for (0..N) |i| { + d[i] = try operation(ElemT, l[i], r[i]); + } + } + }; + + switch (dst.*) { + .Int, .Float => try operator.applyScalar(lane_bits, dst, lhs, rhs), + + .Vector => |dst_vec| for (dst_vec, lhs.Vector, rhs.Vector) |*d_lane, l_lane, r_lane| { + try operator.applyScalar(lane_bits, d_lane, &l_lane, &r_lane); + }, + + .Vector4f32 => |*d| try operator.applySIMDVector(f32, 4, d, &lhs.Vector4f32, &rhs.Vector4f32), + .Vector3f32 => |*d| try operator.applySIMDVector(f32, 3, d, &lhs.Vector3f32, &rhs.Vector3f32), + .Vector2f32 => |*d| try operator.applySIMDVector(f32, 2, d, &lhs.Vector2f32, &rhs.Vector2f32), + + .Vector4i32 => |*d| try operator.applySIMDVector(i32, 4, d, &lhs.Vector4i32, &rhs.Vector4i32), + .Vector3i32 => |*d| try operator.applySIMDVector(i32, 3, d, &lhs.Vector3i32, &rhs.Vector3i32), + .Vector2i32 => |*d| try operator.applySIMDVector(i32, 2, d, &lhs.Vector2i32, &rhs.Vector2i32), + + .Vector4u32 => |*d| try operator.applySIMDVector(u32, 4, d, &lhs.Vector4u32, &rhs.Vector4u32), + .Vector3u32 => |*d| try operator.applySIMDVector(u32, 3, d, &lhs.Vector3u32, &rhs.Vector3u32), + .Vector2u32 => |*d| try operator.applySIMDVector(u32, 2, d, &lhs.Vector2u32, &rhs.Vector2u32), + + else => return RuntimeError.InvalidSpirV, + } + } + }; +} + +inline fn sumSIMDVector(comptime ElemT: type, comptime N: usize, d: *ElemT, v: *const @Vector(N, ElemT)) void { + inline for (0..N) |i| { + d.* += v[i]; + } +} + +fn opLength(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void { + const target_type = (try rt.results[target_type_id].getVariant()).Type; + const dst = try rt.results[id].getValue(); + const src = try rt.results[try rt.it.next()].getValue(); + + const lane_bits = try Result.resolveLaneBitWidth(target_type, rt); + + switch (lane_bits) { + inline 16, 32, 64 => |bits| { + var sum: std.meta.Float(bits) = 0.0; + const d_field = try getValuePrimitiveField(.Float, bits, dst); + + if (bits == 32) { // More likely to be SIMD if f32 + switch (src.*) { + .Vector4f32 => |src_vec| sumSIMDVector(f32, 4, &sum, &src_vec), + .Vector3f32 => |src_vec| sumSIMDVector(f32, 3, &sum, &src_vec), + .Vector2f32 => |src_vec| sumSIMDVector(f32, 2, &sum, &src_vec), + else => {}, + } + } + + switch (src.*) { + .Float => { + // Fast path + const s_field = try getValuePrimitiveField(.Float, bits, src); + d_field.* = s_field.*; + return; + }, + .Vector => |src_vec| for (src_vec) |*s_lane| { + const s_field = try getValuePrimitiveField(.Float, bits, s_lane); + sum += s_field.*; + }, + .Vector4f32, .Vector3f32, .Vector2f32 => {}, + else => return RuntimeError.InvalidSpirV, + } + + d_field.* = @sqrt(sum); + }, + else => return RuntimeError.InvalidSpirV, + } +} + +fn opNormalize(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void { + const target_type = (try rt.results[target_type_id].getVariant()).Type; + const dst = try rt.results[id].getValue(); + const src = try rt.results[try rt.it.next()].getValue(); + + const lane_bits = try Result.resolveLaneBitWidth(target_type, rt); + + switch (lane_bits) { + inline 16, 32, 64 => |bits| { + var sum: std.meta.Float(bits) = 0.0; + + if (bits == 32) { // More likely to be SIMD if f32 + switch (src.*) { + .Vector4f32 => |src_vec| sumSIMDVector(f32, 4, &sum, &src_vec), + .Vector3f32 => |src_vec| sumSIMDVector(f32, 3, &sum, &src_vec), + .Vector2f32 => |src_vec| sumSIMDVector(f32, 2, &sum, &src_vec), + else => {}, + } + } + + switch (src.*) { + .Float => { + const s_field = try getValuePrimitiveField(.Float, bits, src); + sum = s_field.*; + }, + .Vector => |src_vec| for (src_vec) |*s_lane| { + const s_field = try getValuePrimitiveField(.Float, bits, s_lane); + sum += s_field.*; + }, + .Vector4f32, .Vector3f32, .Vector2f32 => {}, + else => return RuntimeError.InvalidSpirV, + } + + sum = @sqrt(sum); + + if (bits == 32) { + switch (dst.*) { + .Vector4f32 => |*dst_vec| inline for (0..4) |i| { + dst_vec[i] = src.Vector4f32[i] / sum; + }, + .Vector3f32 => |*dst_vec| inline for (0..3) |i| { + dst_vec[i] = src.Vector3f32[i] / sum; + }, + .Vector2f32 => |*dst_vec| inline for (0..2) |i| { + dst_vec[i] = src.Vector2f32[i] / sum; + }, + else => {}, + } + } + + switch (dst.*) { + .Vector => |dst_vec| for (dst_vec, src.Vector) |*d_lane, *s_lane| { + const d_field = try getValuePrimitiveField(.Float, bits, d_lane); + const s_field = try getValuePrimitiveField(.Float, bits, s_lane); + d_field.* = s_field.* / sum; + }, + .Vector4f32, .Vector3f32, .Vector2f32 => {}, + else => return RuntimeError.InvalidSpirV, + } + }, + else => return RuntimeError.InvalidSpirV, + } +} diff --git a/src/Result.zig b/src/Result.zig index c7859ea..2ec4385 100644 --- a/src/Result.zig +++ b/src/Result.zig @@ -2,7 +2,8 @@ const std = @import("std"); const spv = @import("spv.zig"); const op = @import("opcodes.zig"); -const RuntimeError = @import("Runtime.zig").RuntimeError; +const Runtime = @import("Runtime.zig"); +const RuntimeError = Runtime.RuntimeError; const SpvVoid = spv.SpvVoid; const SpvByte = spv.SpvByte; @@ -227,65 +228,67 @@ pub const Value = union(Type) { } }; +pub const TypeData = union(Type) { + Void: struct {}, + Bool: struct {}, + Int: struct { + bit_length: SpvWord, + is_signed: bool, + }, + Float: struct { + bit_length: SpvWord, + }, + Vector: struct { + components_type_word: SpvWord, + components_type: Type, + member_count: SpvWord, + }, + Vector4f32: struct {}, + Vector3f32: struct {}, + Vector2f32: struct {}, + Vector4i32: struct {}, + Vector3i32: struct {}, + Vector2i32: struct {}, + Vector4u32: struct {}, + Vector3u32: struct {}, + Vector2u32: struct {}, + Matrix: struct { + column_type_word: SpvWord, + column_type: Type, + member_count: SpvWord, + }, + Array: struct { + components_type_word: SpvWord, + components_type: Type, + member_count: SpvWord, + }, + RuntimeArray: struct {}, + Structure: struct { + members_type_word: []const SpvWord, + members: []Type, + member_names: std.ArrayList([]const u8), + }, + Function: struct { + source_location: usize, + return_type: SpvWord, + params: []const SpvWord, + }, + Image: struct {}, + Sampler: struct {}, + SampledImage: struct {}, + Pointer: struct { + storage_class: spv.SpvStorageClass, + target: SpvWord, + }, +}; + pub const VariantData = union(Variant) { String: []const u8, Extension: struct { /// Should not be allocated but rather a pointer to a static array - dispatcher: []op.OpCodeExtFunc, - }, - Type: union(Type) { - Void: struct {}, - Bool: struct {}, - Int: struct { - bit_length: SpvWord, - is_signed: bool, - }, - Float: struct { - bit_length: SpvWord, - }, - Vector: struct { - components_type_word: SpvWord, - components_type: Type, - member_count: SpvWord, - }, - Vector4f32: struct {}, - Vector3f32: struct {}, - Vector2f32: struct {}, - Vector4i32: struct {}, - Vector3i32: struct {}, - Vector2i32: struct {}, - Vector4u32: struct {}, - Vector3u32: struct {}, - Vector2u32: struct {}, - Matrix: struct { - column_type_word: SpvWord, - column_type: Type, - member_count: SpvWord, - }, - Array: struct { - components_type_word: SpvWord, - components_type: Type, - member_count: SpvWord, - }, - RuntimeArray: struct {}, - Structure: struct { - members_type_word: []const SpvWord, - members: []Type, - member_names: std.ArrayList([]const u8), - }, - Function: struct { - source_location: usize, - return_type: SpvWord, - params: []const SpvWord, - }, - Image: struct {}, - Sampler: struct {}, - SampledImage: struct {}, - Pointer: struct { - storage_class: spv.SpvStorageClass, - target: SpvWord, - }, + dispatcher: []?op.OpCodeExtFunc, }, + Type: TypeData, Variable: struct { storage_class: spv.SpvStorageClass, type_word: SpvWord, @@ -364,7 +367,7 @@ pub fn deinit(self: *Self, allocator: std.mem.Allocator) void { self.decorations.deinit(allocator); } -pub fn getValueTypeWord(self: *Self) RuntimeError!SpvWord { +pub inline fn getValueTypeWord(self: *Self) RuntimeError!SpvWord { return switch ((try self.getVariant()).*) { .Variable => |v| v.type_word, .Constant => |c| c.type_word, @@ -374,7 +377,7 @@ pub fn getValueTypeWord(self: *Self) RuntimeError!SpvWord { }; } -pub fn getValueType(self: *Self) RuntimeError!Type { +pub inline fn getValueType(self: *Self) RuntimeError!Type { return switch ((try self.getVariant()).*) { .Variable => |v| v.type, .Constant => |c| c.type, @@ -383,7 +386,7 @@ pub fn getValueType(self: *Self) RuntimeError!Type { }; } -pub fn getValue(self: *Self) RuntimeError!*Value { +pub inline fn getValue(self: *Self) RuntimeError!*Value { return switch ((try self.getVariant()).*) { .Variable => |*v| &v.value, .Constant => |*c| &c.value, @@ -471,6 +474,26 @@ pub fn dupe(self: *const Self, allocator: std.mem.Allocator) RuntimeError!Self { }; } +pub fn resolveLaneBitWidth(target_type: TypeData, rt: *const Runtime) RuntimeError!SpvWord { + return sw: switch (target_type) { + .Bool => 8, + .Float => |f| f.bit_length, + .Int => |i| i.bit_length, + .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, + .Vector4f32, + .Vector3f32, + .Vector2f32, + .Vector4i32, + .Vector3i32, + .Vector2i32, + .Vector4u32, + .Vector3u32, + .Vector2u32, + => return 32, + else => return RuntimeError.InvalidSpirV, + }; +} + pub fn resolveType(self: *const Self, results: []const Self) *const Self { return if (self.variant) |variant| switch (variant) { diff --git a/src/Runtime.zig b/src/Runtime.zig index 0440c63..494daf9 100644 --- a/src/Runtime.zig +++ b/src/Runtime.zig @@ -131,10 +131,7 @@ pub fn callEntryPoint(self: *Self, allocator: std.mem.Allocator, entry_point_ind var it_tmp = self.it; // Save because operations may iter on this iterator if (op.runtime_dispatcher[opcode]) |pfn| { - pfn(allocator, word_count, self) catch |err| switch (err) { - RuntimeError.Killed => return, - else => return err, - }; + try pfn(allocator, word_count, self); } if (!self.it.did_jump) { _ = it_tmp.skipN(word_count); diff --git a/src/ext/GLSL_std_450.zig b/src/ext/GLSL_std_450.zig deleted file mode 100644 index e69de29..0000000 diff --git a/src/lib.zig b/src/lib.zig index edef74f..c5c96cd 100644 --- a/src/lib.zig +++ b/src/lib.zig @@ -36,3 +36,10 @@ pub const Runtime = @import("Runtime.zig"); const opcodes = @import("opcodes.zig"); const spv = @import("spv.zig"); + +pub const SpvVoid = spv.SpvVoid; +pub const SpvByte = spv.SpvByte; +pub const SpvWord = spv.SpvWord; +pub const SpvBool = spv.SpvBool; + +pub const GLSL_std_450 = @import("GLSL_std_450/opcodes.zig"); diff --git a/src/opcodes.zig b/src/opcodes.zig index fb7e097..0009286 100644 --- a/src/opcodes.zig +++ b/src/opcodes.zig @@ -1,6 +1,8 @@ const std = @import("std"); const spv = @import("spv.zig"); +const GLSL_std_450 = @import("GLSL_std_450/opcodes.zig"); + const Module = @import("Module.zig"); const Runtime = @import("Runtime.zig"); const Result = @import("Result.zig"); @@ -13,13 +15,7 @@ const SpvByte = spv.SpvByte; const SpvWord = spv.SpvWord; const SpvBool = spv.SpvBool; -// OpExtInst Sin -// OpExtInst Cos -// OpExtInst Length -// OpExtInst Normalize -// OpExtInst FMax - -const ValueType = enum { +pub const ValueType = enum { Bool, Float, SInt, @@ -230,11 +226,11 @@ pub fn initRuntimeDispatcher() void { runtime_dispatcher[@intFromEnum(spv.SpvOp.ISub)] = MathEngine(.SInt, .Sub).op; runtime_dispatcher[@intFromEnum(spv.SpvOp.Kill)] = opKill; runtime_dispatcher[@intFromEnum(spv.SpvOp.Load)] = opLoad; - runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalAnd)] = CondEngine(.Float, .LogicalAnd).op; - runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalEqual)] = CondEngine(.Float, .LogicalEqual).op; - runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNot)] = CondEngine(.Float, .LogicalNot).op; - runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNotEqual)] = CondEngine(.Float, .LogicalNotEqual).op; - runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalOr)] = CondEngine(.Float, .LogicalOr).op; + runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalAnd)] = CondEngine(.Bool, .LogicalAnd).op; + runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalEqual)] = CondEngine(.Bool, .LogicalEqual).op; + runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNot)] = CondEngine(.Bool, .LogicalNot).op; + runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalNotEqual)] = CondEngine(.Bool, .LogicalNotEqual).op; + runtime_dispatcher[@intFromEnum(spv.SpvOp.LogicalOr)] = CondEngine(.Bool, .LogicalOr).op; runtime_dispatcher[@intFromEnum(spv.SpvOp.MatrixTimesMatrix)] = MathEngine(.Float, .MatrixTimesMatrix).op; // TODO runtime_dispatcher[@intFromEnum(spv.SpvOp.MatrixTimesScalar)] = MathEngine(.Float, .MatrixTimesScalar).op; // TODO runtime_dispatcher[@intFromEnum(spv.SpvOp.MatrixTimesVector)] = MathEngine(.Float, .MatrixTimesVector).op; // TODO @@ -261,130 +257,271 @@ pub fn initRuntimeDispatcher() void { runtime_dispatcher[@intFromEnum(spv.SpvOp.UMod)] = MathEngine(.UInt, .Mod).op; runtime_dispatcher[@intFromEnum(spv.SpvOp.VectorTimesMatrix)] = MathEngine(.Float, .VectorTimesMatrix).op; // TODO runtime_dispatcher[@intFromEnum(spv.SpvOp.VectorTimesScalar)] = MathEngine(.Float, .VectorTimesScalar).op; + runtime_dispatcher[@intFromEnum(spv.SpvOp.ExtInst)] = opExtInst; // zig fmt: on + + // Extensions init + GLSL_std_450.initRuntimeDispatcher(); } -fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type { - if (T == .Float) @compileError("Invalid value type"); +fn extEqlName(a: []const u8, b: []const u8) bool { + for (0..@min(a.len, b.len)) |i| { + if (a[i] != b[i]) return false; + } + return true; +} + +const extensions_map = std.StaticStringMapWithEql([]?OpCodeExtFunc, extEqlName).initComptime(.{ + .{ "GLSL.std.450", GLSL_std_450.runtime_dispatcher[0..] }, +}); + +fn BitOperator(comptime T: ValueType, comptime Op: BitOp) type { return struct { - fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { - const target_type = (try rt.results[try rt.it.next()].getVariant()).Type; - const value = try rt.results[try rt.it.next()].getValue(); - const op1_value = try rt.results[try rt.it.next()].getValue(); - const op2_value: ?*Result.Value = switch (Op) { - .Not, .BitCount, .BitReverse => null, - else => try rt.results[try rt.it.next()].getValue(), + comptime { + if (T == .Float) @compileError("Invalid value type"); + } + + inline fn isUnaryOp() bool { + return comptime switch (Op) { + .Not, .BitCount, .BitReverse => true, + else => false, }; + } - const size = sw: switch (target_type) { - .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, - .Vector4f32, - .Vector3f32, - .Vector2f32, - .Vector4i32, - .Vector3i32, - .Vector2i32, - .Vector4u32, - .Vector3u32, - .Vector2u32, - => 32, - .Int => |i| i.bit_length, - else => return RuntimeError.InvalidSpirV, + inline fn bitMask(bits: u64) u64 { + return if (bits >= 32) ~@as(u64, 0) else (@as(u64, 0x1) << @intCast(bits)) - 1; + } + + inline fn bitInsert(comptime TT: type, base: TT, insert: TT, offset: u64, count: u64) TT { + const mask: TT = @intCast(bitMask(count) << @intCast(offset)); + return @as(TT, @intCast((base & ~mask) | ((insert << @intCast(offset)) & mask))); + } + + inline fn bitExtract(comptime TT: type, v: TT, offset: TT, count: u64) TT { + return (v >> @intCast(offset)) & @as(TT, @intCast(bitMask(count))); + } + + fn operationUnary(comptime TT: type, op1: TT) RuntimeError!TT { + return switch (Op) { + .BitCount => @as(TT, @intCast(@bitSizeOf(TT))), // keep return type TT + .BitReverse => @bitReverse(op1), + .Not => ~op1, + else => RuntimeError.InvalidSpirV, }; + } - const operator = struct { - inline fn bitMask(bits: u64) u64 { - return if (bits >= 32) ~@as(u64, 0) else (@as(u64, 0x1) << @intCast(bits)) - 1; - } + fn operationBinary(comptime TT: type, rt: *Runtime, op1: TT, op2: TT) RuntimeError!TT { + return switch (Op) { + .BitFieldInsert => blk: { + const offset = try rt.results[try rt.it.next()].getValue(); + const count = try rt.results[try rt.it.next()].getValue(); + break :blk bitInsert(TT, op1, op2, offset.Int.uint64, count.Int.uint64); + }, + .BitFieldSExtract => blk: { + if (T == .UInt) return RuntimeError.InvalidSpirV; + const count = try rt.results[try rt.it.next()].getValue(); + break :blk bitExtract(TT, op1, op2, count.Int.uint64); + }, + .BitFieldUExtract => blk: { + if (T == .SInt) return RuntimeError.InvalidSpirV; + const count = try rt.results[try rt.it.next()].getValue(); + break :blk bitExtract(TT, op1, op2, count.Int.uint64); + }, - inline fn bitInsert(comptime TT: type, base: TT, insert: TT, offset: u64, count: u64) TT { - const mask: TT = @intCast(bitMask(count) << @intCast(offset)); - return @as(TT, @intCast((base & ~mask) | ((insert << @intCast(offset)) & mask))); - } + .BitwiseAnd => op1 & op2, + .BitwiseOr => op1 | op2, + .BitwiseXor => op1 ^ op2, + .ShiftLeft => op1 << @intCast(op2), + .ShiftRight, .ShiftRightArithmetic => op1 >> @intCast(op2), - inline fn bitExtract(comptime TT: type, v: TT, offset: TT, count: u64) TT { - return (v >> @intCast(offset)) & @as(TT, @intCast(bitMask(count))); - } - - fn operation(comptime TT: type, rt2: *Runtime, op1: TT, op2: ?TT) RuntimeError!TT { - switch (Op) { - .BitCount => return @bitSizeOf(TT), - .BitReverse => return @bitReverse(op1), - .Not => return ~op1, - else => {}, - } - return if (op2) |v2| - switch (Op) { - .BitFieldInsert => blk: { - const offset = try rt2.results[try rt2.it.next()].getValue(); - const count = try rt2.results[try rt2.it.next()].getValue(); - break :blk bitInsert(TT, op1, v2, offset.Int.uint64, count.Int.uint64); - }, - .BitFieldSExtract => blk: { - if (T == .UInt) return RuntimeError.InvalidSpirV; - const count = try rt2.results[try rt2.it.next()].getValue(); - break :blk bitExtract(TT, op1, v2, count.Int.uint64); - }, - .BitFieldUExtract => blk: { - if (T == .SInt) return RuntimeError.InvalidSpirV; - const count = try rt2.results[try rt2.it.next()].getValue(); - break :blk bitExtract(TT, op1, v2, count.Int.uint64); - }, - .BitwiseAnd => op1 & v2, - .BitwiseOr => op1 | v2, - .BitwiseXor => op1 ^ v2, - .ShiftLeft => op1 << @intCast(v2), - .ShiftRight, .ShiftRightArithmetic => op1 >> @intCast(v2), - else => return RuntimeError.InvalidSpirV, - } - else - RuntimeError.InvalidSpirV; - } - - fn process(rt2: *Runtime, bit_count: SpvWord, v: *Result.Value, op1_v: *const Result.Value, op2_v: ?*const Result.Value) RuntimeError!void { - switch (bit_count) { - inline 8, 16, 32, 64 => |i| { - (try getValuePrimitiveField(T, i, v)).* = try operation( - getValuePrimitiveFieldType(T, i), - rt2, - (try getValuePrimitiveField(T, i, @constCast(op1_v))).*, - if (op2_v) |v2| - (try getValuePrimitiveField(T, i, @constCast(v2))).* - else - null, - ); - }, - else => return RuntimeError.InvalidSpirV, - } - } + else => RuntimeError.InvalidSpirV, }; + } - switch (value.*) { - .Int => try operator.process(rt, size, value, op1_value, op2_value), - .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i| - try operator.process(rt, size, val, &op1_v, if (op2_value) |op2_v| &op2_v.Vector[i] else null), - // No bit manipulation on VectorXf32 - .Vector4i32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.operation(i32, rt, op1_value.Vector4i32[i], if (op2_value) |op2_v| op2_v.Vector4i32[i] else null); - }, - .Vector3i32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.operation(i32, rt, op1_value.Vector3i32[i], if (op2_value) |op2_v| op2_v.Vector3i32[i] else null); - }, - .Vector2i32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.operation(i32, rt, op1_value.Vector2i32[i], if (op2_value) |op2_v| op2_v.Vector2i32[i] else null); - }, - .Vector4u32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.operation(u32, rt, op1_value.Vector4u32[i], if (op2_value) |op2_v| op2_v.Vector4u32[i] else null); - }, - .Vector3u32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.operation(u32, rt, op1_value.Vector3u32[i], if (op2_value) |op2_v| op2_v.Vector3u32[i] else null); - }, - .Vector2u32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.operation(u32, rt, op1_value.Vector2u32[i], if (op2_value) |op2_v| op2_v.Vector2u32[i] else null); + fn applyScalarBits(rt: *Runtime, bit_count: SpvWord, dst: *Result.Value, op1_v: *const Result.Value, op2_v: ?*const Result.Value) RuntimeError!void { + switch (bit_count) { + inline 8, 16, 32, 64 => |bits| { + const TT = getValuePrimitiveFieldType(T, bits); + const a = (try getValuePrimitiveField(T, bits, @constCast(op1_v))).*; + + const out = if (comptime isUnaryOp()) blk: { + break :blk try operationUnary(TT, a); + } else blk: { + const b_ptr = op2_v orelse return RuntimeError.InvalidSpirV; + const b = (try getValuePrimitiveField(T, bits, @constCast(b_ptr))).*; + break :blk try operationBinary(TT, rt, a, b); + }; + + (try getValuePrimitiveField(T, bits, dst)).* = out; }, else => return RuntimeError.InvalidSpirV, } } + + fn laneRhsPtr(op2_value: ?*Result.Value, index: usize) ?*const Result.Value { + if (comptime isUnaryOp()) return null; + const v = op2_value orelse return null; + return &v.Vector[index]; + } + + fn applyFixedVector(comptime ElemT: type, comptime N: usize, dst: *[N]ElemT, op1: *[N]ElemT, op2_value: ?*Result.Value) RuntimeError!void { + if (comptime isUnaryOp()) { + inline for (0..N) |i| dst[i] = try operationUnary(ElemT, op1[i]); + } else { + const op2 = op2_value orelse return RuntimeError.InvalidSpirV; + const b: *const [N]ElemT = switch (N) { + 2 => &op2.*.Vector2u32, // will be overridden by call sites per ElemT/tag + 3 => &op2.*.Vector3u32, + 4 => &op2.*.Vector4u32, + else => unreachable, + }; + // NOTE: the above dummy mapping isn’t type-correct for i32; call sites below pass correct rhs pointer. + _ = b; + return RuntimeError.InvalidSpirV; + } + } + + fn applyFixedVectorBinary( + comptime ElemT: type, + comptime N: usize, + rt: *Runtime, + dst: *[N]ElemT, + op1: *[N]ElemT, + op2: *[N]ElemT, + ) RuntimeError!void { + inline for (0..N) |i| dst[i] = try operationBinary(ElemT, rt, op1[i], op2[i]); + } + + fn applyFixedVectorUnary( + comptime ElemT: type, + comptime N: usize, + dst: *[N]ElemT, + op1: *[N]ElemT, + ) RuntimeError!void { + inline for (0..N) |i| dst[i] = try operationUnary(ElemT, op1[i]); + } + }; +} + +fn BitEngine(comptime T: ValueType, comptime Op: BitOp) type { + return struct { + fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { + const target_type = (try rt.results[try rt.it.next()].getVariant()).Type; + const dst = try rt.results[try rt.it.next()].getValue(); + const op1 = try rt.results[try rt.it.next()].getValue(); + + const operator = BitOperator(T, Op); + + const op2_value: ?*Result.Value = if (comptime operator.isUnaryOp()) null else try rt.results[try rt.it.next()].getValue(); + + const lane_bits = try Result.resolveLaneBitWidth(target_type, rt); + + switch (dst.*) { + .Int => try operator.applyScalarBits(rt, lane_bits, dst, op1, if (comptime operator.isUnaryOp()) null else op2_value), + + .Vector => |dst_vec| { + const op1_vec = op1.Vector; + if (dst_vec.len != op1_vec.len) return RuntimeError.InvalidSpirV; + + for (dst_vec, op1_vec, 0..) |*d_lane, a_lane, i| { + var tmp_a = a_lane; + const b_ptr = operator.laneRhsPtr(op2_value, i); + try operator.applyScalarBits(rt, lane_bits, d_lane, &tmp_a, b_ptr); + } + }, + + .Vector4i32 => |*d| { + if (comptime operator.isUnaryOp()) + try operator.applyFixedVectorUnary(i32, 4, d, &op1.Vector4i32) + else + try operator.applyFixedVectorBinary(i32, 4, rt, d, &op1.Vector4i32, &op2_value.?.Vector4i32); + }, + .Vector3i32 => |*d| { + if (comptime operator.isUnaryOp()) + try operator.applyFixedVectorUnary(i32, 3, d, &op1.Vector3i32) + else + try operator.applyFixedVectorBinary(i32, 3, rt, d, &op1.Vector3i32, &op2_value.?.Vector3i32); + }, + .Vector2i32 => |*d| { + if (comptime operator.isUnaryOp()) + try operator.applyFixedVectorUnary(i32, 2, d, &op1.Vector2i32) + else + try operator.applyFixedVectorBinary(i32, 2, rt, d, &op1.Vector2i32, &op2_value.?.Vector2i32); + }, + + .Vector4u32 => |*d| { + if (comptime operator.isUnaryOp()) + try operator.applyFixedVectorUnary(u32, 4, d, &op1.Vector4u32) + else + try operator.applyFixedVectorBinary(u32, 4, rt, d, &op1.Vector4u32, &op2_value.?.Vector4u32); + }, + .Vector3u32 => |*d| { + if (comptime operator.isUnaryOp()) + try operator.applyFixedVectorUnary(u32, 3, d, &op1.Vector3u32) + else + try operator.applyFixedVectorBinary(u32, 3, rt, d, &op1.Vector3u32, &op2_value.?.Vector3u32); + }, + .Vector2u32 => |*d| { + if (comptime operator.isUnaryOp()) + try operator.applyFixedVectorUnary(u32, 2, d, &op1.Vector2u32) + else + try operator.applyFixedVectorBinary(u32, 2, rt, d, &op1.Vector2u32, &op2_value.?.Vector2u32); + }, + + else => return RuntimeError.InvalidSpirV, + } + } + }; +} + +fn CondOperator(comptime T: ValueType, comptime Op: CondOp) type { + return struct { + fn operation(comptime TT: type, a: TT, b: TT) RuntimeError!bool { + return switch (Op) { + .Equal, .LogicalEqual => a == b, + .NotEqual, .LogicalNotEqual => a != b, + .Greater => a > b, + .GreaterEqual => a >= b, + .Less => a < b, + .LessEqual => a <= b, + .LogicalAnd => a and b, + .LogicalOr => a or b, + else => RuntimeError.InvalidSpirV, + }; + } + + fn operationUnary(comptime TT: type, a: TT) RuntimeError!bool { + return switch (Op) { + .LogicalNot => !a, + else => RuntimeError.InvalidSpirV, + }; + } + + fn applyLane(bit_count: SpvWord, dst_bool: *Result.Value, a_v: *const Result.Value, b_v: ?*const Result.Value) RuntimeError!void { + switch (bit_count) { + inline 8, 16, 32, 64 => |bits| { + if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV; + + const TT = getValuePrimitiveFieldType(T, bits); + const a = (try getValuePrimitiveField(T, bits, @constCast(a_v))).*; + + if (comptime Op == .LogicalNot) { + dst_bool.Bool = try operationUnary(TT, a); + } else { + const b_ptr = b_v orelse return RuntimeError.InvalidSpirV; + const b = (try getValuePrimitiveField(T, bits, @constCast(b_ptr))).*; + dst_bool.Bool = try operation(TT, a, b); + } + }, + else => return RuntimeError.InvalidSpirV, + } + } + + fn laneRhsPtr(op2_value: ?*Result.Value, index: usize) ?*const Result.Value { + if (comptime Op == .LogicalNot) return null; + const v = op2_value orelse return null; + return &v.Vector[index]; + } }; } @@ -397,199 +534,151 @@ fn CondEngine(comptime T: ValueType, comptime Op: CondOp) type { else => return RuntimeError.InvalidSpirV, } - const value = try rt.results[try rt.it.next()].getValue(); + const dst = try rt.results[try rt.it.next()].getValue(); + const op1_result = &rt.results[try rt.it.next()]; const op1_type = try op1_result.getValueTypeWord(); const op1_value = try op1_result.getValue(); - const op2_value: ?*Result.Value = switch (Op) { - .LogicalNot => null, - else => try rt.results[try rt.it.next()].getValue(), - }; - const size = sw: switch ((try rt.results[op1_type].getVariant()).Type) { - .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, - .Vector4f32, - .Vector3f32, - .Vector2f32, - .Vector4i32, - .Vector3i32, - .Vector2i32, - .Vector4u32, - .Vector3u32, - .Vector2u32, - => 32, - .Float => |f| if (T == .Float) f.bit_length else return RuntimeError.InvalidSpirV, - .Int => |i| if (T == .SInt or T == .UInt) i.bit_length else return RuntimeError.InvalidSpirV, - else => return RuntimeError.InvalidSpirV, - }; + const op2_value: ?*Result.Value = if (comptime Op == .LogicalNot) null else try rt.results[try rt.it.next()].getValue(); - const operator = struct { - fn operation(comptime TT: type, op1: TT, op2: ?TT) RuntimeError!bool { - return switch (Op) { - .Equal, .LogicalEqual => op1 == op2 orelse return RuntimeError.InvalidSpirV, - .NotEqual, .LogicalNotEqual => op1 != op2 orelse return RuntimeError.InvalidSpirV, - .Greater => op1 > op2 orelse return RuntimeError.InvalidSpirV, - .GreaterEqual => op1 >= op2 orelse return RuntimeError.InvalidSpirV, - .Less => op1 < op2 orelse return RuntimeError.InvalidSpirV, - .LessEqual => op1 <= op2 orelse return RuntimeError.InvalidSpirV, - .LogicalAnd => (op1 != @as(TT, 0)) and ((op2 orelse return RuntimeError.InvalidSpirV) != @as(TT, 0)), - .LogicalOr => (op1 != @as(TT, 0)) or ((op2 orelse return RuntimeError.InvalidSpirV) != @as(TT, 0)), - .LogicalNot => (op1 == @as(TT, 0)), - }; - } + const lane_bits = try Result.resolveLaneBitWidth((try rt.results[op1_type].getVariant()).Type, rt); - fn process(bit_count: SpvWord, v: *Result.Value, op1_v: *const Result.Value, op2_v: ?*const Result.Value) RuntimeError!void { - switch (bit_count) { - inline 8, 16, 32, 64 => |i| { - if (i == 8 and T == .Float) { // No f8 - return RuntimeError.InvalidSpirV; - } - v.Bool = try operation( - getValuePrimitiveFieldType(T, i), - (try getValuePrimitiveField(T, i, @constCast(op1_v))).*, - if (op2_v) |val| (try getValuePrimitiveField(T, i, @constCast(val))).* else null, - ); - }, - else => return RuntimeError.InvalidSpirV, - } - } - }; + const operator = CondOperator(T, Op); - switch (value.*) { - .Bool => try operator.process(size, value, op1_value, op2_value), - .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i| { - try operator.process(size, val, &op1_v, if (op2_value) |op2_v| &op2_v.Vector[i] else null); + switch (dst.*) { + .Bool => try operator.applyLane(lane_bits, dst, op1_value, op2_value), + + .Vector => |dst_vec| for (dst_vec, op1_value.Vector, 0..) |*d_lane, a_lane, i| { + const b_ptr = operator.laneRhsPtr(op2_value, i); + try operator.applyLane(lane_bits, d_lane, &a_lane, b_ptr); }, - // No Vector specializations for booleans + else => return RuntimeError.InvalidSpirV, } } }; } -fn ConversionEngine(comptime From: ValueType, comptime To: ValueType) type { +fn ConversionEngine(comptime from_kind: ValueType, comptime to_kind: ValueType) type { return struct { fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { const target_type = (try rt.results[try rt.it.next()].getVariant()).Type; - const value = try rt.results[try rt.it.next()].getValue(); - const op_result = &rt.results[try rt.it.next()]; - const op_type = try op_result.getValueTypeWord(); - const op_value = try op_result.getValue(); + const dst_value = try rt.results[try rt.it.next()].getValue(); - const from_size = sw: switch ((try rt.results[op_type].getVariant()).Type) { - .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, - .Vector4f32, - .Vector3f32, - .Vector2f32, - .Vector4i32, - .Vector3i32, - .Vector2i32, - .Vector4u32, - .Vector3u32, - .Vector2u32, - => 32, - .Float => |f| if (From == .Float) f.bit_length else return RuntimeError.InvalidSpirV, - .Int => |i| if (From == .SInt or From == .UInt) i.bit_length else return RuntimeError.InvalidSpirV, - else => return RuntimeError.InvalidSpirV, - }; + const src_result = &rt.results[try rt.it.next()]; + const src_type_word = try src_result.getValueTypeWord(); + const src_value = try src_result.getValue(); - const to_size = sw: switch (target_type) { - .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, - .Vector4f32, - .Vector3f32, - .Vector2f32, - .Vector4i32, - .Vector3i32, - .Vector2i32, - .Vector4u32, - .Vector3u32, - .Vector2u32, - => 32, - .Float => |f| if (To == .Float) f.bit_length else return RuntimeError.InvalidSpirV, - .Int => |i| if (To == .SInt or To == .UInt) i.bit_length else return RuntimeError.InvalidSpirV, - else => return RuntimeError.InvalidSpirV, - }; + const from_bits = try Result.resolveLaneBitWidth((try rt.results[src_type_word].getVariant()).Type, rt); + const to_bits = try Result.resolveLaneBitWidth(target_type, rt); - const operator = struct { - fn process(from_bit_count: SpvWord, to_bit_count: SpvWord, to: *Result.Value, from: *Result.Value) RuntimeError!void { + const caster = struct { + fn castLane(comptime ToT: type, from_bit_count: SpvWord, from: *Result.Value) RuntimeError!ToT { + return switch (from_bit_count) { + inline 8, 16, 32, 64 => |bits| blk: { + if (bits == 8 and from_kind == .Float) return RuntimeError.InvalidSpirV; // No f8 + const v = (try getValuePrimitiveField(from_kind, bits, from)).*; + break :blk std.math.lossyCast(ToT, v); + }, + else => return RuntimeError.InvalidSpirV, + }; + } + + fn applyScalar(from_bit_count: SpvWord, to_bit_count: SpvWord, dst: *Result.Value, from: *Result.Value) RuntimeError!void { switch (to_bit_count) { - inline 8, 16, 32, 64 => |i| { - if (i == 8 and To == .Float) { - return RuntimeError.InvalidSpirV; // No f8 - } - - const ToType = getValuePrimitiveFieldType(To, i); - (try getValuePrimitiveField(To, i, to)).* = std.math.lossyCast( - ToType, - switch (from_bit_count) { - inline 8, 16, 32, 64 => |j| blk: { - if (j == 8 and From == .Float) { - return RuntimeError.InvalidSpirV; // Same - } - break :blk (try getValuePrimitiveField(From, j, from)).*; - }, - else => return RuntimeError.InvalidSpirV, - }, - ); + inline 8, 16, 32, 64 => |bits| { + if (bits == 8 and to_kind == .Float) return RuntimeError.InvalidSpirV; // No f8 + const ToT = getValuePrimitiveFieldType(to_kind, bits); + (try getValuePrimitiveField(to_kind, bits, dst)).* = try castLane(ToT, from_bit_count, from); }, else => return RuntimeError.InvalidSpirV, } } - fn processVecSpe(comptime T: type, from_bit_count: SpvWord, from: *Result.Value, index: usize) RuntimeError!T { - return switch (from.*) { - .Vector3f32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector2f32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector4i32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector3i32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector2i32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector4u32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector3u32 => |vec| std.math.lossyCast(T, vec[index]), - .Vector2u32 => |vec| std.math.lossyCast(T, vec[index]), - inline else => switch (from_bit_count) { - inline 8, 16, 32, 64 => |i| std.math.lossyCast(T, blk: { - if (i == 8 and From == .Float) { - return RuntimeError.InvalidSpirV; - } - break :blk (try getValuePrimitiveField(From, i, from)).*; - }), - else => return RuntimeError.InvalidSpirV, - }, - }; + fn castSIMDVector(comptime ToT: type, comptime N: usize, dst_arr: *[N]ToT, src_arr: *const [N]ToT) void { + inline for (0..N) |i| dst_arr[i] = std.math.lossyCast(ToT, src_arr[i]); + } + + fn castSIMDVectorFromOther(comptime ToT: type, comptime FromT: type, comptime N: usize, dst_arr: *[N]ToT, src_arr: *const [N]FromT) void { + inline for (0..N) |i| dst_arr[i] = std.math.lossyCast(ToT, src_arr[i]); } }; - switch (value.*) { - .Float => if (To == .Float) try operator.process(from_size, to_size, value, op_value) else return RuntimeError.InvalidSpirV, - .Int => if (To == .SInt or To == .UInt) try operator.process(from_size, to_size, value, op_value) else return RuntimeError.InvalidSpirV, - .Vector => |vec| for (vec, op_value.Vector) |*val, *op_v| try operator.process(from_size, to_size, val, op_v), - .Vector4f32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.processVecSpe(f32, from_size, op_value, i); + switch (dst_value.*) { + .Float => { + if (to_kind != .Float) return RuntimeError.InvalidSpirV; + try caster.applyScalar(from_bits, to_bits, dst_value, src_value); }, - .Vector3f32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.processVecSpe(f32, from_size, op_value, i); + .Int => { + if (to_kind != .SInt and to_kind != .UInt) return RuntimeError.InvalidSpirV; + try caster.applyScalar(from_bits, to_bits, dst_value, src_value); }, - .Vector2f32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.processVecSpe(f32, from_size, op_value, i); + .Vector => |dst_vec| { + const src_vec = src_value.Vector; + if (dst_vec.len != src_vec.len) return RuntimeError.InvalidSpirV; + for (dst_vec, src_vec) |*d_lane, *s_lane| { + try caster.applyScalar(from_bits, to_bits, d_lane, s_lane); + } }, - .Vector4i32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.processVecSpe(i32, from_size, op_value, i); + + .Vector4f32 => |*dst| switch (src_value.*) { + .Vector4f32 => caster.castSIMDVector(f32, 4, dst, &src_value.Vector4f32), + .Vector4i32 => caster.castSIMDVectorFromOther(f32, i32, 4, dst, &src_value.Vector4i32), + .Vector4u32 => caster.castSIMDVectorFromOther(f32, u32, 4, dst, &src_value.Vector4u32), + else => return RuntimeError.InvalidSpirV, }, - .Vector3i32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.processVecSpe(i32, from_size, op_value, i); + .Vector3f32 => |*dst| switch (src_value.*) { + .Vector3f32 => caster.castSIMDVector(f32, 3, dst, &src_value.Vector3f32), + .Vector3i32 => caster.castSIMDVectorFromOther(f32, i32, 3, dst, &src_value.Vector3i32), + .Vector3u32 => caster.castSIMDVectorFromOther(f32, u32, 3, dst, &src_value.Vector3u32), + else => return RuntimeError.InvalidSpirV, }, - .Vector2i32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.processVecSpe(i32, from_size, op_value, i); + .Vector2f32 => |*dst| switch (src_value.*) { + .Vector2f32 => caster.castSIMDVector(f32, 2, dst, &src_value.Vector2f32), + .Vector2i32 => caster.castSIMDVectorFromOther(f32, i32, 2, dst, &src_value.Vector2i32), + .Vector2u32 => caster.castSIMDVectorFromOther(f32, u32, 2, dst, &src_value.Vector2u32), + else => return RuntimeError.InvalidSpirV, }, - .Vector4u32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.processVecSpe(u32, from_size, op_value, i); + + .Vector4i32 => |*dst| switch (src_value.*) { + .Vector4f32 => caster.castSIMDVectorFromOther(i32, f32, 4, dst, &src_value.Vector4f32), + .Vector4i32 => caster.castSIMDVector(i32, 4, dst, &src_value.Vector4i32), + .Vector4u32 => caster.castSIMDVectorFromOther(i32, u32, 4, dst, &src_value.Vector4u32), + else => return RuntimeError.InvalidSpirV, }, - .Vector3u32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.processVecSpe(u32, from_size, op_value, i); + .Vector3i32 => |*dst| switch (src_value.*) { + .Vector3f32 => caster.castSIMDVectorFromOther(i32, f32, 3, dst, &src_value.Vector3f32), + .Vector3i32 => caster.castSIMDVector(i32, 3, dst, &src_value.Vector3i32), + .Vector3u32 => caster.castSIMDVectorFromOther(i32, u32, 3, dst, &src_value.Vector3u32), + else => return RuntimeError.InvalidSpirV, }, - .Vector2u32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.processVecSpe(u32, from_size, op_value, i); + .Vector2i32 => |*dst| switch (src_value.*) { + .Vector2f32 => caster.castSIMDVectorFromOther(i32, f32, 2, dst, &src_value.Vector2f32), + .Vector2i32 => caster.castSIMDVector(i32, 2, dst, &src_value.Vector2i32), + .Vector2u32 => caster.castSIMDVectorFromOther(i32, u32, 2, dst, &src_value.Vector2u32), + else => return RuntimeError.InvalidSpirV, }, + + .Vector4u32 => |*dst| switch (src_value.*) { + .Vector4f32 => caster.castSIMDVectorFromOther(u32, f32, 4, dst, &src_value.Vector4f32), + .Vector4i32 => caster.castSIMDVectorFromOther(u32, i32, 4, dst, &src_value.Vector4i32), + .Vector4u32 => caster.castSIMDVector(u32, 4, dst, &src_value.Vector4u32), + else => return RuntimeError.InvalidSpirV, + }, + .Vector3u32 => |*dst| switch (src_value.*) { + .Vector3f32 => caster.castSIMDVectorFromOther(u32, f32, 3, dst, &src_value.Vector3f32), + .Vector3i32 => caster.castSIMDVectorFromOther(u32, i32, 3, dst, &src_value.Vector3i32), + .Vector3u32 => caster.castSIMDVector(u32, 3, dst, &src_value.Vector3u32), + else => return RuntimeError.InvalidSpirV, + }, + .Vector2u32 => |*dst| switch (src_value.*) { + .Vector2f32 => caster.castSIMDVectorFromOther(u32, f32, 2, dst, &src_value.Vector2f32), + .Vector2i32 => caster.castSIMDVectorFromOther(u32, i32, 2, dst, &src_value.Vector2i32), + .Vector2u32 => caster.castSIMDVector(u32, 2, dst, &src_value.Vector2u32), + else => return RuntimeError.InvalidSpirV, + }, + else => return RuntimeError.InvalidSpirV, } } @@ -600,26 +689,11 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type { return struct { fn op(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { const target_type = (try rt.results[try rt.it.next()].getVariant()).Type; - const value = try rt.results[try rt.it.next()].getValue(); - const op1_value = try rt.results[try rt.it.next()].getValue(); - const op2_value = try rt.results[try rt.it.next()].getValue(); + const dst = try rt.results[try rt.it.next()].getValue(); + const lhs = try rt.results[try rt.it.next()].getValue(); + const rhs = try rt.results[try rt.it.next()].getValue(); - const size = sw: switch (target_type) { - .Vector => |v| continue :sw (try rt.results[v.components_type_word].getVariant()).Type, - .Vector4f32, - .Vector3f32, - .Vector2f32, - .Vector4i32, - .Vector3i32, - .Vector2i32, - .Vector4u32, - .Vector3u32, - .Vector2u32, - => 32, - .Float => |f| if (T == .Float) f.bit_length else return RuntimeError.InvalidSpirV, - .Int => |i| if (T == .SInt or T == .UInt) i.bit_length else return RuntimeError.InvalidSpirV, - else => return RuntimeError.InvalidSpirV, - }; + const lane_bits = try Result.resolveLaneBitWidth(target_type, rt); const operator = struct { fn operation(comptime TT: type, op1: TT, op2: TT) RuntimeError!TT { @@ -637,68 +711,77 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type { }; } - fn process(bit_count: SpvWord, v: *Result.Value, op1_v: *const Result.Value, op2_v: *const Result.Value) RuntimeError!void { + fn applyScalar(bit_count: SpvWord, d: *Result.Value, l: *Result.Value, r: *Result.Value) RuntimeError!void { switch (bit_count) { - inline 8, 16, 32, 64 => |i| { - if (i == 8 and T == .Float) { // No f8 - return RuntimeError.InvalidSpirV; - } - (try getValuePrimitiveField(T, i, v)).* = try operation( - getValuePrimitiveFieldType(T, i), - (try getValuePrimitiveField(T, i, @constCast(op1_v))).*, - (try getValuePrimitiveField(T, i, @constCast(op2_v))).*, - ); + inline 8, 16, 32, 64 => |bits| { + if (bits == 8 and T == .Float) return RuntimeError.InvalidSpirV; + + const ScalarT = getValuePrimitiveFieldType(T, bits); + const d_field = try getValuePrimitiveField(T, bits, d); + const l_field = try getValuePrimitiveField(T, bits, l); + const r_field = try getValuePrimitiveField(T, bits, r); + d_field.* = try operation(ScalarT, l_field.*, r_field.*); }, else => return RuntimeError.InvalidSpirV, } } + + inline fn applyVectorTimesScalarF32(d: []Result.Value, l: []const Result.Value, r: f32) void { + for (d, l) |*d_v, l_v| { + d_v.Float.float32 = l_v.Float.float32 * r; + } + } + + inline fn applySIMDVector(comptime ElemT: type, comptime N: usize, d: *@Vector(N, ElemT), l: *const @Vector(N, ElemT), r: *const @Vector(N, ElemT)) RuntimeError!void { + inline for (0..N) |i| { + d[i] = try operation(ElemT, l[i], r[i]); + } + } + + inline fn applyVectorSIMDTimesScalarF32(comptime N: usize, d: *@Vector(N, f32), l: *const @Vector(N, f32), r: f32) void { + inline for (0..N) |i| { + d[i] = l[i] * r; + } + } + + inline fn applySIMDVectorf32(comptime N: usize, d: *@Vector(N, f32), l: *const @Vector(N, f32), r: *const Result.Value) RuntimeError!void { + switch (Op) { + .VectorTimesScalar => applyVectorSIMDTimesScalarF32(N, d, l, r.Float.float32), + else => { + const rh: *const @Vector(N, f32) = switch (N) { + 2 => &r.Vector2f32, + 3 => &r.Vector3f32, + 4 => &r.Vector4f32, + else => unreachable, + }; + try applySIMDVector(f32, N, d, l, rh); + }, + } + } }; - switch (value.*) { - .Float => if (T == .Float) try operator.process(size, value, op1_value, op2_value) else return RuntimeError.InvalidSpirV, - .Int => if (T == .SInt or T == .UInt) try operator.process(size, value, op1_value, op2_value) else return RuntimeError.InvalidSpirV, - .Vector => |vec| for (vec, op1_value.Vector, 0..) |*val, op1_v, i| { - switch (Op) { - .VectorTimesScalar => try operator.process(size, val, &op1_v, op2_value), - else => try operator.process(size, val, &op1_v, &op2_value.Vector[i]), - } - }, - .Vector4f32 => |*vec| inline for (0..4) |i| { - switch (Op) { - .VectorTimesScalar => vec[i] = op1_value.Vector4f32[i] * op2_value.Float.float32, - else => vec[i] = try operator.operation(f32, op1_value.Vector4f32[i], op2_value.Vector4f32[i]), - } - }, - .Vector3f32 => |*vec| inline for (0..3) |i| { - switch (Op) { - .VectorTimesScalar => vec[i] = op1_value.Vector3f32[i] * op2_value.Float.float32, - else => vec[i] = try operator.operation(f32, op1_value.Vector3f32[i], op2_value.Vector3f32[i]), - } - }, - .Vector2f32 => |*vec| inline for (0..2) |i| { - switch (Op) { - .VectorTimesScalar => vec[i] = op1_value.Vector2f32[i] * op2_value.Float.float32, - else => vec[i] = try operator.operation(f32, op1_value.Vector2f32[i], op2_value.Vector2f32[i]), - } - }, - .Vector4i32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.operation(i32, op1_value.Vector4i32[i], op2_value.Vector4i32[i]); - }, - .Vector3i32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.operation(i32, op1_value.Vector3i32[i], op2_value.Vector3i32[i]); - }, - .Vector2i32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.operation(i32, op1_value.Vector2i32[i], op2_value.Vector2i32[i]); - }, - .Vector4u32 => |*vec| inline for (0..4) |i| { - vec[i] = try operator.operation(u32, op1_value.Vector4u32[i], op2_value.Vector4u32[i]); - }, - .Vector3u32 => |*vec| inline for (0..3) |i| { - vec[i] = try operator.operation(u32, op1_value.Vector3u32[i], op2_value.Vector3u32[i]); - }, - .Vector2u32 => |*vec| inline for (0..2) |i| { - vec[i] = try operator.operation(u32, op1_value.Vector2u32[i], op2_value.Vector2u32[i]); + switch (dst.*) { + .Int, .Float => try operator.applyScalar(lane_bits, dst, lhs, rhs), + + .Vector => |dst_vec| switch (Op) { + .VectorTimesScalar => operator.applyVectorTimesScalarF32(dst_vec, lhs.Vector, rhs.Float.float32), + else => for (dst_vec, lhs.Vector, rhs.Vector) |*d_lane, *l_lane, *r_lane| { + try operator.applyScalar(lane_bits, d_lane, l_lane, r_lane); + }, }, + + .Vector4f32 => |*d| try operator.applySIMDVectorf32(4, d, &lhs.Vector4f32, rhs), + .Vector3f32 => |*d| try operator.applySIMDVectorf32(3, d, &lhs.Vector3f32, rhs), + .Vector2f32 => |*d| try operator.applySIMDVectorf32(2, d, &lhs.Vector2f32, rhs), + + .Vector4i32 => |*d| try operator.applySIMDVector(i32, 4, d, &lhs.Vector4i32, &rhs.Vector4i32), + .Vector3i32 => |*d| try operator.applySIMDVector(i32, 3, d, &lhs.Vector3i32, &rhs.Vector3i32), + .Vector2i32 => |*d| try operator.applySIMDVector(i32, 2, d, &lhs.Vector2i32, &rhs.Vector2i32), + + .Vector4u32 => |*d| try operator.applySIMDVector(u32, 4, d, &lhs.Vector4u32, &rhs.Vector4u32), + .Vector3u32 => |*d| try operator.applySIMDVector(u32, 3, d, &lhs.Vector3u32, &rhs.Vector3u32), + .Vector2u32 => |*d| try operator.applySIMDVector(u32, 2, d, &lhs.Vector2u32, &rhs.Vector2u32), + else => return RuntimeError.InvalidSpirV, } } @@ -784,20 +867,21 @@ fn opBitcast(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void { } fn copyValue(dst: *Result.Value, src: *const Result.Value) void { - if (src.getCompositeDataOrNull()) |src_slice| { - if (dst.getCompositeDataOrNull()) |dst_slice| { + switch (src.*) { + .Vector, .Matrix, .Array, .Structure => |src_slice| { + const dst_slice = switch (dst.*) { + .Vector, .Matrix, .Array, .Structure => |d| d, + else => unreachable, + }; for (0..@min(dst_slice.len, src_slice.len)) |i| { copyValue(&dst_slice[i], &src_slice[i]); } - } else { - unreachable; - } - } else { - dst.* = src.*; + }, + else => dst.* = src.*, } } -fn getValuePrimitiveField(comptime T: ValueType, comptime BitCount: SpvWord, v: *Result.Value) RuntimeError!*getValuePrimitiveFieldType(T, BitCount) { +pub fn getValuePrimitiveField(comptime T: ValueType, comptime BitCount: SpvWord, v: *Result.Value) RuntimeError!*getValuePrimitiveFieldType(T, BitCount) { return switch (T) { .Bool => &v.Bool, .Float => switch (BitCount) { @@ -815,7 +899,7 @@ fn getValuePrimitiveField(comptime T: ValueType, comptime BitCount: SpvWord, v: }; } -fn getValuePrimitiveFieldType(comptime T: ValueType, comptime BitCount: SpvWord) type { +pub fn getValuePrimitiveFieldType(comptime T: ValueType, comptime BitCount: SpvWord) type { return switch (T) { .Bool => bool, .Float => std.meta.Float(BitCount), @@ -1112,7 +1196,7 @@ fn opExtInst(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) Ru const set = try rt.it.next(); const inst = try rt.it.next(); - switch (try rt.results[set].getVariant()) { + switch ((try rt.results[set].getVariant()).*) { .Extension => |ext| if (ext.dispatcher[inst]) |pfn| { try pfn(allocator, target_type, id, word_count, rt); }, @@ -1122,10 +1206,11 @@ fn opExtInst(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) Ru fn opExtInstImport(allocator: std.mem.Allocator, word_count: SpvWord, rt: *Runtime) RuntimeError!void { const id = try rt.it.next(); - rt.mod.results[id].name = try readStringN(allocator, &rt.it, word_count - 1); + const name = try readStringN(allocator, &rt.it, word_count - 1); + rt.mod.results[id].name = name; rt.mod.results[id].variant = .{ .Extension = .{ - .dispatcher = undefined, + .dispatcher = if (extensions_map.get(name)) |map| map else return RuntimeError.UnsupportedExtension, }, }; }