fixing shader execution
This commit is contained in:
@@ -2,6 +2,7 @@ const std = @import("std");
|
||||
const spv = @import("../spv.zig");
|
||||
const ext = @import("GLSL_std_450.zig");
|
||||
const opc = @import("../opcodes.zig");
|
||||
const zm = @import("zmath");
|
||||
|
||||
const Module = @import("../Module.zig");
|
||||
const Runtime = @import("../Runtime.zig");
|
||||
@@ -72,11 +73,23 @@ pub var runtime_dispatcher = [_]?OpCodeExtFunc{null} ** ext.GLSLOpMaxValue;
|
||||
|
||||
pub fn initRuntimeDispatcher() void {
|
||||
// zig fmt: off
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Ceil)] = MathEngine(.Float, .Ceil).opSingleOperator;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Cos)] = MathEngine(.Float, .Cos).opSingleOperator;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Exp)] = MathEngine(.Float, .Exp).opSingleOperator;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Exp2)] = MathEngine(.Float, .Exp2).opSingleOperator;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.FAbs)] = MathEngine(.Float, .FAbs).opSingleOperator;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.FMax)] = MathEngine(.Float, .FMax).opDoubleOperators;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Floor)] = MathEngine(.Float, .Floor).opSingleOperator;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Length)] = opLength;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Log)] = MathEngine(.Float, .Log).opSingleOperator;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Log2)] = MathEngine(.Float, .Log2).opSingleOperator;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Normalize)] = opNormalize;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Round)] = MathEngine(.Float, .Round).opSingleOperator;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.SAbs)] = MathEngine(.SInt, .SAbs).opSingleOperator;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Sin)] = MathEngine(.Float, .Sin).opSingleOperator;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Sqrt)] = MathEngine(.Float, .Sqrt).opSingleOperator;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Tan)] = MathEngine(.Float, .Tan).opSingleOperator;
|
||||
runtime_dispatcher[@intFromEnum(ext.GLSLOp.Trunc)] = MathEngine(.Float, .Trunc).opSingleOperator;
|
||||
// zig fmt: on
|
||||
}
|
||||
|
||||
@@ -92,8 +105,20 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type {
|
||||
const operator = struct {
|
||||
fn operation(comptime TT: type, x: TT) RuntimeError!TT {
|
||||
return switch (Op) {
|
||||
.Sin => @sin(x),
|
||||
.Ceil => @ceil(x),
|
||||
.Cos => @cos(x),
|
||||
.Exp => @exp(x),
|
||||
.Exp2 => @exp2(x),
|
||||
.FAbs => @abs(x),
|
||||
.Floor => @floor(x),
|
||||
.Log => @log(x),
|
||||
.Log2 => @log2(x),
|
||||
.Round => @round(x),
|
||||
.SAbs => if (comptime @typeInfo(TT) == .int) @intCast(@abs(x)) else return RuntimeError.InvalidSpirV,
|
||||
.Sin => @sin(x),
|
||||
.Sqrt => @sqrt(x),
|
||||
.Tan => @tan(x),
|
||||
.Trunc => @trunc(x),
|
||||
else => RuntimeError.InvalidSpirV,
|
||||
};
|
||||
}
|
||||
@@ -166,12 +191,6 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type {
|
||||
else => return RuntimeError.InvalidSpirV,
|
||||
}
|
||||
}
|
||||
|
||||
inline fn applySIMDVector(comptime ElemT: type, comptime N: usize, d: *@Vector(N, ElemT), l: *const @Vector(N, ElemT), r: *const @Vector(N, ElemT)) RuntimeError!void {
|
||||
inline for (0..N) |i| {
|
||||
d[i] = try operation(ElemT, l[i], r[i]);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
switch (dst.*) {
|
||||
@@ -181,17 +200,17 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type {
|
||||
try operator.applyScalar(lane_bits, d_lane, &l_lane, &r_lane);
|
||||
},
|
||||
|
||||
.Vector4f32 => |*d| try operator.applySIMDVector(f32, 4, d, &lhs.Vector4f32, &rhs.Vector4f32),
|
||||
.Vector3f32 => |*d| try operator.applySIMDVector(f32, 3, d, &lhs.Vector3f32, &rhs.Vector3f32),
|
||||
.Vector2f32 => |*d| try operator.applySIMDVector(f32, 2, d, &lhs.Vector2f32, &rhs.Vector2f32),
|
||||
.Vector4f32 => |*d| d.* = try operator.operation(@Vector(4, f32), lhs.Vector4f32, rhs.Vector4f32),
|
||||
.Vector3f32 => |*d| d.* = try operator.operation(@Vector(3, f32), lhs.Vector3f32, rhs.Vector3f32),
|
||||
.Vector2f32 => |*d| d.* = try operator.operation(@Vector(2, f32), lhs.Vector2f32, rhs.Vector2f32),
|
||||
|
||||
.Vector4i32 => |*d| try operator.applySIMDVector(i32, 4, d, &lhs.Vector4i32, &rhs.Vector4i32),
|
||||
.Vector3i32 => |*d| try operator.applySIMDVector(i32, 3, d, &lhs.Vector3i32, &rhs.Vector3i32),
|
||||
.Vector2i32 => |*d| try operator.applySIMDVector(i32, 2, d, &lhs.Vector2i32, &rhs.Vector2i32),
|
||||
.Vector4i32 => |*d| d.* = try operator.operation(@Vector(4, i32), lhs.Vector4i32, rhs.Vector4i32),
|
||||
.Vector3i32 => |*d| d.* = try operator.operation(@Vector(3, i32), lhs.Vector3i32, rhs.Vector3i32),
|
||||
.Vector2i32 => |*d| d.* = try operator.operation(@Vector(2, i32), lhs.Vector2i32, rhs.Vector2i32),
|
||||
|
||||
.Vector4u32 => |*d| try operator.applySIMDVector(u32, 4, d, &lhs.Vector4u32, &rhs.Vector4u32),
|
||||
.Vector3u32 => |*d| try operator.applySIMDVector(u32, 3, d, &lhs.Vector3u32, &rhs.Vector3u32),
|
||||
.Vector2u32 => |*d| try operator.applySIMDVector(u32, 2, d, &lhs.Vector2u32, &rhs.Vector2u32),
|
||||
.Vector4u32 => |*d| d.* = try operator.operation(@Vector(4, u32), lhs.Vector4u32, rhs.Vector4u32),
|
||||
.Vector3u32 => |*d| d.* = try operator.operation(@Vector(3, u32), lhs.Vector3u32, rhs.Vector3u32),
|
||||
.Vector2u32 => |*d| d.* = try operator.operation(@Vector(2, u32), lhs.Vector2u32, rhs.Vector2u32),
|
||||
|
||||
else => return RuntimeError.InvalidSpirV,
|
||||
}
|
||||
@@ -199,12 +218,6 @@ fn MathEngine(comptime T: ValueType, comptime Op: MathOp) type {
|
||||
};
|
||||
}
|
||||
|
||||
inline fn sumSIMDVector(comptime ElemT: type, comptime N: usize, d: *ElemT, v: *const @Vector(N, ElemT)) void {
|
||||
inline for (0..N) |i| {
|
||||
d.* += v[i];
|
||||
}
|
||||
}
|
||||
|
||||
fn opLength(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWord, rt: *Runtime) RuntimeError!void {
|
||||
const target_type = (try rt.results[target_type_id].getVariant()).Type;
|
||||
const dst = try rt.results[id].getValue();
|
||||
@@ -219,9 +232,18 @@ fn opLength(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWo
|
||||
|
||||
if (bits == 32) { // More likely to be SIMD if f32
|
||||
switch (src.*) {
|
||||
.Vector4f32 => |src_vec| sumSIMDVector(f32, 4, &sum, &src_vec),
|
||||
.Vector3f32 => |src_vec| sumSIMDVector(f32, 3, &sum, &src_vec),
|
||||
.Vector2f32 => |src_vec| sumSIMDVector(f32, 2, &sum, &src_vec),
|
||||
.Vector4f32 => |src_vec| {
|
||||
d_field.* = zm.length4(src_vec)[0];
|
||||
return;
|
||||
},
|
||||
.Vector3f32 => |src_vec| {
|
||||
d_field.* = zm.length3(zm.f32x4(src_vec[0], src_vec[1], src_vec[2], 0.0))[0];
|
||||
return;
|
||||
},
|
||||
.Vector2f32 => |src_vec| {
|
||||
d_field.* = zm.length2(zm.f32x4(src_vec[0], src_vec[1], 0.0, 0.0))[0];
|
||||
return;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
@@ -237,7 +259,6 @@ fn opLength(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: SpvWo
|
||||
const s_field = try getValuePrimitiveField(.Float, bits, s_lane);
|
||||
sum += s_field.*;
|
||||
},
|
||||
.Vector4f32, .Vector3f32, .Vector2f32 => {},
|
||||
else => return RuntimeError.InvalidSpirV,
|
||||
}
|
||||
|
||||
@@ -256,17 +277,31 @@ fn opNormalize(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: Sp
|
||||
|
||||
switch (lane_bits) {
|
||||
inline 16, 32, 64 => |bits| {
|
||||
var sum: std.meta.Float(bits) = 0.0;
|
||||
|
||||
if (bits == 32) { // More likely to be SIMD if f32
|
||||
switch (src.*) {
|
||||
.Vector4f32 => |src_vec| sumSIMDVector(f32, 4, &sum, &src_vec),
|
||||
.Vector3f32 => |src_vec| sumSIMDVector(f32, 3, &sum, &src_vec),
|
||||
.Vector2f32 => |src_vec| sumSIMDVector(f32, 2, &sum, &src_vec),
|
||||
.Vector4f32 => |src_vec| {
|
||||
dst.Vector4f32 = zm.normalize4(src_vec);
|
||||
return;
|
||||
},
|
||||
.Vector3f32 => |src_vec| {
|
||||
const normed = zm.normalize3(zm.f32x4(src_vec[0], src_vec[1], src_vec[2], 0.0));
|
||||
dst.Vector3f32[0] = normed[0];
|
||||
dst.Vector3f32[1] = normed[1];
|
||||
dst.Vector3f32[2] = normed[2];
|
||||
return;
|
||||
},
|
||||
.Vector2f32 => |src_vec| {
|
||||
const normed = zm.normalize2(zm.f32x4(src_vec[0], src_vec[1], 0.0, 0.0));
|
||||
dst.Vector2f32[0] = normed[0];
|
||||
dst.Vector2f32[1] = normed[1];
|
||||
return;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
var sum: std.meta.Float(bits) = 0.0;
|
||||
|
||||
switch (src.*) {
|
||||
.Float => {
|
||||
const s_field = try getValuePrimitiveField(.Float, bits, src);
|
||||
@@ -276,34 +311,17 @@ fn opNormalize(_: std.mem.Allocator, target_type_id: SpvWord, id: SpvWord, _: Sp
|
||||
const s_field = try getValuePrimitiveField(.Float, bits, s_lane);
|
||||
sum += s_field.*;
|
||||
},
|
||||
.Vector4f32, .Vector3f32, .Vector2f32 => {},
|
||||
else => return RuntimeError.InvalidSpirV,
|
||||
}
|
||||
|
||||
sum = @sqrt(sum);
|
||||
|
||||
if (bits == 32) {
|
||||
switch (dst.*) {
|
||||
.Vector4f32 => |*dst_vec| inline for (0..4) |i| {
|
||||
dst_vec[i] = src.Vector4f32[i] / sum;
|
||||
},
|
||||
.Vector3f32 => |*dst_vec| inline for (0..3) |i| {
|
||||
dst_vec[i] = src.Vector3f32[i] / sum;
|
||||
},
|
||||
.Vector2f32 => |*dst_vec| inline for (0..2) |i| {
|
||||
dst_vec[i] = src.Vector2f32[i] / sum;
|
||||
},
|
||||
else => {},
|
||||
}
|
||||
}
|
||||
|
||||
switch (dst.*) {
|
||||
.Vector => |dst_vec| for (dst_vec, src.Vector) |*d_lane, *s_lane| {
|
||||
const d_field = try getValuePrimitiveField(.Float, bits, d_lane);
|
||||
const s_field = try getValuePrimitiveField(.Float, bits, s_lane);
|
||||
d_field.* = s_field.* / sum;
|
||||
},
|
||||
.Vector4f32, .Vector3f32, .Vector2f32 => {},
|
||||
else => return RuntimeError.InvalidSpirV,
|
||||
}
|
||||
},
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
//! A small footprint SPIR-V interpreter with zero dependencies to execute SPIR-V shaders on the CPU. It is designed to be used with multiple runtimes concurrently.
|
||||
//! A small footprint SPIR-V interpreter to execute SPIR-V shaders on the CPU. It is designed to be used with multiple runtimes concurrently.
|
||||
//!
|
||||
//! ```zig
|
||||
//! const std = @import("std");
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
const std = @import("std");
|
||||
const spv = @import("spv.zig");
|
||||
const zm = @import("zmath");
|
||||
|
||||
const GLSL_std_450 = @import("GLSL_std_450/opcodes.zig");
|
||||
|
||||
@@ -1141,14 +1142,14 @@ fn opDot(_: std.mem.Allocator, _: SpvWord, rt: *Runtime) RuntimeError!void {
|
||||
else => return RuntimeError.InvalidSpirV,
|
||||
}
|
||||
},
|
||||
.Vector4f32 => |*vec| inline for (0..4) |i| {
|
||||
value.Float.float32 += vec[i] * op2_value.Vector4f32[i];
|
||||
.Vector4f32 => |vec| value.Float.float32 = zm.dot4(vec, op2_value.Vector4f32)[0],
|
||||
.Vector3f32 => |vec| {
|
||||
const op2_vec = op2_value.Vector3f32;
|
||||
value.Float.float32 = zm.dot3(zm.f32x4(vec[0], vec[1], vec[2], 0.0), zm.f32x4(op2_vec[0], op2_vec[1], op2_vec[2], 0.0))[0];
|
||||
},
|
||||
.Vector3f32 => |*vec| inline for (0..3) |i| {
|
||||
value.Float.float32 += vec[i] * op2_value.Vector3f32[i];
|
||||
},
|
||||
.Vector2f32 => |*vec| inline for (0..2) |i| {
|
||||
value.Float.float32 += vec[i] * op2_value.Vector2f32[i];
|
||||
.Vector2f32 => |vec| {
|
||||
const op2_vec = op2_value.Vector2f32;
|
||||
value.Float.float32 = zm.dot2(zm.f32x4(vec[0], vec[1], 0.0, 0.0), zm.f32x4(op2_vec[0], op2_vec[1], 0.0, 0.0))[0];
|
||||
},
|
||||
else => return RuntimeError.InvalidSpirV,
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user