From 8fd62b365b9189f700266bc9d761be69022b44be Mon Sep 17 00:00:00 2001 From: Kbz-8 Date: Tue, 4 Mar 2025 17:42:10 +0100 Subject: [PATCH] software --- Examples/Software/main.c | 32 ++++++++ Examples/Software/shader.nzsl | 6 +- Examples/Vulkan/main.c | 2 +- Examples/Vulkan/shader.nzsl | 2 +- Examples/WebGPU/main.c | 4 +- Sources/Backends/Software/SoftBuffer.c | 5 +- Sources/Backends/Software/SoftBuffer.h | 2 +- Sources/Backends/Software/SoftCommandList.c | 76 ++++++++++++++++--- Sources/Backends/Software/SoftCommandList.h | 19 +---- Sources/Backends/Software/SoftComputePass.c | 12 +++ .../Backends/Software/SoftComputePipeline.c | 43 +++++++++++ .../Backends/Software/SoftComputePipeline.h | 4 + Sources/Backends/Software/SoftDevice.c | 2 + Sources/Backends/Software/SoftDevice.h | 5 +- Sources/Backends/Software/SoftEnums.h | 4 - .../Backends/WebGPU/WebGPUComputePipeline.c | 1 - 16 files changed, 173 insertions(+), 46 deletions(-) diff --git a/Examples/Software/main.c b/Examples/Software/main.c index ada2add..57e85cd 100644 --- a/Examples/Software/main.c +++ b/Examples/Software/main.c @@ -29,6 +29,38 @@ int main(void) buffer_create_info.usage = PULSE_BUFFER_USAGE_STORAGE_READ | PULSE_BUFFER_USAGE_STORAGE_WRITE | PULSE_BUFFER_USAGE_TRANSFER_DOWNLOAD; PulseBuffer buffer = PulseCreateBuffer(device, &buffer_create_info); + // GPU computations + { + const uint8_t shader_bytecode[] = { + #include "shader.spv.h" + }; + + PulseComputePipelineCreateInfo info = { 0 }; + info.code_size = sizeof(shader_bytecode); + info.code = shader_bytecode; + info.entrypoint = "main"; + info.format = PULSE_SHADER_FORMAT_SPIRV_BIT; + info.num_readwrite_storage_buffers = 1; + PulseComputePipeline pipeline = PulseCreateComputePipeline(device, &info); + + PulseFence fence = PulseCreateFence(device); + PulseCommandList cmd = PulseRequestCommandList(device, PULSE_COMMAND_LIST_GENERAL); + + PulseComputePass pass = PulseBeginComputePass(cmd); + //PulseBindStorageBuffers(pass, &buffer, 1); + PulseBindComputePipeline(pass, pipeline); + PulseDispatchComputations(pass, 16, 1, 1); + PulseEndComputePass(pass); + + PulseSubmitCommandList(device, cmd, fence); + PulseWaitForFences(device, &fence, 1, true); + + PulseReleaseCommandList(device, cmd); + PulseDestroyFence(device, fence); + PulseDestroyComputePipeline(device, pipeline); + } + + // Get result and read it on CPU { PulseBufferCreateInfo staging_buffer_create_info = { 0 }; diff --git a/Examples/Software/shader.nzsl b/Examples/Software/shader.nzsl index 7fd42e7..a6ad83d 100644 --- a/Examples/Software/shader.nzsl +++ b/Examples/Software/shader.nzsl @@ -14,12 +14,12 @@ struct SSBO external { - [set(1), binding(0)] ssbo: storage[SSBO], + //[set(1), binding(0)] ssbo: storage[SSBO], } [entry(compute)] -[workgroup(32, 32, 1)] +[workgroup(16, 16, 1)] fn main(input: Input) { - ssbo.data[input.indices.x * input.indices.y] = i32(input.indices.x * input.indices.y); + //ssbo.data[input.indices.x * input.indices.y] = i32(input.indices.x * input.indices.y); } diff --git a/Examples/Vulkan/main.c b/Examples/Vulkan/main.c index 97204d2..757376d 100644 --- a/Examples/Vulkan/main.c +++ b/Examples/Vulkan/main.c @@ -49,7 +49,7 @@ int main(void) PulseComputePass pass = PulseBeginComputePass(cmd); PulseBindStorageBuffers(pass, &buffer, 1); PulseBindComputePipeline(pass, pipeline); - PulseDispatchComputations(pass, 32, 32, 1); + PulseDispatchComputations(pass, 16, 1, 1); PulseEndComputePass(pass); PulseSubmitCommandList(device, cmd, fence); diff --git a/Examples/Vulkan/shader.nzsl b/Examples/Vulkan/shader.nzsl index 7fd42e7..682eec6 100644 --- a/Examples/Vulkan/shader.nzsl +++ b/Examples/Vulkan/shader.nzsl @@ -18,7 +18,7 @@ external } [entry(compute)] -[workgroup(32, 32, 1)] +[workgroup(16, 16, 1)] fn main(input: Input) { ssbo.data[input.indices.x * input.indices.y] = i32(input.indices.x * input.indices.y); diff --git a/Examples/WebGPU/main.c b/Examples/WebGPU/main.c index 6bb1ceb..45bb3a1 100644 --- a/Examples/WebGPU/main.c +++ b/Examples/WebGPU/main.c @@ -24,7 +24,7 @@ void DebugCallBack(PulseDebugMessageSeverity severity, const char* message) const char* wgsl_source = WGSL_SOURCE( @group(1) @binding(0) var ssbo: array; - @compute @workgroup_size(32, 32, 1) + @compute @workgroup_size(16, 16, 1) fn main(@builtin(global_invocation_id) grid: vec3) { ssbo[grid.x * grid.y] = i32(grid.x * grid.y); @@ -58,7 +58,7 @@ int main(void) PulseComputePass pass = PulseBeginComputePass(cmd); PulseBindStorageBuffers(pass, &buffer, 1); PulseBindComputePipeline(pass, pipeline); - PulseDispatchComputations(pass, 32, 32, 1); + PulseDispatchComputations(pass, 16, 1, 1); PulseEndComputePass(pass); PulseSubmitCommandList(device, cmd, fence); diff --git a/Sources/Backends/Software/SoftBuffer.c b/Sources/Backends/Software/SoftBuffer.c index 457286c..0f3bc54 100644 --- a/Sources/Backends/Software/SoftBuffer.c +++ b/Sources/Backends/Software/SoftBuffer.c @@ -15,7 +15,7 @@ PulseBuffer SoftCreateBuffer(PulseDevice device, const PulseBufferCreateInfo* cr PulseBuffer buffer = (PulseBuffer)calloc(1, sizeof(PulseBufferHandler)); PULSE_CHECK_ALLOCATION_RETVAL(buffer, PULSE_NULL_HANDLE); - SoftBuffer* soft_buffer = (SoftBuffer*)calloc(1, sizeof(SoftBuffer) + _Alignof(SoftBuffer) + create_infos->size); + SoftBuffer* soft_buffer = (SoftBuffer*)calloc(1, sizeof(SoftBuffer)); PULSE_CHECK_ALLOCATION_RETVAL(soft_buffer, PULSE_NULL_HANDLE); buffer->device = device; @@ -23,7 +23,7 @@ PulseBuffer SoftCreateBuffer(PulseDevice device, const PulseBufferCreateInfo* cr buffer->size = create_infos->size; buffer->usage = create_infos->usage; - soft_buffer->buffer = soft_buffer + sizeof(SoftBuffer) + _Alignof(SoftBuffer); + soft_buffer->buffer = (uint8_t*)malloc(create_infos->size); return buffer; } @@ -85,6 +85,7 @@ void SoftDestroyBuffer(PulseDevice device, PulseBuffer buffer) { PULSE_UNUSED(device); SoftBuffer* soft_buffer = SOFT_RETRIEVE_DRIVER_DATA_AS(buffer, SoftBuffer*); + free(soft_buffer->buffer); free(soft_buffer); free(buffer); } diff --git a/Sources/Backends/Software/SoftBuffer.h b/Sources/Backends/Software/SoftBuffer.h index 5fb3ebe..14573d4 100644 --- a/Sources/Backends/Software/SoftBuffer.h +++ b/Sources/Backends/Software/SoftBuffer.h @@ -13,7 +13,7 @@ typedef struct SoftBuffer { - void* buffer; + uint8_t* buffer; void* map; PulseMapMode current_map_mode; } SoftBuffer; diff --git a/Sources/Backends/Software/SoftCommandList.c b/Sources/Backends/Software/SoftCommandList.c index eb1ab4b..a1d30e6 100644 --- a/Sources/Backends/Software/SoftCommandList.c +++ b/Sources/Backends/Software/SoftCommandList.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include "../../PulseInternal.h" @@ -13,8 +14,11 @@ #include "SoftDevice.h" #include "SoftCommandList.h" #include "SoftComputePass.h" +#include "SoftComputePipeline.h" #include "SoftBuffer.h" +#include + static void SoftCommandCopyBufferToBuffer(SoftCommand* cmd) { const PulseBufferRegion* src = cmd->CopyBufferToBuffer.src; @@ -22,8 +26,59 @@ static void SoftCommandCopyBufferToBuffer(SoftCommand* cmd) SoftBuffer* src_buffer = SOFT_RETRIEVE_DRIVER_DATA_AS(src->buffer, SoftBuffer*); SoftBuffer* dst_buffer = SOFT_RETRIEVE_DRIVER_DATA_AS(dst->buffer, SoftBuffer*); memcpy(dst_buffer->buffer + dst->offset, src_buffer->buffer + src->offset, (src->size < dst->size ? src->size : dst->size)); - //free((void*)src); - //free((void*)dst); + free((void*)src); + free((void*)dst); +} + +static int SoftCommandDispatchCore(void* arg) +{ + SoftComputePipeline* soft_pipeline = (SoftComputePipeline*)arg; + spvm_state_t state = spvm_state_create(soft_pipeline->program); + spvm_ext_opcode_func* glsl_ext_data = spvm_build_glsl450_ext(); + spvm_result_t glsl_std_450 = spvm_state_get_result(state, "GLSL.std.450"); + if(glsl_std_450) + glsl_std_450->extension = glsl_ext_data; + spvm_word main = spvm_state_get_result_location(state, (spvm_string)soft_pipeline->entry_point); + spvm_state_prepare(state, main); + spvm_state_call_function(state); + spvm_state_delete(state); + return 0; +} + +static void SoftCommandDispatch(SoftCommand* cmd) +{ + SoftComputePipeline* soft_pipeline = SOFT_RETRIEVE_DRIVER_DATA_AS(cmd->Dispatch.pipeline, SoftComputePipeline*); + uint32_t local_size = soft_pipeline->program->local_size_x * soft_pipeline->program->local_size_y * soft_pipeline->program->local_size_z; + uint32_t invocations_count = cmd->Dispatch.groupcount_x * cmd->Dispatch.groupcount_y * cmd->Dispatch.groupcount_z * local_size; + thrd_t* invocations = (thrd_t*)malloc(invocations_count * sizeof(thrd_t)); + PULSE_CHECK_PTR(invocations); + + printf("test2 %d %d\n", invocations_count, local_size); + uint32_t invocation_index = 0; + for(uint32_t z = 0; z < cmd->Dispatch.groupcount_z; z++) + { + for(uint32_t y = 0; y < cmd->Dispatch.groupcount_y; y++) + { + for(uint32_t x = 0; x < cmd->Dispatch.groupcount_x; x++) + { + for(uint32_t i = 0; i < local_size; i++) + { + printf("\r%d", invocation_index); + thrd_create(&invocations[invocation_index], SoftCommandDispatchCore, soft_pipeline); + invocation_index++; + } + } + } + } + printf("\ntest %d %d %d\n", invocation_index, invocations_count, local_size); + for(uint32_t i = 0; i < invocations_count; i++) + { + printf("test %d\n", i); + int res; + thrd_join(invocations[i], &res); + PULSE_UNUSED(res); + } + free(invocations); } static int SoftCommandsRunner(void* arg) @@ -39,15 +94,10 @@ static int SoftCommandsRunner(void* arg) SoftCommand* command = &soft_cmd->commands[i]; switch(command->type) { - case SOFT_COMMAND_BIND_COMPUTE_PIPELINE: break; - case SOFT_COMMAND_BIND_STORAGE_BUFFERS: break; - case SOFT_COMMAND_BIND_STORAGE_IMAGES: break; - case SOFT_COMMAND_BIND_UNIFORM_BUFFERS: break; - case SOFT_COMMAND_BLIT_IMAGES: break; case SOFT_COMMAND_COPY_BUFFER_TO_BUFFER: SoftCommandCopyBufferToBuffer(command); break; case SOFT_COMMAND_COPY_BUFFER_TO_IMAGE: break; case SOFT_COMMAND_COPY_IMAGE_TO_BUFFER: break; - case SOFT_COMMAND_DISPATCH: break; + case SOFT_COMMAND_DISPATCH: SoftCommandDispatch(command); break; case SOFT_COMMAND_DISPATCH_INDIRECT: break; default: break; @@ -100,15 +150,19 @@ bool SoftSubmitCommandList(PulseDevice device, PulseCommandList cmd, PulseFence PULSE_UNUSED(device); SoftCommandList* soft_cmd = SOFT_RETRIEVE_DRIVER_DATA_AS(cmd, SoftCommandList*); cmd->state = PULSE_COMMAND_LIST_STATE_SENT; - soft_cmd->fence = fence; + if(fence != PULSE_NULL_HANDLE) + { + SoftFence* soft_fence = SOFT_RETRIEVE_DRIVER_DATA_AS(fence, SoftFence*); + soft_cmd->fence = fence; + fence->cmd = cmd; + atomic_store(&soft_fence->signal, false); + } return thrd_create(&soft_cmd->thread, SoftCommandsRunner, cmd) == thrd_success; } -#include void SoftReleaseCommandList(PulseDevice device, PulseCommandList cmd) { SoftCommandList* soft_cmd = SOFT_RETRIEVE_DRIVER_DATA_AS(cmd, SoftCommandList*); - printf("%p, %p, %p\n", cmd, soft_cmd, cmd->pass); SoftDestroyComputePass(device, cmd->pass); free(soft_cmd); free(cmd); diff --git a/Sources/Backends/Software/SoftCommandList.h b/Sources/Backends/Software/SoftCommandList.h index f757a8e..065ca35 100644 --- a/Sources/Backends/Software/SoftCommandList.h +++ b/Sources/Backends/Software/SoftCommandList.h @@ -20,23 +20,6 @@ typedef struct SoftCommand SoftCommandType type; union { - struct - { - PulseComputePipeline pipeline; - } BindComputePipeline; - - struct - { - } BindStorageBuffers; - - struct - { - } BindStorageImages; - - struct - { - } BindUniformBuffers; - struct { const PulseImageRegion* src; @@ -63,6 +46,7 @@ typedef struct SoftCommand struct { + PulseComputePipeline pipeline; uint32_t groupcount_x; uint32_t groupcount_y; uint32_t groupcount_z; @@ -70,6 +54,7 @@ typedef struct SoftCommand struct { + PulseComputePipeline pipeline; PulseBuffer buffer; uint32_t offset; } DispatchIndirect; diff --git a/Sources/Backends/Software/SoftComputePass.c b/Sources/Backends/Software/SoftComputePass.c index 6034125..002f8e7 100644 --- a/Sources/Backends/Software/SoftComputePass.c +++ b/Sources/Backends/Software/SoftComputePass.c @@ -6,6 +6,7 @@ #include "../../PulseInternal.h" #include "Soft.h" #include "SoftComputePass.h" +#include "SoftCommandList.h" PulseComputePass SoftCreateComputePass(PulseDevice device, PulseCommandList cmd) { @@ -31,10 +32,12 @@ void SoftDestroyComputePass(PulseDevice device, PulseComputePass pass) PulseComputePass SoftBeginComputePass(PulseCommandList cmd) { + return cmd->pass; } void SoftEndComputePass(PulseComputePass pass) { + PULSE_UNUSED(pass); } void SoftBindStorageBuffers(PulseComputePass pass, const PulseBuffer* buffers, uint32_t num_buffers) @@ -51,8 +54,17 @@ void SoftBindStorageImages(PulseComputePass pass, const PulseImage* images, uint void SoftBindComputePipeline(PulseComputePass pass, PulseComputePipeline pipeline) { + PULSE_UNUSED(pass); + PULSE_UNUSED(pipeline); } void SoftDispatchComputations(PulseComputePass pass, uint32_t groupcount_x, uint32_t groupcount_y, uint32_t groupcount_z) { + SoftCommand command = { 0 }; + command.type = SOFT_COMMAND_DISPATCH; + command.Dispatch.groupcount_x = groupcount_x; + command.Dispatch.groupcount_y = groupcount_y; + command.Dispatch.groupcount_z = groupcount_z; + command.Dispatch.pipeline = pass->current_pipeline; + SoftQueueCommand(pass->cmd, command); } diff --git a/Sources/Backends/Software/SoftComputePipeline.c b/Sources/Backends/Software/SoftComputePipeline.c index cbd3065..6aa6c54 100644 --- a/Sources/Backends/Software/SoftComputePipeline.c +++ b/Sources/Backends/Software/SoftComputePipeline.c @@ -2,15 +2,58 @@ // This file is part of "Pulse" // For conditions of distribution and use, see copyright notice in LICENSE +#include + #include #include "../../PulseInternal.h" #include "Soft.h" +#include "SoftDevice.h" #include "SoftComputePipeline.h" PulseComputePipeline SoftCreateComputePipeline(PulseDevice device, const PulseComputePipelineCreateInfo* info) { + SoftDevice* soft_device = SOFT_RETRIEVE_DRIVER_DATA_AS(device, SoftDevice*); + + PulseComputePipelineHandler* pipeline = (PulseComputePipelineHandler*)calloc(1, sizeof(PulseComputePipelineHandler)); + PULSE_CHECK_ALLOCATION_RETVAL(pipeline, PULSE_NULL_HANDLE); + + SoftComputePipeline* soft_pipeline = (SoftComputePipeline*)calloc(1, sizeof(SoftComputePipeline)); + PULSE_CHECK_ALLOCATION_RETVAL(soft_pipeline, PULSE_NULL_HANDLE); + + if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(device->backend)) + { + if(info->code == PULSE_NULLPTR) + PulseLogError(device->backend, "invalid code pointer passed to PulseComputePipelineCreateInfo"); + if(info->entrypoint == PULSE_NULLPTR) + PulseLogError(device->backend, "invalid entrypoint pointer passed to PulseComputePipelineCreateInfo"); + if(info->format == PULSE_SHADER_FORMAT_SPIRV_BIT && (device->backend->supported_shader_formats & PULSE_SHADER_FORMAT_SPIRV_BIT) == 0) + PulseLogError(device->backend, "invalid shader format passed to PulseComputePipelineCreateInfo"); + } + + soft_pipeline->program = spvm_program_create(soft_device->spv_context, (spvm_source)info->code, info->code_size / sizeof(spvm_word)); + soft_pipeline->entry_point = calloc(1, strlen(info->entrypoint)); + strcpy((char*)soft_pipeline->entry_point, info->entrypoint); + + pipeline->driver_data = soft_pipeline; + + if(PULSE_IS_BACKEND_HIGH_LEVEL_DEBUG(device->backend)) + PulseLogInfoFmt(device->backend, "(Soft) created new compute pipeline %p", pipeline); + return pipeline; } void SoftDestroyComputePipeline(PulseDevice device, PulseComputePipeline pipeline) { + if(pipeline == PULSE_NULL_HANDLE) + { + if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(device->backend)) + PulseLogWarning(device->backend, "compute pipeline is NULL, this may be a bug in your application"); + return; + } + PULSE_UNUSED(device); + SoftComputePipeline* soft_pipeline = SOFT_RETRIEVE_DRIVER_DATA_AS(pipeline, SoftComputePipeline*); + spvm_program_delete(soft_pipeline->program); + free(soft_pipeline); + if(PULSE_IS_BACKEND_HIGH_LEVEL_DEBUG(device->backend)) + PulseLogInfoFmt(device->backend, "(Soft) destroyed compute pipeline %p", pipeline); + free(pipeline); } diff --git a/Sources/Backends/Software/SoftComputePipeline.h b/Sources/Backends/Software/SoftComputePipeline.h index 40f5f3b..c57d001 100644 --- a/Sources/Backends/Software/SoftComputePipeline.h +++ b/Sources/Backends/Software/SoftComputePipeline.h @@ -10,9 +10,13 @@ #define PULSE_SOFTWARE_COMPUTE_PIPELINE_H_ #include "Soft.h" +#include +#include typedef struct SoftComputePipeline { + spvm_program_t program; + const char* entry_point; } SoftComputePipeline; PulseComputePipeline SoftCreateComputePipeline(PulseDevice device, const PulseComputePipelineCreateInfo* info); diff --git a/Sources/Backends/Software/SoftDevice.c b/Sources/Backends/Software/SoftDevice.c index e182066..723b89c 100644 --- a/Sources/Backends/Software/SoftDevice.c +++ b/Sources/Backends/Software/SoftDevice.c @@ -29,6 +29,7 @@ PulseDevice SoftCreateDevice(PulseBackend backend, PulseDevice* forbiden_devices PULSE_CHECK_ALLOCATION_RETVAL(device, PULSE_NULL_HANDLE); device->device = cpuinfo_get_current_processor(); + device->spv_context = spvm_context_initialize(); pulse_device->driver_data = device; pulse_device->backend = backend; @@ -44,6 +45,7 @@ void SoftDestroyDevice(PulseDevice device) SoftDevice* soft_device = SOFT_RETRIEVE_DRIVER_DATA_AS(device, SoftDevice*); if(soft_device == PULSE_NULLPTR) return; + spvm_context_deinitialize(soft_device->spv_context); if(PULSE_IS_BACKEND_HIGH_LEVEL_DEBUG(device->backend)) PulseLogInfoFmt(device->backend, "(Soft) destroyed device created from %s", soft_device->device->package->name); free(soft_device); diff --git a/Sources/Backends/Software/SoftDevice.h b/Sources/Backends/Software/SoftDevice.h index f4f08ac..0f24662 100644 --- a/Sources/Backends/Software/SoftDevice.h +++ b/Sources/Backends/Software/SoftDevice.h @@ -10,15 +10,14 @@ #define PULSE_SOFTWARE_DEVICE_H_ #include +#include #include "Soft.h" typedef struct SoftDevice { const struct cpuinfo_processor* device; - PulseCommandList* available_command_lists; - uint32_t available_command_lists_capacity; - uint32_t available_command_lists_size; + spvm_context_t spv_context; } SoftDevice; PulseDevice SoftCreateDevice(PulseBackend backend, PulseDevice* forbiden_devices, uint32_t forbiden_devices_count); diff --git a/Sources/Backends/Software/SoftEnums.h b/Sources/Backends/Software/SoftEnums.h index c1ed127..af33a33 100644 --- a/Sources/Backends/Software/SoftEnums.h +++ b/Sources/Backends/Software/SoftEnums.h @@ -10,10 +10,6 @@ typedef enum SoftCommandType { SOFT_COMMAND_NONE = 0, - SOFT_COMMAND_BIND_COMPUTE_PIPELINE, - SOFT_COMMAND_BIND_STORAGE_BUFFERS, - SOFT_COMMAND_BIND_STORAGE_IMAGES, - SOFT_COMMAND_BIND_UNIFORM_BUFFERS, SOFT_COMMAND_BLIT_IMAGES, SOFT_COMMAND_COPY_BUFFER_TO_BUFFER, SOFT_COMMAND_COPY_BUFFER_TO_IMAGE, diff --git a/Sources/Backends/WebGPU/WebGPUComputePipeline.c b/Sources/Backends/WebGPU/WebGPUComputePipeline.c index bb2a7ff..c0cf706 100644 --- a/Sources/Backends/WebGPU/WebGPUComputePipeline.c +++ b/Sources/Backends/WebGPU/WebGPUComputePipeline.c @@ -218,7 +218,6 @@ PulseComputePipeline WebGPUCreateComputePipeline(PulseDevice device, const Pulse if(PULSE_IS_BACKEND_HIGH_LEVEL_DEBUG(device->backend)) PulseLogInfoFmt(device->backend, "(WebGPU) created new compute pipeline %p", pipeline); - return pipeline; }