From ae758c34cd22838e106385ebb9d0dbcd38f97115 Mon Sep 17 00:00:00 2001 From: Kbz-8 Date: Sat, 5 Apr 2025 21:52:53 +0200 Subject: [PATCH] yess --- Examples/Software/main.c | 2 +- Examples/Software/shader.nzsl | 8 +- Sources/Backends/Software/SoftCommandList.c | 46 +++++--- Sources/Backends/Software/SoftCommandList.h | 16 +++ Sources/Backends/Software/SoftComputePass.c | 29 ++++- Sources/Backends/Software/SoftComputePass.h | 5 - .../Backends/Software/SoftComputePipeline.c | 109 +++++++++++++++++- .../Backends/Software/SoftComputePipeline.h | 13 +++ Sources/Backends/Software/SoftFence.c | 4 + Sources/PulseDevice.c | 1 - Tests/Image.c | 1 - xmake.lua | 2 +- 12 files changed, 205 insertions(+), 31 deletions(-) diff --git a/Examples/Software/main.c b/Examples/Software/main.c index 57e85cd..97b021d 100644 --- a/Examples/Software/main.c +++ b/Examples/Software/main.c @@ -47,7 +47,7 @@ int main(void) PulseCommandList cmd = PulseRequestCommandList(device, PULSE_COMMAND_LIST_GENERAL); PulseComputePass pass = PulseBeginComputePass(cmd); - //PulseBindStorageBuffers(pass, &buffer, 1); + PulseBindStorageBuffers(pass, &buffer, 1); PulseBindComputePipeline(pass, pipeline); PulseDispatchComputations(pass, 16, 1, 1); PulseEndComputePass(pass); diff --git a/Examples/Software/shader.nzsl b/Examples/Software/shader.nzsl index a6ad83d..548318e 100644 --- a/Examples/Software/shader.nzsl +++ b/Examples/Software/shader.nzsl @@ -3,23 +3,23 @@ module; struct Input { - [builtin(global_invocation_indices)] indices: vec3[u32] + [builtin(global_invocation_indices)] indices: vec3[u32] } [layout(std430)] struct SSBO { - data: dyn_array[i32] + data: dyn_array[i32] } external { - //[set(1), binding(0)] ssbo: storage[SSBO], + [set(1), binding(0)] ssbo: storage[SSBO], } [entry(compute)] [workgroup(16, 16, 1)] fn main(input: Input) { - //ssbo.data[input.indices.x * input.indices.y] = i32(input.indices.x * input.indices.y); + ssbo.data[input.indices.x * input.indices.y] = i32(input.indices.x * input.indices.y); } diff --git a/Sources/Backends/Software/SoftCommandList.c b/Sources/Backends/Software/SoftCommandList.c index 3879f0d..c70e63c 100644 --- a/Sources/Backends/Software/SoftCommandList.c +++ b/Sources/Backends/Software/SoftCommandList.c @@ -17,8 +17,6 @@ #include "SoftComputePipeline.h" #include "SoftBuffer.h" -#include - static void SoftCommandCopyBufferToBuffer(SoftCommand* cmd) { const PulseBufferRegion* src = cmd->CopyBufferToBuffer.src; @@ -32,16 +30,26 @@ static void SoftCommandCopyBufferToBuffer(SoftCommand* cmd) static int SoftCommandDispatchCore(void* arg) { - SoftComputePipeline* soft_pipeline = (SoftComputePipeline*)arg; - spvm_state_t state = spvm_state_create(soft_pipeline->program); + SoftCommand* cmd = (SoftCommand*)arg; + SoftComputePipeline* soft_pipeline = SOFT_RETRIEVE_DRIVER_DATA_AS(cmd->Dispatch.pipeline, SoftComputePipeline*); + + mtx_lock(&cmd->Dispatch.dispatch_mutex); + spvm_state_t state = spvm_state_create(soft_pipeline->program); + mtx_unlock(&cmd->Dispatch.dispatch_mutex); + spvm_ext_opcode_func* glsl_ext_data = spvm_build_glsl450_ext(); spvm_result_t glsl_std_450 = spvm_state_get_result(state, "GLSL.std.450"); if(glsl_std_450) glsl_std_450->extension = glsl_ext_data; spvm_word main = spvm_state_get_result_location(state, (spvm_string)soft_pipeline->entry_point); + + spvm_word mem_count = 0; + spvm_member_t local_invocation_id = spvm_state_get_builtin(state, SpvBuiltInLocalInvocationId, &mem_count); + spvm_state_prepare(state, main); spvm_state_call_function(state); spvm_state_delete(state); + atomic_fetch_sub(&cmd->cmd_list->commands_running, 1); return 0; } @@ -62,15 +70,13 @@ static void SoftCommandDispatch(SoftCommand* cmd) { for(uint32_t i = 0; i < local_size; i++) { - thrd_create(&invocations[invocation_index], SoftCommandDispatchCore, soft_pipeline); - //thrd_join(invocations[invocation_index], NULL); + atomic_fetch_add(&cmd->cmd_list->commands_running, 1); + thrd_create(&invocations[invocation_index], SoftCommandDispatchCore, cmd); invocation_index++; } } } } - for(uint32_t i = 0; i < invocations_count; i++) - thrd_join(invocations[i], PULSE_NULLPTR); free(invocations); } @@ -97,11 +103,8 @@ static int SoftCommandsRunner(void* arg) } } - if(soft_cmd->fence != PULSE_NULL_HANDLE) - { - SoftFence* fence = SOFT_RETRIEVE_DRIVER_DATA_AS(soft_cmd->fence, SoftFence*); - atomic_store(&fence->signal, true); - } + atomic_fetch_sub(&soft_cmd->commands_running, 1); // Remove fence safety + cmd->state = PULSE_COMMAND_LIST_STATE_READY; return 0; } @@ -126,6 +129,7 @@ PulseCommandList SoftRequestCommandList(PulseDevice device, PulseCommandListUsag cmd->pass = SoftCreateComputePass(device, cmd); cmd->state = PULSE_COMMAND_LIST_STATE_RECORDING; cmd->is_available = false; + atomic_store(&soft_cmd->commands_running, 0); return cmd; } @@ -133,6 +137,7 @@ PulseCommandList SoftRequestCommandList(PulseDevice device, PulseCommandListUsag void SoftQueueCommand(PulseCommandList cmd, SoftCommand command) { SoftCommandList* soft_cmd = SOFT_RETRIEVE_DRIVER_DATA_AS(cmd, SoftCommandList*); + command.cmd_list = soft_cmd; PULSE_EXPAND_ARRAY_IF_NEEDED(soft_cmd->commands, SoftCommand, soft_cmd->commands_count, soft_cmd->commands_capacity, 8); soft_cmd->commands[soft_cmd->commands_count] = command; soft_cmd->commands_count++; @@ -150,6 +155,7 @@ bool SoftSubmitCommandList(PulseDevice device, PulseCommandList cmd, PulseFence fence->cmd = cmd; atomic_store(&soft_fence->signal, false); } + atomic_fetch_add(&soft_cmd->commands_running, 1); // Fence safety to avoid fence being signaled before first command being sumitted return thrd_create(&soft_cmd->thread, SoftCommandsRunner, cmd) == thrd_success; } @@ -157,6 +163,20 @@ void SoftReleaseCommandList(PulseDevice device, PulseCommandList cmd) { SoftCommandList* soft_cmd = SOFT_RETRIEVE_DRIVER_DATA_AS(cmd, SoftCommandList*); SoftDestroyComputePass(device, cmd->pass); + + for(uint32_t i = 0; i < soft_cmd->commands_count; i++) + { + SoftCommand* command = &soft_cmd->commands[i]; + switch(command->type) + { + // Lock/Unlock to make sure the mutex is not in use + case SOFT_COMMAND_DISPATCH: mtx_lock(&command->Dispatch.dispatch_mutex); mtx_unlock(&command->Dispatch.dispatch_mutex); break; + case SOFT_COMMAND_DISPATCH_INDIRECT: mtx_lock(&command->DispatchIndirect.dispatch_mutex); mtx_unlock(&command->DispatchIndirect.dispatch_mutex); break; + + default: break; + } + } + free(soft_cmd->commands); free(soft_cmd); free(cmd); } diff --git a/Sources/Backends/Software/SoftCommandList.h b/Sources/Backends/Software/SoftCommandList.h index 065ca35..a62c882 100644 --- a/Sources/Backends/Software/SoftCommandList.h +++ b/Sources/Backends/Software/SoftCommandList.h @@ -9,6 +9,7 @@ #ifndef PULSE_SOFTWARE_COMMAND_LIST_H_ #define PULSE_SOFTWARE_COMMAND_LIST_H_ +#include #include #include "Soft.h" @@ -17,6 +18,7 @@ typedef struct SoftCommand { + struct SoftCommandList* cmd_list; SoftCommandType type; union { @@ -50,6 +52,7 @@ typedef struct SoftCommand uint32_t groupcount_x; uint32_t groupcount_y; uint32_t groupcount_z; + mtx_t dispatch_mutex; } Dispatch; struct @@ -57,8 +60,20 @@ typedef struct SoftCommand PulseComputePipeline pipeline; PulseBuffer buffer; uint32_t offset; + mtx_t dispatch_mutex; } DispatchIndirect; }; + union + { + struct + { + uint32_t global_invocation_id[3]; + uint32_t local_invocation_id[3]; + uint32_t workgroup_count[3]; + uint32_t workgroup_index[3]; + uint32_t local_invocation_index; + } Dipsatch; + } Private; } SoftCommand; typedef struct SoftCommandList @@ -68,6 +83,7 @@ typedef struct SoftCommandList SoftCommand* commands; uint32_t commands_count; uint32_t commands_capacity; + atomic_ullong commands_running; } SoftCommandList; PulseCommandList SoftRequestCommandList(PulseDevice device, PulseCommandListUsage usage); diff --git a/Sources/Backends/Software/SoftComputePass.c b/Sources/Backends/Software/SoftComputePass.c index 002f8e7..c59194c 100644 --- a/Sources/Backends/Software/SoftComputePass.c +++ b/Sources/Backends/Software/SoftComputePass.c @@ -14,11 +14,7 @@ PulseComputePass SoftCreateComputePass(PulseDevice device, PulseCommandList cmd) PulseComputePass pass = (PulseComputePass)calloc(1, sizeof(PulseComputePassHandler)); PULSE_CHECK_ALLOCATION_RETVAL(pass, PULSE_NULL_HANDLE); - SoftComputePass* soft_pass = (SoftComputePass*)calloc(1, sizeof(SoftComputePass)); - PULSE_CHECK_ALLOCATION_RETVAL(soft_pass, PULSE_NULL_HANDLE); - pass->cmd = cmd; - pass->driver_data = soft_pass; return pass; } @@ -42,6 +38,30 @@ void SoftEndComputePass(PulseComputePass pass) void SoftBindStorageBuffers(PulseComputePass pass, const PulseBuffer* buffers, uint32_t num_buffers) { + PulseBufferUsageFlags usage = buffers[0]->usage; + bool is_readwrite = (usage & PULSE_BUFFER_USAGE_STORAGE_WRITE) != 0; + PulseBuffer* array = is_readwrite ? pass->readwrite_storage_buffers : pass->readonly_storage_buffers; + + for(uint32_t i = 0; i < num_buffers; i++) + { + if(is_readwrite && (buffers[i]->usage & PULSE_BUFFER_USAGE_STORAGE_WRITE) == 0) + { + if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(pass->cmd->device->backend)) + PulseLogError(pass->cmd->device->backend, "cannot bind a read only buffer with read-write buffers"); + PulseSetInternalError(PULSE_ERROR_INVALID_BUFFER_USAGE); + return; + } + else if(!is_readwrite && (buffers[i]->usage & PULSE_BUFFER_USAGE_STORAGE_WRITE) != 0) + { + if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(pass->cmd->device->backend)) + PulseLogError(pass->cmd->device->backend, "cannot bind a read-write buffer with read only buffers"); + PulseSetInternalError(PULSE_ERROR_INVALID_BUFFER_USAGE); + return; + } + if(array[i] == buffers[i]) + continue; + array[i] = buffers[i]; + } } void SoftBindUniformData(PulseComputePass pass, uint32_t slot, const void* data, uint32_t data_size) @@ -66,5 +86,6 @@ void SoftDispatchComputations(PulseComputePass pass, uint32_t groupcount_x, uint command.Dispatch.groupcount_y = groupcount_y; command.Dispatch.groupcount_z = groupcount_z; command.Dispatch.pipeline = pass->current_pipeline; + mtx_init(&command.Dispatch.dispatch_mutex, mtx_plain); SoftQueueCommand(pass->cmd, command); } diff --git a/Sources/Backends/Software/SoftComputePass.h b/Sources/Backends/Software/SoftComputePass.h index 39a56bf..cdd7b5e 100644 --- a/Sources/Backends/Software/SoftComputePass.h +++ b/Sources/Backends/Software/SoftComputePass.h @@ -11,11 +11,6 @@ #include "Soft.h" -typedef struct SoftComputePass -{ - int dummy; -} SoftComputePass; - PulseComputePass SoftCreateComputePass(PulseDevice device, PulseCommandList cmd); void SoftDestroyComputePass(PulseDevice device, PulseComputePass pass); diff --git a/Sources/Backends/Software/SoftComputePipeline.c b/Sources/Backends/Software/SoftComputePipeline.c index f008022..741da1d 100644 --- a/Sources/Backends/Software/SoftComputePipeline.c +++ b/Sources/Backends/Software/SoftComputePipeline.c @@ -10,6 +10,105 @@ #include "SoftDevice.h" #include "SoftComputePipeline.h" +void SoftAllocateWorkgroupMemory(struct spvm_state* state, spvm_word result_id, spvm_word type_id) +{ + SoftComputePipeline* pipeline = (SoftComputePipeline*)state->owner->user_data; + mtx_lock(&pipeline->workgroup_memory_allocations_mutex); + spvm_result_t result = &state->results[result_id]; + PULSE_EXPAND_ARRAY_IF_NEEDED(pipeline->workgroup_memory_allocations, SoftSharedMemoryEntry, pipeline->workgroup_memory_allocations_size, pipeline->workgroup_memory_allocations_capacity, 16); + SoftSharedMemoryEntry* entry = &pipeline->workgroup_memory_allocations[pipeline->workgroup_memory_allocations_size]; + entry->destination = &state->results[result_id]; + entry->slot = result_id; + memcpy(&entry->data, entry->destination, sizeof(spvm_result)); + spvm_result_allocate_typed_value(result, state->results, type_id); + pipeline->workgroup_memory_allocations_size++; + mtx_unlock(&pipeline->workgroup_memory_allocations_mutex); +} +/* +void SoftWriteWorkgroupMemory(struct spvm_state* state, spvm_word result_id, spvm_word val_id) +{ + spvm_result_t ptr = &state->results[result_id]; + if(ptr->source_location != PULSE_NULLPTR) + { + spvm_word word_count = ptr->source_word_count; + spvm_source code = ptr->source_location; + + spvm_word var_type = SPVM_READ_WORD(code); + spvm_word id = SPVM_READ_WORD(code); + spvm_word memory_id = SPVM_READ_WORD(code); + + spvm_result_t sharedData = PULSE_NULLPTR; + + ed::DebugInformation* dbgr = (ed::DebugInformation*)state->owner->user_data; + + for(int i = 0; i < dbgr->SharedMemory.size(); i++) + { + if(dbgr->SharedMemory[i].Slot == memory_id) + { + sharedData = &dbgr->SharedMemory[i].Data; + break; + } + } + + if(sharedData == PULSE_NULLPTR) + return; + + spvm_word index_count = word_count - 4; + + spvm_word index_id = SPVM_READ_WORD(code); + spvm_word index = state->results[index_id].members[0].value.s; + + spvm_member_t result = sharedData->members + MIN(index, sharedData->member_count - 1); + + while(index_count) + { + index_id = SPVM_READ_WORD(code); + index = state->results[index_id].members[0].value.s; + + result = result->members + MIN(index, result->member_count - 1); + + index_count--; + } + + spvm_member* members = PULSE_NULLPTR; + spvm_word member_count = 0; + if(result->member_count != 0) + { + member_count = result->member_count; + members = result->members; + } + else + { + member_count = 1; + members = result; + } + spvm_member_memcpy(members, state->results[val_id].members, member_count); + } +} + +void SoftControlBarrier(struct spvm_state* state, spvm_word exec, spvm_word mem, spvm_word sem) +{ + ed::DebugInformation* dbgr = (ed::DebugInformation*)state->owner->user_data; + + // copy memory + for(int i = 0; i < dbgr->SharedMemory.size(); i++) + { + const ed::DebugInformation::SharedMemoryEntry& entry = dbgr->SharedMemory[i]; + spvm_member_memcpy(entry.Data.members, entry.Destination->members, entry.Data.member_count); + } + + // synchronize threads + dbgr->SyncWorkgroup(); + + // copy memory + for(int i = 0; i < dbgr->SharedMemory.size(); i++) + { + const ed::DebugInformation::SharedMemoryEntry& entry = dbgr->SharedMemory[i]; + spvm_member_memcpy(entry.Destination->members, entry.Data.members, entry.Destination->member_count); + } +} +*/ + PulseComputePipeline SoftCreateComputePipeline(PulseDevice device, const PulseComputePipelineCreateInfo* info) { SoftDevice* soft_device = SOFT_RETRIEVE_DRIVER_DATA_AS(device, SoftDevice*); @@ -32,12 +131,18 @@ PulseComputePipeline SoftCreateComputePipeline(PulseDevice device, const PulseCo soft_pipeline->program = spvm_program_create(soft_device->spv_context, (spvm_source)info->code, info->code_size / sizeof(spvm_word)); soft_pipeline->entry_point = calloc(1, strlen(info->entrypoint)); + PULSE_CHECK_ALLOCATION_RETVAL(soft_pipeline->entry_point, PULSE_NULL_HANDLE); strcpy((char*)soft_pipeline->entry_point, info->entrypoint); + soft_pipeline->program->user_data = soft_pipeline; + soft_pipeline->program->allocate_workgroup_memory = SoftAllocateWorkgroupMemory; + // Create dummy state to retrieve informations from the spirv spvm_state_t state = spvm_state_create(soft_pipeline->program); spvm_state_delete(state); + mtx_init(&soft_pipeline->workgroup_memory_allocations_mutex, mtx_plain); + pipeline->driver_data = soft_pipeline; if(PULSE_IS_BACKEND_HIGH_LEVEL_DEBUG(device->backend)) @@ -56,7 +161,9 @@ void SoftDestroyComputePipeline(PulseDevice device, PulseComputePipeline pipelin PULSE_UNUSED(device); SoftComputePipeline* soft_pipeline = SOFT_RETRIEVE_DRIVER_DATA_AS(pipeline, SoftComputePipeline*); spvm_program_delete(soft_pipeline->program); - free(soft_pipeline->entry_point); + mtx_destroy(&soft_pipeline->workgroup_memory_allocations_mutex); + free(soft_pipeline->workgroup_memory_allocations); + free((void*)soft_pipeline->entry_point); free(soft_pipeline); if(PULSE_IS_BACKEND_HIGH_LEVEL_DEBUG(device->backend)) PulseLogInfoFmt(device->backend, "(Soft) destroyed compute pipeline %p", pipeline); diff --git a/Sources/Backends/Software/SoftComputePipeline.h b/Sources/Backends/Software/SoftComputePipeline.h index c57d001..b8e33cf 100644 --- a/Sources/Backends/Software/SoftComputePipeline.h +++ b/Sources/Backends/Software/SoftComputePipeline.h @@ -9,14 +9,27 @@ #ifndef PULSE_SOFTWARE_COMPUTE_PIPELINE_H_ #define PULSE_SOFTWARE_COMPUTE_PIPELINE_H_ +#include + #include "Soft.h" #include #include +typedef struct SoftSharedMemoryEntry +{ + spvm_result data; + spvm_result_t destination; + spvm_word slot; +} SoftSharedMemoryEntry; + typedef struct SoftComputePipeline { spvm_program_t program; const char* entry_point; + SoftSharedMemoryEntry* workgroup_memory_allocations; + uint32_t workgroup_memory_allocations_size; + uint32_t workgroup_memory_allocations_capacity; + mtx_t workgroup_memory_allocations_mutex; } SoftComputePipeline; PulseComputePipeline SoftCreateComputePipeline(PulseDevice device, const PulseComputePipelineCreateInfo* info); diff --git a/Sources/Backends/Software/SoftFence.c b/Sources/Backends/Software/SoftFence.c index 77512e2..68899df 100644 --- a/Sources/Backends/Software/SoftFence.c +++ b/Sources/Backends/Software/SoftFence.c @@ -6,6 +6,7 @@ #include "../../PulseInternal.h" #include "Soft.h" #include "SoftFence.h" +#include "SoftCommandList.h" PulseFence SoftCreateFence(PulseDevice device) { @@ -35,6 +36,9 @@ bool SoftIsFenceReady(PulseDevice device, PulseFence fence) { PULSE_UNUSED(device); SoftFence* soft_fence = SOFT_RETRIEVE_DRIVER_DATA_AS(fence, SoftFence*); + SoftCommandList* soft_cmd = SOFT_RETRIEVE_DRIVER_DATA_AS(fence->cmd, SoftCommandList*); + if(atomic_load(&soft_cmd->commands_running) == 0) + atomic_store(&soft_fence->signal, true); return atomic_load(&soft_fence->signal); } diff --git a/Sources/PulseDevice.c b/Sources/PulseDevice.c index 787f5f8..27c2b50 100644 --- a/Sources/PulseDevice.c +++ b/Sources/PulseDevice.c @@ -25,7 +25,6 @@ PULSE_API void PulseDestroyDevice(PulseDevice device) free(device->allocated_buffers); free(device->allocated_images); device->PFN_DestroyDevice(device); - device->driver_data = PULSE_NULLPTR; } PULSE_API PulseBackendBits PulseGetBackendInUseByDevice(PulseDevice device) diff --git a/Tests/Image.c b/Tests/Image.c index cdfbbc5..d3974c8 100644 --- a/Tests/Image.c +++ b/Tests/Image.c @@ -39,7 +39,6 @@ void TestImageCreation() * This test may crash some Nouveau NVK drivers (wtf ???). * It seems to be comming exclusively from 3D read-only images */ - if(false) { PulseImageCreateInfo image_create_info = { 0 }; image_create_info.type = PULSE_IMAGE_TYPE_3D; diff --git a/xmake.lua b/xmake.lua index 2c55a69..bff8a10 100644 --- a/xmake.lua +++ b/xmake.lua @@ -33,7 +33,7 @@ local backends = { Software = { option = "software", default = true, - packages = { "spirv-vm", "cpuinfo" } + packages = { "spirv-vm", "cpuinfo", "spirv-reflect" } }, OpenGL = { option = "opengl",