From 93a69d37e92f085ef0ddf770a0331f815b138a7a Mon Sep 17 00:00:00 2001 From: Kbz-8 Date: Sun, 2 Mar 2025 18:15:54 +0100 Subject: [PATCH] adding storage buffer management to webgpu --- Examples/WebGPU/main.c | 7 +- Sources/Backends/WebGPU/WebGPU.c | 6 + Sources/Backends/WebGPU/WebGPU.h | 1 + Sources/Backends/WebGPU/WebGPUBuffer.c | 19 +- Sources/Backends/WebGPU/WebGPUCommandList.c | 1 - Sources/Backends/WebGPU/WebGPUComputePass.c | 173 +++++++++++++++ Sources/Backends/WebGPU/WebGPUComputePass.h | 7 + .../Backends/WebGPU/WebGPUComputePipeline.c | 197 ++++++++++++++++-- .../Backends/WebGPU/WebGPUComputePipeline.h | 6 +- Sources/Backends/WebGPU/WebGPUFence.c | 1 + 10 files changed, 391 insertions(+), 27 deletions(-) diff --git a/Examples/WebGPU/main.c b/Examples/WebGPU/main.c index d70ae03..6bb1ceb 100644 --- a/Examples/WebGPU/main.c +++ b/Examples/WebGPU/main.c @@ -22,9 +22,12 @@ void DebugCallBack(PulseDebugMessageSeverity severity, const char* message) #define BUFFER_SIZE (256 * sizeof(uint32_t)) const char* wgsl_source = WGSL_SOURCE( + @group(1) @binding(0) var ssbo: array; + @compute @workgroup_size(32, 32, 1) - fn main(@builtin(global_invocation_id) grid: vec3u) + fn main(@builtin(global_invocation_id) grid: vec3) { + ssbo[grid.x * grid.y] = i32(grid.x * grid.y); } ); @@ -53,7 +56,7 @@ int main(void) PulseCommandList cmd = PulseRequestCommandList(device, PULSE_COMMAND_LIST_GENERAL); PulseComputePass pass = PulseBeginComputePass(cmd); - // PulseBindStorageBuffers(pass, &buffer, 1); + PulseBindStorageBuffers(pass, &buffer, 1); PulseBindComputePipeline(pass, pipeline); PulseDispatchComputations(pass, 32, 32, 1); PulseEndComputePass(pass); diff --git a/Sources/Backends/WebGPU/WebGPU.c b/Sources/Backends/WebGPU/WebGPU.c index 4ff778b..bb7a487 100644 --- a/Sources/Backends/WebGPU/WebGPU.c +++ b/Sources/Backends/WebGPU/WebGPU.c @@ -8,6 +8,12 @@ #include "WebGPU.h" #include "WebGPUDevice.h" +void WebGPUDeviceTick(PulseDevice device) +{ + WebGPUDevice* webgpu_device = WEBGPU_RETRIEVE_DRIVER_DATA_AS(device, WebGPUDevice*); + wgpuQueueSubmit(webgpu_device->queue, 0, PULSE_NULLPTR); // Submitting nothing just to check for ongoing asynchronous operations and call their callbacks if needed +} + PulseBackendFlags WebGPUCheckSupport(PulseBackendFlags candidates, PulseShaderFormatsFlags shader_formats_used) { if(candidates != PULSE_BACKEND_ANY && (candidates & PULSE_BACKEND_WEBGPU) == 0) diff --git a/Sources/Backends/WebGPU/WebGPU.h b/Sources/Backends/WebGPU/WebGPU.h index 35a1298..867b7cc 100644 --- a/Sources/Backends/WebGPU/WebGPU.h +++ b/Sources/Backends/WebGPU/WebGPU.h @@ -19,6 +19,7 @@ typedef struct WebGPUDriverData } WebGPUDriverData; PulseBackendFlags WebGPUCheckSupport(PulseBackendFlags candidates, PulseShaderFormatsFlags shader_formats_used); // Return PULSE_BACKEND_WEBGPU in case of success and PULSE_BACKEND_INVALID otherwise +void WebGPUDeviceTick(PulseDevice device); #endif // PULSE_WEBGPU_H_ diff --git a/Sources/Backends/WebGPU/WebGPUBuffer.c b/Sources/Backends/WebGPU/WebGPUBuffer.c index 4de3f41..9eeb3f2 100644 --- a/Sources/Backends/WebGPU/WebGPUBuffer.c +++ b/Sources/Backends/WebGPU/WebGPUBuffer.c @@ -9,8 +9,8 @@ #include "../../PulseInternal.h" #include "WebGPU.h" #include "WebGPUDevice.h" -#include "webgpu.h" #include "WebGPUBuffer.h" +#include "WebGPUCommandList.h" PulseBuffer WebGPUCreateBuffer(PulseDevice device, const PulseBufferCreateInfo* create_infos) { @@ -58,13 +58,10 @@ PulseBuffer WebGPUCreateBuffer(PulseDevice device, const PulseBufferCreateInfo* return buffer; } -#include - static void WebGPUMapBufferCallback(WGPUMapAsyncStatus status, WGPUStringView message, void* userdata1, void* userdata2) { atomic_int* mapping_finished = (atomic_int*)userdata1; PulseBuffer buffer = (PulseBuffer)userdata2; - puts("test"); if(status == WGPUMapAsyncStatus_Success) atomic_store(mapping_finished, 1); else @@ -90,10 +87,7 @@ bool WebGPUMapBuffer(PulseBuffer buffer, PulseMapMode mode, void** data) // https://toji.dev/webgpu-best-practices/buffer-uploads.html if(mode == PULSE_MAP_WRITE) { - if(webgpu_buffer->map == PULSE_NULLPTR) - webgpu_buffer->map = malloc(buffer->size); - else - webgpu_buffer->map = realloc(webgpu_buffer->map, buffer->size); + webgpu_buffer->map = malloc(buffer->size); PULSE_CHECK_ALLOCATION_RETVAL(webgpu_buffer->map, false); } else @@ -115,6 +109,7 @@ bool WebGPUMapBuffer(PulseBuffer buffer, PulseMapMode mode, void** data) while(atomic_load(&mapping_finished) == 0) { + WebGPUDeviceTick(buffer->device); clock_t elapsed = clock() - start; if(elapsed > timeout) { @@ -141,7 +136,10 @@ void WebGPUUnmapBuffer(PulseBuffer buffer) WebGPUDevice* webgpu_device = WEBGPU_RETRIEVE_DRIVER_DATA_AS(buffer->device, WebGPUDevice*); WebGPUBuffer* webgpu_buffer = WEBGPU_RETRIEVE_DRIVER_DATA_AS(buffer, WebGPUBuffer*); if(webgpu_buffer->current_map_mode == PULSE_MAP_WRITE) + { wgpuQueueWriteBuffer(webgpu_device->queue, webgpu_buffer->buffer, 0, webgpu_buffer->map, buffer->size); + free(webgpu_buffer->map); + } else wgpuBufferUnmap(webgpu_buffer->buffer); webgpu_buffer->map = PULSE_NULLPTR; @@ -149,6 +147,11 @@ void WebGPUUnmapBuffer(PulseBuffer buffer) bool WebGPUCopyBufferToBuffer(PulseCommandList cmd, const PulseBufferRegion* src, const PulseBufferRegion* dst) { + WebGPUBuffer* webgpu_src_buffer = WEBGPU_RETRIEVE_DRIVER_DATA_AS(src->buffer, WebGPUBuffer*); + WebGPUBuffer* webgpu_dst_buffer = WEBGPU_RETRIEVE_DRIVER_DATA_AS(dst->buffer, WebGPUBuffer*); + WebGPUCommandList* webgpu_cmd = WEBGPU_RETRIEVE_DRIVER_DATA_AS(cmd, WebGPUCommandList*); + wgpuCommandEncoderCopyBufferToBuffer(webgpu_cmd->encoder, webgpu_src_buffer->buffer, src->offset, webgpu_dst_buffer->buffer, dst->offset, (src->size < dst->size ? src->size : dst->size)); + return true; } bool WebGPUCopyBufferToImage(PulseCommandList cmd, const PulseBufferRegion* src, const PulseImageRegion* dst) diff --git a/Sources/Backends/WebGPU/WebGPUCommandList.c b/Sources/Backends/WebGPU/WebGPUCommandList.c index 5a2b057..c06c22b 100644 --- a/Sources/Backends/WebGPU/WebGPUCommandList.c +++ b/Sources/Backends/WebGPU/WebGPUCommandList.c @@ -43,7 +43,6 @@ PulseCommandList WebGPURequestCommandList(PulseDevice device, PulseCommandListUs static void WebGPUFenceCallback(WGPUQueueWorkDoneStatus status, void* userdata1, void* userdata2) { - PULSE_UNUSED(userdata2); WebGPUFence* webgpu_fence = (WebGPUFence*)userdata1; PulseCommandList cmd = (PulseCommandList)userdata2; if(status == WGPUQueueWorkDoneStatus_Success) diff --git a/Sources/Backends/WebGPU/WebGPUComputePass.c b/Sources/Backends/WebGPU/WebGPUComputePass.c index c60bd6a..59f2cc2 100644 --- a/Sources/Backends/WebGPU/WebGPUComputePass.c +++ b/Sources/Backends/WebGPU/WebGPUComputePass.c @@ -2,6 +2,8 @@ // This file is part of "Pulse" // For conditions of distribution and use, see copyright notice in LICENSE +#include + #include #include "../../PulseInternal.h" #include "WebGPU.h" @@ -45,10 +47,48 @@ void WebGPUEndComputePass(PulseComputePass pass) WebGPUComputePass* webgpu_pass = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass, WebGPUComputePass*); wgpuComputePassEncoderEnd(webgpu_pass->encoder); wgpuComputePassEncoderRelease(webgpu_pass->encoder); + + wgpuBindGroupRelease(webgpu_pass->read_only_bind_group); + wgpuBindGroupRelease(webgpu_pass->read_write_bind_group); + wgpuBindGroupRelease(webgpu_pass->uniform_bind_group); + webgpu_pass->read_only_bind_group = PULSE_NULLPTR; + webgpu_pass->read_write_bind_group = PULSE_NULLPTR; + webgpu_pass->uniform_bind_group = PULSE_NULLPTR; } void WebGPUBindStorageBuffers(PulseComputePass pass, const PulseBuffer* buffers, uint32_t num_buffers) { + PulseBufferUsageFlags usage = buffers[0]->usage; + bool is_readwrite = (usage & PULSE_BUFFER_USAGE_STORAGE_WRITE) != 0; + PulseBuffer* array = is_readwrite ? pass->readwrite_storage_buffers : pass->readonly_storage_buffers; + WebGPUComputePass* webgpu_pass = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass, WebGPUComputePass*); + + for(uint32_t i = 0; i < num_buffers; i++) + { + if(is_readwrite && (buffers[i]->usage & PULSE_BUFFER_USAGE_STORAGE_WRITE) == 0) + { + if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(pass->cmd->device->backend)) + PulseLogError(pass->cmd->device->backend, "cannot bind a read only buffer with read-write buffers"); + PulseSetInternalError(PULSE_ERROR_INVALID_BUFFER_USAGE); + return; + } + else if(!is_readwrite && (buffers[i]->usage & PULSE_BUFFER_USAGE_STORAGE_WRITE) != 0) + { + if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(pass->cmd->device->backend)) + PulseLogError(pass->cmd->device->backend, "cannot bind a read-write buffer with read only buffers"); + PulseSetInternalError(PULSE_ERROR_INVALID_BUFFER_USAGE); + return; + } + + if(array[i] == buffers[i]) + continue; + array[i] = buffers[i]; + + if(is_readwrite) + webgpu_pass->should_recreate_write_bind_group = true; + else + webgpu_pass->should_recreate_read_only_bind_group = true; + } } void WebGPUBindUniformData(PulseComputePass pass, uint32_t slot, const void* data, uint32_t data_size) @@ -57,6 +97,134 @@ void WebGPUBindUniformData(PulseComputePass pass, uint32_t slot, const void* dat void WebGPUBindStorageImages(PulseComputePass pass, const PulseImage* images, uint32_t num_images) { + PulseImageUsageFlags usage = images[0]->usage; + bool is_readwrite = (usage & PULSE_IMAGE_USAGE_STORAGE_WRITE) != 0; + PulseImage* array = is_readwrite ? pass->readwrite_images : pass->readonly_images; + WebGPUComputePass* webgpu_pass = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass, WebGPUComputePass*); + + for(uint32_t i = 0; i < num_images; i++) + { + if(is_readwrite && (images[i]->usage & PULSE_IMAGE_USAGE_STORAGE_WRITE) == 0) + { + if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(pass->cmd->device->backend)) + PulseLogError(pass->cmd->device->backend, "cannot bind a read only image with read-write images"); + PulseSetInternalError(PULSE_ERROR_INVALID_IMAGE_USAGE); + return; + } + else if(!is_readwrite && (images[i]->usage & PULSE_IMAGE_USAGE_STORAGE_WRITE) != 0) + { + if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(pass->cmd->device->backend)) + PulseLogError(pass->cmd->device->backend, "cannot bind a read-write image with read only images"); + PulseSetInternalError(PULSE_ERROR_INVALID_IMAGE_USAGE); + return; + } + + if(array[i] == images[i]) + continue; + array[i] = images[i]; + + if((usage & PULSE_IMAGE_USAGE_STORAGE_WRITE) != 0) + webgpu_pass->should_recreate_write_bind_group = true; + else + webgpu_pass->should_recreate_read_only_bind_group = true; + } +} + +static void WebGPUBindBindGroups(PulseComputePass pass) +{ + WebGPUComputePass* webgpu_pass = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass, WebGPUComputePass*); + WebGPUDevice* webgpu_device = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass->cmd->device, WebGPUDevice*); + WebGPUComputePipeline* webgpu_pipeline = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass->current_pipeline, WebGPUComputePipeline*); + + if(!webgpu_pass->should_recreate_read_only_bind_group && !webgpu_pass->should_recreate_write_bind_group && !webgpu_pass->should_recreate_uniform_bind_group) + return; + + WGPUBindGroupEntry* read_only_entries = (WGPUBindGroupEntry*)calloc(pass->current_pipeline->num_readonly_storage_images + pass->current_pipeline->num_readonly_storage_buffers, sizeof(WGPUBindGroupEntry)); + PULSE_CHECK_ALLOCATION(read_only_entries); + WGPUBindGroupEntry* read_write_entries = (WGPUBindGroupEntry*)calloc(pass->current_pipeline->num_readwrite_storage_images + pass->current_pipeline->num_readwrite_storage_buffers, sizeof(WGPUBindGroupEntry)); + PULSE_CHECK_ALLOCATION(read_write_entries); + WGPUBindGroupEntry* uniform_entries = (WGPUBindGroupEntry*)calloc(pass->current_pipeline->num_uniform_buffers, sizeof(WGPUBindGroupEntry)); + PULSE_CHECK_ALLOCATION(uniform_entries); + + if(webgpu_pass->should_recreate_read_only_bind_group && webgpu_pipeline->readonly_group != PULSE_NULLPTR) + { + uint32_t entry_index = 0; + for(uint32_t i = 0; i < pass->current_pipeline->num_readonly_storage_images; i++, entry_index++) + { + WGPUBindGroupEntry* entry = &read_only_entries[entry_index]; + memset(entry, 0, sizeof(WGPUBindGroupEntry)); + entry->binding = i; + } + + for(uint32_t i = 0; i < pass->current_pipeline->num_readonly_storage_buffers; i++, entry_index++) + { + WebGPUBuffer* webgpu_buffer = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass->readonly_storage_buffers[i], WebGPUBuffer*); + + WGPUBindGroupEntry* entry = &read_only_entries[entry_index]; + memset(entry, 0, sizeof(WGPUBindGroupEntry)); + entry->binding = pass->current_pipeline->num_readonly_storage_images + i; + entry->buffer = webgpu_buffer->buffer; + entry->size = pass->readonly_storage_buffers[i]->size; + entry->offset = 0; + } + + WGPUBindGroupDescriptor descriptor = { 0 }; + descriptor.layout = webgpu_pipeline->readonly_group; + descriptor.entryCount = pass->current_pipeline->num_readonly_storage_images + pass->current_pipeline->num_readonly_storage_buffers; + descriptor.entries = read_only_entries; + webgpu_pass->read_only_bind_group = wgpuDeviceCreateBindGroup(webgpu_device->device, &descriptor); + wgpuComputePassEncoderSetBindGroup(webgpu_pass->encoder, 0, webgpu_pass->read_only_bind_group, 0, PULSE_NULLPTR); + } + if(webgpu_pass->should_recreate_write_bind_group && webgpu_pipeline->readwrite_group != PULSE_NULLPTR) + { + uint32_t entry_index = 0; + for(uint32_t i = 0; i < pass->current_pipeline->num_readwrite_storage_images; i++, entry_index++) + { + WGPUBindGroupEntry* entry = &read_write_entries[entry_index]; + memset(entry, 0, sizeof(WGPUBindGroupEntry)); + entry->binding = i; + } + + for(uint32_t i = 0; i < pass->current_pipeline->num_readwrite_storage_buffers; i++, entry_index++) + { + WebGPUBuffer* webgpu_buffer = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass->readwrite_storage_buffers[i], WebGPUBuffer*); + + WGPUBindGroupEntry* entry = &read_write_entries[entry_index]; + memset(entry, 0, sizeof(WGPUBindGroupEntry)); + entry->binding = pass->current_pipeline->num_readwrite_storage_images + i; + entry->buffer = webgpu_buffer->buffer; + entry->size = pass->readwrite_storage_buffers[i]->size; + entry->offset = 0; + } + + WGPUBindGroupDescriptor descriptor = { 0 }; + descriptor.layout = webgpu_pipeline->readwrite_group; + descriptor.entryCount = pass->current_pipeline->num_readwrite_storage_images + pass->current_pipeline->num_readwrite_storage_buffers; + descriptor.entries = read_write_entries; + webgpu_pass->read_write_bind_group = wgpuDeviceCreateBindGroup(webgpu_device->device, &descriptor); + wgpuComputePassEncoderSetBindGroup(webgpu_pass->encoder, 1, webgpu_pass->read_write_bind_group, 0, PULSE_NULLPTR); + } + if(webgpu_pass->should_recreate_uniform_bind_group && webgpu_pipeline->uniform_group != PULSE_NULLPTR) + { + for(uint32_t i = 0; i < pass->current_pipeline->num_uniform_buffers; i++) + { + WebGPUBuffer* webgpu_buffer = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass->uniform_buffers[i], WebGPUBuffer*); + + WGPUBindGroupEntry* entry = &uniform_entries[i]; + memset(entry, 0, sizeof(WGPUBindGroupEntry)); + entry->binding = i; + entry->buffer = webgpu_buffer->buffer; + entry->size = pass->uniform_buffers[i]->size; + entry->offset = 0; + } + + WGPUBindGroupDescriptor descriptor = { 0 }; + descriptor.layout = webgpu_pipeline->uniform_group; + descriptor.entryCount = pass->current_pipeline->num_uniform_buffers; + descriptor.entries = uniform_entries; + webgpu_pass->uniform_bind_group = wgpuDeviceCreateBindGroup(webgpu_device->device, &descriptor); + wgpuComputePassEncoderSetBindGroup(webgpu_pass->encoder, 2, webgpu_pass->uniform_bind_group, 0, PULSE_NULLPTR); + } } void WebGPUBindComputePipeline(PulseComputePass pass, PulseComputePipeline pipeline) @@ -64,10 +232,15 @@ void WebGPUBindComputePipeline(PulseComputePass pass, PulseComputePipeline pipel WebGPUComputePass* webgpu_pass = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass, WebGPUComputePass*); WebGPUComputePipeline* webgpu_pipeline = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pipeline, WebGPUComputePipeline*); wgpuComputePassEncoderSetPipeline(webgpu_pass->encoder, webgpu_pipeline->pipeline); + + webgpu_pass->should_recreate_read_only_bind_group = true; + webgpu_pass->should_recreate_write_bind_group = true; + webgpu_pass->should_recreate_uniform_bind_group = true; } void WebGPUDispatchComputations(PulseComputePass pass, uint32_t groupcount_x, uint32_t groupcount_y, uint32_t groupcount_z) { WebGPUComputePass* webgpu_pass = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass, WebGPUComputePass*); + WebGPUBindBindGroups(pass); wgpuComputePassEncoderDispatchWorkgroups(webgpu_pass->encoder, groupcount_x, groupcount_y, groupcount_z); } diff --git a/Sources/Backends/WebGPU/WebGPUComputePass.h b/Sources/Backends/WebGPU/WebGPUComputePass.h index 20c2512..48fea54 100644 --- a/Sources/Backends/WebGPU/WebGPUComputePass.h +++ b/Sources/Backends/WebGPU/WebGPUComputePass.h @@ -16,6 +16,13 @@ typedef struct WebGPUComputePass { WGPUComputePassEncoder encoder; + WGPUBindGroup read_only_bind_group; + WGPUBindGroup read_write_bind_group; + WGPUBindGroup uniform_bind_group; + + bool should_recreate_read_only_bind_group; + bool should_recreate_write_bind_group; + bool should_recreate_uniform_bind_group; } WebGPUComputePass; PulseComputePass WebGPUCreateComputePass(PulseDevice device, PulseCommandList cmd); diff --git a/Sources/Backends/WebGPU/WebGPUComputePipeline.c b/Sources/Backends/WebGPU/WebGPUComputePipeline.c index d4db46a..bb2a7ff 100644 --- a/Sources/Backends/WebGPU/WebGPUComputePipeline.c +++ b/Sources/Backends/WebGPU/WebGPUComputePipeline.c @@ -6,8 +6,162 @@ #include "../../PulseInternal.h" #include "WebGPU.h" #include "WebGPUDevice.h" +#include "webgpu.h" #include "WebGPUComputePipeline.h" +static WGPUBindGroupLayout WebGPUCreateBindGroupLayout(PulseDevice device, + uint32_t read_storage_images_count, + uint32_t read_storage_buffers_count, + uint32_t write_storage_images_count, + uint32_t write_storage_buffers_count, + uint32_t uniform_buffers_count) +{ + WebGPUDevice* webgpu_device = WEBGPU_RETRIEVE_DRIVER_DATA_AS(device, WebGPUDevice*); + + uint8_t category; + if(uniform_buffers_count != 0) + category = 3; + else if(write_storage_images_count != 0 || write_storage_buffers_count != 0) + category = 2; + else + category = 1; + + uint32_t count = 0; + WGPUBindGroupLayoutEntry entries[PULSE_MAX_READ_BUFFERS_BOUND + PULSE_MAX_READ_TEXTURES_BOUND + PULSE_MAX_WRITE_BUFFERS_BOUND + PULSE_MAX_WRITE_TEXTURES_BOUND + PULSE_MAX_UNIFORM_BUFFERS_BOUND] = { 0 }; + + if(category == 1) + { + for(uint32_t i = 0; i < read_storage_images_count; i++, count++) + { + entries[i].binding = i; + entries[i].visibility = WGPUShaderStage_Compute; + + entries[i].buffer.nextInChain = PULSE_NULLPTR; + entries[i].buffer.hasDynamicOffset = false; + entries[i].buffer.type = WGPUBufferBindingType_BindingNotUsed; + entries[i].buffer.minBindingSize = 0; + + entries[i].sampler.nextInChain = PULSE_NULLPTR; + entries[i].sampler.type = WGPUSamplerBindingType_BindingNotUsed; + + entries[i].storageTexture.nextInChain = PULSE_NULLPTR; + entries[i].storageTexture.access = WGPUStorageTextureAccess_Undefined; + entries[i].storageTexture.format = WGPUTextureFormat_Undefined; + entries[i].storageTexture.viewDimension = WGPUTextureViewDimension_Undefined; + + entries[i].texture.nextInChain = PULSE_NULLPTR; + entries[i].texture.multisampled = false; + entries[i].texture.sampleType = WGPUTextureSampleType_BindingNotUsed; + entries[i].texture.viewDimension = WGPUTextureViewDimension_Undefined; + } + + for(uint32_t i = read_storage_images_count; i < read_storage_images_count + read_storage_buffers_count; i++, count++) + { + entries[i].binding = i; + entries[i].visibility = WGPUShaderStage_Compute; + + entries[i].buffer.nextInChain = PULSE_NULLPTR; + entries[i].buffer.hasDynamicOffset = false; + entries[i].buffer.type = WGPUBufferBindingType_ReadOnlyStorage; + entries[i].buffer.minBindingSize = 0; + + entries[i].sampler.nextInChain = PULSE_NULLPTR; + entries[i].sampler.type = WGPUSamplerBindingType_BindingNotUsed; + + entries[i].storageTexture.nextInChain = PULSE_NULLPTR; + entries[i].storageTexture.access = WGPUStorageTextureAccess_BindingNotUsed; + entries[i].storageTexture.format = WGPUTextureFormat_Undefined; + entries[i].storageTexture.viewDimension = WGPUTextureViewDimension_Undefined; + + entries[i].texture.nextInChain = PULSE_NULLPTR; + entries[i].texture.multisampled = false; + entries[i].texture.sampleType = WGPUTextureSampleType_BindingNotUsed; + entries[i].texture.viewDimension = WGPUTextureViewDimension_Undefined; + } + } + else if(category == 2) + { + for(uint32_t i = 0; i < write_storage_images_count; i++, count++) + { + entries[i].binding = i; + entries[i].visibility = WGPUShaderStage_Compute; + + entries[i].buffer.nextInChain = PULSE_NULLPTR; + entries[i].buffer.hasDynamicOffset = false; + entries[i].buffer.type = WGPUBufferBindingType_BindingNotUsed; + entries[i].buffer.minBindingSize = 0; + + entries[i].sampler.nextInChain = PULSE_NULLPTR; + entries[i].sampler.type = WGPUSamplerBindingType_BindingNotUsed; + + entries[i].storageTexture.nextInChain = PULSE_NULLPTR; + entries[i].storageTexture.access = WGPUStorageTextureAccess_BindingNotUsed; + entries[i].storageTexture.format = WGPUTextureFormat_Undefined; + entries[i].storageTexture.viewDimension = WGPUTextureViewDimension_Undefined; + + entries[i].texture.nextInChain = PULSE_NULLPTR; + entries[i].texture.multisampled = false; + entries[i].texture.sampleType = WGPUTextureSampleType_BindingNotUsed; + entries[i].texture.viewDimension = WGPUTextureViewDimension_Undefined; + } + + for(uint32_t i = write_storage_images_count; i < write_storage_images_count + write_storage_buffers_count; i++, count++) + { + entries[i].binding = i; + entries[i].visibility = WGPUShaderStage_Compute; + + entries[i].buffer.nextInChain = PULSE_NULLPTR; + entries[i].buffer.hasDynamicOffset = false; + entries[i].buffer.type = WGPUBufferBindingType_Storage; + entries[i].buffer.minBindingSize = 0; + + entries[i].sampler.nextInChain = PULSE_NULLPTR; + entries[i].sampler.type = WGPUSamplerBindingType_BindingNotUsed; + + entries[i].storageTexture.nextInChain = PULSE_NULLPTR; + entries[i].storageTexture.access = WGPUStorageTextureAccess_BindingNotUsed; + entries[i].storageTexture.format = WGPUTextureFormat_Undefined; + entries[i].storageTexture.viewDimension = WGPUTextureViewDimension_Undefined; + + entries[i].texture.nextInChain = PULSE_NULLPTR; + entries[i].texture.multisampled = false; + entries[i].texture.sampleType = WGPUTextureSampleType_BindingNotUsed; + entries[i].texture.viewDimension = WGPUTextureViewDimension_Undefined; + } + } + else if(category == 3) + { + for(uint32_t i = 0; i < uniform_buffers_count; i++, count++) + { + entries[i].binding = i; + entries[i].visibility = WGPUShaderStage_Compute; + + entries[i].buffer.nextInChain = PULSE_NULLPTR; + entries[i].buffer.hasDynamicOffset = false; + entries[i].buffer.type = WGPUBufferBindingType_Uniform; + entries[i].buffer.minBindingSize = 0; + + entries[i].sampler.nextInChain = PULSE_NULLPTR; + entries[i].sampler.type = WGPUSamplerBindingType_BindingNotUsed; + + entries[i].storageTexture.nextInChain = PULSE_NULLPTR; + entries[i].storageTexture.access = WGPUStorageTextureAccess_BindingNotUsed; + entries[i].storageTexture.format = WGPUTextureFormat_Undefined; + entries[i].storageTexture.viewDimension = WGPUTextureViewDimension_Undefined; + + entries[i].texture.nextInChain = PULSE_NULLPTR; + entries[i].texture.multisampled = false; + entries[i].texture.sampleType = WGPUTextureSampleType_BindingNotUsed; + entries[i].texture.viewDimension = WGPUTextureViewDimension_Undefined; + } + } + + WGPUBindGroupLayoutDescriptor descriptor = { 0 }; + descriptor.entryCount = count; + descriptor.entries = entries; + return wgpuDeviceCreateBindGroupLayout(webgpu_device->device, &descriptor); +} + PulseComputePipeline WebGPUCreateComputePipeline(PulseDevice device, const PulseComputePipelineCreateInfo* info) { WebGPUDevice* webgpu_device = WEBGPU_RETRIEVE_DRIVER_DATA_AS(device, WebGPUDevice*); @@ -30,27 +184,36 @@ PulseComputePipeline WebGPUCreateComputePipeline(PulseDevice device, const Pulse PulseLogError(device->backend, "invalid shader format passed to PulseComputePipelineCreateInfo"); } - WGPUStringView code = { 0 }; - code.length = info->code_size; - code.data = (const char*)info->code; - WGPUChainedStruct chain = { 0 }; - chain.next = PULSE_NULLPTR; - chain.sType = WGPUSType_ShaderSourceWGSL; WGPUShaderSourceWGSL source = { 0 }; - source.chain = chain; - source.code = code; + source.chain.next = PULSE_NULLPTR; + source.chain.sType = WGPUSType_ShaderSourceWGSL; + source.code.length = info->code_size; + source.code.data = (const char*)info->code; + WGPUShaderModuleDescriptor shader_descriptor = { 0 }; shader_descriptor.nextInChain = (const WGPUChainedStruct*)&source; webgpu_pipeline->shader = wgpuDeviceCreateShaderModule(webgpu_device->device, &shader_descriptor); - WGPUStringView entrypoint = { 0 }; - entrypoint.length = WGPU_STRLEN; - entrypoint.data = info->entrypoint; - WGPUProgrammableStageDescriptor state = { 0 }; - state.module = webgpu_pipeline->shader; - state.entryPoint = entrypoint; + webgpu_pipeline->readonly_group = WebGPUCreateBindGroupLayout(device, info->num_readonly_storage_images, info->num_readonly_storage_buffers, 0, 0, 0); + webgpu_pipeline->readwrite_group = WebGPUCreateBindGroupLayout(device, 0, 0, info->num_readwrite_storage_images, info->num_readwrite_storage_buffers, 0); + webgpu_pipeline->uniform_group = WebGPUCreateBindGroupLayout(device, 0, 0, 0, 0, info->num_uniform_buffers); + + WGPUBindGroupLayout bind_group_layouts[3] = { + webgpu_pipeline->readonly_group, + webgpu_pipeline->readwrite_group, + webgpu_pipeline->uniform_group, + }; + + WGPUPipelineLayoutDescriptor layout_descriptor = { 0 }; + layout_descriptor.bindGroupLayoutCount = 3; + layout_descriptor.bindGroupLayouts = bind_group_layouts; + webgpu_pipeline->layout = wgpuDeviceCreatePipelineLayout(webgpu_device->device, &layout_descriptor); + WGPUComputePipelineDescriptor pipeline_descriptor = { 0 }; - pipeline_descriptor.compute = state; + pipeline_descriptor.compute.module = webgpu_pipeline->shader; + pipeline_descriptor.compute.entryPoint.length = WGPU_STRLEN; + pipeline_descriptor.compute.entryPoint.data = info->entrypoint; + pipeline_descriptor.layout = webgpu_pipeline->layout; webgpu_pipeline->pipeline = wgpuDeviceCreateComputePipeline(webgpu_device->device, &pipeline_descriptor); if(PULSE_IS_BACKEND_HIGH_LEVEL_DEBUG(device->backend)) @@ -70,6 +233,10 @@ void WebGPUDestroyComputePipeline(PulseDevice device, PulseComputePipeline pipel PULSE_UNUSED(device); WebGPUComputePipeline* webgpu_pipeline = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pipeline, WebGPUComputePipeline*); + wgpuBindGroupLayoutRelease(webgpu_pipeline->readonly_group); + wgpuBindGroupLayoutRelease(webgpu_pipeline->readwrite_group); + wgpuBindGroupLayoutRelease(webgpu_pipeline->uniform_group); + wgpuPipelineLayoutRelease(webgpu_pipeline->layout); wgpuComputePipelineRelease(webgpu_pipeline->pipeline); wgpuShaderModuleRelease(webgpu_pipeline->shader); free(webgpu_pipeline); diff --git a/Sources/Backends/WebGPU/WebGPUComputePipeline.h b/Sources/Backends/WebGPU/WebGPUComputePipeline.h index 85cc295..da80fed 100644 --- a/Sources/Backends/WebGPU/WebGPUComputePipeline.h +++ b/Sources/Backends/WebGPU/WebGPUComputePipeline.h @@ -13,8 +13,12 @@ typedef struct WebGPUComputePipeline { - WGPUComputePipeline pipeline; WGPUShaderModule shader; + WGPUPipelineLayout layout; + WGPUComputePipeline pipeline; + WGPUBindGroupLayout readonly_group; + WGPUBindGroupLayout readwrite_group; + WGPUBindGroupLayout uniform_group; } WebGPUComputePipeline; PulseComputePipeline WebGPUCreateComputePipeline(PulseDevice device, const PulseComputePipelineCreateInfo* info); diff --git a/Sources/Backends/WebGPU/WebGPUFence.c b/Sources/Backends/WebGPU/WebGPUFence.c index dc33005..f24ef9f 100644 --- a/Sources/Backends/WebGPU/WebGPUFence.c +++ b/Sources/Backends/WebGPU/WebGPUFence.c @@ -46,6 +46,7 @@ bool WebGPUWaitForFences(PulseDevice device, const PulseFence* fences, uint32_t uint32_t fences_to_wait = fences_count; while(fences_to_wait != 0) { + WebGPUDeviceTick(device); for(uint32_t i = 0; i < fences_count; i++) { if(WebGPUIsFenceReady(device, fences[i]))