From 93a69d37e92f085ef0ddf770a0331f815b138a7a Mon Sep 17 00:00:00 2001
From: Kbz-8 <kbz_8.code@proton.me>
Date: Sun, 2 Mar 2025 18:15:54 +0100
Subject: [PATCH] adding storage buffer management to webgpu

---
 Examples/WebGPU/main.c                        |   7 +-
 Sources/Backends/WebGPU/WebGPU.c              |   6 +
 Sources/Backends/WebGPU/WebGPU.h              |   1 +
 Sources/Backends/WebGPU/WebGPUBuffer.c        |  19 +-
 Sources/Backends/WebGPU/WebGPUCommandList.c   |   1 -
 Sources/Backends/WebGPU/WebGPUComputePass.c   | 173 +++++++++++++++
 Sources/Backends/WebGPU/WebGPUComputePass.h   |   7 +
 .../Backends/WebGPU/WebGPUComputePipeline.c   | 197 ++++++++++++++++--
 .../Backends/WebGPU/WebGPUComputePipeline.h   |   6 +-
 Sources/Backends/WebGPU/WebGPUFence.c         |   1 +
 10 files changed, 391 insertions(+), 27 deletions(-)
diff --git a/Examples/WebGPU/main.c b/Examples/WebGPU/main.c
index d70ae03..6bb1ceb 100644
--- a/Examples/WebGPU/main.c
+++ b/Examples/WebGPU/main.c
@@ -22,9 +22,12 @@ void DebugCallBack(PulseDebugMessageSeverity severity, const char* message)
 #define BUFFER_SIZE (256 * sizeof(uint32_t))
 
 const char* wgsl_source = WGSL_SOURCE(
+	@group(1) @binding(0) var<storage, read_write> ssbo: array<i32>;
+
 	@compute @workgroup_size(32, 32, 1)
-	fn main(@builtin(global_invocation_id) grid: vec3u)
+	fn main(@builtin(global_invocation_id) grid: vec3<u32>)
 	{
+    	ssbo[grid.x * grid.y] = i32(grid.x * grid.y);
 	}
 );
 
@@ -53,7 +56,7 @@ int main(void)
 		PulseCommandList cmd = PulseRequestCommandList(device, PULSE_COMMAND_LIST_GENERAL);
 
 		PulseComputePass pass = PulseBeginComputePass(cmd);
-		//	PulseBindStorageBuffers(pass, &buffer, 1);
+			PulseBindStorageBuffers(pass, &buffer, 1);
 			PulseBindComputePipeline(pass, pipeline);
 			PulseDispatchComputations(pass, 32, 32, 1);
 		PulseEndComputePass(pass);
diff --git a/Sources/Backends/WebGPU/WebGPU.c b/Sources/Backends/WebGPU/WebGPU.c
index 4ff778b..bb7a487 100644
--- a/Sources/Backends/WebGPU/WebGPU.c
+++ b/Sources/Backends/WebGPU/WebGPU.c
@@ -8,6 +8,12 @@
 #include "WebGPU.h"
 #include "WebGPUDevice.h"
 
+void WebGPUDeviceTick(PulseDevice device)
+{
+	WebGPUDevice* webgpu_device = WEBGPU_RETRIEVE_DRIVER_DATA_AS(device, WebGPUDevice*);
+	wgpuQueueSubmit(webgpu_device->queue, 0, PULSE_NULLPTR); // Submitting nothing just to check for ongoing asynchronous operations and call their callbacks if needed
+}
+
 PulseBackendFlags WebGPUCheckSupport(PulseBackendFlags candidates, PulseShaderFormatsFlags shader_formats_used)
 {
 	if(candidates != PULSE_BACKEND_ANY && (candidates & PULSE_BACKEND_WEBGPU) == 0)
diff --git a/Sources/Backends/WebGPU/WebGPU.h b/Sources/Backends/WebGPU/WebGPU.h
index 35a1298..867b7cc 100644
--- a/Sources/Backends/WebGPU/WebGPU.h
+++ b/Sources/Backends/WebGPU/WebGPU.h
@@ -19,6 +19,7 @@ typedef struct WebGPUDriverData
 } WebGPUDriverData;
 
 PulseBackendFlags WebGPUCheckSupport(PulseBackendFlags candidates, PulseShaderFormatsFlags shader_formats_used); // Return PULSE_BACKEND_WEBGPU in case of success and PULSE_BACKEND_INVALID otherwise
+void WebGPUDeviceTick(PulseDevice device);
 
 #endif // PULSE_WEBGPU_H_
 
diff --git a/Sources/Backends/WebGPU/WebGPUBuffer.c b/Sources/Backends/WebGPU/WebGPUBuffer.c
index 4de3f41..9eeb3f2 100644
--- a/Sources/Backends/WebGPU/WebGPUBuffer.c
+++ b/Sources/Backends/WebGPU/WebGPUBuffer.c
@@ -9,8 +9,8 @@
 #include "../../PulseInternal.h"
 #include "WebGPU.h"
 #include "WebGPUDevice.h"
-#include "webgpu.h"
 #include "WebGPUBuffer.h"
+#include "WebGPUCommandList.h"
 
 PulseBuffer WebGPUCreateBuffer(PulseDevice device, const PulseBufferCreateInfo* create_infos)
 {
@@ -58,13 +58,10 @@ PulseBuffer WebGPUCreateBuffer(PulseDevice device, const PulseBufferCreateInfo*
 	return buffer;
 }
 
-#include <stdio.h>
-
 static void WebGPUMapBufferCallback(WGPUMapAsyncStatus status, WGPUStringView message, void* userdata1, void* userdata2)
 {
 	atomic_int* mapping_finished = (atomic_int*)userdata1;
 	PulseBuffer buffer = (PulseBuffer)userdata2;
-	puts("test");
 	if(status == WGPUMapAsyncStatus_Success)
 		atomic_store(mapping_finished, 1);
 	else
@@ -90,10 +87,7 @@ bool WebGPUMapBuffer(PulseBuffer buffer, PulseMapMode mode, void** data)
 	// https://toji.dev/webgpu-best-practices/buffer-uploads.html
 	if(mode == PULSE_MAP_WRITE)
 	{
-		if(webgpu_buffer->map == PULSE_NULLPTR)
-			webgpu_buffer->map = malloc(buffer->size);
-		else
-			webgpu_buffer->map = realloc(webgpu_buffer->map, buffer->size);
+		webgpu_buffer->map = malloc(buffer->size);
 		PULSE_CHECK_ALLOCATION_RETVAL(webgpu_buffer->map, false);
 	}
 	else
@@ -115,6 +109,7 @@ bool WebGPUMapBuffer(PulseBuffer buffer, PulseMapMode mode, void** data)
 
 		while(atomic_load(&mapping_finished) == 0)
 		{
+			WebGPUDeviceTick(buffer->device);
 			clock_t elapsed = clock() - start;
 			if(elapsed > timeout)
 			{
@@ -141,7 +136,10 @@ void WebGPUUnmapBuffer(PulseBuffer buffer)
 	WebGPUDevice* webgpu_device = WEBGPU_RETRIEVE_DRIVER_DATA_AS(buffer->device, WebGPUDevice*);
 	WebGPUBuffer* webgpu_buffer = WEBGPU_RETRIEVE_DRIVER_DATA_AS(buffer, WebGPUBuffer*);
 	if(webgpu_buffer->current_map_mode == PULSE_MAP_WRITE)
+	{
 		wgpuQueueWriteBuffer(webgpu_device->queue, webgpu_buffer->buffer, 0, webgpu_buffer->map, buffer->size);
+		free(webgpu_buffer->map);
+	}
 	else
 		wgpuBufferUnmap(webgpu_buffer->buffer);
 	webgpu_buffer->map = PULSE_NULLPTR;
@@ -149,6 +147,11 @@ void WebGPUUnmapBuffer(PulseBuffer buffer)
 
 bool WebGPUCopyBufferToBuffer(PulseCommandList cmd, const PulseBufferRegion* src, const PulseBufferRegion* dst)
 {
+	WebGPUBuffer* webgpu_src_buffer = WEBGPU_RETRIEVE_DRIVER_DATA_AS(src->buffer, WebGPUBuffer*);
+	WebGPUBuffer* webgpu_dst_buffer = WEBGPU_RETRIEVE_DRIVER_DATA_AS(dst->buffer, WebGPUBuffer*);
+	WebGPUCommandList* webgpu_cmd = WEBGPU_RETRIEVE_DRIVER_DATA_AS(cmd, WebGPUCommandList*);
+	wgpuCommandEncoderCopyBufferToBuffer(webgpu_cmd->encoder, webgpu_src_buffer->buffer, src->offset, webgpu_dst_buffer->buffer, dst->offset, (src->size < dst->size ? src->size : dst->size));
+	return true;
 }
 
 bool WebGPUCopyBufferToImage(PulseCommandList cmd, const PulseBufferRegion* src, const PulseImageRegion* dst)
diff --git a/Sources/Backends/WebGPU/WebGPUCommandList.c b/Sources/Backends/WebGPU/WebGPUCommandList.c
index 5a2b057..c06c22b 100644
--- a/Sources/Backends/WebGPU/WebGPUCommandList.c
+++ b/Sources/Backends/WebGPU/WebGPUCommandList.c
@@ -43,7 +43,6 @@ PulseCommandList WebGPURequestCommandList(PulseDevice device, PulseCommandListUs
 
 static void WebGPUFenceCallback(WGPUQueueWorkDoneStatus status, void* userdata1, void* userdata2)
 {
-	PULSE_UNUSED(userdata2);
 	WebGPUFence* webgpu_fence = (WebGPUFence*)userdata1;
 	PulseCommandList cmd = (PulseCommandList)userdata2;
 	if(status == WGPUQueueWorkDoneStatus_Success)
diff --git a/Sources/Backends/WebGPU/WebGPUComputePass.c b/Sources/Backends/WebGPU/WebGPUComputePass.c
index c60bd6a..59f2cc2 100644
--- a/Sources/Backends/WebGPU/WebGPUComputePass.c
+++ b/Sources/Backends/WebGPU/WebGPUComputePass.c
@@ -2,6 +2,8 @@
 // This file is part of "Pulse"
 // For conditions of distribution and use, see copyright notice in LICENSE
 
+#include <string.h>
+
 #include <Pulse.h>
 #include "../../PulseInternal.h"
 #include "WebGPU.h"
@@ -45,10 +47,48 @@ void WebGPUEndComputePass(PulseComputePass pass)
 	WebGPUComputePass* webgpu_pass = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass, WebGPUComputePass*);
 	wgpuComputePassEncoderEnd(webgpu_pass->encoder);
 	wgpuComputePassEncoderRelease(webgpu_pass->encoder);
+
+	wgpuBindGroupRelease(webgpu_pass->read_only_bind_group);
+	wgpuBindGroupRelease(webgpu_pass->read_write_bind_group);
+	wgpuBindGroupRelease(webgpu_pass->uniform_bind_group);
+	webgpu_pass->read_only_bind_group = PULSE_NULLPTR;
+	webgpu_pass->read_write_bind_group = PULSE_NULLPTR;
+	webgpu_pass->uniform_bind_group = PULSE_NULLPTR;
 }
 
 void WebGPUBindStorageBuffers(PulseComputePass pass, const PulseBuffer* buffers, uint32_t num_buffers)
 {
+	PulseBufferUsageFlags usage = buffers[0]->usage;
+	bool is_readwrite = (usage & PULSE_BUFFER_USAGE_STORAGE_WRITE) != 0;
+	PulseBuffer* array = is_readwrite ? pass->readwrite_storage_buffers : pass->readonly_storage_buffers;
+	WebGPUComputePass* webgpu_pass = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass, WebGPUComputePass*);
+
+	for(uint32_t i = 0; i < num_buffers; i++)
+	{
+		if(is_readwrite && (buffers[i]->usage & PULSE_BUFFER_USAGE_STORAGE_WRITE) == 0)
+		{
+			if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(pass->cmd->device->backend))
+				PulseLogError(pass->cmd->device->backend, "cannot bind a read only buffer with read-write buffers");
+			PulseSetInternalError(PULSE_ERROR_INVALID_BUFFER_USAGE);
+			return;
+		}
+		else if(!is_readwrite && (buffers[i]->usage & PULSE_BUFFER_USAGE_STORAGE_WRITE) != 0)
+		{
+			if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(pass->cmd->device->backend))
+				PulseLogError(pass->cmd->device->backend, "cannot bind a read-write buffer with read only buffers");
+			PulseSetInternalError(PULSE_ERROR_INVALID_BUFFER_USAGE);
+			return;
+		}
+
+		if(array[i] == buffers[i])
+			continue;
+		array[i] = buffers[i];
+
+		if(is_readwrite)
+			webgpu_pass->should_recreate_write_bind_group = true;
+		else
+			webgpu_pass->should_recreate_read_only_bind_group = true;
+	}
 }
 
 void WebGPUBindUniformData(PulseComputePass pass, uint32_t slot, const void* data, uint32_t data_size)
@@ -57,6 +97,134 @@ void WebGPUBindUniformData(PulseComputePass pass, uint32_t slot, const void* dat
 
 void WebGPUBindStorageImages(PulseComputePass pass, const PulseImage* images, uint32_t num_images)
 {
+	PulseImageUsageFlags usage = images[0]->usage;
+	bool is_readwrite = (usage & PULSE_IMAGE_USAGE_STORAGE_WRITE) != 0;
+	PulseImage* array = is_readwrite ? pass->readwrite_images : pass->readonly_images;
+	WebGPUComputePass* webgpu_pass = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass, WebGPUComputePass*);
+
+	for(uint32_t i = 0; i < num_images; i++)
+	{
+		if(is_readwrite && (images[i]->usage & PULSE_IMAGE_USAGE_STORAGE_WRITE) == 0)
+		{
+			if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(pass->cmd->device->backend))
+				PulseLogError(pass->cmd->device->backend, "cannot bind a read only image with read-write images");
+			PulseSetInternalError(PULSE_ERROR_INVALID_IMAGE_USAGE);
+			return;
+		}
+		else if(!is_readwrite && (images[i]->usage & PULSE_IMAGE_USAGE_STORAGE_WRITE) != 0)
+		{
+			if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(pass->cmd->device->backend))
+				PulseLogError(pass->cmd->device->backend, "cannot bind a read-write image with read only images");
+			PulseSetInternalError(PULSE_ERROR_INVALID_IMAGE_USAGE);
+			return;
+		}
+
+		if(array[i] == images[i])
+			continue;
+		array[i] = images[i];
+		
+		if((usage & PULSE_IMAGE_USAGE_STORAGE_WRITE) != 0)
+			webgpu_pass->should_recreate_write_bind_group = true;
+		else
+			webgpu_pass->should_recreate_read_only_bind_group = true;
+	}
+}
+
+static void WebGPUBindBindGroups(PulseComputePass pass)
+{
+	WebGPUComputePass* webgpu_pass = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass, WebGPUComputePass*);
+	WebGPUDevice* webgpu_device = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass->cmd->device, WebGPUDevice*);
+	WebGPUComputePipeline* webgpu_pipeline = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass->current_pipeline, WebGPUComputePipeline*);
+
+	if(!webgpu_pass->should_recreate_read_only_bind_group && !webgpu_pass->should_recreate_write_bind_group && !webgpu_pass->should_recreate_uniform_bind_group)
+		return;
+
+	WGPUBindGroupEntry* read_only_entries = (WGPUBindGroupEntry*)calloc(pass->current_pipeline->num_readonly_storage_images + pass->current_pipeline->num_readonly_storage_buffers, sizeof(WGPUBindGroupEntry));
+	PULSE_CHECK_ALLOCATION(read_only_entries);
+	WGPUBindGroupEntry* read_write_entries = (WGPUBindGroupEntry*)calloc(pass->current_pipeline->num_readwrite_storage_images + pass->current_pipeline->num_readwrite_storage_buffers, sizeof(WGPUBindGroupEntry));
+	PULSE_CHECK_ALLOCATION(read_write_entries);
+	WGPUBindGroupEntry* uniform_entries = (WGPUBindGroupEntry*)calloc(pass->current_pipeline->num_uniform_buffers, sizeof(WGPUBindGroupEntry));
+	PULSE_CHECK_ALLOCATION(uniform_entries);
+
+	if(webgpu_pass->should_recreate_read_only_bind_group && webgpu_pipeline->readonly_group != PULSE_NULLPTR)
+	{
+		uint32_t entry_index = 0;
+		for(uint32_t i = 0; i < pass->current_pipeline->num_readonly_storage_images; i++, entry_index++)
+		{
+			WGPUBindGroupEntry* entry = &read_only_entries[entry_index];
+			memset(entry, 0, sizeof(WGPUBindGroupEntry));
+			entry->binding = i;
+		}
+
+		for(uint32_t i = 0; i < pass->current_pipeline->num_readonly_storage_buffers; i++, entry_index++)
+		{
+			WebGPUBuffer* webgpu_buffer = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass->readonly_storage_buffers[i], WebGPUBuffer*);
+
+			WGPUBindGroupEntry* entry = &read_only_entries[entry_index];
+			memset(entry, 0, sizeof(WGPUBindGroupEntry));
+			entry->binding = pass->current_pipeline->num_readonly_storage_images + i;
+			entry->buffer = webgpu_buffer->buffer;
+			entry->size = pass->readonly_storage_buffers[i]->size;
+			entry->offset = 0;
+		}
+
+		WGPUBindGroupDescriptor descriptor = { 0 };
+		descriptor.layout = webgpu_pipeline->readonly_group;
+		descriptor.entryCount = pass->current_pipeline->num_readonly_storage_images + pass->current_pipeline->num_readonly_storage_buffers;
+		descriptor.entries = read_only_entries;
+		webgpu_pass->read_only_bind_group = wgpuDeviceCreateBindGroup(webgpu_device->device, &descriptor);
+		wgpuComputePassEncoderSetBindGroup(webgpu_pass->encoder, 0, webgpu_pass->read_only_bind_group, 0, PULSE_NULLPTR);
+	}
+	if(webgpu_pass->should_recreate_write_bind_group && webgpu_pipeline->readwrite_group != PULSE_NULLPTR)
+	{
+		uint32_t entry_index = 0;
+		for(uint32_t i = 0; i < pass->current_pipeline->num_readwrite_storage_images; i++, entry_index++)
+		{
+			WGPUBindGroupEntry* entry = &read_write_entries[entry_index];
+			memset(entry, 0, sizeof(WGPUBindGroupEntry));
+			entry->binding = i;
+		}
+
+		for(uint32_t i = 0; i < pass->current_pipeline->num_readwrite_storage_buffers; i++, entry_index++)
+		{
+			WebGPUBuffer* webgpu_buffer = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass->readwrite_storage_buffers[i], WebGPUBuffer*);
+
+			WGPUBindGroupEntry* entry = &read_write_entries[entry_index];
+			memset(entry, 0, sizeof(WGPUBindGroupEntry));
+			entry->binding = pass->current_pipeline->num_readwrite_storage_images + i;
+			entry->buffer = webgpu_buffer->buffer;
+			entry->size = pass->readwrite_storage_buffers[i]->size;
+			entry->offset = 0;
+		}
+
+		WGPUBindGroupDescriptor descriptor = { 0 };
+		descriptor.layout = webgpu_pipeline->readwrite_group;
+		descriptor.entryCount = pass->current_pipeline->num_readwrite_storage_images + pass->current_pipeline->num_readwrite_storage_buffers;
+		descriptor.entries = read_write_entries;
+		webgpu_pass->read_write_bind_group = wgpuDeviceCreateBindGroup(webgpu_device->device, &descriptor);
+		wgpuComputePassEncoderSetBindGroup(webgpu_pass->encoder, 1, webgpu_pass->read_write_bind_group, 0, PULSE_NULLPTR);
+	}
+	if(webgpu_pass->should_recreate_uniform_bind_group && webgpu_pipeline->uniform_group != PULSE_NULLPTR)
+	{
+		for(uint32_t i = 0; i < pass->current_pipeline->num_uniform_buffers; i++)
+		{
+			WebGPUBuffer* webgpu_buffer = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass->uniform_buffers[i], WebGPUBuffer*);
+
+			WGPUBindGroupEntry* entry = &uniform_entries[i];
+			memset(entry, 0, sizeof(WGPUBindGroupEntry));
+			entry->binding = i;
+			entry->buffer = webgpu_buffer->buffer;
+			entry->size = pass->uniform_buffers[i]->size;
+			entry->offset = 0;
+		}
+
+		WGPUBindGroupDescriptor descriptor = { 0 };
+		descriptor.layout = webgpu_pipeline->uniform_group;
+		descriptor.entryCount = pass->current_pipeline->num_uniform_buffers;
+		descriptor.entries = uniform_entries;
+		webgpu_pass->uniform_bind_group = wgpuDeviceCreateBindGroup(webgpu_device->device, &descriptor);
+		wgpuComputePassEncoderSetBindGroup(webgpu_pass->encoder, 2, webgpu_pass->uniform_bind_group, 0, PULSE_NULLPTR);
+	}
 }
 
 void WebGPUBindComputePipeline(PulseComputePass pass, PulseComputePipeline pipeline)
@@ -64,10 +232,15 @@ void WebGPUBindComputePipeline(PulseComputePass pass, PulseComputePipeline pipel
 	WebGPUComputePass* webgpu_pass = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass, WebGPUComputePass*);
 	WebGPUComputePipeline* webgpu_pipeline = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pipeline, WebGPUComputePipeline*);
 	wgpuComputePassEncoderSetPipeline(webgpu_pass->encoder, webgpu_pipeline->pipeline);
+
+	webgpu_pass->should_recreate_read_only_bind_group = true;
+	webgpu_pass->should_recreate_write_bind_group = true;
+	webgpu_pass->should_recreate_uniform_bind_group = true;
 }
 
 void WebGPUDispatchComputations(PulseComputePass pass, uint32_t groupcount_x, uint32_t groupcount_y, uint32_t groupcount_z)
 {
 	WebGPUComputePass* webgpu_pass = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pass, WebGPUComputePass*);
+	WebGPUBindBindGroups(pass);
 	wgpuComputePassEncoderDispatchWorkgroups(webgpu_pass->encoder, groupcount_x, groupcount_y, groupcount_z);
 }
diff --git a/Sources/Backends/WebGPU/WebGPUComputePass.h b/Sources/Backends/WebGPU/WebGPUComputePass.h
index 20c2512..48fea54 100644
--- a/Sources/Backends/WebGPU/WebGPUComputePass.h
+++ b/Sources/Backends/WebGPU/WebGPUComputePass.h
@@ -16,6 +16,13 @@
 typedef struct WebGPUComputePass
 {
 	WGPUComputePassEncoder encoder;
+	WGPUBindGroup read_only_bind_group;
+	WGPUBindGroup read_write_bind_group;
+	WGPUBindGroup uniform_bind_group;
+
+	bool should_recreate_read_only_bind_group;
+	bool should_recreate_write_bind_group;
+	bool should_recreate_uniform_bind_group;
 } WebGPUComputePass;
 
 PulseComputePass WebGPUCreateComputePass(PulseDevice device, PulseCommandList cmd);
diff --git a/Sources/Backends/WebGPU/WebGPUComputePipeline.c b/Sources/Backends/WebGPU/WebGPUComputePipeline.c
index d4db46a..bb2a7ff 100644
--- a/Sources/Backends/WebGPU/WebGPUComputePipeline.c
+++ b/Sources/Backends/WebGPU/WebGPUComputePipeline.c
@@ -6,8 +6,162 @@
 #include "../../PulseInternal.h"
 #include "WebGPU.h"
 #include "WebGPUDevice.h"
+#include "webgpu.h"
 #include "WebGPUComputePipeline.h"
 
+static WGPUBindGroupLayout WebGPUCreateBindGroupLayout(PulseDevice device,
+														uint32_t read_storage_images_count,
+														uint32_t read_storage_buffers_count,
+														uint32_t write_storage_images_count,
+														uint32_t write_storage_buffers_count,
+														uint32_t uniform_buffers_count)
+{
+	WebGPUDevice* webgpu_device = WEBGPU_RETRIEVE_DRIVER_DATA_AS(device, WebGPUDevice*);
+
+	uint8_t category;
+	if(uniform_buffers_count != 0)
+		category = 3;
+	else if(write_storage_images_count != 0 || write_storage_buffers_count != 0)
+		category = 2;
+	else
+		category = 1;
+
+	uint32_t count = 0;
+	WGPUBindGroupLayoutEntry entries[PULSE_MAX_READ_BUFFERS_BOUND + PULSE_MAX_READ_TEXTURES_BOUND + PULSE_MAX_WRITE_BUFFERS_BOUND + PULSE_MAX_WRITE_TEXTURES_BOUND + PULSE_MAX_UNIFORM_BUFFERS_BOUND] = { 0 };
+
+	if(category == 1)
+	{
+		for(uint32_t i = 0; i < read_storage_images_count; i++, count++)
+		{
+			entries[i].binding = i;
+			entries[i].visibility = WGPUShaderStage_Compute;
+
+			entries[i].buffer.nextInChain = PULSE_NULLPTR;
+			entries[i].buffer.hasDynamicOffset = false;
+			entries[i].buffer.type = WGPUBufferBindingType_BindingNotUsed;
+			entries[i].buffer.minBindingSize = 0;
+
+			entries[i].sampler.nextInChain = PULSE_NULLPTR;
+			entries[i].sampler.type = WGPUSamplerBindingType_BindingNotUsed;
+
+			entries[i].storageTexture.nextInChain = PULSE_NULLPTR;
+			entries[i].storageTexture.access = WGPUStorageTextureAccess_Undefined;
+			entries[i].storageTexture.format = WGPUTextureFormat_Undefined;
+			entries[i].storageTexture.viewDimension = WGPUTextureViewDimension_Undefined;
+
+			entries[i].texture.nextInChain = PULSE_NULLPTR;
+			entries[i].texture.multisampled = false;
+			entries[i].texture.sampleType = WGPUTextureSampleType_BindingNotUsed;
+			entries[i].texture.viewDimension = WGPUTextureViewDimension_Undefined;
+		}
+
+		for(uint32_t i = read_storage_images_count; i < read_storage_images_count + read_storage_buffers_count; i++, count++)
+		{
+			entries[i].binding = i;
+			entries[i].visibility = WGPUShaderStage_Compute;
+
+			entries[i].buffer.nextInChain = PULSE_NULLPTR;
+			entries[i].buffer.hasDynamicOffset = false;
+			entries[i].buffer.type = WGPUBufferBindingType_ReadOnlyStorage;
+			entries[i].buffer.minBindingSize = 0;
+
+			entries[i].sampler.nextInChain = PULSE_NULLPTR;
+			entries[i].sampler.type = WGPUSamplerBindingType_BindingNotUsed;
+
+			entries[i].storageTexture.nextInChain = PULSE_NULLPTR;
+			entries[i].storageTexture.access = WGPUStorageTextureAccess_BindingNotUsed;
+			entries[i].storageTexture.format = WGPUTextureFormat_Undefined;
+			entries[i].storageTexture.viewDimension = WGPUTextureViewDimension_Undefined;
+
+			entries[i].texture.nextInChain = PULSE_NULLPTR;
+			entries[i].texture.multisampled = false;
+			entries[i].texture.sampleType = WGPUTextureSampleType_BindingNotUsed;
+			entries[i].texture.viewDimension = WGPUTextureViewDimension_Undefined;
+		}
+	}
+	else if(category == 2)
+	{
+		for(uint32_t i = 0; i < write_storage_images_count; i++, count++)
+		{
+			entries[i].binding = i;
+			entries[i].visibility = WGPUShaderStage_Compute;
+
+			entries[i].buffer.nextInChain = PULSE_NULLPTR;
+			entries[i].buffer.hasDynamicOffset = false;
+			entries[i].buffer.type = WGPUBufferBindingType_BindingNotUsed;
+			entries[i].buffer.minBindingSize = 0;
+
+			entries[i].sampler.nextInChain = PULSE_NULLPTR;
+			entries[i].sampler.type = WGPUSamplerBindingType_BindingNotUsed;
+
+			entries[i].storageTexture.nextInChain = PULSE_NULLPTR;
+			entries[i].storageTexture.access = WGPUStorageTextureAccess_BindingNotUsed;
+			entries[i].storageTexture.format = WGPUTextureFormat_Undefined;
+			entries[i].storageTexture.viewDimension = WGPUTextureViewDimension_Undefined;
+
+			entries[i].texture.nextInChain = PULSE_NULLPTR;
+			entries[i].texture.multisampled = false;
+			entries[i].texture.sampleType = WGPUTextureSampleType_BindingNotUsed;
+			entries[i].texture.viewDimension = WGPUTextureViewDimension_Undefined;
+		}
+
+		for(uint32_t i = write_storage_images_count; i < write_storage_images_count + write_storage_buffers_count; i++, count++)
+		{
+			entries[i].binding = i;
+			entries[i].visibility = WGPUShaderStage_Compute;
+
+			entries[i].buffer.nextInChain = PULSE_NULLPTR;
+			entries[i].buffer.hasDynamicOffset = false;
+			entries[i].buffer.type = WGPUBufferBindingType_Storage;
+			entries[i].buffer.minBindingSize = 0;
+
+			entries[i].sampler.nextInChain = PULSE_NULLPTR;
+			entries[i].sampler.type = WGPUSamplerBindingType_BindingNotUsed;
+
+			entries[i].storageTexture.nextInChain = PULSE_NULLPTR;
+			entries[i].storageTexture.access = WGPUStorageTextureAccess_BindingNotUsed;
+			entries[i].storageTexture.format = WGPUTextureFormat_Undefined;
+			entries[i].storageTexture.viewDimension = WGPUTextureViewDimension_Undefined;
+
+			entries[i].texture.nextInChain = PULSE_NULLPTR;
+			entries[i].texture.multisampled = false;
+			entries[i].texture.sampleType = WGPUTextureSampleType_BindingNotUsed;
+			entries[i].texture.viewDimension = WGPUTextureViewDimension_Undefined;
+		}
+	}
+	else if(category == 3)
+	{
+		for(uint32_t i = 0; i < uniform_buffers_count; i++, count++)
+		{
+			entries[i].binding = i;
+			entries[i].visibility = WGPUShaderStage_Compute;
+
+			entries[i].buffer.nextInChain = PULSE_NULLPTR;
+			entries[i].buffer.hasDynamicOffset = false;
+			entries[i].buffer.type = WGPUBufferBindingType_Uniform;
+			entries[i].buffer.minBindingSize = 0;
+
+			entries[i].sampler.nextInChain = PULSE_NULLPTR;
+			entries[i].sampler.type = WGPUSamplerBindingType_BindingNotUsed;
+
+			entries[i].storageTexture.nextInChain = PULSE_NULLPTR;
+			entries[i].storageTexture.access = WGPUStorageTextureAccess_BindingNotUsed;
+			entries[i].storageTexture.format = WGPUTextureFormat_Undefined;
+			entries[i].storageTexture.viewDimension = WGPUTextureViewDimension_Undefined;
+
+			entries[i].texture.nextInChain = PULSE_NULLPTR;
+			entries[i].texture.multisampled = false;
+			entries[i].texture.sampleType = WGPUTextureSampleType_BindingNotUsed;
+			entries[i].texture.viewDimension = WGPUTextureViewDimension_Undefined;
+		}
+	}
+
+	WGPUBindGroupLayoutDescriptor descriptor = { 0 };
+	descriptor.entryCount = count;
+	descriptor.entries = entries;
+	return wgpuDeviceCreateBindGroupLayout(webgpu_device->device, &descriptor);
+}
+
 PulseComputePipeline WebGPUCreateComputePipeline(PulseDevice device, const PulseComputePipelineCreateInfo* info)
 {
 	WebGPUDevice* webgpu_device = WEBGPU_RETRIEVE_DRIVER_DATA_AS(device, WebGPUDevice*);
@@ -30,27 +184,36 @@ PulseComputePipeline WebGPUCreateComputePipeline(PulseDevice device, const Pulse
 			PulseLogError(device->backend, "invalid shader format passed to PulseComputePipelineCreateInfo");
 	}
 
-	WGPUStringView code = { 0 };
-	code.length = info->code_size;
-	code.data = (const char*)info->code;
-	WGPUChainedStruct chain = { 0 };
-	chain.next = PULSE_NULLPTR;
-	chain.sType = WGPUSType_ShaderSourceWGSL;
 	WGPUShaderSourceWGSL source = { 0 };
-	source.chain = chain;
-	source.code = code;
+	source.chain.next = PULSE_NULLPTR;
+	source.chain.sType = WGPUSType_ShaderSourceWGSL;
+	source.code.length = info->code_size;
+	source.code.data = (const char*)info->code;
+
 	WGPUShaderModuleDescriptor shader_descriptor = { 0 };
 	shader_descriptor.nextInChain = (const WGPUChainedStruct*)&source;
 	webgpu_pipeline->shader = wgpuDeviceCreateShaderModule(webgpu_device->device, &shader_descriptor);
 
-	WGPUStringView entrypoint = { 0 };
-	entrypoint.length = WGPU_STRLEN;
-	entrypoint.data = info->entrypoint;
-	WGPUProgrammableStageDescriptor state = { 0 };
-	state.module = webgpu_pipeline->shader;
-	state.entryPoint = entrypoint;
+	webgpu_pipeline->readonly_group  = WebGPUCreateBindGroupLayout(device, info->num_readonly_storage_images, info->num_readonly_storage_buffers, 0, 0, 0);
+	webgpu_pipeline->readwrite_group = WebGPUCreateBindGroupLayout(device, 0, 0, info->num_readwrite_storage_images, info->num_readwrite_storage_buffers, 0);
+	webgpu_pipeline->uniform_group   = WebGPUCreateBindGroupLayout(device, 0, 0, 0, 0, info->num_uniform_buffers);
+
+	WGPUBindGroupLayout bind_group_layouts[3] = {
+		webgpu_pipeline->readonly_group,
+		webgpu_pipeline->readwrite_group,
+		webgpu_pipeline->uniform_group,
+	};
+
+	WGPUPipelineLayoutDescriptor layout_descriptor = { 0 };
+	layout_descriptor.bindGroupLayoutCount = 3;
+	layout_descriptor.bindGroupLayouts = bind_group_layouts;
+	webgpu_pipeline->layout = wgpuDeviceCreatePipelineLayout(webgpu_device->device, &layout_descriptor);
+
 	WGPUComputePipelineDescriptor pipeline_descriptor = { 0 };
-	pipeline_descriptor.compute = state;
+	pipeline_descriptor.compute.module = webgpu_pipeline->shader;
+	pipeline_descriptor.compute.entryPoint.length = WGPU_STRLEN;
+	pipeline_descriptor.compute.entryPoint.data = info->entrypoint;
+	pipeline_descriptor.layout = webgpu_pipeline->layout;
 	webgpu_pipeline->pipeline = wgpuDeviceCreateComputePipeline(webgpu_device->device, &pipeline_descriptor);
 
 	if(PULSE_IS_BACKEND_HIGH_LEVEL_DEBUG(device->backend))
@@ -70,6 +233,10 @@ void WebGPUDestroyComputePipeline(PulseDevice device, PulseComputePipeline pipel
 
 	PULSE_UNUSED(device);
 	WebGPUComputePipeline* webgpu_pipeline = WEBGPU_RETRIEVE_DRIVER_DATA_AS(pipeline, WebGPUComputePipeline*);
+	wgpuBindGroupLayoutRelease(webgpu_pipeline->readonly_group);
+	wgpuBindGroupLayoutRelease(webgpu_pipeline->readwrite_group);
+	wgpuBindGroupLayoutRelease(webgpu_pipeline->uniform_group);
+	wgpuPipelineLayoutRelease(webgpu_pipeline->layout);
 	wgpuComputePipelineRelease(webgpu_pipeline->pipeline);
 	wgpuShaderModuleRelease(webgpu_pipeline->shader);
 	free(webgpu_pipeline);
diff --git a/Sources/Backends/WebGPU/WebGPUComputePipeline.h b/Sources/Backends/WebGPU/WebGPUComputePipeline.h
index 85cc295..da80fed 100644
--- a/Sources/Backends/WebGPU/WebGPUComputePipeline.h
+++ b/Sources/Backends/WebGPU/WebGPUComputePipeline.h
@@ -13,8 +13,12 @@
 
 typedef struct WebGPUComputePipeline
 {
-	WGPUComputePipeline pipeline;
 	WGPUShaderModule shader;
+	WGPUPipelineLayout layout;
+	WGPUComputePipeline pipeline;
+	WGPUBindGroupLayout readonly_group;
+	WGPUBindGroupLayout readwrite_group;
+	WGPUBindGroupLayout uniform_group;
 } WebGPUComputePipeline;
 
 PulseComputePipeline WebGPUCreateComputePipeline(PulseDevice device, const PulseComputePipelineCreateInfo* info);
diff --git a/Sources/Backends/WebGPU/WebGPUFence.c b/Sources/Backends/WebGPU/WebGPUFence.c
index dc33005..f24ef9f 100644
--- a/Sources/Backends/WebGPU/WebGPUFence.c
+++ b/Sources/Backends/WebGPU/WebGPUFence.c
@@ -46,6 +46,7 @@ bool WebGPUWaitForFences(PulseDevice device, const PulseFence* fences, uint32_t
 	uint32_t fences_to_wait = fences_count;
 	while(fences_to_wait != 0)
 	{
+		WebGPUDeviceTick(device);
 		for(uint32_t i = 0; i < fences_count; i++)
 		{
 			if(WebGPUIsFenceReady(device, fences[i]))