diff --git a/Examples/D3D11/main.c b/Examples/D3D11/main.c index 442616f..df7b4d2 100644 --- a/Examples/D3D11/main.c +++ b/Examples/D3D11/main.c @@ -33,7 +33,14 @@ const char* hlsl_source = HLSL_SOURCE( int main(void) { - PulseBackend backend = PulseLoadBackend(PULSE_BACKEND_D3D11, PULSE_SHADER_FORMAT_HLSL_BIT, PULSE_HIGH_DEBUG); + PulseFlags shader_format; + #ifdef PULSE_D3D11_COMPILER_UNAVAILABLE + shader_format = PULSE_SHADER_FORMAT_DXBC_BIT; + #else + shader_format = PULSE_SHADER_FORMAT_HLSL_BIT; + #endif + + PulseBackend backend = PulseLoadBackend(PULSE_BACKEND_D3D11, shader_format, PULSE_HIGH_DEBUG); PulseSetDebugCallback(backend, DebugCallBack); PulseDevice device = PulseCreateDevice(backend, NULL, 0); @@ -45,16 +52,30 @@ int main(void) // GPU computations { PulseComputePipelineCreateInfo info = { 0 }; - info.code_size = strlen(hlsl_source); - info.code = (const uint8_t*)hlsl_source; + #ifdef PULSE_D3D11_COMPILER_UNAVAILABLE + const uint8_t shader_bytecode[] = { + #include "shader.cso.h" + }; + info.code_size = sizeof(shader_bytecode); + info.code = shader_bytecode; + #else + info.code_size = strlen(hlsl_source); + info.code = (const uint8_t*)hlsl_source; + #endif info.entrypoint = "CSMain"; - info.format = PULSE_SHADER_FORMAT_HLSL_BIT; + info.format = shader_format; info.num_readwrite_storage_buffers = 1; PulseComputePipeline pipeline = PulseCreateComputePipeline(device, &info); PulseFence fence = PulseCreateFence(device); PulseCommandList cmd = PulseRequestCommandList(device, PULSE_COMMAND_LIST_GENERAL); + PulseComputePass pass = PulseBeginComputePass(cmd); + PulseBindStorageBuffers(pass, &buffer, 1); + PulseBindComputePipeline(pass, pipeline); + PulseDispatchComputations(pass, 16, 1, 1); + PulseEndComputePass(pass); + PulseSubmitCommandList(device, cmd, fence); PulseWaitForFences(device, &fence, 1, true); @@ -63,6 +84,42 @@ int main(void) PulseDestroyComputePipeline(device, pipeline); } + // Get result and read it on CPU + { + PulseBufferCreateInfo staging_buffer_create_info = { 0 }; + staging_buffer_create_info.size = BUFFER_SIZE; + staging_buffer_create_info.usage = PULSE_BUFFER_USAGE_TRANSFER_UPLOAD | PULSE_BUFFER_USAGE_TRANSFER_DOWNLOAD; + PulseBuffer staging_buffer = PulseCreateBuffer(device, &staging_buffer_create_info); + + PulseFence fence = PulseCreateFence(device); + PulseCommandList cmd = PulseRequestCommandList(device, PULSE_COMMAND_LIST_TRANSFER_ONLY); + + PulseBufferRegion src_region = { 0 }; + src_region.buffer = buffer; + src_region.size = BUFFER_SIZE; + + PulseBufferRegion dst_region = { 0 }; + dst_region.buffer = staging_buffer; + dst_region.size = BUFFER_SIZE; + + PulseCopyBufferToBuffer(cmd, &src_region, &dst_region); + + PulseSubmitCommandList(device, cmd, fence); + PulseWaitForFences(device, &fence, 1, true); + + void* ptr; + PulseMapBuffer(staging_buffer, PULSE_MAP_READ, &ptr); + for(uint32_t i = 0; i < BUFFER_SIZE / sizeof(uint32_t); i++) + printf("%d, ", ((int32_t*)ptr)[i]); + puts(""); + PulseUnmapBuffer(staging_buffer); + + PulseDestroyBuffer(device, staging_buffer); + + PulseReleaseCommandList(device, cmd); + PulseDestroyFence(device, fence); + } + PulseDestroyBuffer(device, buffer); PulseDestroyDevice(device); diff --git a/Examples/D3D11/shader.cso.h b/Examples/D3D11/shader.cso.h new file mode 100644 index 0000000..b4e67cb --- /dev/null +++ b/Examples/D3D11/shader.cso.h @@ -0,0 +1,44 @@ +0x44, 0x58, 0x42, 0x43, 0x73, 0x34, 0xc8, 0x1f, 0xdb, 0x93, 0x0e, 0xcb, +0x41, 0xcd, 0x2e, 0x23, 0xde, 0x7c, 0x23, 0x9b, 0x01, 0x00, 0x00, 0x00, +0x08, 0x02, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x34, 0x00, 0x00, 0x00, +0xd0, 0x00, 0x00, 0x00, 0xe0, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, +0x6c, 0x01, 0x00, 0x00, 0x52, 0x44, 0x45, 0x46, 0x94, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, +0x3c, 0x00, 0x00, 0x00, 0x00, 0x05, 0x53, 0x43, 0x00, 0x01, 0x00, 0x00, +0x61, 0x00, 0x00, 0x00, 0x52, 0x44, 0x31, 0x31, 0x3c, 0x00, 0x00, 0x00, +0x18, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x28, 0x00, 0x00, 0x00, +0x24, 0x00, 0x00, 0x00, 0x0c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x5c, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, +0x01, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, +0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x73, 0x73, 0x62, 0x6f, +0x00, 0x4d, 0x69, 0x63, 0x72, 0x6f, 0x73, 0x6f, 0x66, 0x74, 0x20, 0x28, +0x52, 0x29, 0x20, 0x48, 0x4c, 0x53, 0x4c, 0x20, 0x53, 0x68, 0x61, 0x64, +0x65, 0x72, 0x20, 0x43, 0x6f, 0x6d, 0x70, 0x69, 0x6c, 0x65, 0x72, 0x20, +0x39, 0x2e, 0x32, 0x39, 0x2e, 0x39, 0x35, 0x32, 0x2e, 0x33, 0x31, 0x31, +0x31, 0x00, 0xab, 0xab, 0x49, 0x53, 0x47, 0x4e, 0x08, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x4f, 0x53, 0x47, 0x4e, +0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, +0x53, 0x48, 0x45, 0x58, 0x74, 0x00, 0x00, 0x00, 0x50, 0x00, 0x05, 0x00, +0x1d, 0x00, 0x00, 0x00, 0x6a, 0x08, 0x00, 0x01, 0x9c, 0x08, 0x00, 0x04, +0x00, 0xe0, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x33, 0x33, 0x00, 0x00, +0x5f, 0x00, 0x00, 0x02, 0x32, 0x00, 0x02, 0x00, 0x68, 0x00, 0x00, 0x02, +0x01, 0x00, 0x00, 0x00, 0x9b, 0x00, 0x00, 0x04, 0x10, 0x00, 0x00, 0x00, +0x10, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x26, 0x00, 0x00, 0x06, +0x00, 0xd0, 0x00, 0x00, 0x12, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, +0x0a, 0x00, 0x02, 0x00, 0x1a, 0x00, 0x02, 0x00, 0xa4, 0x00, 0x00, 0x07, +0xf2, 0xe0, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x10, 0x00, +0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, +0x3e, 0x00, 0x00, 0x01, 0x53, 0x54, 0x41, 0x54, 0x94, 0x00, 0x00, 0x00, +0x03, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +0x00, 0x00, 0x00, 0x00 diff --git a/Examples/D3D11/xmake.lua b/Examples/D3D11/xmake.lua index 8643b60..6dc2dbb 100644 --- a/Examples/D3D11/xmake.lua +++ b/Examples/D3D11/xmake.lua @@ -2,6 +2,7 @@ target("D3D11Example") add_deps("pulse_gpu") if is_plat("linux") then set_extension(".x86_64") + add_defines("PULSE_D3D11_COMPILER_UNAVAILABLE") end add_files("*.c") target_end() diff --git a/README.md b/README.md index 828f104..14d5217 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ [![Msys2 build](https://github.com/ft-grmhd/Pulse/actions/workflows/msys2-build.yml/badge.svg)](https://github.com/ft-grmhd/Pulse/actions/workflows/msys2-build.yml) [![Windows build](https://github.com/ft-grmhd/Pulse/actions/workflows/windows-build.yml/badge.svg)](https://github.com/ft-grmhd/Pulse/actions/workflows/windows-build.yml) -Pulse is a low level GPGPU library designed for highly intensive general GPU computations with high control over the hardware. It is built on top of Vulkan. A Metal and WebGPU backends are in development. +Pulse is a kind of low level GPGPU library with mid-tier control over the hardware. It is built on top of Vulkan/WebGPU/OpenGL/OpenGL ES. A Metal and D3D11 backends are in development. Unit tests map: | | Linux | Windows | Msys2 (MinGW64) | macOS | diff --git a/Sources/Backends/D3D11/D3D11.c b/Sources/Backends/D3D11/D3D11.c index 0e41e5b..20d8278 100644 --- a/Sources/Backends/D3D11/D3D11.c +++ b/Sources/Backends/D3D11/D3D11.c @@ -10,7 +10,9 @@ #pragma comment(lib,"d3d11.lib") #pragma comment(lib, "dxgi.lib") -#pragma comment(lib,"d3dcompiler.lib") +#ifndef PULSE_D3D11_COMPILER_UNAVAILABLE + #pragma comment(lib,"d3dcompiler.lib") +#endif PulseBackendFlags Direct3D11CheckSupport(PulseBackendFlags candidates, PulseShaderFormatsFlags shader_formats_used) { @@ -32,30 +34,6 @@ void Direct3D11UnloadBackend(PulseBackend backend) { } -#ifndef D3D11_ERROR_FILE_NOT_FOUND - #define D3D11_ERROR_FILE_NOT_FOUND 0x887C0002 -#endif - -#ifndef D3D11_ERROR_TOO_MANY_UNIQUE_STATE_OBJECTS - #define D3D11_ERROR_TOO_MANY_UNIQUE_STATE_OBJECTS 0x887C0001 -#endif - -#ifndef D3D11_ERROR_TOO_MANY_UNIQUE_VIEW_OBJECTS - #define D3D11_ERROR_TOO_MANY_UNIQUE_VIEW_OBJECTS 0x887C0003 -#endif - -#ifndef D3D11_ERROR_DEFERRED_CONTEXT_MAP_WITHOUT_INITIAL_DISCARD - #define D3D11_ERROR_DEFERRED_CONTEXT_MAP_WITHOUT_INITIAL_DISCARD 0x887C0004 -#endif - -#ifndef D3DERR_INVALIDCALL - #define D3DERR_INVALIDCALL 0x887A0001 -#endif - -#ifndef D3DERR_WASSTILLDRAWING - #define D3DERR_WASSTILLDRAWING 0x887A000A -#endif - const char* D3D11VerbaliseResult(HRESULT res) { switch(res) diff --git a/Sources/Backends/D3D11/D3D11.h b/Sources/Backends/D3D11/D3D11.h index 53d6696..5ecf74f 100644 --- a/Sources/Backends/D3D11/D3D11.h +++ b/Sources/Backends/D3D11/D3D11.h @@ -37,6 +37,30 @@ #define CHECK_D3D11(backend, res, error) CHECK_D3D11_RETVAL(backend, res, error, ) +#ifndef D3D11_ERROR_FILE_NOT_FOUND + #define D3D11_ERROR_FILE_NOT_FOUND 0x887C0002 +#endif + +#ifndef D3D11_ERROR_TOO_MANY_UNIQUE_STATE_OBJECTS + #define D3D11_ERROR_TOO_MANY_UNIQUE_STATE_OBJECTS 0x887C0001 +#endif + +#ifndef D3D11_ERROR_TOO_MANY_UNIQUE_VIEW_OBJECTS + #define D3D11_ERROR_TOO_MANY_UNIQUE_VIEW_OBJECTS 0x887C0003 +#endif + +#ifndef D3D11_ERROR_DEFERRED_CONTEXT_MAP_WITHOUT_INITIAL_DISCARD + #define D3D11_ERROR_DEFERRED_CONTEXT_MAP_WITHOUT_INITIAL_DISCARD 0x887C0004 +#endif + +#ifndef D3DERR_INVALIDCALL + #define D3DERR_INVALIDCALL 0x887A0001 +#endif + +#ifndef D3DERR_WASSTILLDRAWING + #define D3DERR_WASSTILLDRAWING 0x887A000A +#endif + const char* D3D11VerbaliseResult(HRESULT res); PulseBackendFlags Direct3D11CheckSupport(PulseBackendFlags candidates, PulseShaderFormatsFlags shader_formats_used); // Returns corresponding PULSE_BACKEND enum in case of success and PULSE_BACKEND_INVALID otherwise diff --git a/Sources/Backends/D3D11/D3D11Buffer.c b/Sources/Backends/D3D11/D3D11Buffer.c index 9b0d7c0..378c43c 100644 --- a/Sources/Backends/D3D11/D3D11Buffer.c +++ b/Sources/Backends/D3D11/D3D11Buffer.c @@ -33,23 +33,72 @@ PulseBuffer Direct3D11CreateBuffer(PulseDevice device, const PulseBufferCreateIn description.BindFlags |= D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE; description.MiscFlags |= D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; } + if(create_infos->usage & (PULSE_BUFFER_USAGE_TRANSFER_UPLOAD)) + description.CPUAccessFlags |= D3D11_CPU_ACCESS_WRITE; + if(create_infos->usage & (PULSE_BUFFER_USAGE_TRANSFER_DOWNLOAD)) + description.CPUAccessFlags |= D3D11_CPU_ACCESS_READ; CHECK_D3D11_RETVAL(device->backend, ID3D11Device_CreateBuffer(d3d11_device->device, &description, PULSE_NULLPTR, &d3d11_buffer->buffer), PULSE_ERROR_INITIALIZATION_FAILED, PULSE_NULL_HANDLE); + if(create_infos->usage & (PULSE_BUFFER_USAGE_STORAGE_READ | PULSE_BUFFER_USAGE_STORAGE_WRITE)) + { + D3D11_UNORDERED_ACCESS_VIEW_DESC unordered_access_view_description; + unordered_access_view_description.Format = DXGI_FORMAT_R32_TYPELESS; + unordered_access_view_description.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; + unordered_access_view_description.Buffer.FirstElement = 0; + unordered_access_view_description.Buffer.NumElements = create_infos->size / sizeof(uint32_t); + unordered_access_view_description.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW; + CHECK_D3D11_RETVAL(device->backend, ID3D11Device_CreateUnorderedAccessView(d3d11_device->device, (ID3D11Resource*)d3d11_buffer->buffer, &unordered_access_view_description, &d3d11_buffer->unordered_access_view), PULSE_ERROR_INITIALIZATION_FAILED, PULSE_NULL_HANDLE); + + D3D11_SHADER_RESOURCE_VIEW_DESC shader_resource_view_description; + shader_resource_view_description.Format = DXGI_FORMAT_R32_TYPELESS; + shader_resource_view_description.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX; + shader_resource_view_description.BufferEx.FirstElement = 0; + shader_resource_view_description.BufferEx.NumElements = create_infos->size / sizeof(uint32_t); + shader_resource_view_description.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW; + CHECK_D3D11_RETVAL(device->backend, ID3D11Device_CreateShaderResourceView(d3d11_device->device, (ID3D11Resource*)d3d11_buffer->buffer, &shader_resource_view_description, &d3d11_buffer->shader_resource_view), PULSE_ERROR_INITIALIZATION_FAILED, PULSE_NULL_HANDLE); + } + return buffer; } bool Direct3D11MapBuffer(PulseBuffer buffer, PulseMapMode mode, void** data) { + Direct3D11Device* d3d11_device = D3D11_RETRIEVE_DRIVER_DATA_AS(buffer->device, Direct3D11Device*); + Direct3D11Buffer* d3d11_buffer = D3D11_RETRIEVE_DRIVER_DATA_AS(buffer, Direct3D11Buffer*); + + D3D11_MAPPED_SUBRESOURCE subresource; + CHECK_D3D11_RETVAL(buffer->device->backend, ID3D11DeviceContext_Map(d3d11_device->context, (ID3D11Resource*)d3d11_buffer->buffer, 0, (mode == PULSE_MAP_READ) ? D3D11_MAP_READ : D3D11_MAP_WRITE, 0, &subresource), PULSE_ERROR_MAP_FAILED, false); + *data = subresource.pData; return true; } void Direct3D11UnmapBuffer(PulseBuffer buffer) { + Direct3D11Device* d3d11_device = D3D11_RETRIEVE_DRIVER_DATA_AS(buffer->device, Direct3D11Device*); + Direct3D11Buffer* d3d11_buffer = D3D11_RETRIEVE_DRIVER_DATA_AS(buffer, Direct3D11Buffer*); + ID3D11DeviceContext_Unmap(d3d11_device->context, (ID3D11Resource*)d3d11_buffer->buffer, 0); } bool Direct3D11CopyBufferToBuffer(PulseCommandList cmd, const PulseBufferRegion* src, const PulseBufferRegion* dst) { + Direct3D11Buffer* d3d11_src_buffer = D3D11_RETRIEVE_DRIVER_DATA_AS(src->buffer, Direct3D11Buffer*); + Direct3D11Buffer* d3d11_dst_buffer = D3D11_RETRIEVE_DRIVER_DATA_AS(dst->buffer, Direct3D11Buffer*); + Direct3D11CommandList* d3d11_cmd = D3D11_RETRIEVE_DRIVER_DATA_AS(cmd, Direct3D11CommandList*); + + D3D11_BOX src_box = { src->offset, 0, 0, src->offset + src->size, 1, 1 }; + + ID3D11DeviceContext_CopySubresourceRegion( + d3d11_cmd->context, + (ID3D11Resource*)d3d11_dst_buffer->buffer, + 0, + dst->offset, + 0, + 0, + (ID3D11Resource*)d3d11_src_buffer->buffer, + 0, + &src_box + ); return true; } @@ -61,6 +110,10 @@ bool Direct3D11CopyBufferToImage(PulseCommandList cmd, const PulseBufferRegion* void Direct3D11DestroyBuffer(PulseDevice device, PulseBuffer buffer) { Direct3D11Buffer* d3d11_buffer = D3D11_RETRIEVE_DRIVER_DATA_AS(buffer, Direct3D11Buffer*); + if(d3d11_buffer->unordered_access_view) + ID3D11UnorderedAccessView_Release(d3d11_buffer->unordered_access_view); + if(d3d11_buffer->shader_resource_view) + ID3D11ShaderResourceView_Release(d3d11_buffer->shader_resource_view); ID3D11Buffer_Release(d3d11_buffer->buffer); free(d3d11_buffer); free(buffer); diff --git a/Sources/Backends/D3D11/D3D11Buffer.h b/Sources/Backends/D3D11/D3D11Buffer.h index dc8c5ea..254c21a 100644 --- a/Sources/Backends/D3D11/D3D11Buffer.h +++ b/Sources/Backends/D3D11/D3D11Buffer.h @@ -14,6 +14,8 @@ typedef struct Direct3D11Buffer { ID3D11Buffer* buffer; + ID3D11UnorderedAccessView* unordered_access_view; + ID3D11ShaderResourceView* shader_resource_view; } Direct3D11Buffer; PulseBuffer Direct3D11CreateBuffer(PulseDevice device, const PulseBufferCreateInfo* create_infos); diff --git a/Sources/Backends/D3D11/D3D11CommandList.c b/Sources/Backends/D3D11/D3D11CommandList.c index 261f891..dbea759 100644 --- a/Sources/Backends/D3D11/D3D11CommandList.c +++ b/Sources/Backends/D3D11/D3D11CommandList.c @@ -28,7 +28,7 @@ PulseCommandList Direct3D11RequestCommandList(PulseDevice device, PulseCommandLi cmd->driver_data = d3d11_cmd; cmd->thread_id = PulseGetThreadID(); - cmd->pass = PULSE_NULL_HANDLE; + cmd->pass = Direct3D11CreateComputePass(device, cmd); cmd->state = PULSE_COMMAND_LIST_STATE_RECORDING; cmd->is_available = false; @@ -37,13 +37,38 @@ PulseCommandList Direct3D11RequestCommandList(PulseDevice device, PulseCommandLi bool Direct3D11SubmitCommandList(PulseDevice device, PulseCommandList cmd, PulseFence fence) { + Direct3D11Device* d3d11_device = D3D11_RETRIEVE_DRIVER_DATA_AS(device, Direct3D11Device*); + Direct3D11CommandList* d3d11_cmd = D3D11_RETRIEVE_DRIVER_DATA_AS(cmd, Direct3D11CommandList*); + ID3D11CommandList* command_list; + + if(fence != PULSE_NULL_HANDLE) + { + Direct3D11Fence* d3d11_fence = D3D11_RETRIEVE_DRIVER_DATA_AS(fence, Direct3D11Fence*); + fence->cmd = cmd; + ID3D11DeviceContext_End(d3d11_device->context, (ID3D11Asynchronous*)d3d11_fence->query); // Signal fence now + } + + HRESULT res = ID3D11DeviceContext_FinishCommandList(d3d11_cmd->context, false, &command_list); + switch(res) + { + case S_OK: + case S_FALSE: break; + + case DXGI_ERROR_DEVICE_REMOVED: PulseSetInternalError(PULSE_ERROR_DEVICE_LOST); return false; + case E_OUTOFMEMORY: PulseSetInternalError(PULSE_ERROR_DEVICE_ALLOCATION_FAILED); return false; + case DXGI_ERROR_INVALID_CALL: PulseSetInternalError(PULSE_ERROR_CPU_ALLOCATION_FAILED); return false; + default: return false; + } + ID3D11DeviceContext_ExecuteCommandList(d3d11_device->context, command_list, false); + ID3D11CommandList_Release(command_list); + return true; } void Direct3D11ReleaseCommandList(PulseDevice device, PulseCommandList cmd) { - PULSE_UNUSED(device); Direct3D11CommandList* d3d11_cmd = D3D11_RETRIEVE_DRIVER_DATA_AS(cmd, Direct3D11CommandList*); ID3D11DeviceContext_Release(d3d11_cmd->context); + Direct3D11DestroyComputePass(device, cmd->pass); free(d3d11_cmd); free(cmd); } diff --git a/Sources/Backends/D3D11/D3D11CommandList.h b/Sources/Backends/D3D11/D3D11CommandList.h index c02f8e0..6e36389 100644 --- a/Sources/Backends/D3D11/D3D11CommandList.h +++ b/Sources/Backends/D3D11/D3D11CommandList.h @@ -9,9 +9,6 @@ #ifndef PULSE_D3D11_COMMAND_LIST_H_ #define PULSE_D3D11_COMMAND_LIST_H_ -#include -#include - #include "D3D11.h" #include "D3D11Fence.h" diff --git a/Sources/Backends/D3D11/D3D11ComputePass.c b/Sources/Backends/D3D11/D3D11ComputePass.c index eec8e9f..b10eb35 100644 --- a/Sources/Backends/D3D11/D3D11ComputePass.c +++ b/Sources/Backends/D3D11/D3D11ComputePass.c @@ -5,23 +5,78 @@ #include #include "../../PulseInternal.h" #include "D3D11.h" +#include "D3D11Buffer.h" #include "D3D11ComputePass.h" #include "D3D11CommandList.h" +#include "D3D11ComputePipeline.h" + +static void Direct3D11BindResources(PulseComputePass pass) +{ + Direct3D11ComputePass* d3d11_pass = D3D11_RETRIEVE_DRIVER_DATA_AS(pass, Direct3D11ComputePass*); + Direct3D11CommandList* d3d11_cmd = D3D11_RETRIEVE_DRIVER_DATA_AS(pass->cmd, Direct3D11CommandList*); + Direct3D11ComputePipeline* d3d11_pipeline = D3D11_RETRIEVE_DRIVER_DATA_AS(pass->current_pipeline, Direct3D11ComputePipeline*); + + if(d3d11_pass->should_bind_read_only_resources) + { + uint32_t entry_index = 0; + for(uint32_t i = 0; i < pass->current_pipeline->num_readonly_storage_images; i++, entry_index++) + { + } + + ID3D11ShaderResourceView* buffer_resource_views[PULSE_MAX_READ_BUFFERS_BOUND]; + for(uint32_t i = 0; i < pass->current_pipeline->num_readonly_storage_buffers; i++, entry_index++) + { + Direct3D11Buffer* d3d11_buffer = D3D11_RETRIEVE_DRIVER_DATA_AS(pass->readonly_storage_buffers[i], Direct3D11Buffer*); + buffer_resource_views[i] = d3d11_buffer->shader_resource_view; + } + ID3D11DeviceContext_CSSetShaderResources(d3d11_cmd->context, pass->current_pipeline->num_readonly_storage_images, pass->current_pipeline->num_readonly_storage_buffers, buffer_resource_views); + + d3d11_pass->should_bind_read_only_resources = false; + } + if(d3d11_pass->should_bind_write_resources) + { + ID3D11UnorderedAccessView* resource_views[PULSE_MAX_WRITE_TEXTURES_BOUND + PULSE_MAX_WRITE_BUFFERS_BOUND]; + uint32_t entry_index = 0; + for(uint32_t i = 0; i < pass->current_pipeline->num_readwrite_storage_images; i++, entry_index++) + { + } + + for(uint32_t i = 0; i < pass->current_pipeline->num_readwrite_storage_buffers; i++, entry_index++) + { + Direct3D11Buffer* d3d11_buffer = D3D11_RETRIEVE_DRIVER_DATA_AS(pass->readwrite_storage_buffers[i], Direct3D11Buffer*); + resource_views[entry_index] = d3d11_buffer->unordered_access_view; + } + ID3D11DeviceContext_CSSetUnorderedAccessViews(d3d11_cmd->context, 0, pass->current_pipeline->num_readwrite_storage_images + pass->current_pipeline->num_readwrite_storage_buffers, resource_views, PULSE_NULLPTR); + + d3d11_pass->should_bind_write_resources = false; + } +} PulseComputePass Direct3D11CreateComputePass(PulseDevice device, PulseCommandList cmd) { PULSE_UNUSED(device); PulseComputePass pass = (PulseComputePass)calloc(1, sizeof(PulseComputePassHandler)); PULSE_CHECK_ALLOCATION_RETVAL(pass, PULSE_NULL_HANDLE); + Direct3D11ComputePass* d3d11_pass = (Direct3D11ComputePass*)calloc(1, sizeof(Direct3D11ComputePass)); + PULSE_CHECK_ALLOCATION_RETVAL(d3d11_pass, PULSE_NULL_HANDLE); + + pass->cmd = cmd; + pass->driver_data = d3d11_pass; + return pass; } void Direct3D11DestroyComputePass(PulseDevice device, PulseComputePass pass) { + PULSE_UNUSED(device); + Direct3D11ComputePass* d3d11_pass = D3D11_RETRIEVE_DRIVER_DATA_AS(pass, Direct3D11ComputePass*); + free(d3d11_pass); + free(pass); } PulseComputePass Direct3D11BeginComputePass(PulseCommandList cmd) { + return cmd->pass; } void Direct3D11EndComputePass(PulseComputePass pass) @@ -30,6 +85,37 @@ void Direct3D11EndComputePass(PulseComputePass pass) void Direct3D11BindStorageBuffers(PulseComputePass pass, const PulseBuffer* buffers, uint32_t num_buffers) { + PulseBufferUsageFlags usage = buffers[0]->usage; + bool is_readwrite = (usage & PULSE_BUFFER_USAGE_STORAGE_WRITE) != 0; + PulseBuffer* array = is_readwrite ? pass->readwrite_storage_buffers : pass->readonly_storage_buffers; + Direct3D11ComputePass* d3d11_pass = D3D11_RETRIEVE_DRIVER_DATA_AS(pass, Direct3D11ComputePass*); + + for(uint32_t i = 0; i < num_buffers; i++) + { + if(is_readwrite && (buffers[i]->usage & PULSE_BUFFER_USAGE_STORAGE_WRITE) == 0) + { + if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(pass->cmd->device->backend)) + PulseLogError(pass->cmd->device->backend, "cannot bind a read only buffer with read-write buffers"); + PulseSetInternalError(PULSE_ERROR_INVALID_BUFFER_USAGE); + return; + } + else if(!is_readwrite && (buffers[i]->usage & PULSE_BUFFER_USAGE_STORAGE_WRITE) != 0) + { + if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(pass->cmd->device->backend)) + PulseLogError(pass->cmd->device->backend, "cannot bind a read-write buffer with read only buffers"); + PulseSetInternalError(PULSE_ERROR_INVALID_BUFFER_USAGE); + return; + } + + if(array[i] == buffers[i]) + continue; + array[i] = buffers[i]; + + if(is_readwrite) + d3d11_pass->should_bind_write_resources = true; + else + d3d11_pass->should_bind_read_only_resources = true; + } } void Direct3D11BindUniformData(PulseComputePass pass, uint32_t slot, const void* data, uint32_t data_size) @@ -42,8 +128,20 @@ void Direct3D11BindStorageImages(PulseComputePass pass, const PulseImage* images void Direct3D11BindComputePipeline(PulseComputePass pass, PulseComputePipeline pipeline) { + Direct3D11ComputePass* d3d11_pass = D3D11_RETRIEVE_DRIVER_DATA_AS(pass, Direct3D11ComputePass*); + Direct3D11CommandList* d3d11_cmd = D3D11_RETRIEVE_DRIVER_DATA_AS(pass->cmd, Direct3D11CommandList*); + Direct3D11ComputePipeline* d3d11_pipeline = D3D11_RETRIEVE_DRIVER_DATA_AS(pipeline, Direct3D11ComputePipeline*); + ID3D11DeviceContext_CSSetShader(d3d11_cmd->context, d3d11_pipeline->shader, PULSE_NULLPTR, 0); + + d3d11_pass->should_bind_read_only_resources = true; + d3d11_pass->should_bind_write_resources = true; + d3d11_pass->should_bind_uniform_resources = true; } void Direct3D11DispatchComputations(PulseComputePass pass, uint32_t groupcount_x, uint32_t groupcount_y, uint32_t groupcount_z) { + Direct3D11ComputePass* d3d11_pass = D3D11_RETRIEVE_DRIVER_DATA_AS(pass, Direct3D11ComputePass*); + Direct3D11CommandList* d3d11_cmd = D3D11_RETRIEVE_DRIVER_DATA_AS(pass->cmd, Direct3D11CommandList*); + Direct3D11BindResources(pass); + ID3D11DeviceContext_Dispatch(d3d11_cmd->context, groupcount_x, groupcount_y, groupcount_z); } diff --git a/Sources/Backends/D3D11/D3D11ComputePass.h b/Sources/Backends/D3D11/D3D11ComputePass.h index 40079e6..dd6c4d6 100644 --- a/Sources/Backends/D3D11/D3D11ComputePass.h +++ b/Sources/Backends/D3D11/D3D11ComputePass.h @@ -11,6 +11,13 @@ #include "D3D11.h" +typedef struct Direct3D11ComputePass +{ + bool should_bind_read_only_resources; + bool should_bind_write_resources; + bool should_bind_uniform_resources; +} Direct3D11ComputePass; + PulseComputePass Direct3D11CreateComputePass(PulseDevice device, PulseCommandList cmd); void Direct3D11DestroyComputePass(PulseDevice device, PulseComputePass pass); diff --git a/Sources/Backends/D3D11/D3D11ComputePipeline.c b/Sources/Backends/D3D11/D3D11ComputePipeline.c index 2ae44a4..7a1c71d 100644 --- a/Sources/Backends/D3D11/D3D11ComputePipeline.c +++ b/Sources/Backends/D3D11/D3D11ComputePipeline.c @@ -7,45 +7,55 @@ #include "D3D11.h" #include "D3D11Device.h" #include "D3D11ComputePipeline.h" - -#include #include -static HRESULT CompileComputeShader(PulseDevice device, const unsigned char* src, uint32_t src_size, const char* entry_point, ID3DBlob** blob) -{ - if(!src || !entry_point || !device || !blob) - return E_INVALIDARG; +#ifndef PULSE_D3D11_COMPILER_UNAVAILABLE + #include - Direct3D11Device* d3d11_device = D3D11_RETRIEVE_DRIVER_DATA_AS(device, Direct3D11Device*); - *blob = PULSE_NULLPTR; - - UINT flags = D3DCOMPILE_ENABLE_STRICTNESS; - if(PULSE_IS_BACKEND_HIGH_LEVEL_DEBUG(device->backend)) - flags |= D3DCOMPILE_DEBUG; - - // We generally prefer to use the higher CS shader profile when possible as CS 5.0 is better performance on 11-class hardware - LPCSTR profile = (ID3D11Device_GetFeatureLevel(d3d11_device->device) >= D3D_FEATURE_LEVEL_11_0) ? "cs_5_0" : "cs_4_0"; - - ID3DBlob* shader_blob = PULSE_NULLPTR; - ID3DBlob* error_blob = PULSE_NULLPTR; - HRESULT hr = D3DCompile(src, src_size, PULSE_NULLPTR, PULSE_NULLPTR, PULSE_NULLPTR, entry_point, profile, flags, 0, &shader_blob, &error_blob); - - if(FAILED(hr)) + static HRESULT CompileComputeShader(PulseDevice device, const unsigned char* src, uint32_t src_size, const char* entry_point, ID3DBlob** blob) { - if(error_blob) + if(!src || !entry_point || !device || !blob) + return E_INVALIDARG; + + Direct3D11Device* d3d11_device = D3D11_RETRIEVE_DRIVER_DATA_AS(device, Direct3D11Device*); + *blob = PULSE_NULLPTR; + + UINT flags = D3DCOMPILE_ENABLE_STRICTNESS; + if(PULSE_IS_BACKEND_HIGH_LEVEL_DEBUG(device->backend)) + flags |= D3DCOMPILE_DEBUG; + + // We generally prefer to use the higher CS shader profile when possible as CS 5.0 is better performance on 11-class hardware + LPCSTR profile = (ID3D11Device_GetFeatureLevel(d3d11_device->device) >= D3D_FEATURE_LEVEL_11_0) ? "cs_5_0" : "cs_4_0"; + + ID3DBlob* shader_blob = PULSE_NULLPTR; + ID3DBlob* error_blob = PULSE_NULLPTR; + HRESULT hr = D3DCompile(src, src_size, PULSE_NULLPTR, PULSE_NULLPTR, PULSE_NULLPTR, entry_point, profile, flags, 0, &shader_blob, &error_blob); + + if(FAILED(hr)) { - if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(device->backend)) - PulseLogInfoFmt(device->backend, "(D3D11) failed to compile HLSL shader. %s", ID3D10Blob_GetBufferPointer(error_blob)); - ID3D10Blob_Release(error_blob); + if(error_blob) + { + if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(device->backend)) + PulseLogInfoFmt(device->backend, "(D3D11) failed to compile HLSL shader. %s", ID3D10Blob_GetBufferPointer(error_blob)); + ID3D10Blob_Release(error_blob); + } + if(shader_blob) + ID3D10Blob_Release(shader_blob); + return hr; } - if(shader_blob) - ID3D10Blob_Release(shader_blob); + + *blob = shader_blob; return hr; } - - *blob = shader_blob; - return hr; -} +#else + static HRESULT CompileComputeShader(PulseDevice device, const unsigned char* src, uint32_t src_size, const char* entry_point, ID3DBlob** blob) + { + if(PULSE_IS_BACKEND_LOW_LEVEL_DEBUG(device->backend)) + PulseLogInfo(device->backend, "(D3D11) on-the-fly shader compilation is not available"); + PulseSetInternalError(PULSE_ERROR_INITIALIZATION_FAILED); + return D3DERR_INVALIDCALL; + } +#endif PulseComputePipeline Direct3D11CreateComputePipeline(PulseDevice device, const PulseComputePipelineCreateInfo* info) { @@ -59,19 +69,25 @@ PulseComputePipeline Direct3D11CreateComputePipeline(PulseDevice device, const P pipeline->driver_data = d3d11_pipeline; - ID3D10Blob* blob = PULSE_NULLPTR; - if(info->format & PULSE_SHADER_FORMAT_HLSL_BIT) - CHECK_D3D11_RETVAL(device->backend, CompileComputeShader(device, info->code, info->code_size, info->entrypoint, &blob), PULSE_ERROR_INITIALIZATION_FAILED, PULSE_NULL_HANDLE); + void* bytecode_data = PULSE_NULLPTR; + size_t bytecode_size; - if(blob == PULSE_NULLPTR) + if(info->format & PULSE_SHADER_FORMAT_HLSL_BIT) { - D3DCreateBlob(info->code_size, &blob); - memcpy(ID3D10Blob_GetBufferPointer(blob), (void*)info->code, info->code_size); + ID3D10Blob* blob = PULSE_NULLPTR; + CHECK_D3D11_RETVAL(device->backend, CompileComputeShader(device, info->code, info->code_size, info->entrypoint, &blob), PULSE_ERROR_INITIALIZATION_FAILED, PULSE_NULL_HANDLE); + bytecode_size = ID3D10Blob_GetBufferSize(blob); + memcpy(bytecode_data, ID3D10Blob_GetBufferPointer(blob), bytecode_size); + ID3D10Blob_Release(blob); } - CHECK_D3D11_RETVAL(device->backend, ID3D11Device_CreateComputeShader(d3d11_device->device, ID3D10Blob_GetBufferPointer(blob), ID3D10Blob_GetBufferSize(blob), PULSE_NULLPTR, &d3d11_pipeline->shader), PULSE_ERROR_INITIALIZATION_FAILED, PULSE_NULL_HANDLE);; + if(bytecode_data == PULSE_NULLPTR) + { + bytecode_data = (void*)info->code; + bytecode_size = info->code_size; + } - ID3D10Blob_Release(blob); + CHECK_D3D11_RETVAL(device->backend, ID3D11Device_CreateComputeShader(d3d11_device->device, bytecode_data, bytecode_size, PULSE_NULLPTR, &d3d11_pipeline->shader), PULSE_ERROR_INITIALIZATION_FAILED, PULSE_NULL_HANDLE);; if(PULSE_IS_BACKEND_HIGH_LEVEL_DEBUG(device->backend)) PulseLogInfoFmt(device->backend, "(D3D11) created new compute pipeline %p", pipeline); diff --git a/Sources/Backends/D3D11/D3D11Fence.c b/Sources/Backends/D3D11/D3D11Fence.c index 615da64..d2e17c5 100644 --- a/Sources/Backends/D3D11/D3D11Fence.c +++ b/Sources/Backends/D3D11/D3D11Fence.c @@ -5,27 +5,63 @@ #include #include "../../PulseInternal.h" #include "D3D11.h" +#include "D3D11Device.h" #include "D3D11Fence.h" #include "D3D11CommandList.h" PulseFence Direct3D11CreateFence(PulseDevice device) { + Direct3D11Device* d3d11_device = D3D11_RETRIEVE_DRIVER_DATA_AS(device, Direct3D11Device*); + PulseFence fence = (PulseFence)calloc(1, sizeof(PulseFence)); PULSE_CHECK_ALLOCATION_RETVAL(fence, PULSE_NULL_HANDLE); + Direct3D11Fence* d3d11_fence = (Direct3D11Fence*)calloc(1, sizeof(Direct3D11Fence)); + PULSE_CHECK_ALLOCATION_RETVAL(d3d11_fence, PULSE_NULL_HANDLE); + + D3D11_QUERY_DESC query_descriptor = {}; + query_descriptor.Query = D3D11_QUERY_EVENT; // becomes signaled when all prior work completes + query_descriptor.MiscFlags = 0; + CHECK_D3D11_RETVAL(device->backend, ID3D11Device_CreateQuery(d3d11_device->device, &query_descriptor, &d3d11_fence->query), PULSE_ERROR_INITIALIZATION_FAILED, PULSE_NULL_HANDLE); + + fence->driver_data = d3d11_fence; + return fence; } void Direct3D11DestroyFence(PulseDevice device, PulseFence fence) { + PULSE_UNUSED(device); + Direct3D11Fence* d3d11_fence = D3D11_RETRIEVE_DRIVER_DATA_AS(fence, Direct3D11Fence*); + ID3D11Query_Release(d3d11_fence->query); + free(d3d11_fence); free(fence); } bool Direct3D11IsFenceReady(PulseDevice device, PulseFence fence) { - return true; + Direct3D11Device* d3d11_device = D3D11_RETRIEVE_DRIVER_DATA_AS(device, Direct3D11Device*); + Direct3D11Fence* d3d11_fence = D3D11_RETRIEVE_DRIVER_DATA_AS(fence, Direct3D11Fence*); + BOOL done = FALSE; + HRESULT res = ID3D11DeviceContext_GetData(d3d11_device->context, (ID3D11Asynchronous*)d3d11_fence->query, &done, sizeof(done), D3D11_ASYNC_GETDATA_DONOTFLUSH); + return res == S_OK && done == TRUE; } bool Direct3D11WaitForFences(PulseDevice device, const PulseFence* fences, uint32_t fences_count, bool wait_for_all) { + PULSE_UNUSED(device); + if(fences_count == 0) + return true; + uint32_t fences_to_wait = fences_count; + while(fences_to_wait != 0) + { + for(uint32_t i = 0; i < fences_count; i++) + { + if(Direct3D11IsFenceReady(device, fences[i])) + fences_to_wait--; + } + if(!wait_for_all && fences_to_wait != fences_count) + return true; + PulseSleep(1); // 1ms + } return true; } diff --git a/Sources/Backends/D3D11/D3D11Fence.h b/Sources/Backends/D3D11/D3D11Fence.h index 384dba7..f1acb4c 100644 --- a/Sources/Backends/D3D11/D3D11Fence.h +++ b/Sources/Backends/D3D11/D3D11Fence.h @@ -14,7 +14,7 @@ typedef struct Direct3D11Fence { - int dummy; + ID3D11Query* query; } Direct3D11Fence; PulseFence Direct3D11CreateFence(PulseDevice device); diff --git a/Sources/Backends/Vulkan/VulkanCommandList.c b/Sources/Backends/Vulkan/VulkanCommandList.c index 10533d0..61c664e 100644 --- a/Sources/Backends/Vulkan/VulkanCommandList.c +++ b/Sources/Backends/Vulkan/VulkanCommandList.c @@ -121,7 +121,7 @@ bool VulkanSubmitCommandList(PulseDevice device, PulseCommandList cmd, PulseFenc switch(cmd->usage) { case PULSE_COMMAND_LIST_TRANSFER_ONLY: vulkan_queue = vulkan_device->queues[VULKAN_QUEUE_TRANSFER]; break; - case PULSE_COMMAND_LIST_GENERAL: // fallthrough + case PULSE_COMMAND_LIST_GENERAL: default: vulkan_queue = vulkan_device->queues[VULKAN_QUEUE_COMPUTE]; break; } @@ -140,11 +140,11 @@ bool VulkanSubmitCommandList(PulseDevice device, PulseCommandList cmd, PulseFenc { case VK_SUCCESS: return true; - case VK_ERROR_OUT_OF_HOST_MEMORY: PulseSetInternalError(PULSE_ERROR_CPU_ALLOCATION_FAILED); return false; - case VK_ERROR_OUT_OF_DEVICE_MEMORY: PulseSetInternalError(PULSE_ERROR_DEVICE_ALLOCATION_FAILED); return false; - case VK_ERROR_DEVICE_LOST: PulseSetInternalError(PULSE_ERROR_DEVICE_LOST); return false; + case VK_ERROR_OUT_OF_HOST_MEMORY: PulseSetInternalError(PULSE_ERROR_CPU_ALLOCATION_FAILED); break; + case VK_ERROR_OUT_OF_DEVICE_MEMORY: PulseSetInternalError(PULSE_ERROR_DEVICE_ALLOCATION_FAILED); break; + case VK_ERROR_DEVICE_LOST: PulseSetInternalError(PULSE_ERROR_DEVICE_LOST); break; - default: return false; + default: break; } return false; } diff --git a/xmake.lua b/xmake.lua index 749821e..6151208 100644 --- a/xmake.lua +++ b/xmake.lua @@ -36,7 +36,13 @@ local backends = { option = "d3d11", default = is_plat("windows", "msys", "mingw"), custom = function() - add_syslinks("d3d11", "d3dcompiler_47", "dxgi", "windowscodecs") + if is_plat("linux") then + add_sysincludedirs("/usr/include/dxvk") + add_syslinks("dxvk_d3d11", "dxvk_dxgi") + add_defines("PULSE_D3D11_COMPILER_UNAVAILABLE") + else + add_syslinks("d3d11", "d3dcompiler_47", "dxgi", "windowscodecs") + end end }, Software = {