diff --git a/include/vkcv/DescriptorWrites.hpp b/include/vkcv/DescriptorWrites.hpp index 7cc76c6960b536ff9d207d773e7c75d010e503d6..f28a6c91e189b13413ffefec0f05e5a0a358ee26 100644 --- a/include/vkcv/DescriptorWrites.hpp +++ b/include/vkcv/DescriptorWrites.hpp @@ -4,9 +4,12 @@ namespace vkcv { struct SampledImageDescriptorWrite { - inline SampledImageDescriptorWrite(uint32_t binding, ImageHandle image) : binding(binding), image(image) {}; + inline SampledImageDescriptorWrite(uint32_t binding, ImageHandle image, uint32_t mipLevel = 0, bool useGeneralLayout = false) + : binding(binding), image(image), mipLevel(mipLevel), useGeneralLayout(useGeneralLayout) {}; uint32_t binding; ImageHandle image; + uint32_t mipLevel; + bool useGeneralLayout; }; struct StorageImageDescriptorWrite { diff --git a/projects/bloom/resources/shaders/blur.comp b/projects/bloom/resources/shaders/blur.comp index 9fd2ccbf933abd34457a4685149dfae7c2f611a4..51834627abe54a9029a866fb558c41b811135c6c 100644 --- a/projects/bloom/resources/shaders/blur.comp +++ b/projects/bloom/resources/shaders/blur.comp @@ -1,37 +1,77 @@ #version 450 #extension GL_ARB_separate_shader_objects : enable -layout(set=0, binding=0) uniform texture2D inImage; +layout(set=0, binding=0) uniform texture2D inBlurImage; layout(set=0, binding=1) uniform sampler inImageSampler; -layout(set=0, binding=2, r11f_g11f_b10f) uniform writeonly image2D outImage; +layout(set=0, binding=2, r11f_g11f_b10f) uniform writeonly image2D outBlurImage; layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; void main() { - if(any(greaterThanEqual(gl_GlobalInvocationID.xy, imageSize(outImage)))){ + if(any(greaterThanEqual(gl_GlobalInvocationID.xy, imageSize(outBlurImage)))){ return; } - const int kernel_size = 10; - const float kernel_weight = (2 * kernel_size + 1) * (2 * kernel_size + 1); ivec2 pixel_coord = ivec2(gl_GlobalInvocationID.xy); - vec2 pixel_size = vec2(1.0f) / textureSize(sampler2D(inImage, inImageSampler), 0); + vec2 pixel_size = vec2(1.0f) / imageSize(outBlurImage); vec2 UV = pixel_coord.xy * pixel_size; + vec2 UV_offset = UV + 0.5f * pixel_size; - vec4 sampled_color = vec4(0.0f); + vec2 color_fetches[13] = { + // center neighbourhood (RED) + vec2(-1, 1), // LT + vec2(-1, -1), // LB + vec2( 1, -1), // RB + vec2( 1, 1), // RT - for(int i = -kernel_size; i <= kernel_size; i++) + vec2(-2, 2), // LT + vec2( 0, 2), // CT + vec2( 2, 2), // RT + + vec2(0 ,-2), // LC + vec2(0 , 0), // CC + vec2(2, 0), // CR + + vec2(-2, -2), // LB + vec2(0 , -2), // CB + vec2(2 , -2) // RB + }; + + float color_weights[13] = { + // 0.5f + 1.f/8.f, + 1.f/8.f, + 1.f/8.f, + 1.f/8.f, + + // 0.125f + 1.f/32.f, + 1.f/16.f, + 1.f/32.f, + + // 0.25f + 1.f/16.f, + 1.f/8.f, + 1.f/16.f, + + // 0.125f + 1.f/32.f, + 1.f/16.f, + 1.f/32.f + }; + + vec3 sampled_color = vec3(0.0f); + + for(uint i = 0; i < 13; i++) { - for(int j = -kernel_size; j <= kernel_size; j++) - { - vec2 sample_coord = UV + vec2(j, i) * pixel_size + 0.5f * pixel_size * sign(vec2(j, i)); - sampled_color.rgb += texture(sampler2D(inImage, inImageSampler), sample_coord).rgb; - } + vec2 color_fetch = UV_offset + color_fetches[i] * pixel_size; + vec3 color = texture(sampler2D(inBlurImage, inImageSampler), color_fetch).rgb; + color *= color_weights[i]; + sampled_color += color; } - sampled_color /= kernel_weight; - imageStore(outImage, pixel_coord, sampled_color); + imageStore(outBlurImage, pixel_coord, vec4(sampled_color, 1.f)); } \ No newline at end of file diff --git a/projects/bloom/resources/shaders/comp.spv b/projects/bloom/resources/shaders/comp.spv deleted file mode 100644 index e0112a50deceb3b818434636e194d4a2f169184b..0000000000000000000000000000000000000000 Binary files a/projects/bloom/resources/shaders/comp.spv and /dev/null differ diff --git a/projects/bloom/resources/shaders/compositeBloom.comp b/projects/bloom/resources/shaders/compositeBloom.comp index d01e758254de37d315d678fe32292260da7579db..5435a240dffa9c06331b99f78bf2972c3489e4a3 100644 --- a/projects/bloom/resources/shaders/compositeBloom.comp +++ b/projects/bloom/resources/shaders/compositeBloom.comp @@ -23,7 +23,7 @@ void main() vec3 blur_color = texture(sampler2D(blurImage, linearSampler), UV).rgb; vec3 main_color = imageLoad(colorBuffer, pixel_coord).rgb; - composite_color.rgb = mix(main_color, blur_color, 0.1f); + composite_color.rgb = mix(main_color, blur_color, 0.25f); imageStore(colorBuffer, pixel_coord, composite_color); } \ No newline at end of file diff --git a/projects/bloom/resources/shaders/upsample.comp b/projects/bloom/resources/shaders/upsample.comp new file mode 100644 index 0000000000000000000000000000000000000000..0ddeedb5b5af9e476dc19012fed6430544006c0e --- /dev/null +++ b/projects/bloom/resources/shaders/upsample.comp @@ -0,0 +1,45 @@ +#version 450 +#extension GL_ARB_separate_shader_objects : enable + +layout(set=0, binding=0) uniform texture2D inUpsampleImage; +layout(set=0, binding=1) uniform sampler inImageSampler; +layout(set=0, binding=2, r11f_g11f_b10f) uniform image2D outUpsampleImage; + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +void main() +{ + if(any(greaterThanEqual(gl_GlobalInvocationID.xy, imageSize(outUpsampleImage)))){ + return; + } + + + ivec2 pixel_coord = ivec2(gl_GlobalInvocationID.xy); + vec2 pixel_size = vec2(1.0f) / imageSize(outUpsampleImage); + vec2 UV = pixel_coord.xy * pixel_size; + + const float gauss_kernel[3] = {1.f, 2.f, 1.f}; + const float gauss_weight = 16.f; + + vec3 sampled_color = vec3(0.f); + + for(int i = -1; i <= 1; i++) + { + for(int j = -1; j <= 1; j++) + { + vec2 sample_location = UV + vec2(j, i) * pixel_size; + vec3 color = texture(sampler2D(inUpsampleImage, inImageSampler), sample_location).rgb; + color *= gauss_kernel[j+1]; + color *= gauss_kernel[i+1]; + color /= gauss_weight; + + sampled_color += color; + } + } + + //vec3 prev_color = imageLoad(outUpsampleImage, pixel_coord).rgb; + //float bloomRimStrength = 0.75f; // adjust this to change strength of bloom + //sampled_color = mix(prev_color, sampled_color, bloomRimStrength); + + imageStore(outUpsampleImage, pixel_coord, vec4(sampled_color, 1.f)); +} \ No newline at end of file diff --git a/projects/bloom/src/main.cpp b/projects/bloom/src/main.cpp index 6a56b768ee4e3463f8ed609ffc576c2de1204b7b..d93cbc2824817d1b658ad77e8cdb56e4906ed267 100644 --- a/projects/bloom/src/main.cpp +++ b/projects/bloom/src/main.cpp @@ -223,8 +223,8 @@ int main(int argc, const char** argv) { } vkcv::ImageHandle depthBuffer = core.createImage(depthBufferFormat, windowWidth, windowHeight).getHandle(); - vkcv::ImageHandle colorBuffer = core.createImage(colorBufferFormat, windowWidth, windowHeight, 1, true, true).getHandle(); - vkcv::ImageHandle blurBuffer = core.createImage(colorBufferFormat, windowWidth, windowHeight, 1, true, false).getHandle(); + vkcv::ImageHandle colorBuffer = core.createImage(colorBufferFormat, windowWidth, windowHeight, 1, false, true, true).getHandle(); + vkcv::Image blurBuffer = core.createImage(colorBufferFormat, windowWidth, windowHeight, 1, true, true, false); vkcv::SamplerHandle linearSampler = core.createSampler(vkcv::SamplerFilterType::LINEAR, vkcv::SamplerFilterType::LINEAR, vkcv::SamplerMipmapMode::LINEAR, @@ -279,9 +279,31 @@ int main(int argc, const char** argv) { { blurProgram.addShader(shaderStage, path); }); - vkcv::DescriptorSetHandle blurDescriptorSet = core.createDescriptorSet(blurProgram.getReflectedDescriptors()[0]); + // create descriptor sets for each mip level + std::vector<vkcv::DescriptorSetHandle> blurDescriptorSets; + for(uint32_t mipLevel = 0; mipLevel < blurBuffer.getMipCount(); mipLevel++) + { + blurDescriptorSets.push_back(core.createDescriptorSet(blurProgram.getReflectedDescriptors()[0])); + } vkcv::PipelineHandle blurPipeline = core.createComputePipeline(blurProgram, - { core.getDescriptorSet(blurDescriptorSet).layout }); + { core.getDescriptorSet(blurDescriptorSets[0]).layout }); + + // upsample compute shader + vkcv::ShaderProgram upsampleProgram; + compiler.compile(vkcv::ShaderStage::COMPUTE, + "resources/shaders/upsample.comp", + [&](vkcv::ShaderStage shaderStage, const std::filesystem::path& path) + { + upsampleProgram.addShader(shaderStage, path); + }); + // create descriptor sets for each mip level + std::vector<vkcv::DescriptorSetHandle> upsampleDescriptorSets; + for(uint32_t mipLevel = 0; mipLevel < blurBuffer.getMipCount(); mipLevel++) + { + upsampleDescriptorSets.push_back(core.createDescriptorSet(upsampleProgram.getReflectedDescriptors()[0])); + } + vkcv::PipelineHandle upsamplePipeline = core.createComputePipeline(upsampleProgram, + { core.getDescriptorSet(upsampleDescriptorSets[0]).layout }); // bloom composite shader vkcv::ShaderProgram compositeBloomProgram; @@ -338,7 +360,7 @@ int main(int argc, const char** argv) { if ((swapchainWidth != windowWidth) || ((swapchainHeight != windowHeight))) { depthBuffer = core.createImage(depthBufferFormat, swapchainWidth, swapchainHeight).getHandle(); colorBuffer = core.createImage(colorBufferFormat, swapchainWidth, swapchainHeight, 1, true, true).getHandle(); - blurBuffer = core.createImage(colorBufferFormat, swapchainWidth, swapchainHeight, 1, true, false).getHandle(); + //blurBuffer = core.createImage(colorBufferFormat, swapchainWidth, swapchainHeight, 1, true, false).getHandle(); windowWidth = swapchainWidth; windowHeight = swapchainHeight; @@ -348,18 +370,11 @@ int main(int argc, const char** argv) { auto deltatime = std::chrono::duration_cast<std::chrono::microseconds>(end - start); // update descriptor sets which use swapchain image - // blur - vkcv::DescriptorWrites blurDescriptorWrites; - blurDescriptorWrites.sampledImageWrites = {vkcv::SampledImageDescriptorWrite(0, colorBuffer)}; - blurDescriptorWrites.samplerWrites = {vkcv::SamplerDescriptorWrite(1, linearSampler)}; - blurDescriptorWrites.storageImageWrites = {vkcv::StorageImageDescriptorWrite(2, blurBuffer) }; - core.writeDescriptorSet(blurDescriptorSet, blurDescriptorWrites); - // composite bloom vkcv::DescriptorWrites compositeBloomDescriptorWrites; - compositeBloomDescriptorWrites.sampledImageWrites = {vkcv::SampledImageDescriptorWrite(0, blurBuffer)}; + compositeBloomDescriptorWrites.sampledImageWrites = {vkcv::SampledImageDescriptorWrite(0, blurBuffer.getHandle())}; compositeBloomDescriptorWrites.samplerWrites = {vkcv::SamplerDescriptorWrite(1, linearSampler)}; - compositeBloomDescriptorWrites.storageImageWrites = {vkcv::StorageImageDescriptorWrite(2, colorBuffer)}; + compositeBloomDescriptorWrites.storageImageWrites = {vkcv::StorageImageDescriptorWrite(2, colorBuffer, 0)}; core.writeDescriptorSet(compositeBloomDescriptorSet, compositeBloomDescriptorWrites); // gamma correction @@ -431,26 +446,117 @@ int main(int argc, const char** argv) { drawcalls, renderTargets); - const uint32_t gammaCorrectionLocalGroupSize = 8; - const uint32_t gammaCorrectionDispatchCount[3] = { - static_cast<uint32_t>(glm::ceil(windowWidth / static_cast<float>(gammaCorrectionLocalGroupSize))), - static_cast<uint32_t>(glm::ceil(windowHeight / static_cast<float>(gammaCorrectionLocalGroupSize))), - 1 - }; - core.prepareImageForSampling(cmdStream, colorBuffer); - core.prepareImageForStorage(cmdStream, blurBuffer); + auto windowWidthByLocalGroup = static_cast<float>(windowWidth) / 8.0f; + auto windowHeightByLocalGroup = static_cast<float>(windowHeight) / 8.0f; + + uint32_t initialBlurDispatchCount[3] = { + static_cast<uint32_t>(glm::ceil(windowWidthByLocalGroup)), + static_cast<uint32_t>(glm::ceil(windowHeightByLocalGroup)), + 1 + }; // blur dispatch - core.recordComputeDispatchToCmdStream( - cmdStream, - blurPipeline, - gammaCorrectionDispatchCount, - {vkcv::DescriptorSetUsage(0, core.getDescriptorSet(blurDescriptorSet).vulkanHandle)}, + core.prepareImageForSampling(cmdStream, colorBuffer); + core.prepareImageForStorage(cmdStream, blurBuffer.getHandle()); + // blur dispatch of original color attachment + vkcv::DescriptorWrites firstBlurDescriptorWrites; + firstBlurDescriptorWrites.sampledImageWrites = {vkcv::SampledImageDescriptorWrite(0, colorBuffer)}; + firstBlurDescriptorWrites.samplerWrites = {vkcv::SamplerDescriptorWrite(1, linearSampler)}; + firstBlurDescriptorWrites.storageImageWrites = {vkcv::StorageImageDescriptorWrite(2, blurBuffer.getHandle(), 0) }; + core.writeDescriptorSet(blurDescriptorSets[0], firstBlurDescriptorWrites); + core.recordComputeDispatchToCmdStream( + cmdStream, + blurPipeline, + initialBlurDispatchCount, + {vkcv::DescriptorSetUsage(0, core.getDescriptorSet(blurDescriptorSets[0]).vulkanHandle)}, vkcv::PushConstantData(nullptr, 0)); + // blur dispatches of blur buffer's mip maps + for(uint32_t mipLevel = 1; mipLevel < blurBuffer.getMipCount(); mipLevel++) + { + // mip descriptor writes + vkcv::DescriptorWrites mipBlurDescriptorWrites; + mipBlurDescriptorWrites.sampledImageWrites = {vkcv::SampledImageDescriptorWrite(0, blurBuffer.getHandle(), mipLevel - 1, true)}; + mipBlurDescriptorWrites.samplerWrites = {vkcv::SamplerDescriptorWrite(1, linearSampler)}; + mipBlurDescriptorWrites.storageImageWrites = {vkcv::StorageImageDescriptorWrite(2, blurBuffer.getHandle(), mipLevel) }; + core.writeDescriptorSet(blurDescriptorSets[mipLevel], mipBlurDescriptorWrites); + + // mip dispatch calculation + windowWidthByLocalGroup /= 2.0f; + windowHeightByLocalGroup /= 2.0f; + + uint32_t mipBlurDispatchCount[3] = { + static_cast<uint32_t>(glm::ceil(windowWidthByLocalGroup)), + static_cast<uint32_t>(glm::ceil(windowHeightByLocalGroup)), + 1 + }; + + if(mipBlurDispatchCount[0] == 0) + mipBlurDispatchCount[0] = 1; + if(mipBlurDispatchCount[1] == 0) + mipBlurDispatchCount[1] = 1; + + // mip blur dispatch + core.recordComputeDispatchToCmdStream( + cmdStream, + blurPipeline, + mipBlurDispatchCount, + {vkcv::DescriptorSetUsage(0, core.getDescriptorSet(blurDescriptorSets[mipLevel]).vulkanHandle)}, + vkcv::PushConstantData(nullptr, 0)); + + // image barrier between mips + core.recordImageMemoryBarrier(cmdStream, blurBuffer.getHandle()); + } + + // upsample dispatch + + uint32_t upsampleMipLevels = std::min(blurBuffer.getMipCount(), static_cast<uint32_t>(5)); + + // upsample dispatch for each mip map + for(uint32_t mipLevel = upsampleMipLevels; mipLevel > 0; mipLevel--) + { + // mip descriptor writes + vkcv::DescriptorWrites mipUpsampleDescriptorWrites; + mipUpsampleDescriptorWrites.sampledImageWrites = {vkcv::SampledImageDescriptorWrite(0, blurBuffer.getHandle(), mipLevel, true)}; + mipUpsampleDescriptorWrites.samplerWrites = {vkcv::SamplerDescriptorWrite(1, linearSampler)}; + mipUpsampleDescriptorWrites.storageImageWrites = {vkcv::StorageImageDescriptorWrite(2, blurBuffer.getHandle(), mipLevel - 1) }; + core.writeDescriptorSet(upsampleDescriptorSets[mipLevel], mipUpsampleDescriptorWrites); + + auto mipDivisor = glm::pow(2.0f, static_cast<float>(mipLevel) - 1.0f); + + auto upsampleDispatchWidth = static_cast<float>(windowWidth) / mipDivisor; + auto upsampleDispatchHeight = static_cast<float>(windowHeight) / mipDivisor; + + upsampleDispatchWidth /= 8.0f; + upsampleDispatchHeight /= 8.0f; + + const uint32_t upsampleDispatchCount[3] = { + static_cast<uint32_t>(glm::ceil(upsampleDispatchWidth)), + static_cast<uint32_t>(glm::ceil(upsampleDispatchHeight)), + 1 + }; + + core.recordComputeDispatchToCmdStream( + cmdStream, + upsamplePipeline, + upsampleDispatchCount, + {vkcv::DescriptorSetUsage(0, core.getDescriptorSet(upsampleDescriptorSets[mipLevel]).vulkanHandle)}, + vkcv::PushConstantData(nullptr, 0) + ); + // image barrier between mips + core.recordImageMemoryBarrier(cmdStream, blurBuffer.getHandle()); + } + core.prepareImageForStorage(cmdStream, colorBuffer); - core.prepareImageForSampling(cmdStream, blurBuffer); + core.prepareImageForSampling(cmdStream, blurBuffer.getHandle()); + + const uint32_t gammaCorrectionLocalGroupSize = 8; + const uint32_t gammaCorrectionDispatchCount[3] = { + static_cast<uint32_t>(glm::ceil(static_cast<float>(windowWidth) / static_cast<float>(gammaCorrectionLocalGroupSize))), + static_cast<uint32_t>(glm::ceil(static_cast<float>(windowHeight) / static_cast<float>(gammaCorrectionLocalGroupSize))), + 1 + }; // bloom composite dispatch core.recordComputeDispatchToCmdStream(cmdStream, diff --git a/src/vkcv/DescriptorManager.cpp b/src/vkcv/DescriptorManager.cpp index 265532232304106f7271fdd445d52074b7c011a1..8e565a766cd407dc33c0291d3d07b01d6d3066e7 100644 --- a/src/vkcv/DescriptorManager.cpp +++ b/src/vkcv/DescriptorManager.cpp @@ -107,10 +107,11 @@ namespace vkcv std::vector<WriteDescriptorSetInfo> writeInfos; for (const auto& write : writes.sampledImageWrites) { + vk::ImageLayout layout = write.useGeneralLayout ? vk::ImageLayout::eGeneral : vk::ImageLayout::eShaderReadOnlyOptimal; const vk::DescriptorImageInfo imageInfo( nullptr, - imageManager.getVulkanImageView(write.image), - vk::ImageLayout::eShaderReadOnlyOptimal + imageManager.getVulkanImageView(write.image, write.mipLevel), + layout ); imageInfos.push_back(imageInfo);