Skip to content
Snippets Groups Projects
Commit ab528a93 authored by Tobias Frisch's avatar Tobias Frisch
Browse files

Merge branch '87-mesh-shader-implementation' into 'develop'

Resolve "Mesh Shader Implementation"

Closes #87

See merge request !74
parents c9ac0291 38e22ec2
No related branches found
No related tags found
1 merge request!74Resolve "Mesh Shader Implementation"
Pipeline #26788 passed
Showing
with 963 additions and 11 deletions
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <vk_mem_alloc.hpp> #include <vk_mem_alloc.hpp>
#include "QueueManager.hpp" #include "QueueManager.hpp"
#include "DrawcallRecording.hpp"
namespace vkcv namespace vkcv
{ {
......
...@@ -249,13 +249,21 @@ namespace vkcv ...@@ -249,13 +249,21 @@ namespace vkcv
bool beginFrame(uint32_t& width, uint32_t& height); bool beginFrame(uint32_t& width, uint32_t& height);
void recordDrawcallsToCmdStream( void recordDrawcallsToCmdStream(
const CommandStreamHandle cmdStreamHandle, const CommandStreamHandle cmdStreamHandle,
const PassHandle renderpassHandle, const PassHandle renderpassHandle,
const PipelineHandle pipelineHandle, const PipelineHandle pipelineHandle,
const PushConstants &pushConstants, const PushConstants &pushConstants,
const std::vector<DrawcallInfo> &drawcalls, const std::vector<DrawcallInfo> &drawcalls,
const std::vector<ImageHandle> &renderTargets); const std::vector<ImageHandle> &renderTargets);
void recordMeshShaderDrawcalls(
const CommandStreamHandle cmdStreamHandle,
const PassHandle renderpassHandle,
const PipelineHandle pipelineHandle,
const PushConstants& pushConstantData,
const std::vector<MeshShaderDrawcall>& drawcalls,
const std::vector<ImageHandle>& renderTargets);
void recordComputeDispatchToCmdStream( void recordComputeDispatchToCmdStream(
CommandStreamHandle cmdStream, CommandStreamHandle cmdStream,
PipelineHandle computePipeline, PipelineHandle computePipeline,
......
...@@ -13,6 +13,11 @@ namespace vkcv { ...@@ -13,6 +13,11 @@ namespace vkcv {
vk::Buffer buffer; vk::Buffer buffer;
}; };
enum class IndexBitCount{
Bit16,
Bit32
};
struct DescriptorSetUsage { struct DescriptorSetUsage {
inline DescriptorSetUsage(uint32_t setLocation, vk::DescriptorSet vulkanHandle, inline DescriptorSetUsage(uint32_t setLocation, vk::DescriptorSet vulkanHandle,
const std::vector<uint32_t>& dynamicOffsets = {}) noexcept const std::vector<uint32_t>& dynamicOffsets = {}) noexcept
...@@ -24,12 +29,14 @@ namespace vkcv { ...@@ -24,12 +29,14 @@ namespace vkcv {
}; };
struct Mesh { struct Mesh {
inline Mesh(std::vector<VertexBufferBinding> vertexBufferBindings, vk::Buffer indexBuffer, size_t indexCount) noexcept inline Mesh(std::vector<VertexBufferBinding> vertexBufferBindings, vk::Buffer indexBuffer, size_t indexCount, IndexBitCount indexBitCount = IndexBitCount::Bit16) noexcept
: vertexBufferBindings(vertexBufferBindings), indexBuffer(indexBuffer), indexCount(indexCount){} : vertexBufferBindings(vertexBufferBindings), indexBuffer(indexBuffer), indexCount(indexCount), indexBitCount(indexBitCount){}
std::vector<VertexBufferBinding> vertexBufferBindings; std::vector<VertexBufferBinding> vertexBufferBindings;
vk::Buffer indexBuffer; vk::Buffer indexBuffer;
size_t indexCount; size_t indexCount;
IndexBitCount indexBitCount;
}; };
struct DrawcallInfo { struct DrawcallInfo {
...@@ -48,4 +55,21 @@ namespace vkcv { ...@@ -48,4 +55,21 @@ namespace vkcv {
const PushConstants &pushConstants, const PushConstants &pushConstants,
const size_t drawcallIndex); const size_t drawcallIndex);
} void InitMeshShaderDrawFunctions(vk::Device device);
\ No newline at end of file
struct MeshShaderDrawcall {
inline MeshShaderDrawcall(const std::vector<DescriptorSetUsage> descriptorSets, uint32_t taskCount)
: descriptorSets(descriptorSets), taskCount(taskCount) {}
std::vector<DescriptorSetUsage> descriptorSets;
uint32_t taskCount;
};
void recordMeshShaderDrawcall(
vk::CommandBuffer cmdBuffer,
vk::PipelineLayout pipelineLayout,
const PushConstants& pushConstantData,
const uint32_t pushConstantOffset,
const MeshShaderDrawcall& drawcall,
const uint32_t firstTask);
}
...@@ -9,7 +9,11 @@ namespace vkcv { ...@@ -9,7 +9,11 @@ namespace vkcv {
TESS_EVAL, TESS_EVAL,
GEOMETRY, GEOMETRY,
FRAGMENT, FRAGMENT,
COMPUTE COMPUTE,
TASK,
MESH
}; };
} }
...@@ -4,6 +4,7 @@ add_subdirectory(asset_loader) ...@@ -4,6 +4,7 @@ add_subdirectory(asset_loader)
add_subdirectory(camera) add_subdirectory(camera)
add_subdirectory(gui) add_subdirectory(gui)
add_subdirectory(material) add_subdirectory(material)
add_subdirectory(meshlet)
add_subdirectory(scene) add_subdirectory(scene)
add_subdirectory(shader_compiler) add_subdirectory(shader_compiler)
add_subdirectory(testing) add_subdirectory(testing)
......
cmake_minimum_required(VERSION 3.16)
project(vkcv_meshlet)
# setting c++ standard for the module
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(vkcv_meshlet_source ${PROJECT_SOURCE_DIR}/src)
set(vkcv_meshlet_include ${PROJECT_SOURCE_DIR}/include)
# Add source and header files to the module
set(vkcv_meshlet_sources
${vkcv_meshlet_include}/vkcv/meshlet/Meshlet.hpp
${vkcv_meshlet_source}/vkcv/meshlet/Meshlet.cpp
${vkcv_meshlet_include}/vkcv/meshlet/Tipsify.hpp
${vkcv_meshlet_source}/vkcv/meshlet/Tipsify.cpp
${vkcv_meshlet_include}/vkcv/meshlet/Forsyth.hpp
${vkcv_meshlet_source}/vkcv/meshlet/Forsyth.cpp)
# adding source files to the module
add_library(vkcv_meshlet STATIC ${vkcv_meshlet_sources})
# link the required libraries to the module
target_link_libraries(vkcv_meshlet vkcv ${vkcv_libraries})
# including headers of dependencies and the VkCV framework
target_include_directories(vkcv_meshlet SYSTEM BEFORE PRIVATE ${vkcv_include} ${vkcv_includes} ${vkcv_asset_loader_include} ${vkcv_camera_include})
# add the own include directory for public headers
target_include_directories(vkcv_meshlet BEFORE PUBLIC ${vkcv_meshlet_include})
# linking with libraries from all dependencies and the VkCV framework
target_link_libraries(vkcv_meshlet vkcv vkcv_asset_loader vkcv_camera)
#pragma once
#include "Meshlet.hpp"
namespace vkcv::meshlet
{
/**
* Reorders the index buffer, simulating a LRU cache, so that vertices are grouped together in close triangle patches
* @param idxBuf current IndexBuffer
* @param vertexCount of the mesh
* @return new reordered index buffer to replace the input index buffer
* References:
* https://tomforsyth1000.github.io/papers/fast_vert_cache_opt.html
* https://www.martin.st/thesis/efficient_triangle_reordering.pdf
* https://github.com/vivkin/forsyth/blob/master/forsyth.h
*/
VertexCacheReorderResult forsythReorder(const std::vector<uint32_t> &idxBuf, const size_t vertexCount);
}
#pragma once
#include <vector>
#include <map>
#include <glm/glm.hpp>
#include <vkcv/asset/asset_loader.hpp>
namespace vkcv::meshlet {
struct Vertex {
glm::vec3 position;
float padding0;
glm::vec3 normal;
float padding1;
};
struct Meshlet {
uint32_t vertexOffset;
uint32_t vertexCount;
uint32_t indexOffset;
uint32_t indexCount;
glm::vec3 meanPosition;
float boundingSphereRadius;
};
struct VertexCacheReorderResult {
/**
* @param indexBuffer new indexBuffer
* @param skippedIndices indices that have a spacial break
*/
VertexCacheReorderResult(const std::vector<uint32_t> indexBuffer, const std::vector<uint32_t> skippedIndices)
:indexBuffer(indexBuffer), skippedIndices(skippedIndices) {}
std::vector<uint32_t> indexBuffer;
std::vector<uint32_t> skippedIndices;
};
struct MeshShaderModelData {
std::vector<Vertex> vertices;
std::vector<uint32_t> localIndices;
std::vector<Meshlet> meshlets;
};
std::vector<Vertex> convertToVertices(
const std::vector<uint8_t>& vertexData,
const uint64_t vertexCount,
const vkcv::asset::VertexAttribute& positionAttribute,
const vkcv::asset::VertexAttribute& normalAttribute);
MeshShaderModelData createMeshShaderModelData(
const std::vector<Vertex>& inVertices,
const std::vector<uint32_t>& inIndices,
const std::vector<uint32_t>& deadEndIndices = {});
std::vector<uint32_t> assetLoaderIndicesTo32BitIndices(
const std::vector<uint8_t>& indexData,
vkcv::asset::IndexType indexType);
}
\ No newline at end of file
#pragma once
#include "Meshlet.hpp"
#include <algorithm>
#include <iostream>
namespace vkcv::meshlet {
/**
* reorders the IndexBuffer, so all usages of vertices to triangle are as close as possible
* @param indexBuffer32Bit current IndexBuffer
* @param vertexCount of the mesh
* @param cacheSize of the priority cache <br>
* Recommended: 20. Keep the value between 5 and 50 <br>
* low: more random and patchy<br>
* high: closer vertices have higher chance -> leads to sinuous lines
* @return new IndexBuffer that replaces the input IndexBuffer, and the indices that are skipped
*
* https://gfx.cs.princeton.edu/pubs/Sander_2007_%3ETR/tipsy.pdf
* https://www.martin.st/thesis/efficient_triangle_reordering.pdf
*/
VertexCacheReorderResult tipsifyMesh(const std::vector<uint32_t> &indexBuffer32Bit,
const int vertexCount, const unsigned int cacheSize = 20);
}
\ No newline at end of file
#include "vkcv/meshlet/Forsyth.hpp"
#include <vkcv/Logger.hpp>
#include <array>
#include <cmath>
namespace vkcv::meshlet
{
/*
* CACHE AND VALENCE
* SIZE AND SCORE CONSTANTS
* CHANGE AS NEEDED
*/
// set these to adjust performance and result quality
const size_t VERTEX_CACHE_SIZE = 8;
const size_t CACHE_FUNCTION_LENGTH = 32;
// score function constants
const float CACHE_DECAY_POWER = 1.5f;
const float LAST_TRI_SCORE = 0.75f;
const float VALENCE_BOOST_SCALE = 2.0f;
const float VALENCE_BOOST_POWER = 0.5f;
// sizes for precalculated tables
// make sure that cache score is always >= vertex_cache_size
const size_t CACHE_SCORE_TABLE_SIZE = 32;
const size_t VALENCE_SCORE_TABLE_SIZE = 32;
// precalculated tables
std::array<float, CACHE_SCORE_TABLE_SIZE> cachePositionScore = {};
std::array<float, VALENCE_SCORE_TABLE_SIZE> valenceScore = {};
// function to populate the cache position and valence score tables
void initScoreTables()
{
for(size_t i = 0; i < CACHE_SCORE_TABLE_SIZE; i++)
{
float score = 0.0f;
if (i < 3)
{
score = LAST_TRI_SCORE;
}
else
{
const float scaler = 1.0f / static_cast<float>(CACHE_FUNCTION_LENGTH - 3);
score = 1.0f - (i - 3) * scaler;
score = std::pow(score, CACHE_DECAY_POWER);
}
cachePositionScore[i] = score;
}
for(size_t i = 0; i < VALENCE_SCORE_TABLE_SIZE; i++)
{
const float valenceBoost = std::pow(i, -VALENCE_BOOST_POWER);
const float score = VALENCE_BOOST_SCALE * valenceBoost;
valenceScore[i] = score;
}
}
/**
* Return the vertex' score, depending on its current active triangle count and cache position
* Add a valence boost to score, if active triangles are below VALENCE_SCORE_TABLE_SIZE
* @param numActiveTris the active triangles on this vertex
* @param cachePos the vertex' position in the cache
* @return vertex' score
*/
float findVertexScore(uint32_t numActiveTris, int32_t cachePos)
{
if(numActiveTris == 0)
return 0.0f;
float score = 0.0f;
if (cachePos >= 0)
score = cachePositionScore[cachePos];
if (numActiveTris < VALENCE_SCORE_TABLE_SIZE)
score += valenceScore[numActiveTris];
return score;
}
VertexCacheReorderResult forsythReorder(const std::vector<uint32_t> &idxBuf, const size_t vertexCount)
{
std::vector<uint32_t> skippedIndices;
initScoreTables();
// get the total triangle count from the index buffer
const size_t triangleCount = idxBuf.size() / 3;
// per-vertex active triangle count
std::vector<uint8_t> numActiveTris(vertexCount, 0);
// iterate over indices, count total occurrences of each vertex
for(const auto index : idxBuf)
{
if(numActiveTris[index] == UINT8_MAX)
{
vkcv_log(LogLevel::ERROR, "Unsupported mesh.");
vkcv_log(LogLevel::ERROR, "Vertex shared by too many triangles.");
return VertexCacheReorderResult({}, {});
}
numActiveTris[index]++;
}
// allocate remaining vectors
/**
* offsets: contains the vertices' offset into the triangleIndices vector
* Offset itself is the sum of triangles required by the previous vertices
*
* lastScore: the vertices' most recent calculated score
*
* cacheTag: the vertices' most recent cache score
*
* triangleAdded: boolean flags to denote whether a triangle has been processed or not
*
* triangleScore: total score of the three vertices making up the triangle
*
* triangleIndices: indices for the triangles
*/
std::vector<uint32_t> offsets(vertexCount, 0);
std::vector<float> lastScore(vertexCount, 0.0f);
std::vector<int8_t> cacheTag(vertexCount, -1);
std::vector<bool> triangleAdded(triangleCount, false);
std::vector<float> triangleScore(triangleCount, 0.0f);
std::vector<int32_t> triangleIndices(idxBuf.size(), 0);
// sum the number of active triangles for all previous vertices
// null the number of active triangles afterwards for recalculation in second loop
uint32_t sum = 0;
for(size_t i = 0; i < vertexCount; i++)
{
offsets[i] = sum;
sum += numActiveTris[i];
numActiveTris[i] = 0;
}
// create the triangle indices, using the newly calculated offsets, and increment numActiveTris
// every vertex should be referenced by a triangle index now
for(size_t i = 0; i < triangleCount; i++)
{
for(size_t j = 0; j < 3; j++)
{
uint32_t v = idxBuf[3 * i + j];
triangleIndices[offsets[v] + numActiveTris[v]] = static_cast<int32_t>(i);
numActiveTris[v]++;
}
}
// calculate and initialize the triangle score, by summing the vertices' score
for (size_t i = 0; i < vertexCount; i++)
{
lastScore[i] = findVertexScore(numActiveTris[i], static_cast<int32_t>(cacheTag[i]));
for(size_t j = 0; j < numActiveTris[i]; j++)
{
triangleScore[triangleIndices[offsets[i] + j]] += lastScore[i];
}
}
// find best triangle to start reordering with
int32_t bestTriangle = -1;
float bestScore = -1.0f;
for(size_t i = 0; i < triangleCount; i++)
{
if(triangleScore[i] > bestScore)
{
bestScore = triangleScore[i];
bestTriangle = static_cast<int32_t>(i);
}
}
// allocate output triangles
std::vector<int32_t> outTriangles(triangleCount, 0);
uint32_t outPos = 0;
// initialize cache (with -1)
std::array<int32_t, VERTEX_CACHE_SIZE + 3> cache = {};
for(auto &element : cache)
{
element = -1;
}
uint32_t scanPos = 0;
// begin reordering routine
// output the currently best triangle, as long as there are triangles left to output
while(bestTriangle >= 0)
{
// mark best triangle as added
triangleAdded[bestTriangle] = true;
// output this triangle
outTriangles[outPos++] = bestTriangle;
// push best triangle's vertices into the cache
for(size_t i = 0; i < 3; i++)
{
uint32_t v = idxBuf[3 * bestTriangle + i];
// get vertex' cache position, if its -1, set its position to the end
int8_t endPos = cacheTag[v];
if(endPos < 0)
endPos = static_cast<int8_t>(VERTEX_CACHE_SIZE + i);
// shift vertices' cache entries forward by one
for(int8_t j = endPos; j > i; j--)
{
cache[j] = cache[j - 1];
// if cache slot is valid vertex,
// update the vertex cache tag accordingly
if (cache[j] >= 0)
cacheTag[cache[j]]++;
}
// insert current vertex into its new target slot
cache[i] = static_cast<int32_t>(v);
cacheTag[v] = static_cast<int8_t>(i);
// find current triangle in the list of active triangles
// remove it by moving the last triangle into the slot the current triangle is holding.
for (size_t j = 0; j < numActiveTris[v]; j++)
{
if(triangleIndices[offsets[v] + j] == bestTriangle)
{
triangleIndices[offsets[v] + j] = triangleIndices[offsets[v] + numActiveTris[v] - 1];
break;
}
}
// shorten the list
numActiveTris[v]--;
}
// update scores of all triangles in cache
for (size_t i = 0; i < cache.size(); i++)
{
int32_t v = cache[i];
if (v < 0)
break;
// this vertex has been pushed outside of the actual cache
if(i >= VERTEX_CACHE_SIZE)
{
cacheTag[v] = -1;
cache[i] = -1;
}
float newScore = findVertexScore(numActiveTris[v], cacheTag[v]);
float diff = newScore - lastScore[v];
for(size_t j = 0; j < numActiveTris[v]; j++)
{
triangleScore[triangleIndices[offsets[v] + j]] += diff;
}
lastScore[v] = newScore;
}
// find best triangle reference by vertices in cache
bestTriangle = -1;
bestScore = -1.0f;
for(size_t i = 0; i < VERTEX_CACHE_SIZE; i++)
{
if (cache[i] < 0)
break;
int32_t v = cache[i];
for(size_t j = 0; j < numActiveTris[v]; j++)
{
int32_t t = triangleIndices[offsets[v] + j];
if(triangleScore[t] > bestScore)
{
bestTriangle = t;
bestScore = triangleScore[t];
}
}
}
// if no triangle was found at all, continue scanning whole list of triangles
if (bestTriangle < 0)
{
for(; scanPos < triangleCount; scanPos++)
{
if(!triangleAdded[scanPos])
{
bestTriangle = scanPos;
skippedIndices.push_back(3 * outPos);
break;
}
}
}
}
// convert triangle index array into full triangle list
std::vector<uint32_t> outIndices(idxBuf.size(), 0);
outPos = 0;
for(size_t i = 0; i < triangleCount; i++)
{
int32_t t = outTriangles[i];
for(size_t j = 0; j < 3; j++)
{
int32_t v = idxBuf[3 * t + j];
outIndices[outPos++] = static_cast<uint32_t>(v);
}
}
return VertexCacheReorderResult(outIndices, skippedIndices);
}
}
\ No newline at end of file
#include "vkcv/meshlet/Meshlet.hpp"
#include <vkcv/Logger.hpp>
#include <cassert>
#include <iostream>
namespace vkcv::meshlet {
std::vector<vkcv::meshlet::Vertex> convertToVertices(
const std::vector<uint8_t>& vertexData,
const uint64_t vertexCount,
const vkcv::asset::VertexAttribute& positionAttribute,
const vkcv::asset::VertexAttribute& normalAttribute) {
assert(positionAttribute.type == vkcv::asset::PrimitiveType::POSITION);
assert(normalAttribute.type == vkcv::asset::PrimitiveType::NORMAL);
std::vector<vkcv::meshlet::Vertex> vertices;
vertices.reserve(vertexCount);
const size_t positionStepSize = positionAttribute.stride == 0 ? sizeof(glm::vec3) : positionAttribute.stride;
const size_t normalStepSize = normalAttribute.stride == 0 ? sizeof(glm::vec3) : normalAttribute.stride;
for (int i = 0; i < vertexCount; i++) {
Vertex v;
const size_t positionOffset = positionAttribute.offset + positionStepSize * i;
const size_t normalOffset = normalAttribute.offset + normalStepSize * i;
v.position = *reinterpret_cast<const glm::vec3*>(&(vertexData[positionOffset]));
v.normal = *reinterpret_cast<const glm::vec3*>(&(vertexData[normalOffset]));
vertices.push_back(v);
}
return vertices;
}
MeshShaderModelData createMeshShaderModelData(
const std::vector<Vertex>& inVertices,
const std::vector<uint32_t>& inIndices,
const std::vector<uint32_t>& deadEndIndices) {
MeshShaderModelData data;
size_t currentIndex = 0;
const size_t maxVerticesPerMeshlet = 64;
const size_t maxIndicesPerMeshlet = 126 * 3;
bool indicesAreLeft = true;
size_t deadEndIndicesIndex = 0;
while (indicesAreLeft) {
Meshlet meshlet;
meshlet.indexCount = 0;
meshlet.vertexCount = 0;
meshlet.indexOffset = data.localIndices.size();
meshlet.vertexOffset = data.vertices.size();
std::map<uint32_t, uint32_t> globalToLocalIndexMap;
std::vector<uint32_t> globalIndicesOrdered;
while (true) {
if (deadEndIndicesIndex < deadEndIndices.size()) {
const uint32_t deadEndIndex = deadEndIndices[deadEndIndicesIndex];
if (deadEndIndex == currentIndex) {
deadEndIndicesIndex++;
break;
}
}
indicesAreLeft = currentIndex + 1 <= inIndices.size();
if (!indicesAreLeft) {
break;
}
bool enoughSpaceForIndices = meshlet.indexCount + 3 < maxIndicesPerMeshlet;
if (!enoughSpaceForIndices) {
break;
}
size_t vertexCountToAdd = 0;
for (int i = 0; i < 3; i++) {
const uint32_t globalIndex = inIndices[currentIndex + i];
const bool containsVertex = globalToLocalIndexMap.find(globalIndex) != globalToLocalIndexMap.end();
if (!containsVertex) {
vertexCountToAdd++;
}
}
bool enoughSpaceForVertices = meshlet.vertexCount + vertexCountToAdd < maxVerticesPerMeshlet;
if (!enoughSpaceForVertices) {
break;
}
for (int i = 0; i < 3; i++) {
const uint32_t globalIndex = inIndices[currentIndex + i];
uint32_t localIndex;
const bool indexAlreadyExists = globalToLocalIndexMap.find(globalIndex) != globalToLocalIndexMap.end();
if (indexAlreadyExists) {
localIndex = globalToLocalIndexMap[globalIndex];
}
else {
localIndex = globalToLocalIndexMap.size();
globalToLocalIndexMap[globalIndex] = localIndex;
globalIndicesOrdered.push_back(globalIndex);
}
data.localIndices.push_back(localIndex);
}
meshlet.indexCount += 3;
currentIndex += 3;
meshlet.vertexCount += vertexCountToAdd;
}
for (const uint32_t globalIndex : globalIndicesOrdered) {
const Vertex v = inVertices[globalIndex];
data.vertices.push_back(v);
}
// compute mean position
meshlet.meanPosition = glm::vec3(0);
const uint32_t meshletLastVertexIndex = meshlet.vertexOffset + meshlet.vertexCount;
for (uint32_t vertexIndex = meshlet.vertexOffset; vertexIndex < meshletLastVertexIndex; vertexIndex++) {
const Vertex& v = data.vertices[vertexIndex];
meshlet.meanPosition += v.position;
}
meshlet.meanPosition /= meshlet.vertexCount;
// compute bounding sphere radius
meshlet.boundingSphereRadius = 0.f;
for (uint32_t vertexIndex = meshlet.vertexOffset; vertexIndex < meshletLastVertexIndex; vertexIndex++) {
const Vertex& v = data.vertices[vertexIndex];
const float d = glm::distance(v.position, meshlet.meanPosition);
meshlet.boundingSphereRadius = glm::max(meshlet.boundingSphereRadius, d);
}
data.meshlets.push_back(meshlet);
}
return data;
}
std::vector<uint32_t> assetLoaderIndicesTo32BitIndices(const std::vector<uint8_t>& indexData, vkcv::asset::IndexType indexType) {
std::vector<uint32_t> indices;
if (indexType == vkcv::asset::IndexType::UINT16) {
for (int i = 0; i < indexData.size(); i += 2) {
const uint16_t index16Bit = *reinterpret_cast<const uint16_t *>(&(indexData[i]));
const uint32_t index32Bit = static_cast<uint32_t>(index16Bit);
indices.push_back(index32Bit);
}
} else if (indexType == vkcv::asset::IndexType::UINT32) {
for (int i = 0; i < indexData.size(); i += 4) {
const uint32_t index32Bit = *reinterpret_cast<const uint32_t *>(&(indexData[i]));
indices.push_back(index32Bit);
}
} else {
vkcv_log(vkcv::LogLevel::ERROR, "Unsupported index type");
}
return indices;
}
}
\ No newline at end of file
#include <vkcv/Logger.hpp>
#include "vkcv/meshlet/Tipsify.hpp"
#include <iostream>
namespace vkcv::meshlet {
const int maxUsedVertices = 128;
/**
* modulo operation with maxUsedVertices
* @param number for modulo operation
* @return number between 0 and maxUsedVertices - 1
*/
int mod( int number ){
return (number + maxUsedVertices) % maxUsedVertices;
}
/**
* searches for the next VertexIndex that was used before or returns any vertexIndex if no used was found
* @param livingTriangles
* @param usedVerticeStack
* @param usedVerticeCount
* @param usedVerticeOffset
* @param vertexCount
* @param lowestLivingVertexIndex
* @param currentTriangleIndex
* @param skippedIndices
* @return a VertexIndex to be used as fanningVertexIndex
*/
int skipDeadEnd(
const std::vector<uint8_t> &livingTriangles,
const std::vector<uint32_t> &usedVerticeStack,
int &usedVerticeCount,
int &usedVerticeOffset,
int vertexCount,
int &lowestLivingVertexIndex,
int &currentTriangleIndex,
std::vector<uint32_t> &skippedIndices) {
// returns the latest vertex used that has a living triangle
while (mod(usedVerticeCount) != usedVerticeOffset) {
// iterate from the latest to the oldest. + maxUsedVertices to always make it a positive number in the range 0 to maxUsedVertices -1
int nextVertex = usedVerticeStack[mod(--usedVerticeCount)];
if (livingTriangles[nextVertex] > 0) {
return nextVertex;
}
}
// returns any vertexIndex since no last used has a living triangle
while (lowestLivingVertexIndex + 1 < vertexCount) {
lowestLivingVertexIndex++;
if (livingTriangles[lowestLivingVertexIndex] > 0) {
// add index of the vertex to skippedIndices
skippedIndices.push_back(static_cast<uint32_t>(currentTriangleIndex * 3));
return lowestLivingVertexIndex;
}
}
return -1;
}
/**
* searches for the best next candidate as a fanningVertexIndex
* @param vertexCount
* @param lowestLivingVertexIndex
* @param cacheSize
* @param possibleCandidates
* @param numPossibleCandidates
* @param lastTimestampCache
* @param currentTimeStamp
* @param livingTriangles
* @param usedVerticeStack
* @param usedVerticeCount
* @param usedVerticeOffset
* @param currentTriangleIndex
* @param skippedIndices
* @return a VertexIndex to be used as fanningVertexIndex
*/
int getNextVertexIndex(int vertexCount,
int &lowestLivingVertexIndex,
int cacheSize,
const std::vector<uint32_t> &possibleCandidates,
int numPossibleCandidates,
const std::vector<uint32_t> &lastTimestampCache,
int currentTimeStamp,
const std::vector<uint8_t> &livingTriangles,
const std::vector<uint32_t> &usedVerticeStack,
int &usedVerticeCount,
int &usedVerticeOffset,
int &currentTriangleIndex,
std::vector<uint32_t> &skippedIndices) {
int nextVertexIndex = -1;
int maxPriority = -1;
// calculates the next possibleCandidates that is recently used
for (int j = 0; j < numPossibleCandidates; j++) {
int vertexIndex = possibleCandidates[j];
// the candidate needs to be not fanned out yet
if (livingTriangles[vertexIndex] > 0) {
int priority = -1;
// prioritizes recent used vertices, but tries not to pick one that has many triangles -> fills holes better
if ( currentTimeStamp - lastTimestampCache[vertexIndex] + 2 * livingTriangles[vertexIndex] <=
cacheSize) {
priority = currentTimeStamp - lastTimestampCache[vertexIndex];
}
// select the vertexIndex with the highest priority
if (priority > maxPriority) {
maxPriority = priority;
nextVertexIndex = vertexIndex;
}
}
}
// if no candidate is alive, try and find another one
if (nextVertexIndex == -1) {
nextVertexIndex = skipDeadEnd(
livingTriangles,
usedVerticeStack,
usedVerticeCount,
usedVerticeOffset,
vertexCount,
lowestLivingVertexIndex,
currentTriangleIndex,
skippedIndices);
}
return nextVertexIndex;
}
VertexCacheReorderResult tipsifyMesh(
const std::vector<uint32_t> &indexBuffer32Bit,
const int vertexCount,
const unsigned int cacheSize) {
if (indexBuffer32Bit.empty() || vertexCount <= 0) {
vkcv_log(LogLevel::ERROR, "Invalid Input.");
return VertexCacheReorderResult(indexBuffer32Bit , {});
}
int triangleCount = indexBuffer32Bit.size() / 3;
// dynamic array for vertexOccurrence
std::vector<uint8_t> vertexOccurrence(vertexCount, 0);
// count the occurrence of a vertex in all among all triangles
for (size_t i = 0; i < triangleCount * 3; i++) {
vertexOccurrence[indexBuffer32Bit[i]]++;
}
int sum = 0;
std::vector<uint32_t> offsetVertexOccurrence(vertexCount + 1, 0);
// highest offset for later iteration
int maxOffset = 0;
// calculate the offset of each vertex from the start
for (int i = 0; i < vertexCount; i++) {
offsetVertexOccurrence[i] = sum;
sum += vertexOccurrence[i];
if (vertexOccurrence[i] > maxOffset) {
maxOffset = vertexOccurrence[i];
}
// reset for reuse
vertexOccurrence[i] = 0;
}
offsetVertexOccurrence[vertexCount] = sum;
// vertexIndexToTriangle = which vertex belongs to which triangle
std::vector<uint32_t> vertexIndexToTriangle(3 * triangleCount, 0);
// vertexOccurrence functions as number of usages in all triangles
// lowestLivingVertexIndex = number of a triangle
for (int i = 0; i < triangleCount; i++) {
// get the pointer to the first vertex of the triangle
// this allows us to iterate over the indexBuffer with the first vertex of the triangle as start
const uint32_t *vertexIndexOfTriangle = &indexBuffer32Bit[i * 3];
vertexIndexToTriangle[offsetVertexOccurrence[vertexIndexOfTriangle[0]] + vertexOccurrence[vertexIndexOfTriangle[0]]] = i;
vertexOccurrence[vertexIndexOfTriangle[0]]++;
vertexIndexToTriangle[offsetVertexOccurrence[vertexIndexOfTriangle[1]] + vertexOccurrence[vertexIndexOfTriangle[1]]] = i;
vertexOccurrence[vertexIndexOfTriangle[1]]++;
vertexIndexToTriangle[offsetVertexOccurrence[vertexIndexOfTriangle[2]] + vertexOccurrence[vertexIndexOfTriangle[2]]] = i;
vertexOccurrence[vertexIndexOfTriangle[2]]++;
}
// counts if a triangle still uses this vertex
std::vector<uint8_t> livingVertices = vertexOccurrence;
std::vector<uint32_t> lastTimestampCache(vertexCount, 0);
// stack of already used vertices, if it'currentTimeStamp full it will write to 0 again
std::vector<uint32_t> usedVerticeStack(maxUsedVertices, 0);
//currently used vertices
int usedVerticeCount = 0;
// offset if maxUsedVertices was reached and it loops back to 0
int usedVerticeOffset = 0;
// saves if a triangle was emitted (used in the IndexBuffer)
std::vector<bool> isEmittedTriangles(triangleCount, false);
// reordered Triangles that get rewritten to the new IndexBuffer
std::vector<uint32_t> reorderedTriangleIndexBuffer(triangleCount, 0);
// offset to the latest not used triangleIndex
int triangleOutputOffset = 0;
// vertexIndex to fan out from (fanning VertexIndex)
int currentVertexIndex = 0;
int currentTimeStamp = cacheSize + 1;
int lowestLivingVertexIndex = 0;
std::vector<uint32_t> possibleCandidates(3 * maxOffset);
int currentTriangleIndex = 0;
// list of vertex indices where a deadEnd was reached
// useful to know where the mesh is potentially not contiguous
std::vector<uint32_t> skippedIndices;
// run while not all indices are fanned out, -1 equals all are fanned out
while (currentVertexIndex >= 0) {
// number of possible candidates for a fanning VertexIndex
int numPossibleCandidates = 0;
// offset of currentVertexIndex and the next VertexIndex
int startOffset = offsetVertexOccurrence[currentVertexIndex];
int endOffset = offsetVertexOccurrence[currentVertexIndex + 1];
// iterates over every triangle of currentVertexIndex
for (int offset = startOffset; offset < endOffset; offset++) {
int triangleIndex = vertexIndexToTriangle[offset];
// checks if the triangle is already emitted
if (!isEmittedTriangles[triangleIndex]) {
// get the pointer to the first vertex of the triangle
// this allows us to iterate over the indexBuffer with the first vertex of the triangle as start
const uint32_t *vertexIndexOfTriangle = &indexBuffer32Bit[3 * triangleIndex];
currentTriangleIndex++;
// save emitted vertexIndexOfTriangle to reorderedTriangleIndexBuffer and set it to emitted
reorderedTriangleIndexBuffer[triangleOutputOffset++] = triangleIndex;
isEmittedTriangles[triangleIndex] = true;
// save all vertexIndices of the triangle to reuse as soon as possible
for (int j = 0; j < 3; j++) {
int vertexIndex = vertexIndexOfTriangle[j];
//save vertexIndex to reuseStack
usedVerticeStack[mod(usedVerticeCount++)] = vertexIndex;
// after looping back increase the start, so it only overrides the oldest vertexIndex
if ((mod(usedVerticeCount)) ==
(mod(usedVerticeOffset))) {
usedVerticeOffset = mod(usedVerticeOffset + 1);
}
// add vertex to next possibleCandidates as fanning vertex
possibleCandidates[numPossibleCandidates++] = vertexIndex;
// remove one occurrence of the vertex, since the triangle is used
livingVertices[vertexIndex]--;
// writes the timestamp (number of iteration) of the last usage, if it wasn't used within the last cacheSize iterations
if (currentTimeStamp - lastTimestampCache[vertexIndex] > cacheSize) {
lastTimestampCache[vertexIndex] = currentTimeStamp;
currentTimeStamp++;
}
}
}
}
// search for the next vertexIndex to fan out
currentVertexIndex = getNextVertexIndex(
vertexCount, lowestLivingVertexIndex, cacheSize, possibleCandidates, numPossibleCandidates, lastTimestampCache, currentTimeStamp,
livingVertices, usedVerticeStack, usedVerticeCount, usedVerticeOffset, currentTriangleIndex, skippedIndices);
}
std::vector<uint32_t> reorderedIndexBuffer(3 * triangleCount);
triangleOutputOffset = 0;
// rewriting the TriangleIndexBuffer to the new IndexBuffer
for (int i = 0; i < triangleCount; i++) {
int triangleIndex = reorderedTriangleIndexBuffer[i];
// rewriting the triangle index to vertices
for (int j = 0; j < 3; j++) {
int vertexIndex = indexBuffer32Bit[(3 * triangleIndex) + j];
reorderedIndexBuffer[triangleOutputOffset++] = vertexIndex;
}
}
return VertexCacheReorderResult(reorderedIndexBuffer, skippedIndices);
}
}
\ No newline at end of file
...@@ -13,7 +13,7 @@ set(vkcv_scene_sources ...@@ -13,7 +13,7 @@ set(vkcv_scene_sources
${vkcv_scene_include}/vkcv/scene/Bounds.hpp ${vkcv_scene_include}/vkcv/scene/Bounds.hpp
${vkcv_scene_source}/vkcv/scene/Bounds.cpp ${vkcv_scene_source}/vkcv/scene/Bounds.cpp
${vkcv_scene_source}/vkcv/scene/Frustum.hpp ${vkcv_scene_include}/vkcv/scene/Frustum.hpp
${vkcv_scene_source}/vkcv/scene/Frustum.cpp ${vkcv_scene_source}/vkcv/scene/Frustum.cpp
${vkcv_scene_include}/vkcv/scene/MeshPart.hpp ${vkcv_scene_include}/vkcv/scene/MeshPart.hpp
...@@ -21,7 +21,7 @@ set(vkcv_scene_sources ...@@ -21,7 +21,7 @@ set(vkcv_scene_sources
${vkcv_scene_include}/vkcv/scene/Mesh.hpp ${vkcv_scene_include}/vkcv/scene/Mesh.hpp
${vkcv_scene_source}/vkcv/scene/Mesh.cpp ${vkcv_scene_source}/vkcv/scene/Mesh.cpp
${vkcv_scene_include}/vkcv/scene/Node.hpp ${vkcv_scene_include}/vkcv/scene/Node.hpp
${vkcv_scene_source}/vkcv/scene/Node.cpp ${vkcv_scene_source}/vkcv/scene/Node.cpp
...@@ -42,4 +42,4 @@ target_include_directories(vkcv_scene SYSTEM BEFORE PRIVATE ${vkcv_include} ${vk ...@@ -42,4 +42,4 @@ target_include_directories(vkcv_scene SYSTEM BEFORE PRIVATE ${vkcv_include} ${vk
target_include_directories(vkcv_scene BEFORE PUBLIC ${vkcv_scene_include}) target_include_directories(vkcv_scene BEFORE PUBLIC ${vkcv_scene_include})
# linking with libraries from all dependencies and the VkCV framework # linking with libraries from all dependencies and the VkCV framework
target_link_libraries(vkcv_scene vkcv vkcv_asset_loader vkcv_material vkcv_camera) target_link_libraries(vkcv_scene vkcv vkcv_asset_loader vkcv_material vkcv_camera)
\ No newline at end of file
#include "Frustum.hpp" #include "vkcv/scene/Frustum.hpp"
namespace vkcv::scene { namespace vkcv::scene {
......
#include "vkcv/scene/Mesh.hpp" #include "vkcv/scene/Mesh.hpp"
#include "vkcv/scene/Scene.hpp" #include "vkcv/scene/Scene.hpp"
#include "Frustum.hpp" #include "vkcv/scene/Frustum.hpp"
namespace vkcv::scene { namespace vkcv::scene {
......
#include "vkcv/scene/Node.hpp" #include "vkcv/scene/Node.hpp"
#include "vkcv/scene/Scene.hpp" #include "vkcv/scene/Scene.hpp"
#include "Frustum.hpp" #include "vkcv/scene/Frustum.hpp"
#include <algorithm> #include <algorithm>
......
...@@ -52,6 +52,10 @@ namespace vkcv::shader { ...@@ -52,6 +52,10 @@ namespace vkcv::shader {
return EShLangFragment; return EShLangFragment;
case ShaderStage::COMPUTE: case ShaderStage::COMPUTE:
return EShLangCompute; return EShLangCompute;
case ShaderStage::TASK:
return EShLangTaskNV;
case ShaderStage::MESH:
return EShLangMeshNV;
default: default:
return EShLangCount; return EShLangCount;
} }
......
...@@ -5,3 +5,4 @@ add_subdirectory(first_mesh) ...@@ -5,3 +5,4 @@ add_subdirectory(first_mesh)
add_subdirectory(first_scene) add_subdirectory(first_scene)
add_subdirectory(particle_simulation) add_subdirectory(particle_simulation)
add_subdirectory(voxelization) add_subdirectory(voxelization)
add_subdirectory(mesh_shader)
first_triangle
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment