diff --git a/include/vkcv/Context.hpp b/include/vkcv/Context.hpp
index 12e02e96a9e81b461495ce5c2a76e35e6d226eb6..824713fd1e29cbb8b7e60b22768c0019daaa9938 100644
--- a/include/vkcv/Context.hpp
+++ b/include/vkcv/Context.hpp
@@ -4,6 +4,7 @@
 #include <vk_mem_alloc.hpp>
 
 #include "QueueManager.hpp"
+#include "DrawcallRecording.hpp"
 
 namespace vkcv
 {
diff --git a/include/vkcv/Core.hpp b/include/vkcv/Core.hpp
index a892f0d2c1a22aed7afc3b21d0dc2f3ef2be61ef..5677dbf6569a182eddba494852d39320f8154711 100644
--- a/include/vkcv/Core.hpp
+++ b/include/vkcv/Core.hpp
@@ -249,13 +249,21 @@ namespace vkcv
 		bool beginFrame(uint32_t& width, uint32_t& height);
 
 		void recordDrawcallsToCmdStream(
-            const CommandStreamHandle       cmdStreamHandle,
+			const CommandStreamHandle       cmdStreamHandle,
 			const PassHandle                renderpassHandle, 
 			const PipelineHandle            pipelineHandle,
 			const PushConstants             &pushConstants,
 			const std::vector<DrawcallInfo> &drawcalls,
 			const std::vector<ImageHandle>  &renderTargets);
 
+		void recordMeshShaderDrawcalls(
+			const CommandStreamHandle               cmdStreamHandle,
+			const PassHandle                        renderpassHandle,
+			const PipelineHandle                    pipelineHandle,
+			const PushConstants&                    pushConstantData,
+            const std::vector<MeshShaderDrawcall>&  drawcalls,
+			const std::vector<ImageHandle>&         renderTargets);
+
 		void recordComputeDispatchToCmdStream(
 			CommandStreamHandle cmdStream,
 			PipelineHandle computePipeline,
diff --git a/include/vkcv/DrawcallRecording.hpp b/include/vkcv/DrawcallRecording.hpp
index 2dfefab328a7cae395118c620ec6e5825b1cf63e..260fbbc6a2a577d0d333656a1eff4f7f3f88cd69 100644
--- a/include/vkcv/DrawcallRecording.hpp
+++ b/include/vkcv/DrawcallRecording.hpp
@@ -13,6 +13,11 @@ namespace vkcv {
         vk::Buffer      buffer;
     };
 
+    enum class IndexBitCount{
+        Bit16,
+        Bit32
+    };
+
     struct DescriptorSetUsage {
         inline DescriptorSetUsage(uint32_t setLocation, vk::DescriptorSet vulkanHandle,
 								  const std::vector<uint32_t>& dynamicOffsets = {}) noexcept
@@ -24,12 +29,14 @@ namespace vkcv {
     };
 
     struct Mesh {
-        inline Mesh(std::vector<VertexBufferBinding> vertexBufferBindings, vk::Buffer indexBuffer, size_t indexCount) noexcept
-            : vertexBufferBindings(vertexBufferBindings), indexBuffer(indexBuffer), indexCount(indexCount){}
+        inline Mesh(std::vector<VertexBufferBinding> vertexBufferBindings, vk::Buffer indexBuffer, size_t indexCount, IndexBitCount indexBitCount = IndexBitCount::Bit16) noexcept
+            : vertexBufferBindings(vertexBufferBindings), indexBuffer(indexBuffer), indexCount(indexCount), indexBitCount(indexBitCount){}
 
         std::vector<VertexBufferBinding>    vertexBufferBindings;
         vk::Buffer                          indexBuffer;
         size_t                              indexCount;
+        IndexBitCount                       indexBitCount;
+
     };
 
     struct DrawcallInfo {
@@ -48,4 +55,21 @@ namespace vkcv {
         const PushConstants     &pushConstants,
         const size_t            drawcallIndex);
 
-}
\ No newline at end of file
+    void InitMeshShaderDrawFunctions(vk::Device device);
+
+    struct MeshShaderDrawcall {
+        inline MeshShaderDrawcall(const std::vector<DescriptorSetUsage> descriptorSets, uint32_t taskCount)
+            : descriptorSets(descriptorSets), taskCount(taskCount) {}
+
+        std::vector<DescriptorSetUsage> descriptorSets;
+        uint32_t                        taskCount;
+    };
+
+    void recordMeshShaderDrawcall(
+        vk::CommandBuffer                       cmdBuffer,
+        vk::PipelineLayout                      pipelineLayout,
+        const PushConstants&                 pushConstantData,
+        const uint32_t                          pushConstantOffset,
+        const MeshShaderDrawcall&               drawcall,
+        const uint32_t                          firstTask);
+}
diff --git a/include/vkcv/ShaderStage.hpp b/include/vkcv/ShaderStage.hpp
index dca395bdba82a2f1cb38bb0a25196cfd3dab8019..3893bdf5f73408847ceb2b076abfb7d0902bb2f9 100644
--- a/include/vkcv/ShaderStage.hpp
+++ b/include/vkcv/ShaderStage.hpp
@@ -9,7 +9,11 @@ namespace vkcv {
 		TESS_EVAL,
 		GEOMETRY,
 		FRAGMENT,
-		COMPUTE
+		COMPUTE,
+		TASK,
+		MESH
 	};
 
+
+
 }
diff --git a/modules/CMakeLists.txt b/modules/CMakeLists.txt
index a17dd66933012811de41c075c33e3ed2240f9ff6..4b576e7119ebe769eafd1b6abb033b4fb02a3ec1 100644
--- a/modules/CMakeLists.txt
+++ b/modules/CMakeLists.txt
@@ -4,6 +4,7 @@ add_subdirectory(asset_loader)
 add_subdirectory(camera)
 add_subdirectory(gui)
 add_subdirectory(material)
+add_subdirectory(meshlet)
 add_subdirectory(scene)
 add_subdirectory(shader_compiler)
 add_subdirectory(testing)
diff --git a/modules/meshlet/CMakeLists.txt b/modules/meshlet/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..d576466d3d125d3a19640088a9b5725ac7a46b97
--- /dev/null
+++ b/modules/meshlet/CMakeLists.txt
@@ -0,0 +1,36 @@
+cmake_minimum_required(VERSION 3.16)
+project(vkcv_meshlet)
+
+# setting c++ standard for the module
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+set(vkcv_meshlet_source ${PROJECT_SOURCE_DIR}/src)
+set(vkcv_meshlet_include ${PROJECT_SOURCE_DIR}/include)
+
+# Add source and header files to the module
+set(vkcv_meshlet_sources
+		${vkcv_meshlet_include}/vkcv/meshlet/Meshlet.hpp
+		${vkcv_meshlet_source}/vkcv/meshlet/Meshlet.cpp
+
+		${vkcv_meshlet_include}/vkcv/meshlet/Tipsify.hpp
+		${vkcv_meshlet_source}/vkcv/meshlet/Tipsify.cpp
+
+		${vkcv_meshlet_include}/vkcv/meshlet/Forsyth.hpp
+		${vkcv_meshlet_source}/vkcv/meshlet/Forsyth.cpp)
+
+# adding source files to the module
+add_library(vkcv_meshlet STATIC ${vkcv_meshlet_sources})
+
+
+# link the required libraries to the module
+target_link_libraries(vkcv_meshlet vkcv ${vkcv_libraries})
+
+# including headers of dependencies and the VkCV framework
+target_include_directories(vkcv_meshlet SYSTEM BEFORE PRIVATE ${vkcv_include} ${vkcv_includes} ${vkcv_asset_loader_include} ${vkcv_camera_include})
+
+# add the own include directory for public headers
+target_include_directories(vkcv_meshlet BEFORE PUBLIC ${vkcv_meshlet_include})
+
+# linking with libraries from all dependencies and the VkCV framework
+target_link_libraries(vkcv_meshlet vkcv vkcv_asset_loader vkcv_camera)
diff --git a/modules/meshlet/include/vkcv/meshlet/Forsyth.hpp b/modules/meshlet/include/vkcv/meshlet/Forsyth.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..43dc9a3b6bb81ea915268de7a7b53b18efd27638
--- /dev/null
+++ b/modules/meshlet/include/vkcv/meshlet/Forsyth.hpp
@@ -0,0 +1,18 @@
+#pragma once
+
+#include "Meshlet.hpp"
+
+namespace vkcv::meshlet
+{
+ /**
+  * Reorders the index buffer, simulating a LRU cache, so that vertices are grouped together in close triangle patches
+  * @param idxBuf current IndexBuffer
+  * @param vertexCount of the mesh
+  * @return new reordered index buffer to replace the input index buffer
+  * References:
+  * https://tomforsyth1000.github.io/papers/fast_vert_cache_opt.html
+  * https://www.martin.st/thesis/efficient_triangle_reordering.pdf
+  * https://github.com/vivkin/forsyth/blob/master/forsyth.h
+  */
+ VertexCacheReorderResult forsythReorder(const std::vector<uint32_t> &idxBuf, const size_t vertexCount);
+}
diff --git a/modules/meshlet/include/vkcv/meshlet/Meshlet.hpp b/modules/meshlet/include/vkcv/meshlet/Meshlet.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..9900dffaf28c85753d367ba79bbdf5c19a2cf479
--- /dev/null
+++ b/modules/meshlet/include/vkcv/meshlet/Meshlet.hpp
@@ -0,0 +1,59 @@
+#pragma once
+
+#include <vector>
+#include <map>
+#include <glm/glm.hpp>
+#include <vkcv/asset/asset_loader.hpp>
+
+namespace vkcv::meshlet {
+
+    struct Vertex {
+        glm::vec3   position;
+        float       padding0;
+        glm::vec3   normal;
+        float       padding1;
+    };
+
+    struct Meshlet {
+        uint32_t    vertexOffset;
+        uint32_t    vertexCount;
+        uint32_t    indexOffset;
+        uint32_t    indexCount;
+        glm::vec3   meanPosition;
+        float       boundingSphereRadius;
+    };
+
+    struct VertexCacheReorderResult {
+        /**
+         * @param indexBuffer new indexBuffer
+         * @param skippedIndices indices that have a spacial break
+         */
+        VertexCacheReorderResult(const std::vector<uint32_t> indexBuffer, const std::vector<uint32_t> skippedIndices)
+                :indexBuffer(indexBuffer), skippedIndices(skippedIndices) {}
+
+        std::vector<uint32_t> indexBuffer;
+        std::vector<uint32_t>  skippedIndices;
+    };
+
+    struct MeshShaderModelData {
+        std::vector<Vertex>     vertices;
+        std::vector<uint32_t>   localIndices;
+        std::vector<Meshlet>    meshlets;
+    };
+
+    std::vector<Vertex> convertToVertices(
+            const std::vector<uint8_t>&         vertexData,
+            const uint64_t                      vertexCount,
+            const vkcv::asset::VertexAttribute& positionAttribute,
+            const vkcv::asset::VertexAttribute& normalAttribute);
+
+    MeshShaderModelData createMeshShaderModelData(
+            const std::vector<Vertex>&      inVertices,
+            const std::vector<uint32_t>&    inIndices,
+            const std::vector<uint32_t>& deadEndIndices = {});
+
+    std::vector<uint32_t> assetLoaderIndicesTo32BitIndices(
+            const std::vector<uint8_t>& indexData,
+            vkcv::asset::IndexType indexType);
+
+}
\ No newline at end of file
diff --git a/modules/meshlet/include/vkcv/meshlet/Tipsify.hpp b/modules/meshlet/include/vkcv/meshlet/Tipsify.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..6fb4b37d9c17c82642c3b5e7667c3e8acc50b8c0
--- /dev/null
+++ b/modules/meshlet/include/vkcv/meshlet/Tipsify.hpp
@@ -0,0 +1,23 @@
+#pragma once
+
+#include "Meshlet.hpp"
+#include <algorithm>
+#include <iostream>
+
+namespace vkcv::meshlet {
+    /**
+     * reorders the IndexBuffer, so all usages of vertices to triangle are as close as possible
+     * @param indexBuffer32Bit current IndexBuffer
+     * @param vertexCount of the mesh
+     * @param cacheSize of the priority cache <br>
+     * Recommended: 20. Keep the value between 5 and 50 <br>
+     * low:         more random and patchy<br>
+     * high:        closer vertices have higher chance -> leads to sinuous lines
+     * @return new IndexBuffer that replaces the input IndexBuffer, and the indices that are skipped
+     *
+     * https://gfx.cs.princeton.edu/pubs/Sander_2007_%3ETR/tipsy.pdf
+     * https://www.martin.st/thesis/efficient_triangle_reordering.pdf
+     */
+    VertexCacheReorderResult tipsifyMesh(const std::vector<uint32_t> &indexBuffer32Bit,
+                                         const int vertexCount, const unsigned int cacheSize = 20);
+}
\ No newline at end of file
diff --git a/modules/meshlet/src/vkcv/meshlet/Forsyth.cpp b/modules/meshlet/src/vkcv/meshlet/Forsyth.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fd0f160d65b8db81102f9eb6a9d60cf735999d44
--- /dev/null
+++ b/modules/meshlet/src/vkcv/meshlet/Forsyth.cpp
@@ -0,0 +1,317 @@
+#include "vkcv/meshlet/Forsyth.hpp"
+#include <vkcv/Logger.hpp>
+#include <array>
+#include <cmath>
+
+namespace vkcv::meshlet
+{
+    /*
+     * CACHE AND VALENCE
+     * SIZE AND SCORE CONSTANTS
+     * CHANGE AS NEEDED
+     */
+
+    // set these to adjust performance and result quality
+    const size_t VERTEX_CACHE_SIZE = 8;
+    const size_t CACHE_FUNCTION_LENGTH = 32;
+
+    // score function constants
+    const float CACHE_DECAY_POWER = 1.5f;
+    const float LAST_TRI_SCORE = 0.75f;
+
+    const float VALENCE_BOOST_SCALE = 2.0f;
+    const float VALENCE_BOOST_POWER = 0.5f;
+
+    // sizes for precalculated tables
+    // make sure that cache score is always >= vertex_cache_size
+    const size_t CACHE_SCORE_TABLE_SIZE = 32;
+    const size_t VALENCE_SCORE_TABLE_SIZE = 32;
+
+    // precalculated tables
+    std::array<float, CACHE_SCORE_TABLE_SIZE> cachePositionScore = {};
+    std::array<float, VALENCE_SCORE_TABLE_SIZE> valenceScore = {};
+
+    // function to populate the cache position and valence score tables
+    void initScoreTables()
+    {
+        for(size_t i = 0; i < CACHE_SCORE_TABLE_SIZE; i++)
+        {
+            float score = 0.0f;
+            if (i < 3)
+            {
+                score = LAST_TRI_SCORE;
+            }
+            else
+            {
+                const float scaler = 1.0f / static_cast<float>(CACHE_FUNCTION_LENGTH - 3);
+                score = 1.0f - (i - 3) * scaler;
+                score = std::pow(score, CACHE_DECAY_POWER);
+            }
+            cachePositionScore[i] = score;
+        }
+
+        for(size_t i = 0; i < VALENCE_SCORE_TABLE_SIZE; i++)
+        {
+            const float valenceBoost = std::pow(i, -VALENCE_BOOST_POWER);
+            const float score = VALENCE_BOOST_SCALE * valenceBoost;
+
+            valenceScore[i] = score;
+        }
+    }
+
+    /**
+     * Return the vertex' score, depending on its current active triangle count and cache position
+     * Add a valence boost to score, if active triangles are below VALENCE_SCORE_TABLE_SIZE
+     * @param numActiveTris the active triangles on this vertex
+     * @param cachePos the vertex' position in the cache
+     * @return vertex' score
+     */
+    float findVertexScore(uint32_t numActiveTris, int32_t cachePos)
+    {
+        if(numActiveTris == 0)
+            return 0.0f;
+
+        float score = 0.0f;
+
+        if (cachePos >= 0)
+            score = cachePositionScore[cachePos];
+
+        if (numActiveTris < VALENCE_SCORE_TABLE_SIZE)
+            score += valenceScore[numActiveTris];
+
+        return score;
+    }
+
+    VertexCacheReorderResult forsythReorder(const std::vector<uint32_t> &idxBuf, const size_t vertexCount)
+    {
+        std::vector<uint32_t> skippedIndices;
+
+        initScoreTables();
+
+        // get the total triangle count from the index buffer
+        const size_t triangleCount = idxBuf.size() / 3;
+
+        // per-vertex active triangle count
+        std::vector<uint8_t> numActiveTris(vertexCount, 0);
+        // iterate over indices, count total occurrences of each vertex
+        for(const auto index : idxBuf)
+        {
+            if(numActiveTris[index] == UINT8_MAX)
+            {
+                vkcv_log(LogLevel::ERROR, "Unsupported mesh.");
+                vkcv_log(LogLevel::ERROR, "Vertex shared by too many triangles.");
+                return VertexCacheReorderResult({}, {});
+            }
+
+            numActiveTris[index]++;
+        }
+
+
+        // allocate remaining vectors
+        /**
+         * offsets: contains the vertices' offset into the triangleIndices vector
+         * Offset itself is the sum of triangles required by the previous vertices
+         *
+         * lastScore: the vertices' most recent calculated score
+         *
+         * cacheTag: the vertices' most recent cache score
+         *
+         * triangleAdded: boolean flags to denote whether a triangle has been processed or not
+         *
+         * triangleScore: total score of the three vertices making up the triangle
+         *
+         * triangleIndices: indices for the triangles
+         */
+        std::vector<uint32_t> offsets(vertexCount, 0);
+        std::vector<float> lastScore(vertexCount, 0.0f);
+        std::vector<int8_t> cacheTag(vertexCount, -1);
+
+        std::vector<bool> triangleAdded(triangleCount, false);
+        std::vector<float> triangleScore(triangleCount, 0.0f);
+
+        std::vector<int32_t> triangleIndices(idxBuf.size(), 0);
+
+
+        // sum the number of active triangles for all previous vertices
+        // null the number of active triangles afterwards for recalculation in second loop
+        uint32_t sum = 0;
+        for(size_t i = 0; i < vertexCount; i++)
+        {
+            offsets[i] = sum;
+            sum += numActiveTris[i];
+            numActiveTris[i] = 0;
+        }
+        // create the triangle indices, using the newly calculated offsets, and increment numActiveTris
+        // every vertex should be referenced by a triangle index now
+        for(size_t i = 0; i < triangleCount; i++)
+        {
+            for(size_t j = 0; j < 3; j++)
+            {
+                uint32_t v = idxBuf[3 * i + j];
+                triangleIndices[offsets[v] + numActiveTris[v]] = static_cast<int32_t>(i);
+                numActiveTris[v]++;
+            }
+        }
+
+        // calculate and initialize the triangle score, by summing the vertices' score
+        for (size_t i = 0; i < vertexCount; i++)
+        {
+            lastScore[i] = findVertexScore(numActiveTris[i], static_cast<int32_t>(cacheTag[i]));
+
+            for(size_t j = 0; j < numActiveTris[i]; j++)
+            {
+                triangleScore[triangleIndices[offsets[i] + j]] += lastScore[i];
+            }
+        }
+
+        // find best triangle to start reordering with
+        int32_t bestTriangle = -1;
+        float   bestScore    = -1.0f;
+        for(size_t i = 0; i < triangleCount; i++)
+        {
+            if(triangleScore[i] > bestScore)
+            {
+                bestScore = triangleScore[i];
+                bestTriangle = static_cast<int32_t>(i);
+            }
+        }
+
+        // allocate output triangles
+        std::vector<int32_t> outTriangles(triangleCount, 0);
+        uint32_t outPos = 0;
+
+        // initialize cache (with -1)
+        std::array<int32_t, VERTEX_CACHE_SIZE + 3> cache = {};
+        for(auto &element : cache)
+        {
+            element = -1;
+        }
+
+        uint32_t scanPos = 0;
+
+        // begin reordering routine
+        // output the currently best triangle, as long as there are triangles left to output
+        while(bestTriangle >= 0)
+        {
+            // mark best triangle as added
+            triangleAdded[bestTriangle] = true;
+            // output this triangle
+            outTriangles[outPos++] = bestTriangle;
+
+            // push best triangle's vertices into the cache
+            for(size_t i = 0; i < 3; i++)
+            {
+                uint32_t v = idxBuf[3 * bestTriangle + i];
+
+                // get vertex' cache position, if its -1, set its position to the end
+                int8_t endPos = cacheTag[v];
+                if(endPos < 0)
+                    endPos = static_cast<int8_t>(VERTEX_CACHE_SIZE + i);
+
+                // shift vertices' cache entries forward by one
+                for(int8_t j = endPos; j > i; j--)
+                {
+                    cache[j] = cache[j - 1];
+
+                    // if cache slot is valid vertex,
+                    // update the vertex cache tag accordingly
+                    if (cache[j] >= 0)
+                        cacheTag[cache[j]]++;
+                }
+
+                // insert current vertex into its new target slot
+                cache[i] = static_cast<int32_t>(v);
+                cacheTag[v] = static_cast<int8_t>(i);
+
+                // find current triangle in the list of active triangles
+                // remove it by moving the last triangle into the slot the current triangle is holding.
+                for (size_t j = 0; j < numActiveTris[v]; j++)
+                {
+                    if(triangleIndices[offsets[v] + j] == bestTriangle)
+                    {
+                        triangleIndices[offsets[v] + j] = triangleIndices[offsets[v] + numActiveTris[v] - 1];
+                        break;
+                    }
+                }
+                // shorten the list
+                numActiveTris[v]--;
+            }
+
+            // update scores of all triangles in cache
+            for (size_t i = 0; i < cache.size(); i++)
+            {
+                int32_t v = cache[i];
+                if (v < 0)
+                    break;
+
+                // this vertex has been pushed outside of the actual cache
+                if(i >= VERTEX_CACHE_SIZE)
+                {
+                    cacheTag[v] = -1;
+                    cache[i] = -1;
+                }
+
+                float newScore = findVertexScore(numActiveTris[v], cacheTag[v]);
+                float diff = newScore - lastScore[v];
+
+                for(size_t j = 0; j < numActiveTris[v]; j++)
+                {
+                    triangleScore[triangleIndices[offsets[v] + j]] += diff;
+                }
+                lastScore[v] = newScore;
+            }
+
+            // find best triangle reference by vertices in cache
+            bestTriangle = -1;
+            bestScore = -1.0f;
+            for(size_t i = 0; i < VERTEX_CACHE_SIZE; i++)
+            {
+                if (cache[i] < 0)
+                    break;
+
+                int32_t v = cache[i];
+                for(size_t j = 0; j < numActiveTris[v]; j++)
+                {
+                    int32_t t = triangleIndices[offsets[v] + j];
+                    if(triangleScore[t] > bestScore)
+                    {
+                        bestTriangle = t;
+                        bestScore = triangleScore[t];
+                    }
+                }
+            }
+
+            // if no triangle was found at all, continue scanning whole list of triangles
+            if (bestTriangle < 0)
+            {
+                for(; scanPos < triangleCount; scanPos++)
+                {
+                    if(!triangleAdded[scanPos])
+                    {
+                        bestTriangle = scanPos;
+
+                        skippedIndices.push_back(3 * outPos);
+
+                        break;
+                    }
+                }
+            }
+        }
+
+
+        // convert triangle index array into full triangle list
+        std::vector<uint32_t> outIndices(idxBuf.size(), 0);
+        outPos = 0;
+        for(size_t i = 0; i < triangleCount; i++)
+        {
+            int32_t t = outTriangles[i];
+            for(size_t j = 0; j < 3; j++)
+            {
+                int32_t v = idxBuf[3 * t + j];
+                outIndices[outPos++] = static_cast<uint32_t>(v);
+            }
+        }
+
+        return VertexCacheReorderResult(outIndices, skippedIndices);
+    }
+}
\ No newline at end of file
diff --git a/modules/meshlet/src/vkcv/meshlet/Meshlet.cpp b/modules/meshlet/src/vkcv/meshlet/Meshlet.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..abcad7207ed5a6f80cb292ab2f7e855d3b4c7797
--- /dev/null
+++ b/modules/meshlet/src/vkcv/meshlet/Meshlet.cpp
@@ -0,0 +1,167 @@
+
+#include "vkcv/meshlet/Meshlet.hpp"
+#include <vkcv/Logger.hpp>
+#include <cassert>
+#include <iostream>
+
+namespace vkcv::meshlet {
+
+std::vector<vkcv::meshlet::Vertex> convertToVertices(
+        const std::vector<uint8_t>&         vertexData,
+        const uint64_t                      vertexCount,
+        const vkcv::asset::VertexAttribute& positionAttribute,
+        const vkcv::asset::VertexAttribute& normalAttribute) {
+
+    assert(positionAttribute.type   == vkcv::asset::PrimitiveType::POSITION);
+    assert(normalAttribute.type     == vkcv::asset::PrimitiveType::NORMAL);
+
+    std::vector<vkcv::meshlet::Vertex> vertices;
+    vertices.reserve(vertexCount);
+
+    const size_t positionStepSize   = positionAttribute.stride == 0 ? sizeof(glm::vec3) : positionAttribute.stride;
+    const size_t normalStepSize     = normalAttribute.stride   == 0 ? sizeof(glm::vec3) : normalAttribute.stride;
+
+    for (int i = 0; i < vertexCount; i++) {
+        Vertex v;
+
+        const size_t positionOffset = positionAttribute.offset  + positionStepSize  * i;
+        const size_t normalOffset   = normalAttribute.offset    + normalStepSize    * i;
+
+        v.position  = *reinterpret_cast<const glm::vec3*>(&(vertexData[positionOffset]));
+        v.normal    = *reinterpret_cast<const glm::vec3*>(&(vertexData[normalOffset]));
+        vertices.push_back(v);
+    }
+    return vertices;
+}
+
+MeshShaderModelData createMeshShaderModelData(
+        const std::vector<Vertex>&      inVertices,
+        const std::vector<uint32_t>&    inIndices,
+        const std::vector<uint32_t>&    deadEndIndices) {
+
+    MeshShaderModelData data;
+    size_t currentIndex = 0;
+
+    const size_t maxVerticesPerMeshlet = 64;
+    const size_t maxIndicesPerMeshlet  = 126 * 3;
+
+    bool indicesAreLeft = true;
+
+    size_t deadEndIndicesIndex = 0;
+
+    while (indicesAreLeft) {
+        Meshlet meshlet;
+
+        meshlet.indexCount  = 0;
+        meshlet.vertexCount = 0;
+
+        meshlet.indexOffset  = data.localIndices.size();
+        meshlet.vertexOffset = data.vertices.size();
+
+        std::map<uint32_t, uint32_t> globalToLocalIndexMap;
+        std::vector<uint32_t> globalIndicesOrdered;
+
+        while (true) {
+
+            if (deadEndIndicesIndex < deadEndIndices.size()) {
+                const uint32_t deadEndIndex = deadEndIndices[deadEndIndicesIndex];
+                if (deadEndIndex == currentIndex) {
+                    deadEndIndicesIndex++;
+                    break;
+                }
+            }
+
+            indicesAreLeft = currentIndex + 1 <= inIndices.size();
+            if (!indicesAreLeft) {
+                break;
+            }
+
+            bool enoughSpaceForIndices = meshlet.indexCount + 3 < maxIndicesPerMeshlet;
+            if (!enoughSpaceForIndices) {
+                break;
+            }
+
+            size_t vertexCountToAdd = 0;
+            for (int i = 0; i < 3; i++) {
+                const uint32_t globalIndex = inIndices[currentIndex + i];
+                const bool containsVertex  = globalToLocalIndexMap.find(globalIndex) != globalToLocalIndexMap.end();
+                if (!containsVertex) {
+                    vertexCountToAdd++;
+                }
+            }
+
+            bool enoughSpaceForVertices = meshlet.vertexCount + vertexCountToAdd < maxVerticesPerMeshlet;
+            if (!enoughSpaceForVertices) {
+                break;
+            }
+
+            for (int i = 0; i < 3; i++) {
+                const uint32_t globalIndex = inIndices[currentIndex + i];
+
+                uint32_t localIndex;
+                const bool indexAlreadyExists = globalToLocalIndexMap.find(globalIndex) != globalToLocalIndexMap.end();
+                if (indexAlreadyExists) {
+                    localIndex = globalToLocalIndexMap[globalIndex];
+                }
+                else {
+                    localIndex = globalToLocalIndexMap.size();
+                    globalToLocalIndexMap[globalIndex] = localIndex;
+                    globalIndicesOrdered.push_back(globalIndex);
+                }
+
+                data.localIndices.push_back(localIndex);
+            }
+
+            meshlet.indexCount  += 3;
+            currentIndex        += 3;
+            meshlet.vertexCount += vertexCountToAdd;
+        }
+
+        for (const uint32_t globalIndex : globalIndicesOrdered) {
+            const Vertex v = inVertices[globalIndex];
+            data.vertices.push_back(v);
+        }
+
+        // compute mean position
+        meshlet.meanPosition = glm::vec3(0);
+        const uint32_t meshletLastVertexIndex = meshlet.vertexOffset + meshlet.vertexCount;
+
+        for (uint32_t vertexIndex = meshlet.vertexOffset; vertexIndex < meshletLastVertexIndex; vertexIndex++) {
+            const Vertex& v         = data.vertices[vertexIndex];
+            meshlet.meanPosition    += v.position;
+        }
+        meshlet.meanPosition /= meshlet.vertexCount;
+
+        // compute bounding sphere radius
+        meshlet.boundingSphereRadius = 0.f;
+        for (uint32_t vertexIndex = meshlet.vertexOffset; vertexIndex < meshletLastVertexIndex; vertexIndex++) {
+            const Vertex& v = data.vertices[vertexIndex];
+            const float d                   = glm::distance(v.position, meshlet.meanPosition);
+            meshlet.boundingSphereRadius    = glm::max(meshlet.boundingSphereRadius, d);
+        }
+
+        data.meshlets.push_back(meshlet);
+    }
+
+    return data;
+}
+
+std::vector<uint32_t> assetLoaderIndicesTo32BitIndices(const std::vector<uint8_t>& indexData, vkcv::asset::IndexType indexType) {
+    std::vector<uint32_t> indices;
+    if (indexType == vkcv::asset::IndexType::UINT16) {
+        for (int i = 0; i < indexData.size(); i += 2) {
+            const uint16_t index16Bit = *reinterpret_cast<const uint16_t *>(&(indexData[i]));
+            const uint32_t index32Bit = static_cast<uint32_t>(index16Bit);
+            indices.push_back(index32Bit);
+        }
+    } else if (indexType == vkcv::asset::IndexType::UINT32) {
+        for (int i = 0; i < indexData.size(); i += 4) {
+            const uint32_t index32Bit = *reinterpret_cast<const uint32_t *>(&(indexData[i]));
+            indices.push_back(index32Bit);
+        }
+    } else {
+        vkcv_log(vkcv::LogLevel::ERROR, "Unsupported index type");
+    }
+    return indices;
+}
+}
\ No newline at end of file
diff --git a/modules/meshlet/src/vkcv/meshlet/Tipsify.cpp b/modules/meshlet/src/vkcv/meshlet/Tipsify.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c5762100bc37eccbe3e4f6b4c94e5f0e580c53c7
--- /dev/null
+++ b/modules/meshlet/src/vkcv/meshlet/Tipsify.cpp
@@ -0,0 +1,288 @@
+
+#include <vkcv/Logger.hpp>
+#include "vkcv/meshlet/Tipsify.hpp"
+#include <iostream>
+
+namespace vkcv::meshlet {
+
+    const int maxUsedVertices           = 128;
+
+    /**
+     * modulo operation with maxUsedVertices
+     * @param number for modulo operation
+     * @return number between 0 and maxUsedVertices - 1
+     */
+    int mod( int number ){
+        return (number + maxUsedVertices) % maxUsedVertices;
+    }
+
+    /**
+     * searches for the next VertexIndex that was used before or returns any vertexIndex if no used was found
+     * @param livingTriangles
+     * @param usedVerticeStack
+     * @param usedVerticeCount
+     * @param usedVerticeOffset
+     * @param vertexCount
+     * @param lowestLivingVertexIndex
+     * @param currentTriangleIndex
+     * @param skippedIndices
+     * @return a VertexIndex to be used as fanningVertexIndex
+     */
+    int skipDeadEnd(
+            const std::vector<uint8_t> &livingTriangles,
+            const std::vector<uint32_t> &usedVerticeStack,
+            int &usedVerticeCount,
+            int &usedVerticeOffset,
+            int vertexCount,
+            int &lowestLivingVertexIndex,
+            int &currentTriangleIndex,
+            std::vector<uint32_t> &skippedIndices) {
+
+        // returns the latest vertex used that has a living triangle
+        while (mod(usedVerticeCount) != usedVerticeOffset) {
+            // iterate from the latest to the oldest. + maxUsedVertices to always make it a positive number in the range 0 to maxUsedVertices -1
+            int nextVertex = usedVerticeStack[mod(--usedVerticeCount)];
+
+            if (livingTriangles[nextVertex] > 0) {
+                return nextVertex;
+            }
+        }
+        // returns any vertexIndex since no last used has a living triangle
+        while (lowestLivingVertexIndex + 1 < vertexCount) {
+            lowestLivingVertexIndex++;
+            if (livingTriangles[lowestLivingVertexIndex] > 0) {
+                // add index of the vertex to skippedIndices
+                skippedIndices.push_back(static_cast<uint32_t>(currentTriangleIndex * 3));
+                return lowestLivingVertexIndex;
+            }
+        }
+        return -1;
+    }
+
+    /**
+     * searches for the best next candidate as a fanningVertexIndex
+     * @param vertexCount
+     * @param lowestLivingVertexIndex
+     * @param cacheSize
+     * @param possibleCandidates
+     * @param numPossibleCandidates
+     * @param lastTimestampCache
+     * @param currentTimeStamp
+     * @param livingTriangles
+     * @param usedVerticeStack
+     * @param usedVerticeCount
+     * @param usedVerticeOffset
+     * @param currentTriangleIndex
+     * @param skippedIndices
+     * @return a VertexIndex to be used as fanningVertexIndex
+     */
+    int getNextVertexIndex(int vertexCount,
+                           int &lowestLivingVertexIndex,
+                           int cacheSize,
+                           const std::vector<uint32_t> &possibleCandidates,
+                           int numPossibleCandidates,
+                           const std::vector<uint32_t> &lastTimestampCache,
+                           int currentTimeStamp,
+                           const std::vector<uint8_t> &livingTriangles,
+                           const std::vector<uint32_t> &usedVerticeStack,
+                           int &usedVerticeCount,
+                           int &usedVerticeOffset,
+                           int &currentTriangleIndex,
+                           std::vector<uint32_t> &skippedIndices) {
+        int nextVertexIndex = -1;
+        int maxPriority     = -1;
+        // calculates the next possibleCandidates that is recently used
+        for (int j = 0; j < numPossibleCandidates; j++) {
+            int vertexIndex = possibleCandidates[j];
+
+            // the candidate needs to be not fanned out yet
+            if (livingTriangles[vertexIndex] > 0) {
+                int priority = -1;
+
+                // prioritizes recent used vertices, but tries not to pick one that has many triangles -> fills holes better
+                if ( currentTimeStamp - lastTimestampCache[vertexIndex] + 2 * livingTriangles[vertexIndex] <=
+                    cacheSize) {
+                    priority = currentTimeStamp - lastTimestampCache[vertexIndex];
+                }
+                // select the vertexIndex with the highest priority
+                if (priority > maxPriority) {
+                    maxPriority     = priority;
+                    nextVertexIndex = vertexIndex;
+                }
+            }
+        }
+
+        // if no candidate is alive, try and find another one
+        if (nextVertexIndex == -1) {
+            nextVertexIndex = skipDeadEnd(
+                    livingTriangles,
+                    usedVerticeStack,
+                    usedVerticeCount,
+                    usedVerticeOffset,
+                    vertexCount,
+                    lowestLivingVertexIndex,
+                    currentTriangleIndex,
+                    skippedIndices);
+        }
+        return nextVertexIndex;
+    }
+
+    VertexCacheReorderResult tipsifyMesh(
+            const std::vector<uint32_t> &indexBuffer32Bit,
+            const int vertexCount,
+            const unsigned int cacheSize) {
+
+        if (indexBuffer32Bit.empty() || vertexCount <= 0) {
+            vkcv_log(LogLevel::ERROR, "Invalid Input.");
+            return VertexCacheReorderResult(indexBuffer32Bit , {});
+        }
+        int triangleCount = indexBuffer32Bit.size() / 3;
+
+       // dynamic array for vertexOccurrence
+        std::vector<uint8_t> vertexOccurrence(vertexCount, 0);
+        // count the occurrence of a vertex in all among all triangles
+        for (size_t i = 0; i < triangleCount * 3; i++) {
+            vertexOccurrence[indexBuffer32Bit[i]]++;
+        }
+
+        int sum = 0;
+        std::vector<uint32_t> offsetVertexOccurrence(vertexCount + 1, 0);
+        // highest offset for later iteration
+        int maxOffset = 0;
+        // calculate the offset of each vertex from the start
+        for (int i = 0; i < vertexCount; i++) {
+            offsetVertexOccurrence[i]   = sum;
+            sum                         += vertexOccurrence[i];
+
+            if (vertexOccurrence[i] > maxOffset) {
+                maxOffset = vertexOccurrence[i];
+            }
+            // reset for reuse
+            vertexOccurrence[i] = 0;
+        }
+        offsetVertexOccurrence[vertexCount] = sum;
+
+        // vertexIndexToTriangle = which vertex belongs to which triangle
+        std::vector<uint32_t> vertexIndexToTriangle(3 * triangleCount, 0);
+        // vertexOccurrence functions as number of usages in all triangles
+        // lowestLivingVertexIndex = number of a triangle
+        for (int i = 0; i < triangleCount; i++) {
+            // get the pointer to the first vertex of the triangle
+            // this allows us to iterate over the indexBuffer with the first vertex of the triangle as start
+            const uint32_t *vertexIndexOfTriangle = &indexBuffer32Bit[i * 3];
+
+            vertexIndexToTriangle[offsetVertexOccurrence[vertexIndexOfTriangle[0]] + vertexOccurrence[vertexIndexOfTriangle[0]]] = i;
+            vertexOccurrence[vertexIndexOfTriangle[0]]++;
+
+            vertexIndexToTriangle[offsetVertexOccurrence[vertexIndexOfTriangle[1]] + vertexOccurrence[vertexIndexOfTriangle[1]]] = i;
+            vertexOccurrence[vertexIndexOfTriangle[1]]++;
+
+            vertexIndexToTriangle[offsetVertexOccurrence[vertexIndexOfTriangle[2]] + vertexOccurrence[vertexIndexOfTriangle[2]]] = i;
+            vertexOccurrence[vertexIndexOfTriangle[2]]++;
+        }
+
+        // counts if a triangle still uses this vertex
+        std::vector<uint8_t>  livingVertices = vertexOccurrence;
+        std::vector<uint32_t> lastTimestampCache(vertexCount, 0);
+
+        // stack of already used vertices, if it'currentTimeStamp full it will write to 0 again
+        std::vector<uint32_t> usedVerticeStack(maxUsedVertices, 0);
+
+        //currently used vertices
+        int usedVerticeCount     = 0;
+        // offset if maxUsedVertices was reached and it loops back to 0
+        int usedVerticeOffset    = 0;
+
+        // saves if a triangle was emitted (used in the IndexBuffer)
+        std::vector<bool> isEmittedTriangles(triangleCount, false);
+
+        // reordered Triangles that get rewritten to the new IndexBuffer
+        std::vector<uint32_t> reorderedTriangleIndexBuffer(triangleCount, 0);
+
+        // offset to the latest not used triangleIndex
+        int triangleOutputOffset    = 0;
+        // vertexIndex to fan out from (fanning VertexIndex)
+        int currentVertexIndex      = 0;
+        int currentTimeStamp        = cacheSize + 1;
+        int lowestLivingVertexIndex = 0;
+
+        std::vector<uint32_t> possibleCandidates(3 * maxOffset);
+
+        int currentTriangleIndex = 0;
+        // list of vertex indices where a deadEnd was reached
+        // useful to know where the mesh is potentially not contiguous
+        std::vector<uint32_t> skippedIndices;
+
+        // run while not all indices are fanned out, -1 equals all are fanned out
+        while (currentVertexIndex >= 0) {
+            // number of possible candidates for a fanning VertexIndex
+            int numPossibleCandidates   = 0;
+            // offset of currentVertexIndex and the next VertexIndex
+            int startOffset             = offsetVertexOccurrence[currentVertexIndex];
+            int endOffset               = offsetVertexOccurrence[currentVertexIndex + 1];
+            // iterates over every triangle of currentVertexIndex
+            for (int offset = startOffset; offset < endOffset; offset++) {
+                int triangleIndex = vertexIndexToTriangle[offset];
+
+                // checks if the triangle is already emitted
+                if (!isEmittedTriangles[triangleIndex]) {
+
+                    // get the pointer to the first vertex of the triangle
+                    // this allows us to iterate over the indexBuffer with the first vertex of the triangle as start
+                    const uint32_t *vertexIndexOfTriangle        = &indexBuffer32Bit[3 * triangleIndex];
+
+                    currentTriangleIndex++;
+
+                    // save emitted vertexIndexOfTriangle to reorderedTriangleIndexBuffer and set it to emitted
+                    reorderedTriangleIndexBuffer[triangleOutputOffset++]    = triangleIndex;
+                    isEmittedTriangles[triangleIndex]                       = true;
+
+                    // save all vertexIndices of the triangle to reuse as soon as possible
+                    for (int j = 0; j < 3; j++) {
+                        int vertexIndex = vertexIndexOfTriangle[j];
+
+                        //save vertexIndex to reuseStack
+                        usedVerticeStack[mod(usedVerticeCount++)] = vertexIndex;
+
+                        // after looping back increase the start, so it only overrides the oldest vertexIndex
+                        if ((mod(usedVerticeCount)) ==
+                            (mod(usedVerticeOffset))) {
+                            usedVerticeOffset = mod(usedVerticeOffset + 1);
+                        }
+                        // add vertex to next possibleCandidates as fanning vertex
+                        possibleCandidates[numPossibleCandidates++] = vertexIndex;
+
+                        // remove one occurrence of the vertex, since the triangle is used
+                        livingVertices[vertexIndex]--;
+
+                        // writes the timestamp (number of iteration) of the last usage, if it wasn't used within the last cacheSize iterations
+                        if (currentTimeStamp - lastTimestampCache[vertexIndex] > cacheSize) {
+                            lastTimestampCache[vertexIndex] = currentTimeStamp;
+                            currentTimeStamp++;
+                        }
+                    }
+                }
+            }
+
+            // search for the next vertexIndex to fan out
+            currentVertexIndex = getNextVertexIndex(
+                    vertexCount, lowestLivingVertexIndex, cacheSize, possibleCandidates, numPossibleCandidates, lastTimestampCache, currentTimeStamp,
+                    livingVertices, usedVerticeStack, usedVerticeCount, usedVerticeOffset, currentTriangleIndex, skippedIndices);
+        }
+
+        std::vector<uint32_t> reorderedIndexBuffer(3 * triangleCount);
+
+        triangleOutputOffset = 0;
+        // rewriting the TriangleIndexBuffer to the new IndexBuffer
+        for (int i = 0; i < triangleCount; i++) {
+            int triangleIndex = reorderedTriangleIndexBuffer[i];
+            // rewriting the triangle index to vertices
+            for (int j = 0; j < 3; j++) {
+                int vertexIndex = indexBuffer32Bit[(3 * triangleIndex) + j];
+                reorderedIndexBuffer[triangleOutputOffset++] = vertexIndex;
+            }
+        }
+
+        return VertexCacheReorderResult(reorderedIndexBuffer, skippedIndices);
+    }
+}
\ No newline at end of file
diff --git a/modules/scene/CMakeLists.txt b/modules/scene/CMakeLists.txt
index 9aa76883a260d26aa6f46d6dabdc8206e4dad387..5edf9a29ad929b3c07b79d4f1ffcb7f1cf2fcd99 100644
--- a/modules/scene/CMakeLists.txt
+++ b/modules/scene/CMakeLists.txt
@@ -13,7 +13,7 @@ set(vkcv_scene_sources
 		${vkcv_scene_include}/vkcv/scene/Bounds.hpp
 		${vkcv_scene_source}/vkcv/scene/Bounds.cpp
 		
-		${vkcv_scene_source}/vkcv/scene/Frustum.hpp
+		${vkcv_scene_include}/vkcv/scene/Frustum.hpp
 		${vkcv_scene_source}/vkcv/scene/Frustum.cpp
 		
 		${vkcv_scene_include}/vkcv/scene/MeshPart.hpp
@@ -21,7 +21,7 @@ set(vkcv_scene_sources
 		
 		${vkcv_scene_include}/vkcv/scene/Mesh.hpp
 		${vkcv_scene_source}/vkcv/scene/Mesh.cpp
-		
+
 		${vkcv_scene_include}/vkcv/scene/Node.hpp
 		${vkcv_scene_source}/vkcv/scene/Node.cpp
 		
@@ -42,4 +42,4 @@ target_include_directories(vkcv_scene SYSTEM BEFORE PRIVATE ${vkcv_include} ${vk
 target_include_directories(vkcv_scene BEFORE PUBLIC ${vkcv_scene_include})
 
 # linking with libraries from all dependencies and the VkCV framework
-target_link_libraries(vkcv_scene vkcv vkcv_asset_loader vkcv_material vkcv_camera)
\ No newline at end of file
+target_link_libraries(vkcv_scene vkcv vkcv_asset_loader vkcv_material vkcv_camera)
diff --git a/modules/scene/src/vkcv/scene/Frustum.hpp b/modules/scene/include/vkcv/scene/Frustum.hpp
similarity index 100%
rename from modules/scene/src/vkcv/scene/Frustum.hpp
rename to modules/scene/include/vkcv/scene/Frustum.hpp
diff --git a/modules/scene/src/vkcv/scene/Frustum.cpp b/modules/scene/src/vkcv/scene/Frustum.cpp
index c800bb1e4baf4d0feef33c073740fb211da7bf63..1f63eb1d07002d24add81872627777048642dcdb 100644
--- a/modules/scene/src/vkcv/scene/Frustum.cpp
+++ b/modules/scene/src/vkcv/scene/Frustum.cpp
@@ -1,5 +1,5 @@
 
-#include "Frustum.hpp"
+#include "vkcv/scene/Frustum.hpp"
 
 namespace vkcv::scene {
 	
diff --git a/modules/scene/src/vkcv/scene/Mesh.cpp b/modules/scene/src/vkcv/scene/Mesh.cpp
index 53fb81713ed7e14049a21cb91c771d67f2f7086c..af02aedbd71ba4bdfcc30aa7fdcd82796af904f1 100644
--- a/modules/scene/src/vkcv/scene/Mesh.cpp
+++ b/modules/scene/src/vkcv/scene/Mesh.cpp
@@ -1,7 +1,7 @@
 
 #include "vkcv/scene/Mesh.hpp"
 #include "vkcv/scene/Scene.hpp"
-#include "Frustum.hpp"
+#include "vkcv/scene/Frustum.hpp"
 
 namespace vkcv::scene {
 	
diff --git a/modules/scene/src/vkcv/scene/Node.cpp b/modules/scene/src/vkcv/scene/Node.cpp
index 32230099b2f693362bab69d8172a4dee56c4e304..24f62d18e160c7d80f82384829a2130737737ba9 100644
--- a/modules/scene/src/vkcv/scene/Node.cpp
+++ b/modules/scene/src/vkcv/scene/Node.cpp
@@ -1,7 +1,7 @@
 
 #include "vkcv/scene/Node.hpp"
 #include "vkcv/scene/Scene.hpp"
-#include "Frustum.hpp"
+#include "vkcv/scene/Frustum.hpp"
 
 #include <algorithm>
 
diff --git a/modules/shader_compiler/src/vkcv/shader/GLSLCompiler.cpp b/modules/shader_compiler/src/vkcv/shader/GLSLCompiler.cpp
index 7d4bf289d8c4135ba776cfd85a270ea277aa40ed..c8878513bf99054e357f1b076dfe12664be763b3 100644
--- a/modules/shader_compiler/src/vkcv/shader/GLSLCompiler.cpp
+++ b/modules/shader_compiler/src/vkcv/shader/GLSLCompiler.cpp
@@ -52,6 +52,10 @@ namespace vkcv::shader {
 				return EShLangFragment;
 			case ShaderStage::COMPUTE:
 				return EShLangCompute;
+			case ShaderStage::TASK:
+				return EShLangTaskNV;
+			case ShaderStage::MESH:
+				return EShLangMeshNV;
 			default:
 				return EShLangCount;
 		}
diff --git a/projects/CMakeLists.txt b/projects/CMakeLists.txt
index 4196d55d99db115641f0a23cf5d7445bc70e52fe..c4fde45438e5c8446c6a1d50ceec16f830e8ebfd 100644
--- a/projects/CMakeLists.txt
+++ b/projects/CMakeLists.txt
@@ -5,3 +5,4 @@ add_subdirectory(first_mesh)
 add_subdirectory(first_scene)
 add_subdirectory(particle_simulation)
 add_subdirectory(voxelization)
+add_subdirectory(mesh_shader)
diff --git a/projects/mesh_shader/.gitignore b/projects/mesh_shader/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..7e24fd7b853bfb0a29d8b30879ef1cb95ad141c0
--- /dev/null
+++ b/projects/mesh_shader/.gitignore
@@ -0,0 +1 @@
+first_triangle
\ No newline at end of file
diff --git a/projects/mesh_shader/CMakeLists.txt b/projects/mesh_shader/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1aa5d5ff3977a47dce75a38329216d550b1b9311
--- /dev/null
+++ b/projects/mesh_shader/CMakeLists.txt
@@ -0,0 +1,30 @@
+cmake_minimum_required(VERSION 3.16)
+project(mesh_shader)
+
+# setting c++ standard for the project
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+# this should fix the execution path to load local files from the project
+set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
+
+# adding source files to the project
+add_executable(mesh_shader src/main.cpp)
+
+target_sources(mesh_shader PRIVATE)
+
+# this should fix the execution path to load local files from the project (for MSVC)
+if(MSVC)
+	set_target_properties(mesh_shader PROPERTIES RUNTIME_OUTPUT_DIRECTORY_DEBUG ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
+	set_target_properties(mesh_shader PROPERTIES RUNTIME_OUTPUT_DIRECTORY_RELEASE ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
+
+	# in addition to setting the output directory, the working directory has to be set
+	# by default visual studio sets the working directory to the build directory, when using the debugger
+	set_target_properties(mesh_shader PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY ${CMAKE_RUNTIME_OUTPUT_DIRECTORY})
+endif()
+
+# including headers of dependencies and the VkCV framework
+target_include_directories(mesh_shader SYSTEM BEFORE PRIVATE ${vkcv_include} ${vkcv_includes} ${vkcv_testing_include} ${vkcv_camera_include} ${vkcv_meshlet_include} ${vkcv_shader_compiler_include} ${vkcv_gui_include})
+
+# linking with libraries from all dependencies and the VkCV framework
+target_link_libraries(mesh_shader vkcv ${vkcv_libraries} vkcv_asset_loader ${vkcv_asset_loader_libraries} vkcv_testing vkcv_camera vkcv_meshlet vkcv_shader_compiler vkcv_gui)
\ No newline at end of file
diff --git a/projects/mesh_shader/resources/Bunny/Bunny.glb b/projects/mesh_shader/resources/Bunny/Bunny.glb
new file mode 100644
index 0000000000000000000000000000000000000000..181f1f92f1906e1e1ba900768580203efe19e9be
--- /dev/null
+++ b/projects/mesh_shader/resources/Bunny/Bunny.glb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b8bc6fab11929ca11bdf4e892ffb03b621b10307f705cdea17d82d3dee3b9aae
+size 4045836
diff --git a/projects/mesh_shader/resources/monke.glb b/projects/mesh_shader/resources/monke.glb
new file mode 100644
index 0000000000000000000000000000000000000000..47d0b9131f15a8f0697318d0a47302c71cad1db8
--- /dev/null
+++ b/projects/mesh_shader/resources/monke.glb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:597584db90a3f51088beea6652d8320e82cb025f9d3d036b89e54ad72c732a06
+size 98612
diff --git a/projects/mesh_shader/resources/shaders/common.inc b/projects/mesh_shader/resources/shaders/common.inc
new file mode 100644
index 0000000000000000000000000000000000000000..280ffee215a8b8342b78d1f5558d63a05e16859b
--- /dev/null
+++ b/projects/mesh_shader/resources/shaders/common.inc
@@ -0,0 +1,4 @@
+struct ObjectMatrices{
+    mat4 model;
+    mat4 mvp;
+};
\ No newline at end of file
diff --git a/projects/mesh_shader/resources/shaders/meshlet.inc b/projects/mesh_shader/resources/shaders/meshlet.inc
new file mode 100644
index 0000000000000000000000000000000000000000..0594f62ceead8ffca09b585305075eb6046f3c46
--- /dev/null
+++ b/projects/mesh_shader/resources/shaders/meshlet.inc
@@ -0,0 +1,8 @@
+struct Meshlet{
+    uint    vertexOffset;
+    uint    vertexCount;
+    uint    indexOffset;
+    uint    indexCount;
+    vec3    meanPosition;
+    float   boundingSphereRadius;
+};
\ No newline at end of file
diff --git a/projects/mesh_shader/resources/shaders/shader.frag b/projects/mesh_shader/resources/shaders/shader.frag
new file mode 100644
index 0000000000000000000000000000000000000000..f4f6982f2089e6c8e102027f3b8763bb38f8e59c
--- /dev/null
+++ b/projects/mesh_shader/resources/shaders/shader.frag
@@ -0,0 +1,32 @@
+#version 450
+#extension GL_ARB_separate_shader_objects : enable
+
+layout(location = 0) in  vec3 passNormal;
+layout(location = 1) in  flat uint passTaskIndex;
+layout(location = 0) out vec3 outColor;
+
+uint lowbias32(uint x)
+{
+    x ^= x >> 16;
+    x *= 0x7feb352dU;
+    x ^= x >> 15;
+    x *= 0x846ca68bU;
+    x ^= x >> 16;
+    return x;
+}
+
+float hashToFloat(uint hash){
+    return (hash % 255) / 255.f;
+}
+
+vec3 colorFromIndex(uint i){
+    return vec3(
+        hashToFloat(lowbias32(i+0)),
+        hashToFloat(lowbias32(i+1)),
+        hashToFloat(lowbias32(i+2)));
+}
+
+void main() {
+	outColor = normalize(passNormal) * 0.5 + 0.5;
+    outColor = colorFromIndex(passTaskIndex);
+}
\ No newline at end of file
diff --git a/projects/mesh_shader/resources/shaders/shader.mesh b/projects/mesh_shader/resources/shaders/shader.mesh
new file mode 100644
index 0000000000000000000000000000000000000000..30c98610f4776204ff526c57c1f793e371194629
--- /dev/null
+++ b/projects/mesh_shader/resources/shaders/shader.mesh
@@ -0,0 +1,78 @@
+#version 460
+#extension GL_ARB_separate_shader_objects   : enable
+#extension GL_GOOGLE_include_directive      : enable
+#extension GL_NV_mesh_shader                : require
+
+#include "meshlet.inc"
+
+layout(local_size_x=32) in;
+
+layout(triangles) out;
+layout(max_vertices=64, max_primitives=126) out;
+
+layout(location = 0) out vec3 passNormal[];
+layout(location = 1) out uint passTaskIndex[];
+
+struct Vertex
+{
+    vec3 position;  float padding0;
+    vec3 normal;    float padding1;
+};
+
+layout(std430, binding = 0) readonly buffer vertexBuffer
+{
+    Vertex vertices[];
+};
+
+layout(std430, binding = 1) readonly buffer indexBuffer
+{
+    uint localIndices[]; // breaks for 16 bit indices
+};
+
+layout(std430, binding = 2) readonly buffer meshletBuffer
+{
+    Meshlet meshlets[];
+};
+
+taskNV in Task {
+  uint meshletIndices[32];
+  mat4 mvp;
+} IN;
+
+void main()	{
+    
+    uint meshletIndex = IN.meshletIndices[gl_WorkGroupID.x];
+    Meshlet meshlet = meshlets[meshletIndex];
+    
+    // set vertices
+    for(uint i = 0; i < 2; i++){
+    
+        uint workIndex = gl_LocalInvocationID.x + 32 * i;
+        if(workIndex >= meshlet.vertexCount){
+            break;
+        }
+    
+        uint vertexIndex    = meshlet.vertexOffset + workIndex;
+        Vertex vertex       = vertices[vertexIndex];
+    
+        gl_MeshVerticesNV[workIndex].gl_Position    = IN.mvp * vec4(vertex.position, 1);
+        passNormal[workIndex]                       = vertex.normal;
+        passTaskIndex[workIndex]                    = meshletIndex;
+    }
+    
+    // set local indices
+    for(uint i = 0; i < 12; i++){
+    
+        uint workIndex = gl_LocalInvocationID.x + i * 32;
+        if(workIndex >= meshlet.indexCount){
+            break;
+        }    
+        
+        uint indexBufferIndex               = meshlet.indexOffset + workIndex;
+        gl_PrimitiveIndicesNV[workIndex]    = localIndices[indexBufferIndex];
+    }
+    
+    if(gl_LocalInvocationID.x == 0){
+        gl_PrimitiveCountNV = meshlet.indexCount / 3;
+    }
+}
\ No newline at end of file
diff --git a/projects/mesh_shader/resources/shaders/shader.task b/projects/mesh_shader/resources/shaders/shader.task
new file mode 100644
index 0000000000000000000000000000000000000000..7a692e98e6384767191d76cef940e295ca127d62
--- /dev/null
+++ b/projects/mesh_shader/resources/shaders/shader.task
@@ -0,0 +1,78 @@
+#version 460
+#extension GL_ARB_separate_shader_objects   : enable
+#extension GL_NV_mesh_shader                : require
+#extension GL_GOOGLE_include_directive      : enable
+
+#include "meshlet.inc"
+#include "common.inc"
+
+layout(local_size_x=32) in;
+
+taskNV out Task {
+  uint meshletIndices[32];
+  mat4 mvp;
+} OUT;
+
+layout( push_constant ) uniform constants{
+    uint matrixIndex;
+    uint meshletCount;
+};
+
+// TODO: reuse mesh stage binding at location 2 after required fix in framework
+layout(std430, binding = 5) readonly buffer meshletBuffer
+{
+    Meshlet meshlets[];
+};
+
+struct Plane{
+    vec3    pointOnPlane;
+    float   padding0;
+    vec3    normal;
+    float   padding1;
+};
+
+layout(set=0, binding=3, std140) uniform cameraPlaneBuffer{
+    Plane cameraPlanes[6];
+};
+
+layout(std430, binding = 4) readonly buffer matrixBuffer
+{
+    ObjectMatrices objectMatrices[];
+};
+
+shared uint taskCount;
+
+bool isSphereInsideFrustum(vec3 spherePos, float sphereRadius, Plane cameraPlanes[6]){
+    bool isInside = true;
+    for(int i = 0; i < 6; i++){
+        Plane p     = cameraPlanes[i];
+        isInside    = isInside && dot(p.normal, spherePos - p.pointOnPlane) - sphereRadius < 0;
+    }
+    return isInside;
+}
+
+void main() {
+
+    if(gl_LocalInvocationID.x >= meshletCount){
+        return;
+    }
+    
+    uint meshletIndex   = gl_GlobalInvocationID.x;
+    Meshlet meshlet     = meshlets[meshletIndex]; 
+    
+    if(gl_LocalInvocationID.x == 0){
+        taskCount = 0;
+    }
+    
+    // TODO: scaling support
+    vec3 meshletPositionWorld = (vec4(meshlet.meanPosition, 1) * objectMatrices[matrixIndex].model).xyz;
+    if(isSphereInsideFrustum(meshletPositionWorld, meshlet.boundingSphereRadius, cameraPlanes)){
+        uint outIndex = atomicAdd(taskCount, 1);
+        OUT.meshletIndices[outIndex] = gl_GlobalInvocationID.x;
+    }
+
+    if(gl_LocalInvocationID.x == 0){
+        gl_TaskCountNV              = taskCount;
+        OUT.mvp = objectMatrices[matrixIndex].mvp;
+    }
+}
\ No newline at end of file
diff --git a/projects/mesh_shader/resources/shaders/shader.vert b/projects/mesh_shader/resources/shaders/shader.vert
new file mode 100644
index 0000000000000000000000000000000000000000..fca5057976f995183c040195bdbd592c63f1074e
--- /dev/null
+++ b/projects/mesh_shader/resources/shaders/shader.vert
@@ -0,0 +1,29 @@
+#version 450
+#extension GL_ARB_separate_shader_objects   : enable
+#extension GL_GOOGLE_include_directive      : enable
+
+#include "common.inc"
+
+layout(location = 0) in vec3 inPosition;
+layout(location = 1) in vec3 inNormal;
+
+layout(location = 0) out vec3 passNormal;
+layout(location = 1) out uint dummyOutput;
+
+layout(std430, binding = 0) readonly buffer matrixBuffer
+{
+    ObjectMatrices objectMatrices[];
+};
+
+layout( push_constant ) uniform constants{
+    uint matrixIndex;
+    uint padding; // pad to same size as mesh shader constants
+};
+
+
+void main()	{
+	gl_Position = objectMatrices[matrixIndex].mvp * vec4(inPosition, 1.0);
+	passNormal  = inNormal;
+    
+    dummyOutput = padding * 0;  // padding must be used, else compiler shrinks constant size
+}
\ No newline at end of file
diff --git a/projects/mesh_shader/src/main.cpp b/projects/mesh_shader/src/main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3a94de5842f3e70625729c9755b8c88048ece2ec
--- /dev/null
+++ b/projects/mesh_shader/src/main.cpp
@@ -0,0 +1,392 @@
+#include <iostream>
+#include <vkcv/Core.hpp>
+#include <GLFW/glfw3.h>
+#include <vkcv/camera/CameraManager.hpp>
+#include <chrono>
+
+#include <vkcv/shader/GLSLCompiler.hpp>
+#include <vkcv/gui/GUI.hpp>
+#include <vkcv/asset/asset_loader.hpp>
+#include <vkcv/meshlet/Meshlet.hpp>
+#include <vkcv/meshlet/Tipsify.hpp>
+#include <vkcv/meshlet/Forsyth.hpp>
+
+struct Plane {
+	glm::vec3 pointOnPlane;
+	float padding0;
+	glm::vec3 normal;
+	float padding1;
+};
+
+struct CameraPlanes {
+	Plane planes[6];
+};
+
+CameraPlanes computeCameraPlanes(const vkcv::camera::Camera& camera) {
+	const float     fov     = camera.getFov();
+	const glm::vec3 pos     = camera.getPosition();
+	const float     ratio   = camera.getRatio();
+	const glm::vec3 forward = glm::normalize(camera.getFront());
+	float near;
+	float far;
+	camera.getNearFar(near, far);
+
+	glm::vec3 up    = glm::vec3(0, -1, 0);
+	glm::vec3 right = glm::normalize(glm::cross(forward, up));
+	up              = glm::cross(forward, right);
+
+	const glm::vec3 nearCenter      = pos + forward * near;
+	const glm::vec3 farCenter       = pos + forward * far;
+
+	const float tanFovHalf          = glm::tan(fov / 2);
+
+	const glm::vec3 nearUpCenter    = nearCenter + up    * tanFovHalf * near;
+	const glm::vec3 nearDownCenter  = nearCenter - up    * tanFovHalf * near;
+	const glm::vec3 nearRightCenter = nearCenter + right * tanFovHalf * near * ratio;
+	const glm::vec3 nearLeftCenter  = nearCenter - right * tanFovHalf * near * ratio;
+
+	const glm::vec3 farUpCenter     = farCenter + up    * tanFovHalf * far;
+	const glm::vec3 farDownCenter   = farCenter - up    * tanFovHalf * far;
+	const glm::vec3 farRightCenter  = farCenter + right * tanFovHalf * far * ratio;
+	const glm::vec3 farLeftCenter   = farCenter - right * tanFovHalf * far * ratio;
+
+	CameraPlanes cameraPlanes;
+	// near
+	cameraPlanes.planes[0].pointOnPlane = nearCenter;
+	cameraPlanes.planes[0].normal       = -forward;
+	// far
+	cameraPlanes.planes[1].pointOnPlane = farCenter;
+	cameraPlanes.planes[1].normal       = forward;
+
+	// top
+	cameraPlanes.planes[2].pointOnPlane = nearUpCenter;
+	cameraPlanes.planes[2].normal       = glm::normalize(glm::cross(farUpCenter - nearUpCenter, right));
+	// bot
+	cameraPlanes.planes[3].pointOnPlane = nearDownCenter;
+	cameraPlanes.planes[3].normal       = glm::normalize(glm::cross(right, farDownCenter - nearDownCenter));
+
+	// right
+	cameraPlanes.planes[4].pointOnPlane = nearRightCenter;
+	cameraPlanes.planes[4].normal       = glm::normalize(glm::cross(up, farRightCenter - nearRightCenter));
+	// left
+	cameraPlanes.planes[5].pointOnPlane = nearLeftCenter;
+	cameraPlanes.planes[5].normal       = glm::normalize(glm::cross(farLeftCenter - nearLeftCenter, up));
+
+	return cameraPlanes;
+}
+
+int main(int argc, const char** argv) {
+	const char* applicationName = "Mesh shader";
+
+	const int windowWidth = 1280;
+	const int windowHeight = 720;
+	vkcv::Window window = vkcv::Window::create(
+		applicationName,
+		windowWidth,
+		windowHeight,
+		false
+	);
+
+	vkcv::Core core = vkcv::Core::create(
+		window,
+		applicationName,
+		VK_MAKE_VERSION(0, 0, 1),
+		{ vk::QueueFlagBits::eTransfer,vk::QueueFlagBits::eGraphics, vk::QueueFlagBits::eCompute },
+		{},
+		{ "VK_KHR_swapchain", VK_NV_MESH_SHADER_EXTENSION_NAME }
+	);
+
+    vkcv::gui::GUI gui (core, window);
+
+    const auto& context = core.getContext();
+    const vk::Instance& instance = context.getInstance();
+    const vk::PhysicalDevice& physicalDevice = context.getPhysicalDevice();
+    const vk::Device& device = context.getDevice();
+
+    vkcv::asset::Scene mesh;
+    const char* path = argc > 1 ? argv[1] : "resources/Bunny/Bunny.glb";
+    vkcv::asset::loadScene(path, mesh);
+
+    assert(!mesh.vertexGroups.empty());
+
+    auto vertexBuffer = core.createBuffer<uint8_t>(
+            vkcv::BufferType::VERTEX,
+            mesh.vertexGroups[0].vertexBuffer.data.size(),
+            vkcv::BufferMemoryType::DEVICE_LOCAL
+    );
+    vertexBuffer.fill(mesh.vertexGroups[0].vertexBuffer.data);
+
+    auto indexBuffer = core.createBuffer<uint8_t>(
+            vkcv::BufferType::INDEX,
+            mesh.vertexGroups[0].indexBuffer.data.size(),
+            vkcv::BufferMemoryType::DEVICE_LOCAL
+    );
+    indexBuffer.fill(mesh.vertexGroups[0].indexBuffer.data);
+
+	// format data for mesh shader
+	auto& attributes = mesh.vertexGroups[0].vertexBuffer.attributes;
+
+	std::sort(attributes.begin(), attributes.end(), [](const vkcv::asset::VertexAttribute& x, const vkcv::asset::VertexAttribute& y) {
+		return static_cast<uint32_t>(x.type) < static_cast<uint32_t>(y.type);
+	});
+
+	const std::vector<vkcv::VertexBufferBinding> vertexBufferBindings = {
+			vkcv::VertexBufferBinding(static_cast<vk::DeviceSize>(attributes[0].offset), vertexBuffer.getVulkanHandle()),
+			vkcv::VertexBufferBinding(static_cast<vk::DeviceSize>(attributes[1].offset), vertexBuffer.getVulkanHandle()),
+			vkcv::VertexBufferBinding(static_cast<vk::DeviceSize>(attributes[2].offset), vertexBuffer.getVulkanHandle()) };
+
+	const auto& bunny = mesh.vertexGroups[0];
+	std::vector<vkcv::meshlet::Vertex> interleavedVertices = vkcv::meshlet::convertToVertices(bunny.vertexBuffer.data, bunny.numVertices, attributes[0], attributes[1]);
+	// mesh shader buffers
+	const auto& assetLoaderIndexBuffer                    = mesh.vertexGroups[0].indexBuffer;
+	std::vector<uint32_t> indexBuffer32Bit                = vkcv::meshlet::assetLoaderIndicesTo32BitIndices(assetLoaderIndexBuffer.data, assetLoaderIndexBuffer.type);
+    vkcv::meshlet::VertexCacheReorderResult tipsifyResult = vkcv::meshlet::tipsifyMesh(indexBuffer32Bit, interleavedVertices.size());
+    vkcv::meshlet::VertexCacheReorderResult forsythResult = vkcv::meshlet::forsythReorder(indexBuffer32Bit, interleavedVertices.size());
+
+    const auto meshShaderModelData = createMeshShaderModelData(interleavedVertices, forsythResult.indexBuffer, forsythResult.skippedIndices);
+
+	auto meshShaderVertexBuffer = core.createBuffer<vkcv::meshlet::Vertex>(
+		vkcv::BufferType::STORAGE,
+		meshShaderModelData.vertices.size());
+	meshShaderVertexBuffer.fill(meshShaderModelData.vertices);
+
+	auto meshShaderIndexBuffer = core.createBuffer<uint32_t>(
+		vkcv::BufferType::STORAGE,
+		meshShaderModelData.localIndices.size());
+	meshShaderIndexBuffer.fill(meshShaderModelData.localIndices);
+
+	auto meshletBuffer = core.createBuffer<vkcv::meshlet::Meshlet>(
+		vkcv::BufferType::STORAGE,
+		meshShaderModelData.meshlets.size(),
+		vkcv::BufferMemoryType::DEVICE_LOCAL
+		);
+	meshletBuffer.fill(meshShaderModelData.meshlets);
+
+	// attachments
+	const vkcv::AttachmentDescription present_color_attachment(
+		vkcv::AttachmentOperation::STORE,
+		vkcv::AttachmentOperation::CLEAR,
+		core.getSwapchain().getFormat());
+
+    const vkcv::AttachmentDescription depth_attachment(
+            vkcv::AttachmentOperation::STORE,
+            vkcv::AttachmentOperation::CLEAR,
+            vk::Format::eD32Sfloat
+    );
+
+	vkcv::PassConfig bunnyPassDefinition({ present_color_attachment, depth_attachment });
+	vkcv::PassHandle renderPass = core.createPass(bunnyPassDefinition);
+
+	if (!renderPass)
+	{
+		std::cout << "Error. Could not create renderpass. Exiting." << std::endl;
+		return EXIT_FAILURE;
+	}
+
+	vkcv::ShaderProgram bunnyShaderProgram{};
+	vkcv::shader::GLSLCompiler compiler;
+	
+	compiler.compile(vkcv::ShaderStage::VERTEX, std::filesystem::path("resources/shaders/shader.vert"),
+					 [&bunnyShaderProgram](vkcv::ShaderStage shaderStage, const std::filesystem::path& path) {
+		 bunnyShaderProgram.addShader(shaderStage, path);
+	});
+	
+	compiler.compile(vkcv::ShaderStage::FRAGMENT, std::filesystem::path("resources/shaders/shader.frag"),
+					 [&bunnyShaderProgram](vkcv::ShaderStage shaderStage, const std::filesystem::path& path) {
+		bunnyShaderProgram.addShader(shaderStage, path);
+	});
+
+    const std::vector<vkcv::VertexAttachment> vertexAttachments = bunnyShaderProgram.getVertexAttachments();
+    std::vector<vkcv::VertexBinding> bindings;
+    for (size_t i = 0; i < vertexAttachments.size(); i++) {
+        bindings.push_back(vkcv::VertexBinding(i, { vertexAttachments[i] }));
+    }
+    const vkcv::VertexLayout bunnyLayout (bindings);
+
+	vkcv::DescriptorSetHandle vertexShaderDescriptorSet = core.createDescriptorSet(bunnyShaderProgram.getReflectedDescriptors()[0]);
+
+	const vkcv::PipelineConfig bunnyPipelineDefinition {
+			bunnyShaderProgram,
+			(uint32_t)windowWidth,
+			(uint32_t)windowHeight,
+			renderPass,
+			{ bunnyLayout },
+			{ core.getDescriptorSet(vertexShaderDescriptorSet).layout },
+			false
+	};
+
+	struct ObjectMatrices {
+		glm::mat4 model;
+		glm::mat4 mvp;
+	};
+	const size_t objectCount = 1;
+	vkcv::Buffer<ObjectMatrices> matrixBuffer = core.createBuffer<ObjectMatrices>(vkcv::BufferType::STORAGE, objectCount);
+
+	vkcv::DescriptorWrites vertexShaderDescriptorWrites;
+	vertexShaderDescriptorWrites.storageBufferWrites = { vkcv::BufferDescriptorWrite(0, matrixBuffer.getHandle()) };
+	core.writeDescriptorSet(vertexShaderDescriptorSet, vertexShaderDescriptorWrites);
+
+	vkcv::PipelineHandle bunnyPipeline = core.createGraphicsPipeline(bunnyPipelineDefinition);
+
+	if (!bunnyPipeline)
+	{
+		std::cout << "Error. Could not create graphics pipeline. Exiting." << std::endl;
+		return EXIT_FAILURE;
+	}
+
+	// mesh shader
+	vkcv::ShaderProgram meshShaderProgram;
+	compiler.compile(vkcv::ShaderStage::TASK, std::filesystem::path("resources/shaders/shader.task"),
+		[&meshShaderProgram](vkcv::ShaderStage shaderStage, const std::filesystem::path& path) {
+		meshShaderProgram.addShader(shaderStage, path);
+	});
+
+	compiler.compile(vkcv::ShaderStage::MESH, std::filesystem::path("resources/shaders/shader.mesh"),
+		[&meshShaderProgram](vkcv::ShaderStage shaderStage, const std::filesystem::path& path) {
+		meshShaderProgram.addShader(shaderStage, path);
+	});
+
+	compiler.compile(vkcv::ShaderStage::FRAGMENT, std::filesystem::path("resources/shaders/shader.frag"),
+		[&meshShaderProgram](vkcv::ShaderStage shaderStage, const std::filesystem::path& path) {
+		meshShaderProgram.addShader(shaderStage, path);
+	});
+
+	uint32_t setID = 0;
+	vkcv::DescriptorSetHandle meshShaderDescriptorSet = core.createDescriptorSet( meshShaderProgram.getReflectedDescriptors()[setID]);
+	const vkcv::VertexLayout meshShaderLayout(bindings);
+
+	const vkcv::PipelineConfig meshShaderPipelineDefinition{
+		meshShaderProgram,
+		(uint32_t)windowWidth,
+		(uint32_t)windowHeight,
+		renderPass,
+		{meshShaderLayout},
+		{core.getDescriptorSet(meshShaderDescriptorSet).layout},
+		false
+	};
+
+	vkcv::PipelineHandle meshShaderPipeline = core.createGraphicsPipeline(meshShaderPipelineDefinition);
+
+	if (!meshShaderPipeline)
+	{
+		std::cout << "Error. Could not create mesh shader pipeline. Exiting." << std::endl;
+		return EXIT_FAILURE;
+	}
+
+	vkcv::Buffer<CameraPlanes> cameraPlaneBuffer = core.createBuffer<CameraPlanes>(vkcv::BufferType::UNIFORM, 1);
+
+	vkcv::DescriptorWrites meshShaderWrites;
+	meshShaderWrites.storageBufferWrites = {
+		vkcv::BufferDescriptorWrite(0, meshShaderVertexBuffer.getHandle()),
+		vkcv::BufferDescriptorWrite(1, meshShaderIndexBuffer.getHandle()),
+		vkcv::BufferDescriptorWrite(2, meshletBuffer.getHandle()),
+		vkcv::BufferDescriptorWrite(4, matrixBuffer.getHandle()),
+		vkcv::BufferDescriptorWrite(5, meshletBuffer.getHandle()),
+	};
+	meshShaderWrites.uniformBufferWrites = {
+		vkcv::BufferDescriptorWrite(3, cameraPlaneBuffer.getHandle()),
+	};
+
+    core.writeDescriptorSet( meshShaderDescriptorSet, meshShaderWrites);
+
+    vkcv::ImageHandle depthBuffer = core.createImage(vk::Format::eD32Sfloat, windowWidth, windowHeight, 1, false).getHandle();
+
+    auto start = std::chrono::system_clock::now();
+
+	vkcv::ImageHandle swapchainImageHandle = vkcv::ImageHandle::createSwapchainImageHandle();
+
+    const vkcv::Mesh renderMesh(vertexBufferBindings, indexBuffer.getVulkanHandle(), mesh.vertexGroups[0].numIndices, vkcv::IndexBitCount::Bit32);
+
+	const vkcv::ImageHandle swapchainInput = vkcv::ImageHandle::createSwapchainImageHandle();
+
+	vkcv::camera::CameraManager cameraManager(window);
+	uint32_t camIndex0 = cameraManager.addCamera(vkcv::camera::ControllerType::PILOT);
+	
+	cameraManager.getCamera(camIndex0).setPosition(glm::vec3(0, 0, -2));
+
+	bool useMeshShader          = true;
+	bool updateFrustumPlanes    = true;
+
+	while (window.isWindowOpen())
+	{
+		vkcv::Window::pollEvents();
+
+		uint32_t swapchainWidth, swapchainHeight; // No resizing = No problem
+		if (!core.beginFrame(swapchainWidth, swapchainHeight)) {
+			continue;
+		}
+		
+		auto end = std::chrono::system_clock::now();
+		auto deltatime = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+		start = end;
+		
+		cameraManager.update(0.000001 * static_cast<double>(deltatime.count()));
+
+		const vkcv::camera::Camera& camera = cameraManager.getActiveCamera();
+
+		ObjectMatrices objectMatrices;
+		objectMatrices.model    = *reinterpret_cast<glm::mat4*>(&mesh.meshes.front().modelMatrix);
+		objectMatrices.mvp      = camera.getMVP() * objectMatrices.model;
+
+		matrixBuffer.fill({ objectMatrices });
+
+		struct PushConstants {
+			uint32_t matrixIndex;
+			uint32_t meshletCount;
+		};
+		PushConstants pushConstants{ 0, static_cast<uint32_t>(meshShaderModelData.meshlets.size()) };
+
+		if (updateFrustumPlanes) {
+			const CameraPlanes cameraPlanes = computeCameraPlanes(camera);
+			cameraPlaneBuffer.fill({ cameraPlanes });
+		}
+
+		const std::vector<vkcv::ImageHandle> renderTargets = { swapchainInput, depthBuffer };
+		auto cmdStream = core.createCommandStream(vkcv::QueueType::Graphics);
+
+		vkcv::PushConstants pushConstantData(sizeof(pushConstants));
+		pushConstantData.appendDrawcall(pushConstants);
+
+		if (useMeshShader) {
+
+			vkcv::DescriptorSetUsage descriptorUsage(0, core.getDescriptorSet(meshShaderDescriptorSet).vulkanHandle);
+			const uint32_t taskCount = (meshShaderModelData.meshlets.size() + 31) / 32;
+
+			core.recordMeshShaderDrawcalls(
+				cmdStream,
+				renderPass,
+				meshShaderPipeline,
+				pushConstantData,
+				{ vkcv::MeshShaderDrawcall({descriptorUsage}, taskCount)},
+				{ renderTargets });
+		}
+		else {
+
+			vkcv::DescriptorSetUsage descriptorUsage(0, core.getDescriptorSet(vertexShaderDescriptorSet).vulkanHandle);
+
+			core.recordDrawcallsToCmdStream(
+				cmdStream,
+				renderPass,
+				bunnyPipeline,
+				pushConstantData,
+				{ vkcv::DrawcallInfo(renderMesh, { descriptorUsage }) },
+				{ renderTargets });
+		}
+
+		core.prepareSwapchainImageForPresent(cmdStream);
+		core.submitCommandStream(cmdStream);
+		
+		gui.beginGUI();
+		
+		ImGui::Begin("Settings");
+		ImGui::Checkbox("Use mesh shader", &useMeshShader);
+		ImGui::Checkbox("Update frustum culling", &updateFrustumPlanes);
+
+		ImGui::End();
+		
+		gui.endGUI();
+
+		core.endFrame();
+	}
+	return 0;
+}
diff --git a/src/vkcv/Context.cpp b/src/vkcv/Context.cpp
index fb863f9d223fc091da924e27ebee4981a2afa110..2e30fb961d0b0931e4ff8796dd92b2cbd0b5f734 100644
--- a/src/vkcv/Context.cpp
+++ b/src/vkcv/Context.cpp
@@ -180,6 +180,15 @@ namespace vkcv
 		return extensions;
 	}
 	
+	bool isPresentInCharPtrVector(const std::vector<const char*>& v, const char* term){
+		for (const auto& entry : v) {
+			if (strcmp(entry, term) != 0) {
+				return true;
+			}
+		}
+		return false;
+	}
+	
 	Context Context::create(const char *applicationName,
 							uint32_t applicationVersion,
 							const std::vector<vk::QueueFlagBits>& queueFlags,
@@ -302,6 +311,14 @@ namespace vkcv
 		deviceFeatures2.features.depthClamp = true;
 		deviceFeatures2.features.shaderInt16 = true;
 		
+		const bool usingMeshShaders = isPresentInCharPtrVector(deviceExtensions, VK_NV_MESH_SHADER_EXTENSION_NAME);
+		vk::PhysicalDeviceMeshShaderFeaturesNV meshShadingFeatures;
+		if (usingMeshShaders) {
+			meshShadingFeatures.taskShader = true;
+			meshShadingFeatures.meshShader = true;
+            deviceFeatures2.setPNext(&meshShadingFeatures);
+		}
+		
 		if (shaderFloat16) {
 			deviceFeatures2.setPNext(&deviceShaderFloat16Int8Features);
 		}
@@ -318,6 +335,11 @@ namespace vkcv
 		// jetzt koennen wir mit dem device die queues erstellen
 		
 		vk::Device device = physicalDevice.createDevice(deviceCreateInfo);
+
+		if (usingMeshShaders)
+		{
+			InitMeshShaderDrawFunctions(device);
+		}
 		
 		QueueManager queueManager = QueueManager::create(
 				device,
diff --git a/src/vkcv/Core.cpp b/src/vkcv/Core.cpp
index 39f9839fa8bf8436143bd7c81f2c5692d0336783..e8e172dd236ac5cb49d0e2caf03599c198a07092 100644
--- a/src/vkcv/Core.cpp
+++ b/src/vkcv/Core.cpp
@@ -228,130 +228,246 @@ namespace vkcv
 		return (m_currentSwapchainImageIndex != std::numeric_limits<uint32_t>::max());
 	}
 
-	void Core::recordDrawcallsToCmdStream(
-		const CommandStreamHandle       cmdStreamHandle,
-		const PassHandle                renderpassHandle, 
-		const PipelineHandle            pipelineHandle, 
-        const PushConstants             &pushConstants,
-        const std::vector<DrawcallInfo> &drawcalls,
-		const std::vector<ImageHandle>  &renderTargets) {
+	std::array<uint32_t, 2> getWidthHeightFromRenderTargets(
+		const std::vector<ImageHandle>& renderTargets,
+		const Swapchain& swapchain,
+		const ImageManager& imageManager) {
 
-		if (m_currentSwapchainImageIndex == std::numeric_limits<uint32_t>::max()) {
-			return;
-		}
+		std::array<uint32_t, 2> widthHeight;
 
-		uint32_t width;
-		uint32_t height;
 		if (renderTargets.size() > 0) {
 			const vkcv::ImageHandle firstImage = renderTargets[0];
 			if (firstImage.isSwapchainImage()) {
-				const auto& swapchainExtent = m_swapchain.getExtent();
-				width = swapchainExtent.width;
-				height = swapchainExtent.height;
+				const auto& swapchainExtent = swapchain.getExtent();
+				widthHeight[0] = swapchainExtent.width;
+				widthHeight[1] = swapchainExtent.height;
 			}
 			else {
-				width = m_ImageManager->getImageWidth(firstImage);
-				height = m_ImageManager->getImageHeight(firstImage);
+				widthHeight[0] = imageManager.getImageWidth(firstImage);
+				widthHeight[1] = imageManager.getImageHeight(firstImage);
 			}
 		}
 		else {
-			width = 1;
-			height = 1;
+			widthHeight[0] = 1;
+			widthHeight[1] = 1;
 		}
 		// TODO: validate that width/height match for all attachments
+		return widthHeight;
+	}
 
-		const vk::RenderPass renderpass = m_PassManager->getVkPass(renderpassHandle);
-		const PassConfig passConfig = m_PassManager->getPassConfig(renderpassHandle);
-
-		const vk::Pipeline pipeline		= m_PipelineManager->getVkPipeline(pipelineHandle);
-		const vk::PipelineLayout pipelineLayout = m_PipelineManager->getVkPipelineLayout(pipelineHandle);
-		const vk::Rect2D renderArea(vk::Offset2D(0, 0), vk::Extent2D(width, height));
+	vk::Framebuffer createFramebuffer(
+		const std::vector<ImageHandle>& renderTargets,
+		const ImageManager&             imageManager,
+		const Swapchain&                swapchain,
+		vk::RenderPass                  renderpass,
+		vk::Device                      device) {
 
 		std::vector<vk::ImageView> attachmentsViews;
-		for (const ImageHandle& handle : renderTargets) {
-			vk::ImageView targetHandle;
-			const auto cmdBuffer = m_CommandStreamManager->getStreamCommandBuffer(cmdStreamHandle);
+		for (const ImageHandle handle : renderTargets) {
+			vk::ImageView targetHandle = imageManager.getVulkanImageView(handle);
+			attachmentsViews.push_back(targetHandle);
+		}
+
+		const std::array<uint32_t, 2> widthHeight = getWidthHeightFromRenderTargets(renderTargets, swapchain, imageManager);
+
+		const vk::FramebufferCreateInfo createInfo(
+			{},
+			renderpass,
+			static_cast<uint32_t>(attachmentsViews.size()),
+			attachmentsViews.data(),
+			widthHeight[0],
+			widthHeight[1],
+			1);
+
+		return device.createFramebuffer(createInfo);
+	}
+
+	void transitionRendertargetsToAttachmentLayout(
+		const std::vector<ImageHandle>& renderTargets,
+		ImageManager&                   imageManager,
+		const vk::CommandBuffer         cmdBuffer) {
 
-			targetHandle = m_ImageManager->getVulkanImageView(handle);
-			const bool isDepthImage = isDepthFormat(m_ImageManager->getImageFormat(handle));
-			const vk::ImageLayout targetLayout = 
+		for (const ImageHandle handle : renderTargets) {
+			vk::ImageView targetHandle = imageManager.getVulkanImageView(handle);
+			const bool isDepthImage = isDepthFormat(imageManager.getImageFormat(handle));
+			const vk::ImageLayout targetLayout =
 				isDepthImage ? vk::ImageLayout::eDepthStencilAttachmentOptimal : vk::ImageLayout::eColorAttachmentOptimal;
-			m_ImageManager->recordImageLayoutTransition(handle, targetLayout, cmdBuffer);
-			attachmentsViews.push_back(targetHandle);
+			imageManager.recordImageLayoutTransition(handle, targetLayout, cmdBuffer);
 		}
-		
-        const vk::FramebufferCreateInfo createInfo(
-            {},
-            renderpass,
-            static_cast<uint32_t>(attachmentsViews.size()),
-            attachmentsViews.data(),
-            width,
-            height,
-            1
+	}
+
+	std::vector<vk::ClearValue> createAttachmentClearValues(const std::vector<AttachmentDescription>& attachments) {
+		std::vector<vk::ClearValue> clearValues;
+		for (const auto& attachment : attachments) {
+			if (attachment.load_operation == AttachmentOperation::CLEAR) {
+				float clear = 0.0f;
+
+				if (isDepthFormat(attachment.format)) {
+					clear = 1.0f;
+				}
+
+				clearValues.emplace_back(std::array<float, 4>{
+					clear,
+						clear,
+						clear,
+						1.f
+				});
+			}
+		}
+		return clearValues;
+	}
+
+	void recordDynamicViewport(vk::CommandBuffer cmdBuffer, uint32_t width, uint32_t height) {
+		vk::Viewport dynamicViewport(
+			0.0f, 0.0f,
+			static_cast<float>(width), static_cast<float>(height),
+			0.0f, 1.0f
 		);
-		
-		vk::Framebuffer framebuffer = m_Context.m_Device.createFramebuffer(createInfo);
-        
-        if (!framebuffer) {
+
+		vk::Rect2D dynamicScissor({ 0, 0 }, { width, height });
+
+		cmdBuffer.setViewport(0, 1, &dynamicViewport);
+		cmdBuffer.setScissor(0, 1, &dynamicScissor);
+	}
+
+	void Core::recordDrawcallsToCmdStream(
+		const CommandStreamHandle       cmdStreamHandle,
+		const PassHandle                renderpassHandle, 
+		const PipelineHandle            pipelineHandle, 
+        const PushConstants             &pushConstantData,
+        const std::vector<DrawcallInfo> &drawcalls,
+		const std::vector<ImageHandle>  &renderTargets) {
+
+		if (m_currentSwapchainImageIndex == std::numeric_limits<uint32_t>::max()) {
+			return;
+		}
+
+		const std::array<uint32_t, 2> widthHeight = getWidthHeightFromRenderTargets(renderTargets, m_swapchain, *m_ImageManager);
+		const auto width  = widthHeight[0];
+		const auto height = widthHeight[1];
+
+		const vk::RenderPass        renderpass      = m_PassManager->getVkPass(renderpassHandle);
+		const PassConfig            passConfig      = m_PassManager->getPassConfig(renderpassHandle);
+
+		const vk::Pipeline          pipeline        = m_PipelineManager->getVkPipeline(pipelineHandle);
+		const vk::PipelineLayout    pipelineLayout  = m_PipelineManager->getVkPipelineLayout(pipelineHandle);
+		const vk::Rect2D            renderArea(vk::Offset2D(0, 0), vk::Extent2D(width, height));
+
+		vk::CommandBuffer cmdBuffer = m_CommandStreamManager->getStreamCommandBuffer(cmdStreamHandle);
+		transitionRendertargetsToAttachmentLayout(renderTargets, *m_ImageManager, cmdBuffer);
+
+		const vk::Framebuffer framebuffer = createFramebuffer(renderTargets, *m_ImageManager, m_swapchain, renderpass, m_Context.m_Device);
+
+		if (!framebuffer) {
 			vkcv_log(LogLevel::ERROR, "Failed to create temporary framebuffer");
-            return;
-        }
+			return;
+		}
 
-        vk::Viewport dynamicViewport(
-        		0.0f, 0.0f,
-            	static_cast<float>(width), static_cast<float>(height),
-            0.0f, 1.0f
-		);
+		SubmitInfo submitInfo;
+		submitInfo.queueType = QueueType::Graphics;
+		submitInfo.signalSemaphores = { m_SyncResources.renderFinished };
+
+		auto submitFunction = [&](const vk::CommandBuffer& cmdBuffer) {
+
+			const std::vector<vk::ClearValue> clearValues = createAttachmentClearValues(passConfig.attachments);
+
+			const vk::RenderPassBeginInfo beginInfo(renderpass, framebuffer, renderArea, clearValues.size(), clearValues.data());
+			cmdBuffer.beginRenderPass(beginInfo, {}, {});
+
+			cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, {});
+
+			const PipelineConfig &pipeConfig = m_PipelineManager->getPipelineConfig(pipelineHandle);
+			if(pipeConfig.m_UseDynamicViewport)
+			{
+				recordDynamicViewport(cmdBuffer, width, height);
+			}
+
+			for (int i = 0; i < drawcalls.size(); i++) {
+				recordDrawcall(drawcalls[i], cmdBuffer, pipelineLayout, pushConstantData, i);
+			}
 
         vk::Rect2D dynamicScissor({0, 0}, {width, height});
+			cmdBuffer.endRenderPass();
+		};
+
+		auto finishFunction = [framebuffer, this]()
+		{
+			m_Context.m_Device.destroy(framebuffer);
+		};
+
+		recordCommandsToStream(cmdStreamHandle, submitFunction, finishFunction);
+	}
+
+	void Core::recordMeshShaderDrawcalls(
+		const CommandStreamHandle                           cmdStreamHandle,
+		const PassHandle                                    renderpassHandle,
+		const PipelineHandle                                pipelineHandle,
+		const PushConstants&                                pushConstantData,
+		const std::vector<MeshShaderDrawcall>&              drawcalls,
+		const std::vector<ImageHandle>&                     renderTargets) {
+
+		if (m_currentSwapchainImageIndex == std::numeric_limits<uint32_t>::max()) {
+			return;
+		}
+
+		const std::array<uint32_t, 2> widthHeight = getWidthHeightFromRenderTargets(renderTargets, m_swapchain, *m_ImageManager);
+		const auto width  = widthHeight[0];
+		const auto height = widthHeight[1];
+
+		const vk::RenderPass        renderpass = m_PassManager->getVkPass(renderpassHandle);
+		const PassConfig            passConfig = m_PassManager->getPassConfig(renderpassHandle);
+
+		const vk::Pipeline          pipeline = m_PipelineManager->getVkPipeline(pipelineHandle);
+		const vk::PipelineLayout    pipelineLayout = m_PipelineManager->getVkPipelineLayout(pipelineHandle);
+		const vk::Rect2D            renderArea(vk::Offset2D(0, 0), vk::Extent2D(width, height));
+
+		vk::CommandBuffer cmdBuffer = m_CommandStreamManager->getStreamCommandBuffer(cmdStreamHandle);
+		transitionRendertargetsToAttachmentLayout(renderTargets, *m_ImageManager, cmdBuffer);
+
+		const vk::Framebuffer framebuffer = createFramebuffer(renderTargets, *m_ImageManager, m_swapchain, renderpass, m_Context.m_Device);
+
+		if (!framebuffer) {
+			vkcv_log(LogLevel::ERROR, "Failed to create temporary framebuffer");
+			return;
+		}
 
 		SubmitInfo submitInfo;
 		submitInfo.queueType = QueueType::Graphics;
 		submitInfo.signalSemaphores = { m_SyncResources.renderFinished };
 
 		auto submitFunction = [&](const vk::CommandBuffer& cmdBuffer) {
-            std::vector<vk::ClearValue> clearValues;
-
-            for (const auto& attachment : passConfig.attachments) {
-                if (attachment.load_operation == AttachmentOperation::CLEAR) {
-                    float clear = 0.0f;
-
-                    if (isDepthFormat(attachment.format)) {
-                        clear = 1.0f;
-                    }
-
-                    clearValues.emplace_back(std::array<float, 4>{
-                            clear,
-                            clear,
-                            clear,
-                            1.f
-                    });
-                }
-            }
-
-            const vk::RenderPassBeginInfo beginInfo(renderpass, framebuffer, renderArea, clearValues.size(), clearValues.data());
-            const vk::SubpassContents subpassContents = {};
-            cmdBuffer.beginRenderPass(beginInfo, subpassContents, {});
-
-            cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, {});
-
-            const PipelineConfig &pipeConfig = m_PipelineManager->getPipelineConfig(pipelineHandle);
-            if (pipeConfig.m_UseDynamicViewport) {
-                cmdBuffer.setViewport(0, 1, &dynamicViewport);
-                cmdBuffer.setScissor(0, 1, &dynamicScissor);
-            }
-
-            for (size_t i = 0; i < drawcalls.size(); i++) {
-                recordDrawcall(drawcalls[i], cmdBuffer, pipelineLayout, pushConstants, i);
-            }
-
-            cmdBuffer.endRenderPass();
-        };
-
-        auto finishFunction = [framebuffer, this]()
-        {
-            m_Context.m_Device.destroy(framebuffer);
-        };
+
+			const std::vector<vk::ClearValue> clearValues = createAttachmentClearValues(passConfig.attachments);
+
+			const vk::RenderPassBeginInfo beginInfo(renderpass, framebuffer, renderArea, clearValues.size(), clearValues.data());
+			cmdBuffer.beginRenderPass(beginInfo, {}, {});
+
+			cmdBuffer.bindPipeline(vk::PipelineBindPoint::eGraphics, pipeline, {});
+
+			const PipelineConfig& pipeConfig = m_PipelineManager->getPipelineConfig(pipelineHandle);
+			if (pipeConfig.m_UseDynamicViewport)
+			{
+				recordDynamicViewport(cmdBuffer, width, height);
+			}
+
+			for (int i = 0; i < drawcalls.size(); i++) {
+                const uint32_t pushConstantOffset = i * pushConstantData.getSizePerDrawcall();
+                recordMeshShaderDrawcall(
+                    cmdBuffer,
+                    pipelineLayout,
+                    pushConstantData,
+                    pushConstantOffset,
+                    drawcalls[i],
+                    0);
+			}
+
+			cmdBuffer.endRenderPass();
+		};
+
+		auto finishFunction = [framebuffer, this]()
+		{
+			m_Context.m_Device.destroy(framebuffer);
+		};
 
 		recordCommandsToStream(cmdStreamHandle, submitFunction, finishFunction);
 	}
diff --git a/src/vkcv/DrawcallRecording.cpp b/src/vkcv/DrawcallRecording.cpp
index 32ed00e98f7ef72f0c391f61924444c26844869b..d89ace3859717f753534402507a713a78bfb6876 100644
--- a/src/vkcv/DrawcallRecording.cpp
+++ b/src/vkcv/DrawcallRecording.cpp
@@ -1,7 +1,18 @@
 #include <vkcv/DrawcallRecording.hpp>
+#include <vkcv/Logger.hpp>
 
 namespace vkcv {
 
+    vk::IndexType getIndexType(IndexBitCount indexByteCount){
+        switch (indexByteCount) {
+            case IndexBitCount::Bit16: return vk::IndexType::eUint16;
+            case IndexBitCount::Bit32: return vk::IndexType::eUint32;
+            default:
+                vkcv_log(LogLevel::ERROR, "unknown Enum");
+                return vk::IndexType::eUint16;
+        }
+    }
+
     void recordDrawcall(
         const DrawcallInfo      &drawcall,
         vk::CommandBuffer       cmdBuffer,
@@ -33,11 +44,59 @@ namespace vkcv {
         }
 
         if (drawcall.mesh.indexBuffer) {
-            cmdBuffer.bindIndexBuffer(drawcall.mesh.indexBuffer, 0, vk::IndexType::eUint16);	//FIXME: choose proper size
+            cmdBuffer.bindIndexBuffer(drawcall.mesh.indexBuffer, 0, getIndexType(drawcall.mesh.indexBitCount));
             cmdBuffer.drawIndexed(drawcall.mesh.indexCount, drawcall.instanceCount, 0, 0, {});
         }
         else {
-            cmdBuffer.draw(drawcall.mesh.indexCount, 1, 0, 0, {});
+            cmdBuffer.draw(drawcall.mesh.indexCount, drawcall.instanceCount, 0, 0, {});
         }
     }
+
+
+
+    struct MeshShaderFunctions
+    {
+        PFN_vkCmdDrawMeshTasksNV cmdDrawMeshTasks                           = nullptr;
+        PFN_vkCmdDrawMeshTasksIndirectNV cmdDrawMeshTasksIndirect           = nullptr;
+        PFN_vkCmdDrawMeshTasksIndirectCountNV cmdDrawMeshTasksIndirectCount = nullptr;
+    } MeshShaderFunctions;
+
+    void InitMeshShaderDrawFunctions(vk::Device device)
+    {
+        MeshShaderFunctions.cmdDrawMeshTasks = PFN_vkCmdDrawMeshTasksNV(device.getProcAddr("vkCmdDrawMeshTasksNV"));
+        MeshShaderFunctions.cmdDrawMeshTasksIndirect = PFN_vkCmdDrawMeshTasksIndirectNV(device.getProcAddr("vkCmdDrawMeshTasksIndirectNV"));
+        MeshShaderFunctions.cmdDrawMeshTasksIndirectCount = PFN_vkCmdDrawMeshTasksIndirectCountNV (device.getProcAddr( "vkCmdDrawMeshTasksIndirectCountNV"));
+    }
+
+    void recordMeshShaderDrawcall(
+        vk::CommandBuffer                       cmdBuffer,
+        vk::PipelineLayout                      pipelineLayout,
+        const PushConstants&                    pushConstantData,
+        const uint32_t                          pushConstantOffset,
+        const MeshShaderDrawcall&               drawcall,
+        const uint32_t                          firstTask) {
+
+        for (const auto& descriptorUsage : drawcall.descriptorSets) {
+            cmdBuffer.bindDescriptorSets(
+                vk::PipelineBindPoint::eGraphics,
+                pipelineLayout,
+                descriptorUsage.setLocation,
+                descriptorUsage.vulkanHandle,
+                nullptr);
+        }
+
+        // char* cast because void* does not support pointer arithmetic
+        const void* drawcallPushConstantData = pushConstantOffset + (char*)pushConstantData.getData();
+
+        if (pushConstantData.getData()) {
+            cmdBuffer.pushConstants(
+                pipelineLayout,
+                vk::ShaderStageFlagBits::eAll,
+                0,
+                pushConstantData.getSizePerDrawcall(),
+                drawcallPushConstantData);
+        }
+
+        MeshShaderFunctions.cmdDrawMeshTasks(VkCommandBuffer(cmdBuffer), drawcall.taskCount, firstTask);
+    }
 }
diff --git a/src/vkcv/PipelineManager.cpp b/src/vkcv/PipelineManager.cpp
index 8b1f0b68be3a72f60103ca0dd8136f2c923513a5..244f6723f70e5ea938c005b74b286e192d68443c 100644
--- a/src/vkcv/PipelineManager.cpp
+++ b/src/vkcv/PipelineManager.cpp
@@ -44,95 +44,190 @@ namespace vkcv
 
     vk::PrimitiveTopology primitiveTopologyToVulkanPrimitiveTopology(const PrimitiveTopology topology) {
         switch (topology) {
-        case(PrimitiveTopology::PointList):     return vk::PrimitiveTopology::ePointList;
-        case(PrimitiveTopology::LineList):      return vk::PrimitiveTopology::eLineList;
-        case(PrimitiveTopology::TriangleList):  return vk::PrimitiveTopology::eTriangleList;
-        default: std::cout << "Error: Unknown primitive topology type" << std::endl; return vk::PrimitiveTopology::eTriangleList;
+            case(PrimitiveTopology::PointList):     return vk::PrimitiveTopology::ePointList;
+            case(PrimitiveTopology::LineList):      return vk::PrimitiveTopology::eLineList;
+            case(PrimitiveTopology::TriangleList):  return vk::PrimitiveTopology::eTriangleList;
+            default: std::cout << "Error: Unknown primitive topology type" << std::endl; return vk::PrimitiveTopology::eTriangleList;
         }
     }
 
     vk::CompareOp depthTestToVkCompareOp(DepthTest depthTest) {
         switch (depthTest) {
-        case(DepthTest::None):          return vk::CompareOp::eAlways;
-        case(DepthTest::Less):          return vk::CompareOp::eLess;
-        case(DepthTest::LessEqual):     return vk::CompareOp::eLessOrEqual;
-        case(DepthTest::Greater):       return vk::CompareOp::eGreater;
-        case(DepthTest::GreatherEqual): return vk::CompareOp::eGreaterOrEqual;
-        case(DepthTest::Equal):         return vk::CompareOp::eEqual;
-        default: vkcv_log(vkcv::LogLevel::ERROR, "Unknown depth test enum"); return vk::CompareOp::eAlways;
+            case(DepthTest::None):          return vk::CompareOp::eAlways;
+            case(DepthTest::Less):          return vk::CompareOp::eLess;
+            case(DepthTest::LessEqual):     return vk::CompareOp::eLessOrEqual;
+            case(DepthTest::Greater):       return vk::CompareOp::eGreater;
+            case(DepthTest::GreatherEqual): return vk::CompareOp::eGreaterOrEqual;
+            case(DepthTest::Equal):         return vk::CompareOp::eEqual;
+            default: vkcv_log(vkcv::LogLevel::ERROR, "Unknown depth test enum"); return vk::CompareOp::eAlways;
         }
     }
+        
+	vk::ShaderStageFlagBits shaderStageToVkShaderStage(vkcv::ShaderStage stage) {
+		switch (stage) {
+            case vkcv::ShaderStage::VERTEX:         return vk::ShaderStageFlagBits::eVertex;
+            case vkcv::ShaderStage::FRAGMENT:       return vk::ShaderStageFlagBits::eFragment;
+            case vkcv::ShaderStage::GEOMETRY:       return vk::ShaderStageFlagBits::eGeometry;
+            case vkcv::ShaderStage::TESS_CONTROL:   return vk::ShaderStageFlagBits::eTessellationControl;
+            case vkcv::ShaderStage::TESS_EVAL:      return vk::ShaderStageFlagBits::eTessellationEvaluation;
+            case vkcv::ShaderStage::COMPUTE:        return vk::ShaderStageFlagBits::eCompute;
+            case vkcv::ShaderStage::TASK:           return vk::ShaderStageFlagBits::eTaskNV;
+            case vkcv::ShaderStage::MESH:           return vk::ShaderStageFlagBits::eMeshNV;
+            default: vkcv_log(vkcv::LogLevel::ERROR, "Unknown shader stage"); return vk::ShaderStageFlagBits::eAll;
+		}
+	}
+
+    bool createPipelineShaderStageCreateInfo(
+        const vkcv::ShaderProgram&          shaderProgram, 
+        ShaderStage                         stage,
+        vk::Device                          device,
+        vk::PipelineShaderStageCreateInfo*  outCreateInfo) {
+
+        assert(outCreateInfo);
+        std::vector<char>           code = shaderProgram.getShader(stage).shaderCode;
+        vk::ShaderModuleCreateInfo  vertexModuleInfo({}, code.size(), reinterpret_cast<uint32_t*>(code.data()));
+        vk::ShaderModule            shaderModule;
+        if (device.createShaderModule(&vertexModuleInfo, nullptr, &shaderModule) != vk::Result::eSuccess)
+            return false;
+
+        const static auto entryName = "main";
+
+        *outCreateInfo = vk::PipelineShaderStageCreateInfo(
+            {},
+            shaderStageToVkShaderStage(stage),
+            shaderModule,
+            entryName,
+            nullptr);
+        return true;
+    }
 
     PipelineHandle PipelineManager::createPipeline(const PipelineConfig &config, PassManager& passManager)
     {
 		const vk::RenderPass &pass = passManager.getVkPass(config.m_PassHandle);
     	
+		const bool existsTaskShader = config.m_ShaderProgram.existsShader(ShaderStage::TASK);
+		const bool existsMeshShader = config.m_ShaderProgram.existsShader(ShaderStage::MESH);
+
         const bool existsVertexShader = config.m_ShaderProgram.existsShader(ShaderStage::VERTEX);
+
+        const bool validGeometryStages = existsVertexShader || (existsTaskShader && existsMeshShader);
+
         const bool existsFragmentShader = config.m_ShaderProgram.existsShader(ShaderStage::FRAGMENT);
-        if (!(existsVertexShader && existsFragmentShader))
+        if (!validGeometryStages)
         {
-			vkcv_log(LogLevel::ERROR, "Requires vertex and fragment shader code");
+			vkcv_log(LogLevel::ERROR, "Requires vertex or task and mesh shader");
             return PipelineHandle();
         }
-
-        // vertex shader stage
-        std::vector<char> vertexCode = config.m_ShaderProgram.getShader(ShaderStage::VERTEX).shaderCode;
-        vk::ShaderModuleCreateInfo vertexModuleInfo({}, vertexCode.size(), reinterpret_cast<uint32_t*>(vertexCode.data()));
-        vk::ShaderModule vertexModule{};
-        if (m_Device.createShaderModule(&vertexModuleInfo, nullptr, &vertexModule) != vk::Result::eSuccess)
+        if (!existsFragmentShader) {
+            vkcv_log(LogLevel::ERROR, "Requires fragment shader code");
             return PipelineHandle();
+        }
 
-        vk::PipelineShaderStageCreateInfo pipelineVertexShaderStageInfo(
-                {},
-                vk::ShaderStageFlagBits::eVertex,
-                vertexModule,
-                "main",
-                nullptr
-        );
+        std::vector<vk::PipelineShaderStageCreateInfo> shaderStages;
+        auto destroyShaderModules = [&shaderStages, this] {
+            for (auto stage : shaderStages) {
+                m_Device.destroyShaderModule(stage.module);
+            }
+            shaderStages.clear();
+        };
+
+        if (existsVertexShader) {
+            vk::PipelineShaderStageCreateInfo createInfo;
+            const bool success = createPipelineShaderStageCreateInfo(
+                config.m_ShaderProgram, 
+                vkcv::ShaderStage::VERTEX, 
+                m_Device, 
+                &createInfo);
+
+            if (success) {
+                shaderStages.push_back(createInfo);
+            }
+            else {
+                destroyShaderModules();
+                return PipelineHandle();
+            }
+        }
+
+        if (existsTaskShader) {
+            vk::PipelineShaderStageCreateInfo createInfo;
+            const bool success = createPipelineShaderStageCreateInfo(
+                config.m_ShaderProgram,
+                vkcv::ShaderStage::TASK,
+                m_Device,
+                &createInfo);
+
+            if (success) {
+                shaderStages.push_back(createInfo);
+            }
+            else {
+                destroyShaderModules();
+                return PipelineHandle();
+            }
+        }
+
+        if (existsMeshShader) {
+            vk::PipelineShaderStageCreateInfo createInfo;
+            const bool success = createPipelineShaderStageCreateInfo(
+                config.m_ShaderProgram,
+                vkcv::ShaderStage::MESH,
+                m_Device,
+                &createInfo);
+
+            if (success) {
+                shaderStages.push_back(createInfo);
+            }
+            else {
+                destroyShaderModules();
+                return PipelineHandle();
+            }
+        }
 
         // fragment shader stage
-        std::vector<char> fragCode = config.m_ShaderProgram.getShader(ShaderStage::FRAGMENT).shaderCode;
-        vk::ShaderModuleCreateInfo fragmentModuleInfo({}, fragCode.size(), reinterpret_cast<uint32_t*>(fragCode.data()));
-        vk::ShaderModule fragmentModule{};
-        if (m_Device.createShaderModule(&fragmentModuleInfo, nullptr, &fragmentModule) != vk::Result::eSuccess)
         {
-            m_Device.destroy(vertexModule);
-            return PipelineHandle();
+            vk::PipelineShaderStageCreateInfo createInfo;
+            const bool success = createPipelineShaderStageCreateInfo(
+                config.m_ShaderProgram,
+                vkcv::ShaderStage::FRAGMENT,
+                m_Device,
+                &createInfo);
+
+            if (success) {
+                shaderStages.push_back(createInfo);
+            }
+            else {
+                destroyShaderModules();
+                return PipelineHandle();
+            }
         }
 
-        vk::PipelineShaderStageCreateInfo pipelineFragmentShaderStageInfo(
-                {},
-                vk::ShaderStageFlagBits::eFragment,
-                fragmentModule,
-                "main",
-                nullptr
-        );
-
         // vertex input state
 
         // Fill up VertexInputBindingDescription and VertexInputAttributeDescription Containers
         std::vector<vk::VertexInputAttributeDescription>	vertexAttributeDescriptions;
 		std::vector<vk::VertexInputBindingDescription>		vertexBindingDescriptions;
 
-        const VertexLayout &layout = config.m_VertexLayout;
-
-        // iterate over the layout's specified, mutually exclusive buffer bindings that make up a vertex buffer
-        for (const auto &vertexBinding : layout.vertexBindings)
-        {
-            vertexBindingDescriptions.emplace_back(vertexBinding.bindingLocation,
-                                                   vertexBinding.stride,
-                                                   vk::VertexInputRate::eVertex);
-
-            // iterate over the bindings' specified, mutually exclusive vertex input attachments that make up a vertex
-            for(const auto &vertexAttachment: vertexBinding.vertexAttachments)
-            {
-                vertexAttributeDescriptions.emplace_back(vertexAttachment.inputLocation,
-                                                         vertexBinding.bindingLocation,
-                                                         vertexFormatToVulkanFormat(vertexAttachment.format),
-                                                         vertexAttachment.offset % vertexBinding.stride);
+		if (existsVertexShader) {
+			const VertexLayout& layout = config.m_VertexLayout;
+
+			// iterate over the layout's specified, mutually exclusive buffer bindings that make up a vertex buffer
+			for (const auto& vertexBinding : layout.vertexBindings)
+			{
+				vertexBindingDescriptions.emplace_back(vertexBinding.bindingLocation,
+					vertexBinding.stride,
+					vk::VertexInputRate::eVertex);
+
+				// iterate over the bindings' specified, mutually exclusive vertex input attachments that make up a vertex
+				for (const auto& vertexAttachment : vertexBinding.vertexAttachments)
+				{
+					vertexAttributeDescriptions.emplace_back(vertexAttachment.inputLocation,
+						vertexBinding.bindingLocation,
+						vertexFormatToVulkanFormat(vertexAttachment.format),
+						vertexAttachment.offset % vertexBinding.stride);
+
+				}
+			}
 
-            }
-        }
+		}
 
         // Handover Containers to PipelineVertexInputStateCreateIngo Struct
         vk::PipelineVertexInputStateCreateInfo pipelineVertexInputStateCreateInfo(
@@ -240,8 +335,7 @@ namespace vkcv
         vk::PipelineLayout vkPipelineLayout{};
         if (m_Device.createPipelineLayout(&pipelineLayoutCreateInfo, nullptr, &vkPipelineLayout) != vk::Result::eSuccess)
         {
-            m_Device.destroy(vertexModule);
-            m_Device.destroy(fragmentModule);
+            destroyShaderModules();
             return PipelineHandle();
         }
 	
@@ -276,25 +370,28 @@ namespace vkcv
 		    dynamicStates.push_back(vk::DynamicState::eScissor);
         }
 
-        vk::PipelineDynamicStateCreateInfo dynamicStateCreateInfo({},
-                                                            static_cast<uint32_t>(dynamicStates.size()),
-                                                            dynamicStates.data());
-
-        // graphics pipeline create
-        std::vector<vk::PipelineShaderStageCreateInfo> shaderStages = { pipelineVertexShaderStageInfo, pipelineFragmentShaderStageInfo };
-
-		const char *geometryShaderName = "main";	// outside of if to make sure it stays in scope
-		vk::ShaderModule geometryModule;
-		if (config.m_ShaderProgram.existsShader(ShaderStage::GEOMETRY)) {
-			const vkcv::Shader geometryShader = config.m_ShaderProgram.getShader(ShaderStage::GEOMETRY);
-			const auto& geometryCode = geometryShader.shaderCode;
-			const vk::ShaderModuleCreateInfo geometryModuleInfo({}, geometryCode.size(), reinterpret_cast<const uint32_t*>(geometryCode.data()));
-			if (m_Device.createShaderModule(&geometryModuleInfo, nullptr, &geometryModule) != vk::Result::eSuccess) {
-				return PipelineHandle();
-			}
-			vk::PipelineShaderStageCreateInfo geometryStage({}, vk::ShaderStageFlagBits::eGeometry, geometryModule, geometryShaderName);
-			shaderStages.push_back(geometryStage);
-		}
+        vk::PipelineDynamicStateCreateInfo dynamicStateCreateInfo(
+            {},
+            static_cast<uint32_t>(dynamicStates.size()),
+            dynamicStates.data());
+
+        const bool existsGeometryShader = config.m_ShaderProgram.existsShader(vkcv::ShaderStage::GEOMETRY);
+        if (existsGeometryShader) {
+            vk::PipelineShaderStageCreateInfo createInfo;
+            const bool success = createPipelineShaderStageCreateInfo(
+                config.m_ShaderProgram,
+                vkcv::ShaderStage::GEOMETRY,
+                m_Device,
+                &createInfo);
+
+            if (success) {
+                shaderStages.push_back(createInfo);
+            }
+            else {
+                destroyShaderModules();
+                return PipelineHandle();
+            }
+        }
 
         const vk::GraphicsPipelineCreateInfo graphicsPipelineCreateInfo(
                 {},
@@ -319,20 +416,11 @@ namespace vkcv
         vk::Pipeline vkPipeline{};
         if (m_Device.createGraphicsPipelines(nullptr, 1, &graphicsPipelineCreateInfo, nullptr, &vkPipeline) != vk::Result::eSuccess)
         {
-            m_Device.destroy(vertexModule);
-            m_Device.destroy(fragmentModule);
-            if (geometryModule) {
-                m_Device.destroy(geometryModule);
-            }
-            m_Device.destroy();
+            destroyShaderModules();
             return PipelineHandle();
         }
 
-        m_Device.destroy(vertexModule);
-        m_Device.destroy(fragmentModule);
-        if (geometryModule) {
-            m_Device.destroy(geometryModule);
-        }
+        destroyShaderModules();
         
         const uint64_t id = m_Pipelines.size();
         m_Pipelines.push_back({ vkPipeline, vkPipelineLayout, config });
@@ -457,4 +545,4 @@ namespace vkcv
         vk::ShaderModuleCreateInfo moduleInfo({}, code.size(), reinterpret_cast<uint32_t*>(code.data()));
         return m_Device.createShaderModule(&moduleInfo, nullptr, &module);
     }
-}
\ No newline at end of file
+}