diff --git a/projects/mesh_shader/resources/monke.glb b/projects/mesh_shader/resources/monke.glb
new file mode 100644
index 0000000000000000000000000000000000000000..47d0b9131f15a8f0697318d0a47302c71cad1db8
--- /dev/null
+++ b/projects/mesh_shader/resources/monke.glb
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:597584db90a3f51088beea6652d8320e82cb025f9d3d036b89e54ad72c732a06
+size 98612
diff --git a/projects/mesh_shader/resources/shaders/shader.frag b/projects/mesh_shader/resources/shaders/shader.frag
index f4f6982f2089e6c8e102027f3b8763bb38f8e59c..17bf8f960bb6996f039c4e73e547db802f1ceab4 100644
--- a/projects/mesh_shader/resources/shaders/shader.frag
+++ b/projects/mesh_shader/resources/shaders/shader.frag
@@ -28,5 +28,5 @@ vec3 colorFromIndex(uint i){
 
 void main() {
 	outColor = normalize(passNormal) * 0.5 + 0.5;
-    outColor = colorFromIndex(passTaskIndex);
+    //outColor = colorFromIndex(passTaskIndex);
 }
\ No newline at end of file
diff --git a/projects/mesh_shader/resources/shaders/shader.mesh b/projects/mesh_shader/resources/shaders/shader.mesh
index 848fca2ccbcdf94eb9551bfd3aa101710daf29a8..f26cef3d850835b0c6a57c4b25a4279c2906570f 100644
--- a/projects/mesh_shader/resources/shaders/shader.mesh
+++ b/projects/mesh_shader/resources/shaders/shader.mesh
@@ -2,10 +2,10 @@
 #extension GL_ARB_separate_shader_objects   : enable
 #extension GL_NV_mesh_shader                : require
 
-layout(local_size_x=30) in;
+layout(local_size_x=32) in;
 
 layout(triangles) out;
-layout(max_vertices=30, max_primitives=10) out;
+layout(max_vertices=64, max_primitives=126) out;
 
 layout( push_constant ) uniform constants{
     mat4 mvp;
@@ -27,31 +27,58 @@ layout(std430, binding = 0) readonly buffer vertexBuffer
 
 layout(std430, binding = 1) readonly buffer indexBuffer
 {
-    uint indices[]; // breaks for 16 bit indices
+    uint localIndices[]; // breaks for 16 bit indices
+};
+
+struct Meshlet{
+    uint vertexOffset;
+    uint vertexCount;
+    uint indexOffset;
+    uint indexCount;
+};
+
+layout(std430, binding = 2) readonly buffer meshletBuffer
+{
+    Meshlet meshlets[];
 };
 
 taskNV in Task {
-  uint baseID;
+  uint meshletIndex;
 } IN;
 
 void main()	{
     
-    uint workIndex                      = gl_LocalInvocationID.x;      
-    gl_PrimitiveIndicesNV[workIndex]    = workIndex;
+    Meshlet meshlet = meshlets[IN.meshletIndex];
     
-    const uint verticesPerMeshTask  = 30;
-    uint previousMeshGroupCount     = IN.baseID;
-    uint indexBufferIndex           = previousMeshGroupCount * verticesPerMeshTask + workIndex;
-    uint index                      = indices[indexBufferIndex];
+    // set vertices
+    for(uint i = 0; i < 2; i++){
     
-    vec3 inPos      = vertices[index].position;
-    vec3 inNormal   = vertices[index].normal;
+        uint workIndex = gl_LocalInvocationID.x + 32 * i;
+        if(workIndex >= meshlet.vertexCount){
+            break;
+        }
     
-    gl_MeshVerticesNV[workIndex].gl_Position    = mvp * vec4(inPos, 1);
-    passNormal[workIndex]                       = inNormal;
-    passTaskIndex[workIndex]                    = previousMeshGroupCount;
+        uint vertexIndex    = meshlet.vertexOffset + workIndex;
+        Vertex vertex       = vertices[vertexIndex];
+    
+        gl_MeshVerticesNV[workIndex].gl_Position    = mvp * vec4(vertex.position, 1);
+        passNormal[workIndex]                       = vertex.normal;
+        // passTaskIndex[workIndex]                    = IN.meshletIndex;
+    }
+    
+    // set local indices
+    for(uint i = 0; i < 12; i++){
+    
+        uint workIndex = gl_LocalInvocationID.x + i * 32;
+        if(workIndex >= meshlet.indexCount){
+            break;
+        }    
+        
+        uint indexBufferIndex               = meshlet.indexOffset + workIndex;
+        gl_PrimitiveIndicesNV[workIndex]    = localIndices[indexBufferIndex];
+    }
     
     if(gl_LocalInvocationID.x == 0){
-        gl_PrimitiveCountNV = 10;
+        gl_PrimitiveCountNV = meshlet.indexCount / 3;
     }
 }
\ No newline at end of file
diff --git a/projects/mesh_shader/resources/shaders/shader.task b/projects/mesh_shader/resources/shaders/shader.task
index c2c143a8130da4d52567d1bee28e82ea480e5939..aedeba1505e21c63cfa89b04fb73e97955cd9b8c 100644
--- a/projects/mesh_shader/resources/shaders/shader.task
+++ b/projects/mesh_shader/resources/shaders/shader.task
@@ -5,14 +5,10 @@
 layout(local_size_x=1) in;
 
 taskNV out Task {
-  uint baseID;
+  uint meshletIndex;
 } OUT;
 
-layout( push_constant ) uniform constants{
-    mat4 mvp;
-};
-
 void main() {
-    gl_TaskCountNV  = 1;
-    OUT.baseID      = gl_GlobalInvocationID.x;
+    gl_TaskCountNV      = 1;
+    OUT.meshletIndex    = gl_GlobalInvocationID.x;
 }
\ No newline at end of file
diff --git a/projects/mesh_shader/src/main.cpp b/projects/mesh_shader/src/main.cpp
index a4993454a91f0569b4fe75c11d41385329982e6f..699c40263ffb9a9d3e1e07ce95b9a636fa564ebd 100644
--- a/projects/mesh_shader/src/main.cpp
+++ b/projects/mesh_shader/src/main.cpp
@@ -8,6 +8,7 @@
 #include <vkcv/gui/GUI.hpp>
 #include <vkcv/asset/asset_loader.hpp>
 #include "MeshStruct.hpp"
+#include <Map>
 
 struct Vertex {
 	glm::vec3   position;
@@ -16,6 +17,13 @@ struct Vertex {
 	float       padding1;
 };
 
+struct Meshlet {
+    uint32_t vertexOffset;
+    uint32_t vertexCount;
+    uint32_t indexOffset;
+    uint32_t indexCount;
+};
+
 std::vector<Vertex> convertToVertices(
 	const std::vector<uint8_t>&         vertexData,
 	const uint64_t                      vertexCount,
@@ -45,6 +53,117 @@ std::vector<Vertex> convertToVertices(
 	return vertices;
 }
 
+struct MeshShaderModelData {
+	std::vector<Vertex>     vertices;
+	std::vector<uint32_t>   localIndices;
+	std::vector<Meshlet>    meshlets;
+};
+
+MeshShaderModelData createMeshShaderModelData(
+	const std::vector<Vertex>&      inVertices,
+	const std::vector<uint32_t>&    inIndices) {
+
+	MeshShaderModelData data;
+	size_t currentIndex = 0;
+
+	const size_t maxVerticesPerMeshlet = 64;
+	const size_t maxIndicesPerMeshlet  = 126 * 3;
+
+	bool indicesAreLeft = true;
+
+	while (indicesAreLeft) {
+		Meshlet meshlet;
+
+		meshlet.indexCount  = 0;
+		meshlet.vertexCount = 0;
+
+		meshlet.indexOffset  = data.localIndices.size();
+		meshlet.vertexOffset = data.vertices.size();
+
+        std::map<uint32_t, uint32_t> globalToLocalIndexMap;
+
+		while (true) {
+
+			indicesAreLeft = currentIndex + 1 <= inIndices.size();
+			if (!indicesAreLeft) {
+				break;
+			}
+
+			bool enoughSpaceForIndices = meshlet.indexCount + 3 < maxIndicesPerMeshlet;
+			if (!enoughSpaceForIndices) {
+				break;
+			}
+
+			size_t vertexCountToAdd = 0;
+			for (int i = 0; i < 3; i++) {
+				const uint32_t globalIndex = inIndices[currentIndex + i];
+				const bool containsVertex  = globalToLocalIndexMap.find(globalIndex) != globalToLocalIndexMap.end();
+				if (!containsVertex) {
+					vertexCountToAdd++;
+				}
+			}
+
+			bool enoughSpaceForVertices = meshlet.vertexCount + vertexCountToAdd < maxVerticesPerMeshlet;
+			if (!enoughSpaceForVertices) {
+				break;
+			}
+
+			for (int i = 0; i < 3; i++) {
+				const uint32_t globalIndex = inIndices[currentIndex + i];
+
+				uint32_t localIndex;
+				const bool indexAlreadyExists = globalToLocalIndexMap.find(globalIndex) != globalToLocalIndexMap.end();
+				if (indexAlreadyExists) {
+					localIndex = globalToLocalIndexMap[globalIndex];
+				}
+				else {
+					localIndex = globalToLocalIndexMap.size();
+					globalToLocalIndexMap[globalIndex] = localIndex;
+				}
+
+				data.localIndices.push_back(localIndex);
+			}
+
+			meshlet.indexCount  += 3;
+			currentIndex        += 3;
+			meshlet.vertexCount += vertexCountToAdd;
+		}
+
+		for (const auto& iterator : globalToLocalIndexMap) {
+			const uint32_t globalIndex = iterator.first;
+			const uint32_t localIndex  = iterator.second;
+
+			const Vertex v = inVertices[globalIndex];
+			data.vertices.push_back(v);
+		}
+
+		data.meshlets.push_back(meshlet);
+	}
+
+	return data;
+}
+
+std::vector<uint32_t> assetLoaderIndicesTo32BitIndices(const std::vector<uint8_t>& indexData, vkcv::asset::IndexType indexType) {
+	std::vector<uint32_t> indices;
+	if (indexType == vkcv::asset::IndexType::UINT16) {
+		for (int i = 0; i < indexData.size(); i += 2) {
+			const uint16_t index16Bit = *reinterpret_cast<const uint16_t*>(&(indexData[i]));
+			const uint32_t index32Bit = static_cast<uint32_t>(index16Bit);
+			indices.push_back(index32Bit);
+		}
+	}
+	else if (indexType == vkcv::asset::IndexType::UINT32) {
+		for (int i = 0; i < indexData.size(); i += 4) {
+			const uint32_t index32Bit = *reinterpret_cast<const uint32_t*>(&(indexData[i]));
+			indices.push_back(index32Bit);
+		}
+	}
+	else {
+		vkcv_log(vkcv::LogLevel::ERROR, "Unsupported index type");
+	}
+	return indices;
+}
+
 int main(int argc, const char** argv) {
 	const char* applicationName = "Mesh shader";
 
@@ -106,18 +225,29 @@ int main(int argc, const char** argv) {
 			vkcv::VertexBufferBinding(static_cast<vk::DeviceSize>(attributes[2].offset), vertexBuffer.getVulkanHandle()) };
 
 	const auto& bunny = mesh.vertexGroups[0];
-	const auto interleavedVertices = convertToVertices(bunny.vertexBuffer.data, bunny.numVertices, attributes[0], attributes[1]);
+	const std::vector<Vertex> interleavedVertices = convertToVertices(bunny.vertexBuffer.data, bunny.numVertices, attributes[0], attributes[1]);
 
 	// mesh shader buffers
-	auto meshBuffer = core.createBuffer<Vertex>(
+	const auto& assetLoaderIndexBuffer              = mesh.vertexGroups[0].indexBuffer;
+	const std::vector<uint32_t> indexBuffer32Bit    = assetLoaderIndicesTo32BitIndices(assetLoaderIndexBuffer.data, assetLoaderIndexBuffer.type);
+	const auto meshShaderModelData                  = createMeshShaderModelData(interleavedVertices, indexBuffer32Bit);
+
+	auto meshShaderVertexBuffer = core.createBuffer<Vertex>(
 		vkcv::BufferType::STORAGE,
-		interleavedVertices.size());
-	meshBuffer.fill(interleavedVertices);
+		meshShaderModelData.vertices.size());
+	meshShaderVertexBuffer.fill(meshShaderModelData.vertices);
 
-	auto meshShaderIndexBuffer = core.createBuffer<uint8_t>(
+	auto meshShaderIndexBuffer = core.createBuffer<uint32_t>(
 		vkcv::BufferType::STORAGE,
-		mesh.vertexGroups[0].indexBuffer.data.size());
-	meshShaderIndexBuffer.fill(mesh.vertexGroups[0].indexBuffer.data);
+		meshShaderModelData.localIndices.size());
+	meshShaderIndexBuffer.fill(meshShaderModelData.localIndices);
+
+	auto meshletBuffer = core.createBuffer<Meshlet>(
+		vkcv::BufferType::STORAGE,
+		meshShaderModelData.meshlets.size(),
+		vkcv::BufferMemoryType::DEVICE_LOCAL
+		);
+	meshletBuffer.fill(meshShaderModelData.meshlets);
 
 	// attachments
 	const vkcv::AttachmentDescription present_color_attachment(
@@ -219,8 +349,10 @@ int main(int argc, const char** argv) {
 
     vkcv::DescriptorWrites meshShaderWrites;
 	meshShaderWrites.storageBufferWrites = {
-		vkcv::StorageBufferDescriptorWrite(0, meshBuffer.getHandle()), 
-		vkcv::StorageBufferDescriptorWrite(1, meshShaderIndexBuffer.getHandle()) };
+		vkcv::StorageBufferDescriptorWrite(0, meshShaderVertexBuffer.getHandle()), 
+		vkcv::StorageBufferDescriptorWrite(1, meshShaderIndexBuffer.getHandle()),
+		vkcv::StorageBufferDescriptorWrite(2, meshletBuffer.getHandle()) };
+
     core.writeDescriptorSet( meshShaderDescriptorSet, meshShaderWrites);
 
     vkcv::ImageHandle depthBuffer = core.createImage(vk::Format::eD32Sfloat, windowWidth, windowHeight, 1, false).getHandle();
@@ -267,14 +399,13 @@ int main(int argc, const char** argv) {
 		if (useMeshShader) {
 
 			vkcv::DescriptorSetUsage descriptorUsage(0, core.getDescriptorSet(meshShaderDescriptorSet).vulkanHandle);
-			const uint32_t verticesPerTask = 30;
 
 			core.recordMeshShaderDrawcalls(
 				cmdStream,
 				renderPass,
 				meshShaderPipeline,
 				pushConstantData,
-				{ vkcv::MeshShaderDrawcall({descriptorUsage}, glm::ceil(bunny.numIndices / float(verticesPerTask))) },
+				{ vkcv::MeshShaderDrawcall({descriptorUsage}, meshShaderModelData.meshlets.size()) },
 				{ renderTargets });
 		}
 		else {