#version 450
#extension GL_GOOGLE_include_directive : enable

layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;

#include "particle.inc"

layout(set=0, binding=0, rgba32f) restrict readonly uniform image3D gridImage;
layout(set=0, binding=1, rgba32f) restrict writeonly uniform image3D gridForceImage;
layout(set=0, binding=2, std430) readonly buffer particleBuffer {
    Particle particles [];
};

layout( push_constant ) uniform constants {
    float lame1;
    float lame2;
    float alpha;
    float beta;
    float t;
    float dt;
};

#define SHARED_PARTICLES_BATCH_SIZE 64

shared Particle shared_particles [SHARED_PARTICLES_BATCH_SIZE];

void main()	{

    barrier();
    memoryBarrierBuffer();
    memoryBarrierShared();
    memoryBarrierImage();

    const vec3 gridResolution = vec3(imageSize(gridImage));
    const vec3 position = (vec3(gl_GlobalInvocationID) + vec3(0.5f)) / gridResolution;

    vec4 gridSample = imageLoad(
        gridImage,
        ivec3(gl_GlobalInvocationID)
    );

    vec3 velocity = gridSample.xyz;
    float mass = gridSample.w;

    barrier();
    memoryBarrierBuffer();

    if (mass > 0.0f) {
        velocity += vec3(0.0f, -9.81f * dt, 0.0f);
    }

    bvec3 lowerID = lessThanEqual(gl_GlobalInvocationID, ivec3(0));
    bvec3 negativeVelocity = lessThan(velocity, vec3(0.0f));

    bvec3 greaterID = greaterThanEqual(gl_GlobalInvocationID + ivec3(1), imageSize(gridForceImage));
    bvec3 positiveVelocity = greaterThan(velocity, vec3(0.0f));

    bvec3 invert = bvec3(
        (lowerID.x && negativeVelocity.x) || (greaterID.x && positiveVelocity.x),
        (lowerID.y && negativeVelocity.y) || (greaterID.y && positiveVelocity.y),
        (lowerID.z && negativeVelocity.z) || (greaterID.z && positiveVelocity.z)
    );

    velocity = mix(velocity, -velocity, invert);

    barrier();
    memoryBarrierBuffer();
    memoryBarrierShared();
    memoryBarrierImage();

    imageStore(
        gridForceImage,
        ivec3(gl_GlobalInvocationID),
        vec4(velocity, mass)
    );
}