Sphere Generation System With CUDA-OpenGL Interop

Question

This is some kind of follow up to my previous question, this question will be more focused on the actual tessellating pipeline.

What I changed from previous question

Implemented the async sphere generation in separate CPU thread
Improved separation of concerns in my code
Implemented RAII class for managing Active OpenGL context in each CPU thread

Here Is The Code:

include/sphere_generation/fwd.h

//
// Created by progamers on 8/26/25.
//

#ifndef SPACE_EXPLORER_SPHERE_GENERATION_FWD_H
#define SPACE_EXPLORER_SPHERE_GENERATION_FWD_H
#include <cmath>
#include <cstdint>

#ifdef __CUDACC__
#define HOST_DEVICE __host__ __device__
#else
#define HOST_DEVICE
#endif

namespace raw::sphere_generation {
namespace predef {
// Oh and btw, turns out, even after 4 steps our sphere gets nearly perfect (even on 2k monitor,
// well maybe on 4k it would be nice to have 6, but 4 is pretty much enough)
static constexpr auto BASIC_RADIUS = 1.0f;
static constexpr auto BASIC_STEPS  = 7U;
static constexpr auto MAX_STEPS    = 8U;
// Things below you can't change, all things above you can
static constexpr auto BASIC_AMOUNT_OF_TRIANGLES = 20U;
static constexpr uint32_t MAXIMUM_AMOUNT_OF_INDICES =
    BASIC_AMOUNT_OF_TRIANGLES * (1u << (2u * MAX_STEPS)) * 3u;
static constexpr uint32_t MAXIMUM_AMOUNT_OF_VERTICES = 10u * (1u << (2u * MAX_STEPS)) + 2u;
static constexpr uint32_t MAXIMUM_AMOUNT_OF_TRIANGLES =
    BASIC_AMOUNT_OF_TRIANGLES * (1u << (2u * MAX_STEPS));
} // namespace predef

class icosahedron_data_manager;
class sphere_generator;
class generation_context;
// Stores 2 indices of vertices in the sphere
struct edge {
    uint32_t         v0;
    uint32_t         v1;
    HOST_DEVICE bool operator<(const edge& other) const {
        if (v0 < other.v0) {
            return true;
        }
        if (v0 > other.v0) {
            return false;
        }
        return v1 < other.v1;
    }
    HOST_DEVICE bool operator==(const edge& edge) const {
        return v0 == edge.v0 && v1 == edge.v1;
    }
    HOST_DEVICE bool operator!=(const edge& edge) const {
        return !operator==(edge);
    }
};
} // namespace raw::sphere_generation
#endif // SPACE_EXPLORER_SPHERE_GENERATION_FWD_H

include/graphics/gl_context_lock.h

//
// Created by progamers on 9/1/25.
//

#ifndef SPACE_EXPLORER_GL_CONTEXT_LOCK_H
#define SPACE_EXPLORER_GL_CONTEXT_LOCK_H
#include <format>
#include <mutex>
#include <stdexcept>

#include "window/fwd.h"

namespace raw::graphics {
enum class context_type { MAIN, TESS, TEX_GEN };

struct graphics_data {
    SDL_Window*   window;
    std::mutex    main_mutex;
    SDL_GLContext main_context;
    std::mutex    tessellation_mutex;
    SDL_GLContext tessellation_context;
    std::mutex    texture_gen_mutex;
    SDL_GLContext texture_gen_context;
};
template<context_type ctx_type>
class gl_context_lock {
private:
    std::lock_guard<std::mutex> lock;
    SDL_Window*                 window;

    void set_current_context(graphics_data& data) const {
        using enum context_type;
        bool result = false;
        if constexpr (ctx_type == MAIN) {
            result = SDL_GL_MakeCurrent(window, data.main_context);
        } else if constexpr (ctx_type == TESS) {
            result = SDL_GL_MakeCurrent(window, data.tessellation_context);
        } else if constexpr (ctx_type == TEX_GEN) {
            result = SDL_GL_MakeCurrent(window, data.texture_gen_context);
        }
        if (result == false) {
            throw std::runtime_error(std::format(
                "Failed to Set Current Context, IDK what to do, bye bye! {}\n", SDL_GetError()));
        }
    }

public:
    explicit gl_context_lock(graphics_data& data)
        requires(ctx_type == context_type::MAIN)
        : lock(data.main_mutex), window(data.window) {
        set_current_context(data);
    }
    explicit gl_context_lock(graphics_data& data)
        requires(ctx_type == context_type::TESS)
        : lock(data.tessellation_mutex), window(data.window) {
        set_current_context(data);
    }
    explicit gl_context_lock(graphics_data& data)
        requires(ctx_type == context_type::TEX_GEN)
        : lock(data.texture_gen_mutex), window(data.window) {
        set_current_context(data);
    }
    ~gl_context_lock() {
        SDL_GL_MakeCurrent(window, nullptr);
    }

    gl_context_lock(const gl_context_lock&)            = delete;
    gl_context_lock(gl_context_lock&&)                 = default;
    gl_context_lock& operator=(const gl_context_lock&) = delete;
    gl_context_lock& operator=(gl_context_lock&&)      = default;
};
} // namespace raw::graphics

#endif // SPACE_EXPLORER_GL_CONTEXT_LOCK_H

include/sphere_generation/icosahedron_data_manager.h

//
// Created by progamers on 7/7/25.
//

#ifndef SPACE_EXPLORER_MESH_GENERATOR_H
#define SPACE_EXPLORER_MESH_GENERATOR_H
#include <raw_memory.h>

#include <array>
#include <glm/glm.hpp>

#include "cuda_types/buffer.h"
#include "cuda_types/cuda_from_gl_data.h"
#include "graphics/vertex.h"
#include "sphere_generation/fwd.h"
#include "sphere_generation/generation_context.h"

namespace raw::sphere_generation {

class icosahedron_data_manager {
private:
    cuda_types::cuda_from_gl_data<raw::graphics::vertex> vertices_handle;
    cuda_types::cuda_from_gl_data<uint32_t>              indices_handle;
    std::shared_ptr<cuda_types::cuda_stream>             stream;

    uint32_t _vbo;
    uint32_t _ebo;

    cuda_types::cuda_buffer<raw::graphics::vertex> vertices_second;
    cuda_types::cuda_buffer<uint32_t>              indices_second;

    cuda_types::cuda_buffer<uint32_t> amount_of_triangles;
    cuda_types::cuda_buffer<uint32_t> amount_of_vertices;
    cuda_types::cuda_buffer<uint32_t> amount_of_edges;

    cuda_types::cuda_buffer<edge>     all_edges;
    cuda_types::cuda_buffer<edge>     d_unique_edges;
    cuda_types::cuda_buffer<uint32_t> edge_to_vertex;

    size_t vertices_bytes = 0;
    size_t indices_bytes  = 0;

    uint32_t num_vertices_cpu  = 12;
    uint32_t num_triangles_cpu = predef::BASIC_AMOUNT_OF_TRIANGLES;

    bool inited = false;

    friend class generation_context;
    // Called every time after `generate` function
    void cleanup();

    // Called once when the object is created (or generate function called first time)
    void init(uint32_t vbo, uint32_t ebo);

    // Called every time `generate` function
    void prepare(uint32_t vbo, uint32_t ebo);

public:
    icosahedron_data_manager();

    icosahedron_data_manager(uint32_t vbo, uint32_t ebo, std::shared_ptr<cuda_types::cuda_stream> stream);

    generation_context create_context();

    // I am thinking about moving these functions into the tessellation process itself so this class just manages resources lifetimes
    static constexpr std::array<graphics::vertex, 12> generate_icosahedron_vertices();

    static constexpr std::array<uint32_t, 60> generate_icosahedron_indices();

    auto get_data() const{
        return std::make_tuple(vertices_handle.get_data(), indices_handle.get_data(),
                               all_edges.get(), vertices_second.get(), indices_second.get(),
                               d_unique_edges.get(), edge_to_vertex.get(), amount_of_vertices.get(),
                               amount_of_triangles.get(), amount_of_edges.get());
    }

};
} // namespace raw::sphere_generation
#endif // SPACE_EXPLORER_MESH_GENERATOR_H

src/sphere_generation/icosahedron_data_manager.cpp

//
// Created by progamers on 7/7/25.
//

#include "sphere_generation/icosahedron_data_manager.h"

#include <numbers>

#include "core/clock.h"
#include "cuda_types/buffer.h"
#include "sphere_generation/generation_context.h"
#include "sphere_generation/kernel_launcher.h"

namespace raw::sphere_generation {
inline constexpr float GOLDEN_RATIO = std::numbers::phi_v<float>;
inline constexpr float PI           = std::numbers::pi_v<float>;

icosahedron_data_manager::icosahedron_data_manager()
    : stream(std::make_shared<cuda_types::cuda_stream>()),
      _vbo(0),
      _ebo(0),
      amount_of_triangles(sizeof(uint32_t), stream),
      amount_of_vertices(sizeof(uint32_t), stream),
      amount_of_edges(sizeof(uint32_t), stream) {}

icosahedron_data_manager::icosahedron_data_manager(uint32_t vbo, uint32_t ebo,
                                                   std::shared_ptr<cuda_types::cuda_stream> stream)

    : stream(stream),
      amount_of_triangles(sizeof(uint32_t), stream),
      amount_of_vertices(sizeof(uint32_t), stream),
      amount_of_edges(sizeof(uint32_t), stream) {
    init(vbo, ebo);
}

void icosahedron_data_manager::init(uint32_t vbo, uint32_t ebo) {
    static int times_called = 0;
    // can be called only once in the lifetime
    assert(times_called == 0);
    _vbo = vbo;
    _ebo = ebo;
    vertices_handle =
        cuda_types::cuda_from_gl_data<raw::graphics::vertex>(&vertices_bytes, vbo, stream);
    indices_handle = cuda_types::cuda_from_gl_data<uint32_t>(&indices_bytes, ebo, stream);

    vertices_second = cuda_types::cuda_buffer<raw::graphics::vertex>(vertices_bytes, stream);
    indices_second  = cuda_types::cuda_buffer<uint32_t>(indices_bytes, stream);
    all_edges       = cuda_types::cuda_buffer<edge>(
          predef::MAXIMUM_AMOUNT_OF_TRIANGLES * 3 * sizeof(edge), stream);
    edge_to_vertex = cuda_types::cuda_buffer<uint32_t>(
        predef::MAXIMUM_AMOUNT_OF_TRIANGLES * 3 * sizeof(uint32_t), stream);
    d_unique_edges = cuda_types::cuda_buffer<edge>(
        predef::MAXIMUM_AMOUNT_OF_TRIANGLES * 3 * sizeof(edge), stream);
    amount_of_edges.zero_data(sizeof(uint32_t));

    inited = true;
    ++times_called;
    cudaMemcpy(vertices_handle.get_data(), (void *)std::data(generate_icosahedron_vertices()),
               num_vertices_cpu * sizeof(graphics::vertex), cudaMemcpyHostToDevice);
    cudaMemcpy(indices_handle.get_data(), (void *)std::data(generate_icosahedron_indices()),
               num_triangles_cpu * 3 * sizeof(uint32_t), cudaMemcpyHostToDevice);
}

void icosahedron_data_manager::prepare(uint32_t vbo, uint32_t ebo) {
    if (!inited) {
        init(vbo, ebo);
        return;
    }
    vertices_handle.map();
    indices_handle.map();
    vertices_second.allocate(vertices_bytes);
    indices_second.allocate(indices_bytes);
    all_edges.allocate(predef::MAXIMUM_AMOUNT_OF_TRIANGLES * 3 * sizeof(edge));
    edge_to_vertex.allocate(predef::MAXIMUM_AMOUNT_OF_TRIANGLES * 3 * sizeof(uint32_t));
    d_unique_edges.allocate(predef::MAXIMUM_AMOUNT_OF_TRIANGLES * 3 * sizeof(edge));

    cudaMemcpyAsync(vertices_handle.get_data(), (void *)std::data(generate_icosahedron_vertices()),
                    num_vertices_cpu * sizeof(graphics::vertex), cudaMemcpyHostToDevice, stream->stream());
    cudaMemcpyAsync(indices_handle.get_data(), (void *)std::data(generate_icosahedron_indices()),
                    num_triangles_cpu * 3 * sizeof(uint32_t), cudaMemcpyHostToDevice, stream->stream());
}
generation_context icosahedron_data_manager::create_context() {
    return generation_context {*this, _vbo, _ebo};
}

void icosahedron_data_manager::cleanup() {
    vertices_second.free();
    indices_second.free();
    vertices_handle.unmap();
    indices_handle.unmap();
    all_edges.free();
    d_unique_edges.free();
    edge_to_vertex.free();
    num_vertices_cpu  = 12;
    num_triangles_cpu = predef::BASIC_AMOUNT_OF_TRIANGLES;
}

constexpr std::array<graphics::vertex, 12>
icosahedron_data_manager::generate_icosahedron_vertices() {
    std::array<graphics::vertex, 12> vertices;
    int                              vertex_index = 0;

    const float unscaled_dist = std::sqrt(1.0f + GOLDEN_RATIO * GOLDEN_RATIO);
    const float scale         = 1 / unscaled_dist;
    const float a             = 1.0f * scale;
    const float b             = GOLDEN_RATIO * scale;

    for (int i = 0; i < 3; ++i) {
        for (int j = 0; j < 4; ++j) {
            const auto sign1 = (j & 2) ? -1.0f : 1.0f;
            const auto sign2 = (j & 1) ? -1.0f : 1.0f;

            glm::vec3 point(1.0f);
            if (i == 0) {
                point = {sign1 * a, sign2 * b, 0.0f};
            } else if (i == 1) {
                point = {0.0f, sign1 * a, sign2 * b};
            } else {
                point = {sign1 * b, 0.0f, sign2 * a};
            }

            auto &v = vertices[vertex_index++];

            v.position = glm::normalize(point);
            v.normal   = v.position;

            constexpr glm::vec3 up = {0.0f, 1.0f, 0.0f};
            v.tangent              = glm::normalize(glm::cross(up, v.normal));
            v.bitangent            = glm::normalize(glm::cross(v.normal, v.tangent));
            v.tex_coord.x          = 0.5f + std::atan2(v.normal.z, v.normal.x) / (2.0f * PI);
            v.tex_coord.y          = 0.5f - std::asin(v.normal.y) / PI;
        }
    }
    return vertices;
}

constexpr std::array<uint32_t, 60> icosahedron_data_manager::generate_icosahedron_indices() {
    return {2, 10, 4,  2, 4,  0, 2, 0, 5, 2, 5,  11, 2, 11, 10, 0, 4, 8, 4, 10,
            6, 10, 11, 3, 11, 5, 7, 5, 0, 9, 1,  8,  6, 1,  6,  3, 1, 3, 7, 1,
            7, 9,  1,  9, 8,  6, 8, 4, 3, 6, 10, 7,  3, 11, 9,  7, 5, 8, 9, 0};
}

} // namespace raw::sphere_generation

include/sphere_generation/generation_context.h

//
// Created by progamers on 8/28/25.
//

#ifndef SPACE_EXPLORER_GENERATION_CONTEST_H
#define SPACE_EXPLORER_GENERATION_CONTEST_H
#include "sphere_generation/fwd.h"

namespace raw::sphere_generation {
class generation_context {
private:
    icosahedron_data_manager& manager;
    friend class icosahedron_data_manager;

protected:
    generation_context(icosahedron_data_manager& mgr, uint32_t vbo, uint32_t ebo);

public:
    ~generation_context();
    generation_context(const generation_context& other)                = delete;
    generation_context(generation_context&& other) noexcept            = default;
    generation_context& operator=(const generation_context& other)     = delete;
    generation_context& operator=(generation_context&& other) noexcept = default;
};
} // namespace raw::sphere_generation
#endif // SPACE_EXPLORER_GENERATION_CONTEST_H

src/sphere_generation/generate_context.cpp

//
// Created by progamers on 8/28/25.
//
#include "sphere_generation/generation_context.h"
#include "sphere_generation/icosahedron_data_manager.h"

namespace raw::sphere_generation {
generation_context::generation_context(icosahedron_data_manager& mgr, uint32_t vbo, uint32_t ebo) : manager(mgr) {
    manager.prepare(vbo, ebo);
}
generation_context::~generation_context() {
    manager.cleanup();
}

} // namespace raw::sphere_generation

include/sphere_generation/sphere_generator.h

//
// Created by progamers on 8/28/25.
//

#ifndef SPACE_EXPLORER_SPHERE_GENERATOR_H
#define SPACE_EXPLORER_SPHERE_GENERATOR_H
#include <thread>

#include "cuda_types/stream.h"
#include "graphics/gl_context_lock.h"
#include "sphere_generation/fwd.h"
namespace raw::sphere_generation {
class sphere_generator {
private:
    std::jthread worker_thread;
public:
    sphere_generator() = default;
    void generate(uint32_t steps, cuda_types::cuda_stream& stream, icosahedron_data_manager& source, graphics::graphics_data& data);
    void sync();
};
} // namespace raw::sphere_generation

#endif // SPACE_EXPLORER_SPHERE_GENERATOR_H

src/sphere_generation/sphere_generator.cpp

//
// Created by progamers on 8/28/25.
//
#include "sphere_generation/sphere_generator.h"

#include "sphere_generation/icosahedron_data_manager.h"
#include "sphere_generation/kernel_launcher.h"

namespace raw::sphere_generation {
void sphere_generator::generate(uint32_t steps, cuda_types::cuda_stream& stream,
                                icosahedron_data_manager& source,
                                graphics::graphics_data&  graphics_data) {
    if (steps >= predef::MAX_STEPS) {
        throw std::runtime_error(std::format(
            "[Error] Amount of steps should not exceed maximum, which is {}, while was given {}",
            predef::MAX_STEPS, steps));
    }
    sync();
    worker_thread = std::jthread([&stream, steps, &source, &graphics_data] mutable {
        cudaStream_t                                            local_stream = stream.stream();
        graphics::gl_context_lock<graphics::context_type::TESS> lock(graphics_data);
        auto                                                    context = source.create_context();
        auto                                                    data_for_thread = source.get_data();
        // `launch_tessellation` syncs the stream at the end of function so we don't exit the thread before everything was completed and can just call `sync` to wait for sphere tessellation process
        std::apply(sphere_generation::launch_tessellation,
                   std::tuple_cat(std::move(data_for_thread),
                                  std::make_tuple(std::ref(local_stream), steps)));
    });
}
void sphere_generator::sync() {
    if (worker_thread.joinable())
        worker_thread.join();
}

} // namespace raw::sphere_generation

Also If you want me to show some class/struct I didn't shown here, just ask for it in the comments, I will edit the question, but I think the code is pretty easy to understand even without them.

Is there anything I might still need to improve before moving on to the next task? I wanted to implement the sphere generation the best way I possibly could and never touch it again, so feel free to share any of your thoughts!

My project stack

C++20
OpenGL 4.6
CUDA 13 C++ 20
SDL 3.0
And some basic stuff like glad, glm, thrust

Environment Info

Arch Linux 6.16
GCC 15.2.1

Would love to hear any suggestions!

Well for CUDA I started from Nvidia docs, with info from them you can already program very cool things, but when I start to need more (for example streams, opengl interop), I learn it by doing. And don't think I am some kind of pro at GPGPU, I am actually not :) I've been programming in CUDA only for 4-5 months I think, and I still didn't have any use case for advanced features like graphs, events, etc. — NeKon
– NeKon, Commented Sep 13 at 19:09

Stack Exchange Network

Sphere Generation System With CUDA-OpenGL Interop

What I changed from previous question

My project stack

Environment Info

0

You must log in to answer this question.

Linked

Hot Network Questions

Sphere Generation System With CUDA-OpenGL Interop

What I changed from previous question

My project stack

Environment Info

0

You must log in to answer this question.

Linked

Related

Hot Network Questions