This is some kind of follow up to my previous question, this question will be more focused on the actual tessellating pipeline.
What I changed from previous question
- Implemented the async sphere generation in separate CPU thread
- Improved separation of concerns in my code
- Implemented RAII class for managing Active OpenGL context in each CPU thread
Here Is The Code:
include/sphere_generation/fwd.h
//
// Created by progamers on 8/26/25.
//
#ifndef SPACE_EXPLORER_SPHERE_GENERATION_FWD_H
#define SPACE_EXPLORER_SPHERE_GENERATION_FWD_H
#include <cmath>
#include <cstdint>
#ifdef __CUDACC__
#define HOST_DEVICE __host__ __device__
#else
#define HOST_DEVICE
#endif
namespace raw::sphere_generation {
namespace predef {
// Oh and btw, turns out, even after 4 steps our sphere gets nearly perfect (even on 2k monitor,
// well maybe on 4k it would be nice to have 6, but 4 is pretty much enough)
static constexpr auto BASIC_RADIUS = 1.0f;
static constexpr auto BASIC_STEPS = 7U;
static constexpr auto MAX_STEPS = 8U;
// Things below you can't change, all things above you can
static constexpr auto BASIC_AMOUNT_OF_TRIANGLES = 20U;
static constexpr uint32_t MAXIMUM_AMOUNT_OF_INDICES =
BASIC_AMOUNT_OF_TRIANGLES * (1u << (2u * MAX_STEPS)) * 3u;
static constexpr uint32_t MAXIMUM_AMOUNT_OF_VERTICES = 10u * (1u << (2u * MAX_STEPS)) + 2u;
static constexpr uint32_t MAXIMUM_AMOUNT_OF_TRIANGLES =
BASIC_AMOUNT_OF_TRIANGLES * (1u << (2u * MAX_STEPS));
} // namespace predef
class icosahedron_data_manager;
class sphere_generator;
class generation_context;
// Stores 2 indices of vertices in the sphere
struct edge {
uint32_t v0;
uint32_t v1;
HOST_DEVICE bool operator<(const edge& other) const {
if (v0 < other.v0) {
return true;
}
if (v0 > other.v0) {
return false;
}
return v1 < other.v1;
}
HOST_DEVICE bool operator==(const edge& edge) const {
return v0 == edge.v0 && v1 == edge.v1;
}
HOST_DEVICE bool operator!=(const edge& edge) const {
return !operator==(edge);
}
};
} // namespace raw::sphere_generation
#endif // SPACE_EXPLORER_SPHERE_GENERATION_FWD_H
include/graphics/gl_context_lock.h
//
// Created by progamers on 9/1/25.
//
#ifndef SPACE_EXPLORER_GL_CONTEXT_LOCK_H
#define SPACE_EXPLORER_GL_CONTEXT_LOCK_H
#include <format>
#include <mutex>
#include <stdexcept>
#include "window/fwd.h"
namespace raw::graphics {
enum class context_type { MAIN, TESS, TEX_GEN };
struct graphics_data {
SDL_Window* window;
std::mutex main_mutex;
SDL_GLContext main_context;
std::mutex tessellation_mutex;
SDL_GLContext tessellation_context;
std::mutex texture_gen_mutex;
SDL_GLContext texture_gen_context;
};
template<context_type ctx_type>
class gl_context_lock {
private:
std::lock_guard<std::mutex> lock;
SDL_Window* window;
void set_current_context(graphics_data& data) const {
using enum context_type;
bool result = false;
if constexpr (ctx_type == MAIN) {
result = SDL_GL_MakeCurrent(window, data.main_context);
} else if constexpr (ctx_type == TESS) {
result = SDL_GL_MakeCurrent(window, data.tessellation_context);
} else if constexpr (ctx_type == TEX_GEN) {
result = SDL_GL_MakeCurrent(window, data.texture_gen_context);
}
if (result == false) {
throw std::runtime_error(std::format(
"Failed to Set Current Context, IDK what to do, bye bye! {}\n", SDL_GetError()));
}
}
public:
explicit gl_context_lock(graphics_data& data)
requires(ctx_type == context_type::MAIN)
: lock(data.main_mutex), window(data.window) {
set_current_context(data);
}
explicit gl_context_lock(graphics_data& data)
requires(ctx_type == context_type::TESS)
: lock(data.tessellation_mutex), window(data.window) {
set_current_context(data);
}
explicit gl_context_lock(graphics_data& data)
requires(ctx_type == context_type::TEX_GEN)
: lock(data.texture_gen_mutex), window(data.window) {
set_current_context(data);
}
~gl_context_lock() {
SDL_GL_MakeCurrent(window, nullptr);
}
gl_context_lock(const gl_context_lock&) = delete;
gl_context_lock(gl_context_lock&&) = default;
gl_context_lock& operator=(const gl_context_lock&) = delete;
gl_context_lock& operator=(gl_context_lock&&) = default;
};
} // namespace raw::graphics
#endif // SPACE_EXPLORER_GL_CONTEXT_LOCK_H
include/sphere_generation/icosahedron_data_manager.h
//
// Created by progamers on 7/7/25.
//
#ifndef SPACE_EXPLORER_MESH_GENERATOR_H
#define SPACE_EXPLORER_MESH_GENERATOR_H
#include <raw_memory.h>
#include <array>
#include <glm/glm.hpp>
#include "cuda_types/buffer.h"
#include "cuda_types/cuda_from_gl_data.h"
#include "graphics/vertex.h"
#include "sphere_generation/fwd.h"
#include "sphere_generation/generation_context.h"
namespace raw::sphere_generation {
class icosahedron_data_manager {
private:
cuda_types::cuda_from_gl_data<raw::graphics::vertex> vertices_handle;
cuda_types::cuda_from_gl_data<uint32_t> indices_handle;
std::shared_ptr<cuda_types::cuda_stream> stream;
uint32_t _vbo;
uint32_t _ebo;
cuda_types::cuda_buffer<raw::graphics::vertex> vertices_second;
cuda_types::cuda_buffer<uint32_t> indices_second;
cuda_types::cuda_buffer<uint32_t> amount_of_triangles;
cuda_types::cuda_buffer<uint32_t> amount_of_vertices;
cuda_types::cuda_buffer<uint32_t> amount_of_edges;
cuda_types::cuda_buffer<edge> all_edges;
cuda_types::cuda_buffer<edge> d_unique_edges;
cuda_types::cuda_buffer<uint32_t> edge_to_vertex;
size_t vertices_bytes = 0;
size_t indices_bytes = 0;
uint32_t num_vertices_cpu = 12;
uint32_t num_triangles_cpu = predef::BASIC_AMOUNT_OF_TRIANGLES;
bool inited = false;
friend class generation_context;
// Called every time after `generate` function
void cleanup();
// Called once when the object is created (or generate function called first time)
void init(uint32_t vbo, uint32_t ebo);
// Called every time `generate` function
void prepare(uint32_t vbo, uint32_t ebo);
public:
icosahedron_data_manager();
icosahedron_data_manager(uint32_t vbo, uint32_t ebo, std::shared_ptr<cuda_types::cuda_stream> stream);
generation_context create_context();
// I am thinking about moving these functions into the tessellation process itself so this class just manages resources lifetimes
static constexpr std::array<graphics::vertex, 12> generate_icosahedron_vertices();
static constexpr std::array<uint32_t, 60> generate_icosahedron_indices();
auto get_data() const{
return std::make_tuple(vertices_handle.get_data(), indices_handle.get_data(),
all_edges.get(), vertices_second.get(), indices_second.get(),
d_unique_edges.get(), edge_to_vertex.get(), amount_of_vertices.get(),
amount_of_triangles.get(), amount_of_edges.get());
}
};
} // namespace raw::sphere_generation
#endif // SPACE_EXPLORER_MESH_GENERATOR_H
src/sphere_generation/icosahedron_data_manager.cpp
//
// Created by progamers on 7/7/25.
//
#include "sphere_generation/icosahedron_data_manager.h"
#include <numbers>
#include "core/clock.h"
#include "cuda_types/buffer.h"
#include "sphere_generation/generation_context.h"
#include "sphere_generation/kernel_launcher.h"
namespace raw::sphere_generation {
inline constexpr float GOLDEN_RATIO = std::numbers::phi_v<float>;
inline constexpr float PI = std::numbers::pi_v<float>;
icosahedron_data_manager::icosahedron_data_manager()
: stream(std::make_shared<cuda_types::cuda_stream>()),
_vbo(0),
_ebo(0),
amount_of_triangles(sizeof(uint32_t), stream),
amount_of_vertices(sizeof(uint32_t), stream),
amount_of_edges(sizeof(uint32_t), stream) {}
icosahedron_data_manager::icosahedron_data_manager(uint32_t vbo, uint32_t ebo,
std::shared_ptr<cuda_types::cuda_stream> stream)
: stream(stream),
amount_of_triangles(sizeof(uint32_t), stream),
amount_of_vertices(sizeof(uint32_t), stream),
amount_of_edges(sizeof(uint32_t), stream) {
init(vbo, ebo);
}
void icosahedron_data_manager::init(uint32_t vbo, uint32_t ebo) {
static int times_called = 0;
// can be called only once in the lifetime
assert(times_called == 0);
_vbo = vbo;
_ebo = ebo;
vertices_handle =
cuda_types::cuda_from_gl_data<raw::graphics::vertex>(&vertices_bytes, vbo, stream);
indices_handle = cuda_types::cuda_from_gl_data<uint32_t>(&indices_bytes, ebo, stream);
vertices_second = cuda_types::cuda_buffer<raw::graphics::vertex>(vertices_bytes, stream);
indices_second = cuda_types::cuda_buffer<uint32_t>(indices_bytes, stream);
all_edges = cuda_types::cuda_buffer<edge>(
predef::MAXIMUM_AMOUNT_OF_TRIANGLES * 3 * sizeof(edge), stream);
edge_to_vertex = cuda_types::cuda_buffer<uint32_t>(
predef::MAXIMUM_AMOUNT_OF_TRIANGLES * 3 * sizeof(uint32_t), stream);
d_unique_edges = cuda_types::cuda_buffer<edge>(
predef::MAXIMUM_AMOUNT_OF_TRIANGLES * 3 * sizeof(edge), stream);
amount_of_edges.zero_data(sizeof(uint32_t));
inited = true;
++times_called;
cudaMemcpy(vertices_handle.get_data(), (void *)std::data(generate_icosahedron_vertices()),
num_vertices_cpu * sizeof(graphics::vertex), cudaMemcpyHostToDevice);
cudaMemcpy(indices_handle.get_data(), (void *)std::data(generate_icosahedron_indices()),
num_triangles_cpu * 3 * sizeof(uint32_t), cudaMemcpyHostToDevice);
}
void icosahedron_data_manager::prepare(uint32_t vbo, uint32_t ebo) {
if (!inited) {
init(vbo, ebo);
return;
}
vertices_handle.map();
indices_handle.map();
vertices_second.allocate(vertices_bytes);
indices_second.allocate(indices_bytes);
all_edges.allocate(predef::MAXIMUM_AMOUNT_OF_TRIANGLES * 3 * sizeof(edge));
edge_to_vertex.allocate(predef::MAXIMUM_AMOUNT_OF_TRIANGLES * 3 * sizeof(uint32_t));
d_unique_edges.allocate(predef::MAXIMUM_AMOUNT_OF_TRIANGLES * 3 * sizeof(edge));
cudaMemcpyAsync(vertices_handle.get_data(), (void *)std::data(generate_icosahedron_vertices()),
num_vertices_cpu * sizeof(graphics::vertex), cudaMemcpyHostToDevice, stream->stream());
cudaMemcpyAsync(indices_handle.get_data(), (void *)std::data(generate_icosahedron_indices()),
num_triangles_cpu * 3 * sizeof(uint32_t), cudaMemcpyHostToDevice, stream->stream());
}
generation_context icosahedron_data_manager::create_context() {
return generation_context {*this, _vbo, _ebo};
}
void icosahedron_data_manager::cleanup() {
vertices_second.free();
indices_second.free();
vertices_handle.unmap();
indices_handle.unmap();
all_edges.free();
d_unique_edges.free();
edge_to_vertex.free();
num_vertices_cpu = 12;
num_triangles_cpu = predef::BASIC_AMOUNT_OF_TRIANGLES;
}
constexpr std::array<graphics::vertex, 12>
icosahedron_data_manager::generate_icosahedron_vertices() {
std::array<graphics::vertex, 12> vertices;
int vertex_index = 0;
const float unscaled_dist = std::sqrt(1.0f + GOLDEN_RATIO * GOLDEN_RATIO);
const float scale = 1 / unscaled_dist;
const float a = 1.0f * scale;
const float b = GOLDEN_RATIO * scale;
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 4; ++j) {
const auto sign1 = (j & 2) ? -1.0f : 1.0f;
const auto sign2 = (j & 1) ? -1.0f : 1.0f;
glm::vec3 point(1.0f);
if (i == 0) {
point = {sign1 * a, sign2 * b, 0.0f};
} else if (i == 1) {
point = {0.0f, sign1 * a, sign2 * b};
} else {
point = {sign1 * b, 0.0f, sign2 * a};
}
auto &v = vertices[vertex_index++];
v.position = glm::normalize(point);
v.normal = v.position;
constexpr glm::vec3 up = {0.0f, 1.0f, 0.0f};
v.tangent = glm::normalize(glm::cross(up, v.normal));
v.bitangent = glm::normalize(glm::cross(v.normal, v.tangent));
v.tex_coord.x = 0.5f + std::atan2(v.normal.z, v.normal.x) / (2.0f * PI);
v.tex_coord.y = 0.5f - std::asin(v.normal.y) / PI;
}
}
return vertices;
}
constexpr std::array<uint32_t, 60> icosahedron_data_manager::generate_icosahedron_indices() {
return {2, 10, 4, 2, 4, 0, 2, 0, 5, 2, 5, 11, 2, 11, 10, 0, 4, 8, 4, 10,
6, 10, 11, 3, 11, 5, 7, 5, 0, 9, 1, 8, 6, 1, 6, 3, 1, 3, 7, 1,
7, 9, 1, 9, 8, 6, 8, 4, 3, 6, 10, 7, 3, 11, 9, 7, 5, 8, 9, 0};
}
} // namespace raw::sphere_generation
include/sphere_generation/generation_context.h
//
// Created by progamers on 8/28/25.
//
#ifndef SPACE_EXPLORER_GENERATION_CONTEST_H
#define SPACE_EXPLORER_GENERATION_CONTEST_H
#include "sphere_generation/fwd.h"
namespace raw::sphere_generation {
class generation_context {
private:
icosahedron_data_manager& manager;
friend class icosahedron_data_manager;
protected:
generation_context(icosahedron_data_manager& mgr, uint32_t vbo, uint32_t ebo);
public:
~generation_context();
generation_context(const generation_context& other) = delete;
generation_context(generation_context&& other) noexcept = default;
generation_context& operator=(const generation_context& other) = delete;
generation_context& operator=(generation_context&& other) noexcept = default;
};
} // namespace raw::sphere_generation
#endif // SPACE_EXPLORER_GENERATION_CONTEST_H
src/sphere_generation/generate_context.cpp
//
// Created by progamers on 8/28/25.
//
#include "sphere_generation/generation_context.h"
#include "sphere_generation/icosahedron_data_manager.h"
namespace raw::sphere_generation {
generation_context::generation_context(icosahedron_data_manager& mgr, uint32_t vbo, uint32_t ebo) : manager(mgr) {
manager.prepare(vbo, ebo);
}
generation_context::~generation_context() {
manager.cleanup();
}
} // namespace raw::sphere_generation
include/sphere_generation/sphere_generator.h
//
// Created by progamers on 8/28/25.
//
#ifndef SPACE_EXPLORER_SPHERE_GENERATOR_H
#define SPACE_EXPLORER_SPHERE_GENERATOR_H
#include <thread>
#include "cuda_types/stream.h"
#include "graphics/gl_context_lock.h"
#include "sphere_generation/fwd.h"
namespace raw::sphere_generation {
class sphere_generator {
private:
std::jthread worker_thread;
public:
sphere_generator() = default;
void generate(uint32_t steps, cuda_types::cuda_stream& stream, icosahedron_data_manager& source, graphics::graphics_data& data);
void sync();
};
} // namespace raw::sphere_generation
#endif // SPACE_EXPLORER_SPHERE_GENERATOR_H
src/sphere_generation/sphere_generator.cpp
//
// Created by progamers on 8/28/25.
//
#include "sphere_generation/sphere_generator.h"
#include "sphere_generation/icosahedron_data_manager.h"
#include "sphere_generation/kernel_launcher.h"
namespace raw::sphere_generation {
void sphere_generator::generate(uint32_t steps, cuda_types::cuda_stream& stream,
icosahedron_data_manager& source,
graphics::graphics_data& graphics_data) {
if (steps >= predef::MAX_STEPS) {
throw std::runtime_error(std::format(
"[Error] Amount of steps should not exceed maximum, which is {}, while was given {}",
predef::MAX_STEPS, steps));
}
sync();
worker_thread = std::jthread([&stream, steps, &source, &graphics_data] mutable {
cudaStream_t local_stream = stream.stream();
graphics::gl_context_lock<graphics::context_type::TESS> lock(graphics_data);
auto context = source.create_context();
auto data_for_thread = source.get_data();
// `launch_tessellation` syncs the stream at the end of function so we don't exit the thread before everything was completed and can just call `sync` to wait for sphere tessellation process
std::apply(sphere_generation::launch_tessellation,
std::tuple_cat(std::move(data_for_thread),
std::make_tuple(std::ref(local_stream), steps)));
});
}
void sphere_generator::sync() {
if (worker_thread.joinable())
worker_thread.join();
}
} // namespace raw::sphere_generation
Also If you want me to show some class/struct I didn't shown here, just ask for it in the comments, I will edit the question, but I think the code is pretty easy to understand even without them.
Is there anything I might still need to improve before moving on to the next task? I wanted to implement the sphere generation the best way I possibly could and never touch it again, so feel free to share any of your thoughts!
My project stack
- C++20
- OpenGL 4.6
- CUDA 13 C++ 20
- SDL 3.0
- And some basic stuff like glad, glm, thrust
Environment Info
- Arch Linux 6.16
- GCC 15.2.1
Would love to hear any suggestions!