Skip to content
This repository was archived by the owner on Mar 1, 2026. It is now read-only.

Commit 4065472

Browse files
yizhang82Herman Lee
authored andcommitted
Add rocksdb_wsenv_path for initial WSEnvironment support
Summary: Added rocksdb_wsenv_path to support WSEnvironment. At this point you'll need both specify rocksdb_wsenv_path and also builds with --wsenv to enable this feature since it requires linking with WSEnvironment in a separate repo. Note under WSEnvironment, there are a few quirks: * SyncWAL isn't supported yet so for now I'm pretending they just succeed (otherwise simple things like log rotation would fail) and only do Flush(false) as needed for rdb_dict_manager * There is no local .rocksdb directory - so for now I'm disabling the bulk loading temp SST cleanup code * WSEnvironment for some reason doesn't like '#' in the file names, for now I'm working around it by replacing '#' with '$' in SST file name for partitioned table during bulk loading (mostly working, though some tests are timing out) * We should not rely on errno and instead should always stick to rocksdb::Status for error checking (addressed in earlier commit) * We should make sure DBOptions/etc are cleaned up Differential Revision: D22531061 ---------------------------------------------------------------- Delete garbage temp sst files using RocksDB Env API Summary: In MyRocksOnWS scenarios, MyRocks writes bulk load tmp file into WS (through SSTFileWriter) but the cleanup code in Rdb_sst_info::init still assumes local directory and will enumerate from local directory to delete garbage temp files that weren't cleaned up during a crash. This would leave files in WS around and end up not cleaning anything. This involves using RocksDB Env API to create / access the file instead of using MySQL API, and should work regardless whether it is WS or local disk. Reviewed By: yizhang82 Differential Revision: D28680122
1 parent 7f9616e commit 4065472

8 files changed

Lines changed: 119 additions & 23 deletions

File tree

‎mysql-test/r/mysqld--help-notwin.result‎

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2122,6 +2122,8 @@ The following options may be given as the first argument:
21222122
WriteOptions::ignore_missing_column_families for RocksDB
21232123
--rocksdb-write-policy=name
21242124
DBOptions::write_policy for RocksDB
2125+
--rocksdb-wsenv-path=name
2126+
Path for RocksDB WSEnv
21252127
--rpl-read-size=# The size for reads done from the binlog and relay log. It
21262128
must be a multiple of 4kb. Making it larger might help
21272129
with IO stalls while reading these files when they are
@@ -3145,6 +3147,7 @@ rocksdb-write-batch-max-bytes 0
31453147
rocksdb-write-disable-wal FALSE
31463148
rocksdb-write-ignore-missing-column-families FALSE
31473149
rocksdb-write-policy write_committed
3150+
rocksdb-wsenv-path
31483151
rpl-read-size 8192
31493152
rpl-receive-buffer-size 2097152
31503153
rpl-send-buffer-size 2097152
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
SET @start_global_value = @@global.ROCKSDB_WSENV_PATH;
2+
SELECT @start_global_value;
3+
@start_global_value
4+
5+
"Trying to set variable @@global.ROCKSDB_WSENV_PATH to 444. It should fail because it is readonly."
6+
SET @@global.ROCKSDB_WSENV_PATH = 444;
7+
ERROR HY000: Variable 'rocksdb_wsenv_path' is a read only variable
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
--source include/have_rocksdb.inc
2+
3+
--let $sys_var=ROCKSDB_WSENV_PATH
4+
--let $read_only=1
5+
--let $session=0
6+
--source ../include/rocksdb_sys_var.inc

‎storage/rocksdb/CMakeLists.txt‎

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,13 @@ IF(CMAKE_COMPILER_IS_GNUCXX AND NOT CMAKE_CXX_COMPILER_VERSION VERSION_LESS 12)
141141
ENDIF()
142142
ENDIF()
143143

144+
IF(FB_WITH_WSENV)
145+
ADD_DEFINITIONS(-DFB_HAVE_WSENV=1)
146+
INCLUDE_DIRECTORIES(
147+
${FB_WITH_WSENV}
148+
)
149+
ENDIF()
150+
144151
INCLUDE_DIRECTORIES(
145152
${ROCKSDB_ROOT}
146153
${ROCKSDB_ROOT}/include

‎storage/rocksdb/ha_rocksdb.cc‎

Lines changed: 62 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343

4444
/* MySQL includes */
4545
#include <mysql/components/services/log_builtins.h>
46+
#include <mysql/psi/mysql_file.h>
4647
#include <mysql/psi/mysql_table.h>
4748
#include <mysql/thread_pool_priv.h>
4849
#include <mysys_err.h>
@@ -724,6 +725,7 @@ static unsigned long // NOLINT(runtime/int)
724725
static ulong rocksdb_info_log_level;
725726
static char *rocksdb_wal_dir;
726727
static char *rocksdb_persistent_cache_path;
728+
static char *rocksdb_wsenv_path;
727729
static ulong rocksdb_index_type;
728730
static uint32_t rocksdb_flush_log_at_trx_commit;
729731
static uint32_t rocksdb_debug_optimizer_n_rows;
@@ -1596,6 +1598,10 @@ static MYSQL_SYSVAR_ULONG(
15961598
nullptr, nullptr, rocksdb_persistent_cache_size_mb,
15971599
/* min */ 0L, /* max */ ULONG_MAX, 0);
15981600

1601+
static MYSQL_SYSVAR_STR(wsenv_path, rocksdb_wsenv_path,
1602+
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
1603+
"Path for RocksDB WSEnv", nullptr, nullptr, "");
1604+
15991605
static MYSQL_SYSVAR_STR(fault_injection_options,
16001606
opt_rocksdb_fault_injection_options,
16011607
PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -2496,6 +2502,7 @@ static struct SYS_VAR *rocksdb_system_variables[] = {
24962502
MYSQL_SYSVAR(persistent_cache_path),
24972503
MYSQL_SYSVAR(persistent_cache_size_mb),
24982504
MYSQL_SYSVAR(fault_injection_options),
2505+
MYSQL_SYSVAR(wsenv_path),
24992506
MYSQL_SYSVAR(delete_obsolete_files_period_micros),
25002507
MYSQL_SYSVAR(max_background_jobs),
25012508
MYSQL_SYSVAR(max_background_flushes),
@@ -3998,7 +4005,8 @@ class Rdb_transaction_impl : public Rdb_transaction {
39984005
tx_opts.write_batch_flush_threshold =
39994006
THDVAR(m_thd, write_batch_flush_threshold);
40004007

4001-
write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
4008+
write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC) &&
4009+
rdb_sync_wal_supported();
40024010
write_opts.disableWAL = THDVAR(m_thd, write_disable_wal);
40034011
write_opts.ignore_missing_column_families =
40044012
THDVAR(m_thd, write_ignore_missing_column_families);
@@ -4291,7 +4299,8 @@ class Rdb_writebatch_impl : public Rdb_transaction {
42914299

42924300
void start_tx() override {
42934301
reset();
4294-
write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
4302+
write_opts.sync = (rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC) &&
4303+
rdb_sync_wal_supported();
42954304
write_opts.disableWAL = THDVAR(m_thd, write_disable_wal);
42964305
write_opts.ignore_missing_column_families =
42974306
THDVAR(m_thd, write_ignore_missing_column_families);
@@ -4566,14 +4575,17 @@ static bool rocksdb_flush_wal(handlerton *const hton MY_ATTRIBUTE((__unused__)),
45664575
if ((!binlog_group_flush && !rocksdb_db_options->allow_mmap_writes) ||
45674576
rocksdb_flush_log_at_trx_commit != FLUSH_LOG_NEVER) {
45684577
rocksdb_wal_group_syncs++;
4569-
s = rdb->FlushWAL(!binlog_group_flush ||
4570-
rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
4578+
bool sync = rdb_sync_wal_supported() &&
4579+
(!binlog_group_flush ||
4580+
rocksdb_flush_log_at_trx_commit == FLUSH_LOG_SYNC);
4581+
s = rdb->FlushWAL(sync);
45714582
}
45724583

45734584
if (!s.ok()) {
45744585
rdb_log_status_error(s);
45754586
return HA_EXIT_FAILURE;
45764587
}
4588+
45774589
return HA_EXIT_SUCCESS;
45784590
}
45794591

@@ -5929,6 +5941,23 @@ static rocksdb::Status check_rocksdb_options_compatibility(
59295941

59305942
static uint rocksdb_partition_flags() { return (HA_CANNOT_PARTITION_FK); }
59315943

5944+
bool rdb_has_wsenv() {
5945+
#if FB_HAVE_WSENV
5946+
return rocksdb_wsenv_path != nullptr && *rocksdb_wsenv_path;
5947+
#else
5948+
return false;
5949+
#endif
5950+
}
5951+
5952+
bool rdb_sync_wal_supported() {
5953+
#if FB_HAVE_WSENV
5954+
// wsenv doesn't support SyncWAL=true yet
5955+
return !rdb_has_wsenv();
5956+
#else
5957+
return true;
5958+
#endif
5959+
}
5960+
59325961
/*
59335962
Storage Engine initialization function, invoked when plugin is loaded.
59345963
*/
@@ -5965,6 +5994,33 @@ static int rocksdb_init_internal(void *const p) {
59655994
DBUG_RETURN(HA_EXIT_FAILURE);
59665995
});
59675996

5997+
#ifdef FB_HAVE_WSENV
5998+
// Initialize WSEnv with rocksdb_ws_env_path
5999+
if (rdb_has_wsenv()) {
6000+
// NO_LINT_DEBUG
6001+
sql_print_information(
6002+
"RocksDB: Initializing WSEnvironment: rocksdb_wsenv_path = %s",
6003+
rocksdb_wsenv_path);
6004+
6005+
RegisterCustomObjectsSimple();
6006+
rocksdb::Env *ws_env = nullptr;
6007+
auto s = rocksdb::Env::LoadEnv(rocksdb_wsenv_path, &ws_env);
6008+
if (s.ok()) {
6009+
rocksdb_db_options->env = ws_env;
6010+
} else {
6011+
rdb_log_status_error(s, "Can't initialize WSEnvironment");
6012+
DBUG_RETURN(HA_EXIT_FAILURE);
6013+
}
6014+
}
6015+
#else
6016+
if (rocksdb_wsenv_path != nullptr && *rocksdb_wsenv_path) {
6017+
// We've turned on WSEnv in the wrong build
6018+
// NO_LINT_DEBUG
6019+
sql_print_error("RocksDB: WSEnvironment not supported. ");
6020+
DBUG_RETURN(HA_EXIT_FAILURE);
6021+
}
6022+
#endif
6023+
59686024
if (opt_rocksdb_fault_injection_options != nullptr &&
59696025
*opt_rocksdb_fault_injection_options != '\0') {
59706026
bool retryable = false;
@@ -14773,7 +14829,8 @@ void Rdb_background_thread::run() {
1477314829
// background thread.
1477414830
if (rdb && (rocksdb_flush_log_at_trx_commit != FLUSH_LOG_SYNC) &&
1477514831
!rocksdb_db_options->allow_mmap_writes) {
14776-
const rocksdb::Status s = rdb->FlushWAL(true);
14832+
bool sync = rdb_sync_wal_supported();
14833+
const rocksdb::Status s = rdb->FlushWAL(sync);
1477714834
if (!s.ok()) {
1477814835
rdb_handle_io_error(s, RDB_IO_ERROR_BG_THREAD);
1477914836
}

‎storage/rocksdb/ha_rocksdb_proto.h‎

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,12 @@ int rdb_dbug_set_ttl_read_filter_ts();
8888
bool rdb_dbug_set_ttl_ignore_pk();
8989
#endif
9090

91+
/* Whether WSEnvironment is enabled */
92+
bool rdb_has_wsenv();
93+
94+
/* Whether SyncWAL is supported in current scenario */
95+
bool rdb_sync_wal_supported();
96+
9197
enum operation_type : int;
9298
void rdb_update_global_stats(const operation_type &type, uint count,
9399
bool is_system_table = false);

‎storage/rocksdb/rdb_datadic.cc‎

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5266,14 +5266,19 @@ int Rdb_dict_manager::commit(rocksdb::WriteBatch *const batch,
52665266
if (!batch) return HA_ERR_ROCKSDB_COMMIT_FAILED;
52675267
int res = HA_EXIT_SUCCESS;
52685268
rocksdb::WriteOptions options;
5269-
options.sync = sync;
5269+
options.sync = (sync && rdb_sync_wal_supported());
52705270
rocksdb::TransactionDBWriteOptimizations optimize;
52715271
optimize.skip_concurrency_control = true;
52725272
rocksdb::Status s = m_db->Write(options, optimize, batch);
52735273
res = !s.ok(); // we return true when something failed
52745274
if (res) {
52755275
rdb_handle_io_error(s, RDB_IO_ERROR_DICT_COMMIT);
52765276
}
5277+
if (!rdb_sync_wal_supported()) {
5278+
// If we don't support SyncWAL, do a flush at least
5279+
m_db->FlushWAL(false);
5280+
}
5281+
52775282
batch->Clear();
52785283
return res;
52795284
}

‎storage/rocksdb/rdb_sst_info.cc‎

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@
3131

3232
/* RocksDB header files */
3333
#include "rocksdb/db.h"
34+
#include "rocksdb/file_system.h"
35+
#include "rocksdb/io_status.h"
3436
#include "rocksdb/options.h"
3537

3638
/* MyRocks header files */
@@ -333,6 +335,11 @@ Rdb_sst_info::Rdb_sst_info(rocksdb::DB *const db, const std::string &tablename,
333335
// is loaded in parallel
334336
m_prefix += std::to_string(m_prefix_counter.fetch_add(1)) + "_";
335337

338+
if (rdb_has_wsenv()) {
339+
// WSEnv doesn't like '#'
340+
std::replace(m_prefix.begin(), m_prefix.end(), '#', '$');
341+
}
342+
336343
rocksdb::ColumnFamilyDescriptor cf_descr;
337344
const rocksdb::Status s = m_cf->GetDescriptor(&cf_descr);
338345
if (!s.ok()) {
@@ -519,32 +526,30 @@ void Rdb_sst_info::report_error_msg(const rocksdb::Status &s,
519526
}
520527

521528
void Rdb_sst_info::init(const rocksdb::DB *const db) {
522-
const std::string path = db->GetName() + FN_DIRSEP;
523-
struct MY_DIR *const dir_info = my_dir(path.c_str(), MYF(MY_DONT_SORT));
529+
const std::string dir = db->GetName();
530+
const auto &fs = db->GetEnv()->GetFileSystem();
531+
std::vector<std::string> files_in_dir;
524532

525-
// Access the directory
526-
if (dir_info == nullptr) {
533+
// Get the files in the specified directory
534+
rocksdb::IOStatus s =
535+
fs->GetChildren(dir, rocksdb::IOOptions(), &files_in_dir, nullptr);
536+
if (!s.ok()) {
527537
// NO_LINT_DEBUG
528538
sql_print_warning("RocksDB: Could not access database directory: %s",
529-
path.c_str());
539+
dir.c_str());
530540
return;
531541
}
532542

533543
// Scan through the files in the directory
534-
const struct fileinfo *file_info = dir_info->dir_entry;
535-
for (uint ii = 0; ii < dir_info->number_off_files; ii++, file_info++) {
536-
// find any files ending with m_suffix ...
537-
const std::string name = file_info->name;
538-
const size_t pos = name.find(m_suffix);
539-
if (pos != std::string::npos && name.size() - pos == m_suffix.size()) {
540-
// ... and remove them
541-
const std::string fullname = path + name;
542-
my_delete(fullname.c_str(), MYF(0));
544+
for (const auto &file : files_in_dir) {
545+
// Find any files ending with m_suffix ...
546+
const size_t pos = file.find(m_suffix);
547+
if (pos != std::string::npos && file.size() - pos == m_suffix.size()) {
548+
// Remove
549+
const std::string fullname = dir + FN_DIRSEP + file;
550+
fs->DeleteFile(fullname, rocksdb::IOOptions(), nullptr);
543551
}
544552
}
545-
546-
// Release the directory entry
547-
my_dirend(dir_info);
548553
}
549554

550555
std::atomic<uint64_t> Rdb_sst_info::m_prefix_counter(0);

0 commit comments

Comments
 (0)