Skip to content
This repository was archived by the owner on Mar 1, 2026. It is now read-only.

Commit 88f90ef

Browse files
yizhang82Herman Lee
authored andcommitted
Fix CommitTimeWriteBatch without prepare error in MyRocks DDL
Summary: This is discovered with rqg_runtime test - it issues a DDL `alter table t1 add column a int` operation that implicit commits current transaction and starts a new transaction for the table copy operation with temp tables. In this case, the following event happens: 1. Start of every command we reset binlog position in THD to NULL/0 2. DDL calls trans_commit_implicit to implicitly commit current on-going transaction. The transaction go through 2pc as usual, updates binlog position in THD, and writes it to RocksDB as part of CommitTimeWriteBatch during commit 3. DDL starts a new transaction for the in-memory temp table copy operation that doesn't go through binlog (which makes sense since it is purely in memory). 4. DDL commits this transaction and go to ha_commit_low directly without going through ordered_commit 5. As a result, rocksdb_prepare is skippped, since this isn't a MySQL multi-engine 2pc transaction 6. As part of rocksdb_commit, we get the binlog position from earlier transaction, and tries to update with CommitTimeWriteBatch. 7. When RocksDB commits, it notices that this isn't a RocksDB 2pc (prepare isn't called) and yet CommitTimeWriteBatch has non-0 items, and returns a failure I can see 3 issues here: 1. We should not leak binlog position in THD when a statement implicitly commits current transaction. Usually this is OK if the next transaction is a MySQL 2pc transaction since it'll get overwritten 2. In a non MySQL 2pc transaction where binlog isn't updated and prepare haven't been called, we should never return and write to a CommitTimeWriteBatch. 3. We should not update binlog position in a non-2pc operation with leaked binlog position This fixes 1) & 3) by clearing binlog position in implicit commit case. 2) should be always correct once 1) is fixed, and if ever there is an issue RocksDB would report an error. Reviewed By: lloyd Differential Revision: D19895639
1 parent 0881538 commit 88f90ef

7 files changed

Lines changed: 71 additions & 3 deletions

File tree

‎mysql-test/collections/disabled_rocksdb.def‎

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,4 @@ rocksdb.com_rpc_tx : BUG#0000 detached session is missing in 8.0
4040
# Misc bugs
4141
rocksdb.persistent_cache : BUG#0000 only repros with other tests running
4242
rocksdb.index_merge_rocksdb2 : BUG#0000 query plan is different between 8.0 and 5.6
43-
rocksdb.rqg_runtime: BUG#0000 commit without prepare
44-
rocksdb.rqg_transactions : BUG#0000 commit without prepare
4543
rocksdb.add_index_inplace : BUG#0000 : ALTER TABLE ALGORITHM=INPLACE fails
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
create table t1 (pk int primary key, a int);
2+
create table t2 (pk int primary key, a int);
3+
insert into t1 values (1, 1), (2, 2);
4+
start transaction;
5+
insert into t2 values (1, 1);
6+
alter table t1 add column b int;
7+
select * from t1;
8+
pk a b
9+
1 1 NULL
10+
2 2 NULL
11+
drop table t1;
12+
drop table t2;
13+
create table t1 (pk int primary key);
14+
insert into t1 values (1);
15+
start transaction;
16+
insert into t1 values (2), (3), (1);
17+
ERROR 23000: Duplicate entry '1' for key 't1.PRIMARY'
18+
savepoint a;
19+
commit;
20+
drop table t1;

‎mysql-test/suite/rocksdb/r/rqg_runtime.result‎

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ call mtr.add_suppression("Did not write failed ");
22
call mtr.add_suppression("Can't open and lock privilege tables");
33
call mtr.add_suppression("Attempt to delete the trigger file");
44
SET @ORIG_EVENT_SCHEDULER = @@EVENT_SCHEDULER;
5+
SET @ORIG_INNODB_FILE_PER_TABLE = @@INNODB_FILE_PER_TABLE;
56
CREATE TABLE user_temp SELECT * FROM mysql.user LIMIT 0;
67
INSERT user_temp SELECT * FROM mysql.user;
78
CREATE TABLE tables_priv_temp SELECT * FROM mysql.tables_priv LIMIT 0;
@@ -27,4 +28,5 @@ DROP TABLE tables_priv_temp;
2728
DROP TABLE IF EXISTS test.executors;
2829
DROP DATABASE IF EXISTS testdb_N;
2930
DROP DATABASE IF EXISTS testdb_S;
31+
SET GLOBAL INNODB_FILE_PER_TABLE = @ORIG_INNODB_FILE_PER_TABLE;
3032
SET GLOBAL EVENT_SCHEDULER = @ORIG_EVENT_SCHEDULER;
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
--source include/have_rocksdb.inc
2+
3+
# Scenario #1
4+
# ALTER TABLE implicit commit
5+
create table t1 (pk int primary key, a int);
6+
create table t2 (pk int primary key, a int);
7+
insert into t1 values (1, 1), (2, 2);
8+
start transaction;
9+
insert into t2 values (1, 1);
10+
11+
# Bug: triggers commit without prepare error in RocksDB
12+
alter table t1 add column b int;
13+
--sorted_result
14+
select * from t1;
15+
16+
drop table t1;
17+
drop table t2;
18+
19+
# Scenario #2
20+
# Incorrect write count in statement rollback in transaction
21+
create table t1 (pk int primary key);
22+
insert into t1 values (1);
23+
24+
start transaction;
25+
26+
--error ER_DUP_ENTRY
27+
insert into t1 values (2), (3), (1);
28+
29+
savepoint a;
30+
31+
# Bug: triggers commit without prepare error in RocksDB
32+
commit;
33+
34+
drop table t1;
35+

‎mysql-test/suite/rocksdb/t/rqg_runtime.test‎

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ call mtr.add_suppression("Attempt to delete the trigger file");
1010

1111
SET @ORIG_EVENT_SCHEDULER = @@EVENT_SCHEDULER;
1212

13+
# Following tests may update INNODB_FILE_PER_TABLE
14+
SET @ORIG_INNODB_FILE_PER_TABLE = @@INNODB_FILE_PER_TABLE;
15+
1316
# mysql.user and mysql.tables_priv are modified by the
1417
# tests, so they need to be restored to the original
1518
# state.
@@ -62,4 +65,5 @@ DROP DATABASE IF EXISTS testdb_N;
6265
DROP DATABASE IF EXISTS testdb_S;
6366
--enable_warnings
6467

68+
SET GLOBAL INNODB_FILE_PER_TABLE = @ORIG_INNODB_FILE_PER_TABLE;
6569
SET GLOBAL EVENT_SCHEDULER = @ORIG_EVENT_SCHEDULER;

‎sql/binlog.cc‎

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9250,6 +9250,15 @@ TC_LOG::enum_result MYSQL_BIN_LOG::commit(THD *thd, bool all) {
92509250
(void)RUN_HOOK(transaction, after_commit, (thd, all));
92519251
}
92529252
} else if (!skip_commit) {
9253+
/*
9254+
We only set engine binlog position in ordered_commit path flush phase
9255+
and not all transactions go through them (such as table copy in DDL).
9256+
So in cases where a DDL statement implicitly commits earlier transaction
9257+
and starting a new one, the new transaction could be "leaking" the
9258+
engine binlog pos. In order to avoid that and accidentally overwrite
9259+
binlog position with previous location, we reset it here.
9260+
*/
9261+
thd->set_trans_pos(nullptr, 0);
92539262
if (trx_coordinator::commit_in_engines(thd, all))
92549263
return RESULT_INCONSISTENT;
92559264
}

‎storage/rocksdb/ha_rocksdb.cc‎

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3614,7 +3614,7 @@ class Rdb_transaction {
36143614
statement start) because setting a savepoint is cheap.
36153615
*/
36163616
do_set_savepoint();
3617-
m_writes_at_last_savepoint = m_write_count;
3617+
m_write_count = m_writes_at_last_savepoint;
36183618
}
36193619
}
36203620

0 commit comments

Comments
 (0)