Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 78 additions & 0 deletions db/blob/db_blob_index_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,84 @@ TEST_F(DBBlobIndexTest, EmbeddedBlobIteratorWithFirstKeyIndex) {
EXPECT_EQ(iter->value(), big2);
}

// Backward iteration (Prev / SeekForPrev) over an embedded-blob SST must work,
// including for wide-column entities whose same-file blob columns are resolved
// into an iterator-owned buffer. DBIter requires the underlying iterator's
// value to be pinned for backward iteration; EmbeddedBlobResolvingIterator
// therefore pins resolved values -- both whole-value blob payloads and rebuilt
// wide-column values -- and hands their cleanup to the PinnedIteratorsManager
// across repositioning. Before that, backward iteration over the wide-column
// entity returned NotSupported (or tripped the IsValuePinned() assertion).
TEST_F(DBBlobIndexTest, EmbeddedBlobBackwardIteration) {
Options options = GetTestOptions();
options.create_if_missing = true;
DestroyAndReopen(options);

const std::string sst_path = dbname_ + "/embedded_backward.sst";
SstFileWriterEmbeddedBlobOptions embedded_blob_options;
embedded_blob_options.min_blob_size = 8;

// Whole-value same-file blobs.
const std::string big1(1024, 'x');
const std::string big3(1024, 'z');
// Wide-column entity: a small (inline) default column plus a large
// (embedded, same-file blob) non-default column -- the mixed wide-column
// path whose resolved value lands in an iterator-owned buffer.
const std::string k2_default(2, 'd');
const std::string k2_big_col(1024, 'c');
const WideColumns k2_columns{{kDefaultWideColumnName, k2_default},
{"big", k2_big_col}};

SstFileWriter writer(EnvOptions(), options);
ASSERT_OK(writer.OpenWithEmbeddedBlobs(sst_path, embedded_blob_options));
ASSERT_OK(writer.Put("k1", big1));
ASSERT_OK(writer.PutEntity("k2", k2_columns));
ASSERT_OK(writer.Put("k3", big3));
ASSERT_OK(writer.Finish());

ASSERT_OK(db_->IngestExternalFile({sst_path}, IngestExternalFileOptions()));

std::unique_ptr<Iterator> iter(db_->NewIterator(ReadOptions()));

// Backward scan via SeekToLast + Prev.
iter->SeekToLast();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
EXPECT_EQ(iter->key(), "k3");
EXPECT_EQ(iter->value(), big3);

iter->Prev();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
EXPECT_EQ(iter->key(), "k2");
EXPECT_EQ(iter->value(), k2_default);
EXPECT_EQ(iter->columns(), k2_columns);

iter->Prev();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
EXPECT_EQ(iter->key(), "k1");
EXPECT_EQ(iter->value(), big1);

iter->Prev();
ASSERT_FALSE(iter->Valid());
ASSERT_OK(iter->status());

// SeekForPrev directly onto the wide-column entity, then step back.
iter->SeekForPrev("k2");
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
EXPECT_EQ(iter->key(), "k2");
EXPECT_EQ(iter->value(), k2_default);
EXPECT_EQ(iter->columns(), k2_columns);

iter->Prev();
ASSERT_TRUE(iter->Valid());
ASSERT_OK(iter->status());
EXPECT_EQ(iter->key(), "k1");
EXPECT_EQ(iter->value(), big1);
}

// When a blob cache is configured, same-file ("embedded") blob reads should go
// through BlobSource: the first read misses + inserts + does a disk read, and
// the second read of the same blob is served from the blob cache. This holds on
Expand Down
14 changes: 0 additions & 14 deletions db_stress_tool/db_stress_tool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -488,20 +488,6 @@ int db_stress_tool(int argc, char** argv) {
"all be 0 when using compaction filter or inplace update support\n");
exit(1);
}
if (FLAGS_ingest_external_file_with_embedded_blobs &&
FLAGS_test_backward_scan) {
// Embedded blob SSTs are read through EmbeddedBlobResolvingIterator, which
// resolves same-file blob references (and wide-column entities with same-
// file blob columns) into an iterator-owned buffer. Such resolved values
// are not pinnable, but DBIter requires pinned values for backward
// iteration (Prev/SeekForPrev) and otherwise returns NotSupported (or hits
// the IsValuePinned() assertion in FindValueForCurrentKeyUsingSeek).
// Disable backward scan automatically.
fprintf(stdout,
"Disabling test_backward_scan because "
"ingest_external_file_with_embedded_blobs is set\n");
FLAGS_test_backward_scan = false;
}
if (FLAGS_test_multi_ops_txns) {
CheckAndSetOptionsForMultiOpsTxnStressTest();
}
Expand Down
65 changes: 38 additions & 27 deletions table/block_based/embedded_blob_resolving_iterator.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,11 @@ class EmbeddedBlobResolvingIterator : public InternalIterator {
Slice value() const override {
assert(Valid());
if (MaterializeValue() && value_resolved_) {
if (value_is_pinned_) {
// Whole-value blob payload pinned in the blob cache (or an owned
// buffer); returned without a copy.
return Slice(resolved_pinned_value_);
}
return Slice(resolved_value_);
// The resolved value -- a whole-value blob payload (pinned in the blob
// cache or an owned buffer) or a rebuilt wide-column value (an owned
// buffer) -- lives in resolved_pinned_value_ and is returned without a
// copy.
return Slice(resolved_pinned_value_);
}
// MaterializeValue() may have set an error (corruption or blob-region I/O).
// Fall back to the raw value; the error is surfaced via status()/Valid().
Expand Down Expand Up @@ -169,13 +168,13 @@ class EmbeddedBlobResolvingIterator : public InternalIterator {
}
bool IsValuePinned() const override {
if (value_resolved_) {
// A resolved whole-value blob payload is pinned in the blob cache (or an
// owned buffer). The IsValuePinned() contract requires the value to stay
// valid until the iterator is deleted / ReleasePinnedData is called, so
// only advertise it as pinned when a PinnedIteratorsManager is active to
// take over the pin's cleanup across repositioning (see ResetState). A
// built (wide-column) value lives in `resolved_value_` and is never
// pinned.
// The resolved value -- a whole-value blob payload (pinned in the blob
// cache or an owned buffer) or a rebuilt wide-column value (an owned
// buffer) -- lives in resolved_pinned_value_. The IsValuePinned()
// contract requires the value to stay valid until the iterator is deleted
// / ReleasePinnedData is called, so only advertise it as pinned when a
// PinnedIteratorsManager is active to take over the pin's cleanup across
// repositioning (see ResetState).
return value_is_pinned_ && pinned_iters_mgr_ != nullptr &&
pinned_iters_mgr_->PinningEnabled();
}
Expand Down Expand Up @@ -215,6 +214,12 @@ class EmbeddedBlobResolvingIterator : public InternalIterator {
}

private:
// Cleanup for a heap-allocated std::string holding a rebuilt (wide-column)
// value pinned into resolved_pinned_value_.
static void ReleaseResolvedValueBuffer(void* arg1, void* /*arg2*/) {
delete static_cast<std::string*>(arg1);
}

void ResetState() {
status_.PermitUncheckedError();
status_ = Status::OK();
Expand All @@ -223,12 +228,11 @@ class EmbeddedBlobResolvingIterator : public InternalIterator {
key_resolved_ = false;
value_resolved_ = false;
resolved_internal_key_.clear();
resolved_value_.clear();
if (value_is_pinned_) {
// If a PinnedIteratorsManager is active it has been told (via
// IsValuePinned) that the previous entry's pinned blob stays valid until
// ReleasePinnedData; hand the cache pin's cleanup to it so the slice
// outlives this reposition. Otherwise just release the pin now.
// IsValuePinned) that the previous entry's pinned value stays valid until
// ReleasePinnedData; hand the pin's cleanup to it so the slice outlives
// this reposition. Otherwise just release the pin now.
if (pinned_iters_mgr_ != nullptr && pinned_iters_mgr_->PinningEnabled()) {
resolved_pinned_value_.DelegateCleanupsTo(pinned_iters_mgr_);
}
Expand Down Expand Up @@ -330,13 +334,20 @@ class EmbeddedBlobResolvingIterator : public InternalIterator {
resolved_internal_key_ = std::move(resolved_key);
key_resolved_ = true;
}
if (value_pinned) {
// Whole-value blob: payload pinned into resolved_pinned_value_ (no copy).
value_is_pinned_ = true;
} else {
// Built (wide-column) value: owned by this wrapper.
resolved_value_ = std::move(resolved_value);
if (!value_pinned) {
// Built (wide-column) value: move it into a heap buffer and pin it into
// resolved_pinned_value_ so the value is pinnable like a whole-value
// blob. DBIter requires a pinned value to back up over an entry
// (Prev/SeekForPrev); the buffer's cleanup is handed to the
// PinnedIteratorsManager across repositioning (see ResetState).
auto* owned_value = new std::string(std::move(resolved_value));
resolved_pinned_value_.PinSlice(Slice(*owned_value),
&ReleaseResolvedValueBuffer, owned_value,
nullptr);
}
// Either way the resolved value now lives, pinned, in
// resolved_pinned_value_.
value_is_pinned_ = true;
value_resolved_ = true;
return true;
}
Expand All @@ -352,10 +363,10 @@ class EmbeddedBlobResolvingIterator : public InternalIterator {
// accessors.
mutable Status status_;
mutable std::string resolved_internal_key_;
mutable std::string resolved_value_;
// Holds a whole-value same-file blob payload pinned in the blob cache (or an
// owned buffer when no cache is configured), avoiding a copy. Used only when
// value_is_pinned_ is true; wide-column values use resolved_value_ instead.
// Holds the resolved value for the current entry without a copy: a
// whole-value same-file blob payload (pinned in the blob cache, or an owned
// buffer when no cache is configured) or a rebuilt wide-column value (an
// owned heap buffer). Populated whenever value_is_pinned_ is true.
mutable PinnableSlice resolved_pinned_value_;
mutable bool key_prepared_ = false;
mutable bool value_prepared_ = false;
Expand Down
Loading