|
| 1 | +/* |
| 2 | + * Thrift RPC Latency Test - Comprehensive Backend Comparison |
| 3 | + * |
| 4 | + * This test demonstrates the performance difference between various EventBase |
| 5 | + * backends for Thrift RPC calls. It reproduces the multi-second latency issue |
| 6 | + * seen in OSS builds with libevent 2.x and validates the fixes. |
| 7 | + * |
| 8 | + * Usage: |
| 9 | + * # Internal build (Buck2): |
| 10 | + * buck2 run fbcode//fboss/cli/fboss2/test:thrift_latency_test |
| 11 | + * |
| 12 | + * # OSS build (CMake): |
| 13 | + * ./thrift_latency_test |
| 14 | + */ |
| 15 | + |
| 16 | +#include <algorithm> |
| 17 | +#include <chrono> |
| 18 | +#include <iomanip> |
| 19 | +#include <iostream> |
| 20 | +#include <memory> |
| 21 | +#include <numeric> |
| 22 | +#include <thread> |
| 23 | +#include <vector> |
| 24 | + |
| 25 | +#include <folly/executors/IOThreadPoolExecutor.h> |
| 26 | +#include <folly/init/Init.h> |
| 27 | +#include <folly/io/async/AsyncSocket.h> |
| 28 | +#include <folly/io/async/EventBase.h> |
| 29 | +#include <folly/io/async/EventBaseManager.h> |
| 30 | +#include <thrift/lib/cpp2/async/RocketClientChannel.h> |
| 31 | +#include <thrift/lib/cpp2/server/ThriftServer.h> |
| 32 | +#include <thrift/lib/cpp2/util/ScopedServerInterfaceThread.h> |
| 33 | + |
| 34 | +// Conditionally include EpollBackend if available |
| 35 | +#if __has_include(<folly/io/async/EpollBackend.h>) |
| 36 | +#include <folly/io/async/EpollBackend.h> |
| 37 | +#define HAS_EPOLL_BACKEND 1 |
| 38 | +#else |
| 39 | +#define HAS_EPOLL_BACKEND 0 |
| 40 | +#endif |
| 41 | + |
| 42 | +#include "fboss/agent/if/gen-cpp2/FbossCtrl.h" |
| 43 | + |
| 44 | +using namespace facebook::fboss; |
| 45 | +using namespace apache::thrift; |
| 46 | + |
| 47 | +constexpr int kNumIterations = 10; |
| 48 | +constexpr int64_t kLatencyThresholdMs = 100; |
| 49 | + |
| 50 | +#if HAS_EPOLL_BACKEND |
| 51 | +// Get EventBase::Options configured to use EpollBackend |
| 52 | +folly::EventBase::Options getEpollEventBaseOptions() { |
| 53 | + return folly::EventBase::Options{}.setBackendFactory( |
| 54 | + []() -> std::unique_ptr<folly::EventBaseBackendBase> { |
| 55 | + return std::make_unique<folly::EpollBackend>( |
| 56 | + folly::EpollBackend::Options{}); |
| 57 | + }); |
| 58 | +} |
| 59 | + |
| 60 | +// EventBaseManager that creates EventBases with EpollBackend |
| 61 | +folly::EventBaseManager& getEpollEventBaseManager() { |
| 62 | + static folly::EventBaseManager manager(getEpollEventBaseOptions()); |
| 63 | + return manager; |
| 64 | +} |
| 65 | +#endif |
| 66 | + |
| 67 | +// Simple handler that returns a fixed timestamp |
| 68 | +class SimpleHandler : public apache::thrift::ServiceHandler<FbossCtrl> { |
| 69 | + public: |
| 70 | + void getConfigAppliedInfo(ConfigAppliedInfo& info) override { |
| 71 | + info.lastAppliedInMs() = 12345; |
| 72 | + info.lastColdbootAppliedInMs() = 0; |
| 73 | + } |
| 74 | +}; |
| 75 | + |
| 76 | +// Result of running a backend test |
| 77 | +struct BackendResult { |
| 78 | + std::string name; |
| 79 | + bool success; |
| 80 | + std::vector<double> latenciesMs; |
| 81 | + double avgLatencyMs; |
| 82 | + double maxLatencyMs; |
| 83 | + std::string errorMsg; |
| 84 | +}; |
| 85 | + |
| 86 | +// Run latency test for a specific backend configuration |
| 87 | +BackendResult runBackendTest( |
| 88 | + const std::string& backendName, |
| 89 | + std::function<void(ThriftServer&)> configureServer) { |
| 90 | + BackendResult result; |
| 91 | + result.name = backendName; |
| 92 | + result.success = true; |
| 93 | + |
| 94 | + try { |
| 95 | + auto handler = std::make_shared<SimpleHandler>(); |
| 96 | + |
| 97 | + // Keep executor alive for the lifetime of the server |
| 98 | + std::shared_ptr<folly::IOThreadPoolExecutor> executor; |
| 99 | + |
| 100 | + ScopedServerInterfaceThread::ServerConfigCb serverConfigCb = |
| 101 | + [&configureServer, &executor](ThriftServer& server) { |
| 102 | + configureServer(server); |
| 103 | + }; |
| 104 | + |
| 105 | + ScopedServerInterfaceThread server( |
| 106 | + handler, "::1", 0, std::move(serverConfigCb)); |
| 107 | + |
| 108 | + auto port = server.getAddress().getPort(); |
| 109 | + std::cout << " Server started on port " << port << std::endl; |
| 110 | + |
| 111 | + // Create client |
| 112 | + auto client = server.newClient<apache::thrift::Client<FbossCtrl>>( |
| 113 | + nullptr, RocketClientChannel::newChannel); |
| 114 | + |
| 115 | + // Run multiple iterations |
| 116 | + for (int i = 0; i < kNumIterations; ++i) { |
| 117 | + auto start = std::chrono::steady_clock::now(); |
| 118 | + |
| 119 | + ConfigAppliedInfo info; |
| 120 | + client->sync_getConfigAppliedInfo(info); |
| 121 | + |
| 122 | + auto end = std::chrono::steady_clock::now(); |
| 123 | + // Use microseconds for sub-millisecond precision, convert to double ms |
| 124 | + auto durationUs = |
| 125 | + std::chrono::duration_cast<std::chrono::microseconds>(end - start) |
| 126 | + .count(); |
| 127 | + double durationMs = durationUs / 1000.0; |
| 128 | + |
| 129 | + result.latenciesMs.push_back(durationMs); |
| 130 | + std::cout << std::fixed << std::setprecision(3); |
| 131 | + std::cout << " Iteration " << (i + 1) << ": " << durationMs << " ms" |
| 132 | + << std::endl; |
| 133 | + } |
| 134 | + |
| 135 | + // Calculate statistics |
| 136 | + double sum = std::accumulate( |
| 137 | + result.latenciesMs.begin(), result.latenciesMs.end(), 0.0); |
| 138 | + result.avgLatencyMs = sum / result.latenciesMs.size(); |
| 139 | + result.maxLatencyMs = |
| 140 | + *std::max_element(result.latenciesMs.begin(), result.latenciesMs.end()); |
| 141 | + |
| 142 | + } catch (const std::exception& e) { |
| 143 | + result.success = false; |
| 144 | + result.errorMsg = e.what(); |
| 145 | + result.avgLatencyMs = 0; |
| 146 | + result.maxLatencyMs = 0; |
| 147 | + } |
| 148 | + |
| 149 | + return result; |
| 150 | +} |
| 151 | + |
| 152 | +void printSeparator() { |
| 153 | + std::cout << std::string(60, '-') << std::endl; |
| 154 | +} |
| 155 | + |
| 156 | +void printResult(const BackendResult& result) { |
| 157 | + std::cout << std::endl; |
| 158 | + std::cout << " Results:" << std::endl; |
| 159 | + if (result.success) { |
| 160 | + std::cout << std::fixed << std::setprecision(2); |
| 161 | + std::cout << " Average latency: " << result.avgLatencyMs << " ms" |
| 162 | + << std::endl; |
| 163 | + std::cout << " Max latency: " << result.maxLatencyMs << " ms" |
| 164 | + << std::endl; |
| 165 | + |
| 166 | + if (result.avgLatencyMs > kLatencyThresholdMs) { |
| 167 | + std::cout << " Status: SLOW (avg > " << kLatencyThresholdMs << " ms)" |
| 168 | + << std::endl; |
| 169 | + } else { |
| 170 | + std::cout << " Status: OK (avg < " << kLatencyThresholdMs << " ms)" |
| 171 | + << std::endl; |
| 172 | + } |
| 173 | + } else { |
| 174 | + std::cout << " Status: FAILED - " << result.errorMsg << std::endl; |
| 175 | + } |
| 176 | +} |
| 177 | + |
| 178 | +int main(int argc, char** argv) { |
| 179 | + folly::Init init(&argc, &argv); |
| 180 | + |
| 181 | + std::cout << "========================================" << std::endl; |
| 182 | + std::cout << " Thrift RPC Latency Test" << std::endl; |
| 183 | + std::cout << " Iterations per backend: " << kNumIterations << std::endl; |
| 184 | + std::cout << "========================================" << std::endl; |
| 185 | + std::cout << std::endl; |
| 186 | + |
| 187 | +#if HAS_EPOLL_BACKEND |
| 188 | + std::cout << "EpollBackend: available" << std::endl; |
| 189 | +#else |
| 190 | + std::cout << "EpollBackend: NOT available" << std::endl; |
| 191 | +#endif |
| 192 | + std::cout << std::endl; |
| 193 | + |
| 194 | + std::vector<BackendResult> results; |
| 195 | + |
| 196 | + // Test 1: LibEvent (default) backend |
| 197 | + printSeparator(); |
| 198 | + std::cout << "[1/3] Testing LibEvent backend (default)..." << std::endl; |
| 199 | + auto libeventResult = |
| 200 | + runBackendTest("LibEvent", [](ThriftServer& /* server */) { |
| 201 | + // No configuration needed - uses default libevent backend |
| 202 | + }); |
| 203 | + printResult(libeventResult); |
| 204 | + results.push_back(libeventResult); |
| 205 | + |
| 206 | + // Test 2: Epoll backend |
| 207 | + printSeparator(); |
| 208 | + std::cout << "[2/3] Testing Epoll backend..." << std::endl; |
| 209 | +#if HAS_EPOLL_BACKEND |
| 210 | + auto epollResult = runBackendTest("Epoll", [](ThriftServer& server) { |
| 211 | + auto& epollManager = getEpollEventBaseManager(); |
| 212 | + auto executor = std::make_shared<folly::IOThreadPoolExecutor>( |
| 213 | + std::thread::hardware_concurrency(), |
| 214 | + std::make_shared<folly::NamedThreadFactory>("ThriftIO"), |
| 215 | + &epollManager, |
| 216 | + folly::IOThreadPoolExecutor::Options().setEnableThreadIdCollection( |
| 217 | + true)); |
| 218 | + server.setIOThreadPool(executor); |
| 219 | + }); |
| 220 | + printResult(epollResult); |
| 221 | + results.push_back(epollResult); |
| 222 | +#else |
| 223 | + std::cout << " SKIPPED - EpollBackend not available" << std::endl; |
| 224 | + BackendResult epollSkipped; |
| 225 | + epollSkipped.name = "Epoll"; |
| 226 | + epollSkipped.success = false; |
| 227 | + epollSkipped.errorMsg = "EpollBackend not available"; |
| 228 | + results.push_back(epollSkipped); |
| 229 | +#endif |
| 230 | + |
| 231 | + // Test 3: IoUring backend |
| 232 | + printSeparator(); |
| 233 | + std::cout << "[3/3] Testing IoUring backend..." << std::endl; |
| 234 | + auto iouringResult = runBackendTest("IoUring", [](ThriftServer& server) { |
| 235 | + server.setPreferIoUring(true); |
| 236 | + server.setUseDefaultIoUringExecutor(true); |
| 237 | + }); |
| 238 | + printResult(iouringResult); |
| 239 | + results.push_back(iouringResult); |
| 240 | + |
| 241 | + // Print summary |
| 242 | + printSeparator(); |
| 243 | + std::cout << std::endl; |
| 244 | + std::cout << "========================================" << std::endl; |
| 245 | + std::cout << " Summary" << std::endl; |
| 246 | + std::cout << "========================================" << std::endl; |
| 247 | + std::cout << std::endl; |
| 248 | + |
| 249 | + std::cout << std::left << std::setw(12) << "Backend" << std::right |
| 250 | + << std::setw(12) << "Avg (ms)" << std::setw(12) << "Max (ms)" |
| 251 | + << std::setw(12) << "Status" << std::endl; |
| 252 | + std::cout << std::string(48, '-') << std::endl; |
| 253 | + |
| 254 | + bool hasFailure = false; |
| 255 | + for (const auto& r : results) { |
| 256 | + std::cout << std::left << std::setw(12) << r.name; |
| 257 | + if (r.success) { |
| 258 | + std::cout << std::right << std::fixed << std::setprecision(2) |
| 259 | + << std::setw(12) << r.avgLatencyMs << std::setw(12) |
| 260 | + << r.maxLatencyMs; |
| 261 | + if (r.avgLatencyMs > kLatencyThresholdMs) { |
| 262 | + std::cout << std::setw(12) << "SLOW"; |
| 263 | + // LibEvent being slow is expected in OSS builds |
| 264 | + if (r.name != "LibEvent") { |
| 265 | + hasFailure = true; |
| 266 | + } |
| 267 | + } else { |
| 268 | + std::cout << std::setw(12) << "OK"; |
| 269 | + } |
| 270 | + } else { |
| 271 | + std::cout << std::right << std::setw(12) << "N/A" << std::setw(12) |
| 272 | + << "N/A" << std::setw(12) << "SKIP/FAIL"; |
| 273 | + } |
| 274 | + std::cout << std::endl; |
| 275 | + } |
| 276 | + |
| 277 | + std::cout << std::endl; |
| 278 | + |
| 279 | + if (hasFailure) { |
| 280 | + std::cout << "ERROR: Non-libevent backend(s) showed high latency!" |
| 281 | + << std::endl; |
| 282 | + return 1; |
| 283 | + } |
| 284 | + |
| 285 | + // Check if libevent is slow but alternatives are fast |
| 286 | + bool libeventSlow = libeventResult.success && |
| 287 | + libeventResult.avgLatencyMs > kLatencyThresholdMs; |
| 288 | + bool hasWorkingAlternative = false; |
| 289 | + for (const auto& r : results) { |
| 290 | + if (r.name != "LibEvent" && r.success && |
| 291 | + r.avgLatencyMs <= kLatencyThresholdMs) { |
| 292 | + hasWorkingAlternative = true; |
| 293 | + break; |
| 294 | + } |
| 295 | + } |
| 296 | + |
| 297 | + if (libeventSlow && hasWorkingAlternative) { |
| 298 | + std::cout |
| 299 | + << "Note: LibEvent backend is slow (expected in OSS builds with libevent 2.x)" |
| 300 | + << std::endl; |
| 301 | + std::cout |
| 302 | + << " Alternative backends (Epoll/IoUring) are working correctly." |
| 303 | + << std::endl; |
| 304 | + } |
| 305 | + |
| 306 | + std::cout << "Test completed successfully." << std::endl; |
| 307 | + return 0; |
| 308 | +} |
0 commit comments