diff --git a/flow/Knobs.cpp b/flow/Knobs.cpp index c3d6b919a4b..2f9f1393278 100644 --- a/flow/Knobs.cpp +++ b/flow/Knobs.cpp @@ -135,6 +135,7 @@ void FlowKnobs::initialize(Randomize randomize, IsSimulated isSimulated) { init( TLS_HANDSHAKE_THREAD_STACKSIZE, 64 * 1024 ); init( TLS_MALLOC_ARENA_MAX, 6 ); init( TLS_HANDSHAKE_LIMIT, 1000 ); + init( TLS_HANDSHAKE_FLOWLOCK_HIGH_PRIORITY, false ); init( NETWORK_TEST_CLIENT_COUNT, 30 ); init( NETWORK_TEST_REPLY_SIZE, 600e3 ); diff --git a/flow/Net2.actor.cpp b/flow/Net2.actor.cpp index c58a36abf43..f8e8028a245 100644 --- a/flow/Net2.actor.cpp +++ b/flow/Net2.actor.cpp @@ -363,13 +363,19 @@ class BindPromise { std::variant errContext; UID errID; NetworkAddress peerAddr; + double createTime = 0.0; + double startTime = 0.0; + double startHandshakeTime = 0.0; public: BindPromise(const char* errContext, UID errID) : errContext(errContext), errID(errID) {} BindPromise(AuditedEvent auditedEvent, UID errID) : errContext(auditedEvent), errID(errID) {} - BindPromise(BindPromise const& r) : p(r.p), errContext(r.errContext), errID(r.errID), peerAddr(r.peerAddr) {} + BindPromise(BindPromise const& r) + : p(r.p), errContext(r.errContext), errID(r.errID), peerAddr(r.peerAddr), createTime(r.createTime), + startTime(r.startTime), startHandshakeTime(r.startHandshakeTime) {} BindPromise(BindPromise&& r) noexcept - : p(std::move(r.p)), errContext(r.errContext), errID(r.errID), peerAddr(r.peerAddr) {} + : p(std::move(r.p)), errContext(r.errContext), errID(r.errID), peerAddr(r.peerAddr), createTime(r.createTime), + startTime(r.startTime), startHandshakeTime(r.startHandshakeTime) {} Future getFuture() const { return p.getFuture(); } @@ -377,11 +383,18 @@ class BindPromise { void setPeerAddr(const NetworkAddress& addr) { peerAddr = addr; } + void setTimeMetrics(double inputCreateTime, double inputStartTime, double inputStartHandshakeTime) { + createTime = inputCreateTime; + startTime = inputStartTime; + startHandshakeTime = inputStartHandshakeTime; + } + void operator()(const boost::system::error_code& error, size_t bytesWritten = 0) { try { if (error) { // Log the error... { + double currentTime = now(); std::optional traceEvent; if (std::holds_alternative(errContext)) traceEvent.emplace(SevWarn, std::get(errContext), errID); @@ -400,6 +413,15 @@ class BindPromise { evt.detail("PeerAddr", peerAddr); evt.detail("PeerAddress", peerAddr); } + if (createTime > 0) { + evt.detail("Duration", currentTime - createTime); + } + if (startTime > 0) { + evt.detail("QueuingTime", currentTime - startTime); + } + if (startHandshakeTime > 0) { + evt.detail("ServiceTime", currentTime - startHandshakeTime); + } } p.sendError(connection_failed()); @@ -832,14 +854,24 @@ struct SSLHandshakerThread final : IThreadPoolReceiver { return std::move(o).str(); } + void setTimeMetrics(double inputCreateTime, double inputStartTime, double inputStartHandshakeTime) { + createTime = inputCreateTime; + startTime = inputStartTime; + startHandshakeTime = inputStartHandshakeTime; + } + ThreadReturnPromise done; ssl_socket& socket; ssl_socket::handshake_type type; boost::system::error_code err; + double createTime = 0.0; + double startTime = 0.0; + double startHandshakeTime = 0.0; }; void action(Handshake& h) { try { + double currentTime = now(); h.socket.next_layer().non_blocking(false, h.err); if (!h.err.failed()) { h.socket.handshake(h.type, h.err); @@ -848,14 +880,23 @@ struct SSLHandshakerThread final : IThreadPoolReceiver { h.socket.next_layer().non_blocking(true, h.err); } if (h.err.failed()) { - TraceEvent(SevWarn, - h.type == ssl_socket::handshake_type::client ? "N2_ConnectHandshakeError"_audit - : "N2_AcceptHandshakeError"_audit) - .detail("PeerAddr", h.getPeerAddress()) - .detail("PeerAddress", h.getPeerAddress()) - .detail("ErrorCode", h.err.value()) - .detail("ErrorMsg", h.err.message().c_str()) - .detail("BackgroundThread", true); + TraceEvent evt(SevWarn, + h.type == ssl_socket::handshake_type::client ? "N2_ConnectHandshakeError"_audit + : "N2_AcceptHandshakeError"_audit); + evt.detail("PeerAddr", h.getPeerAddress()); + evt.detail("PeerAddress", h.getPeerAddress()); + evt.detail("ErrorCode", h.err.value()); + evt.detail("ErrorMsg", h.err.message().c_str()); + evt.detail("BackgroundThread", true); + if (h.createTime > 0) { + evt.detail("Duration", currentTime - h.createTime); + } + if (h.startTime > 0) { + evt.detail("QueuingTime", currentTime - h.startTime); + } + if (h.startHandshakeTime > 0) { + evt.detail("ServiceTime", currentTime - h.startHandshakeTime); + } h.done.sendError(connection_failed()); } else { h.done.send(Void()); @@ -882,11 +923,11 @@ class SSLConnection final : public IConnection, ReferenceCounted explicit SSLConnection(boost::asio::io_service& io_service, Reference> context) : id(nondeterministicRandom()->randomUniqueID()), socket(io_service), ssl_sock(socket, context->mutate()), - sslContext(context), has_trusted_peer(false) {} + sslContext(context), has_trusted_peer(false), createTime(now()) {} explicit SSLConnection(Reference> context, tcp::socket* existingSocket) : id(nondeterministicRandom()->randomUniqueID()), socket(std::move(*existingSocket)), - ssl_sock(socket, context->mutate()), sslContext(context) {} + ssl_sock(socket, context->mutate()), sslContext(context), createTime(now()) {} // This is not part of the IConnection interface, because it is wrapped by INetwork::connect() ACTOR static Future> connect(boost::asio::io_service* ios, @@ -924,6 +965,7 @@ class SSLConnection final : public IConnection, ReferenceCounted wait(onConnected); self->init(); + self->startTime = now(); return self; } catch (Error&) { // Either the connection failed, or was cancelled by the caller @@ -940,6 +982,8 @@ class SSLConnection final : public IConnection, ReferenceCounted ACTOR static void doAcceptHandshake(Reference self, Promise connected) { state Hold holder; + state double startHandshakeTime = now(); + state bool doBackgroundHandshake = false; try { Future onHandshook; @@ -955,18 +999,31 @@ class SSLConnection final : public IConnection, ReferenceCounted holder = Hold(&N2::g_net2->sslPoolHandshakesInProgress); auto handshake = new SSLHandshakerThread::Handshake(self->ssl_sock, boost::asio::ssl::stream_base::server); + handshake->setTimeMetrics(self->createTime, self->startTime, startHandshakeTime); onHandshook = handshake->done.getFuture(); + doBackgroundHandshake = true; N2::g_net2->sslHandshakerPool->post(handshake); } else { // Otherwise use flow network thread g_net2->countServerTLSHandshakesOnMainThread++; BindPromise p("N2_AcceptHandshakeError"_audit, self->id); p.setPeerAddr(self->getPeerAddress()); + p.setTimeMetrics(self->createTime, self->startTime, startHandshakeTime); onHandshook = p.getFuture(); + doBackgroundHandshake = false; self->ssl_sock.async_handshake(boost::asio::ssl::stream_base::server, std::move(p)); } wait(onHandshook); wait(delay(0, TaskPriority::Handshake)); + double currentTime = now(); + TraceEvent(SevInfo, "N2_AcceptHandshakeComplete"_audit) + .suppressFor(1.0) + .detail("PeerAddr", self->getPeerAddress()) + .detail("PeerAddress", self->getPeerAddress()) + .detail("BackgroundThread", doBackgroundHandshake) + .detail("Duration", currentTime - self->createTime) + .detail("QueuingTime", currentTime - self->startTime) + .detail("ServiceTime", currentTime - startHandshakeTime); connected.send(Void()); } catch (...) { self->closeSocket(); @@ -1028,6 +1085,8 @@ class SSLConnection final : public IConnection, ReferenceCounted ACTOR static void doConnectHandshake(Reference self, Promise connected) { state Hold holder; + state double startHandshakeTime = now(); + state bool doBackgroundHandshake = false; try { Future onHandshook; @@ -1050,18 +1109,31 @@ class SSLConnection final : public IConnection, ReferenceCounted holder = Hold(&N2::g_net2->sslPoolHandshakesInProgress); auto handshake = new SSLHandshakerThread::Handshake(self->ssl_sock, boost::asio::ssl::stream_base::client); + handshake->setTimeMetrics(self->createTime, self->startTime, startHandshakeTime); onHandshook = handshake->done.getFuture(); + doBackgroundHandshake = true; N2::g_net2->sslHandshakerPool->post(handshake); } else { // Otherwise use flow network thread g_net2->countClientTLSHandshakesOnMainThread++; BindPromise p("N2_ConnectHandshakeError"_audit, self->id); p.setPeerAddr(self->getPeerAddress()); + p.setTimeMetrics(self->createTime, self->startTime, startHandshakeTime); onHandshook = p.getFuture(); + doBackgroundHandshake = false; self->ssl_sock.async_handshake(boost::asio::ssl::stream_base::client, std::move(p)); } wait(onHandshook); wait(delay(0, TaskPriority::Handshake)); + double currentTime = now(); + TraceEvent(SevInfo, "N2_ConnectHandshakeComplete"_audit) + .suppressFor(1.0) + .detail("PeerAddr", self->getPeerAddress()) + .detail("PeerAddress", self->getPeerAddress()) + .detail("BackgroundThread", doBackgroundHandshake) + .detail("Duration", currentTime - self->createTime) + .detail("QueuingTime", currentTime - self->startTime) + .detail("ServiceTime", currentTime - startHandshakeTime); connected.send(Void()); } catch (...) { self->closeSocket(); @@ -1070,7 +1142,8 @@ class SSLConnection final : public IConnection, ReferenceCounted } ACTOR static Future connectHandshakeWrapper(Reference self) { - wait(g_network->networkInfo.handshakeLock->take()); + wait(g_network->networkInfo.handshakeLock->take( + FLOW_KNOBS->TLS_HANDSHAKE_FLOWLOCK_HIGH_PRIORITY ? TaskPriority::Handshake : TaskPriority::DefaultYield)); state FlowLock::Releaser releaser(*g_network->networkInfo.handshakeLock); Promise connected; @@ -1193,6 +1266,9 @@ class SSLConnection final : public IConnection, ReferenceCounted ssl_socket& getSSLSocket() { return ssl_sock; } + double createTime = 0.0; + double startTime = 0.0; + private: UID id; tcp::socket socket; @@ -1283,7 +1359,7 @@ class SSLListener final : public IListener, ReferenceCounted { : IPAddress(peer_endpoint.address().to_v4().to_ulong()); conn->accept(NetworkAddress(peer_address, peer_endpoint.port(), false, true)); - + conn->startTime = now(); return conn; } catch (...) { conn->close(); diff --git a/flow/include/flow/Knobs.h b/flow/include/flow/Knobs.h index 8ae4a9aae20..5f14cdddadb 100644 --- a/flow/include/flow/Knobs.h +++ b/flow/include/flow/Knobs.h @@ -201,6 +201,7 @@ class FlowKnobs : public KnobsImpl { int TLS_HANDSHAKE_THREAD_STACKSIZE; int TLS_MALLOC_ARENA_MAX; int TLS_HANDSHAKE_LIMIT; + bool TLS_HANDSHAKE_FLOWLOCK_HIGH_PRIORITY; int NETWORK_TEST_CLIENT_COUNT; int NETWORK_TEST_REPLY_SIZE;