From 755f1372209913a25543f530a68c270aeb9826be Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Tue, 12 Nov 2024 11:51:12 +0200 Subject: [PATCH 1/2] chore: add logs to help debugging tls issues in prod --- util/tls/tls_socket.cc | 37 ++++++++++++++++++++++++++++++------- util/tls/tls_socket.h | 5 +++-- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/util/tls/tls_socket.cc b/util/tls/tls_socket.cc index 1d4c4892..f01ded54 100644 --- a/util/tls/tls_socket.cc +++ b/util/tls/tls_socket.cc @@ -13,6 +13,11 @@ #include "util/fibers/proactor_base.h" #include "util/tls/tls_engine.h" +#define VSOCK(verbosity) \ + VLOG(verbosity) << "sock[" << native_handle() << "], state " << int(state_) \ + << ", write_total:" << upstream_write_ << " " << " pending output: " \ + << engine_->OutputPending() << " " + namespace util { namespace tls { @@ -128,6 +133,7 @@ auto TlsSocket::Accept() -> AcceptResult { // it is important to send output (protocol errors) before we return from this function. error_code ec = MaybeSendOutput(); if (ec) { + VSOCK(1) << "MaybeSendOutput failed " << ec; return make_unexpected(ec); } @@ -411,16 +417,17 @@ auto TlsSocket::MaybeSendOutput() -> error_code { return error_code{}; } - return HandleSocketWrite(); + return HandleUpstreamWrite(); } -auto TlsSocket::HandleSocketRead() -> error_code { +auto TlsSocket::HandleUpstreamRead() -> error_code { RETURN_ON_ERROR(MaybeSendOutput()); if (state_ & READ_IN_PROGRESS) { // We need to Yield because otherwise we might end up in an infinite loop. // See also comments in MaybeSendOutput. ThisFiber::Yield(); + VSOCK(1) << "HandleUpstreamRead: Yielded"; return error_code{}; } @@ -429,24 +436,29 @@ auto TlsSocket::HandleSocketRead() -> error_code { io::Result esz = next_sock_->Recv(mut_buf, 0); state_ &= ~READ_IN_PROGRESS; if (!esz) { + // log any errors as well as situations where we have unflushed output. + if (esz.error() != errc::connection_aborted || engine_->OutputPending() > 0) { + VSOCK(1) << "HandleUpstreamRead failed " << esz.error(); + return esz.error(); + } return esz.error(); } - DVLOG(1) << "TlsSocket:Read " << *esz << " bytes"; + DVLOG(1) << "HandleUpstreamRead " << *esz << " bytes"; engine_->CommitInput(*esz); return error_code{}; } -error_code TlsSocket::HandleSocketWrite() { +error_code TlsSocket::HandleUpstreamWrite() { Engine::Buffer buffer = engine_->PeekOutputBuf(); DCHECK(!buffer.empty()); if (buffer.empty()) return {}; - DVLOG(2) << "HandleSocketWrite " << buffer.size(); + DVLOG(2) << "HandleUpstreamWrite " << buffer.size(); // we do not allow concurrent writes from multiple fibers. state_ |= WRITE_IN_PROGRESS; while (!buffer.empty()) { @@ -456,13 +468,23 @@ error_code TlsSocket::HandleSocketWrite() { if (!write_result) { state_ &= ~WRITE_IN_PROGRESS; + // broken_pipe - happens when the other side closes the connection. do not log this. + if (write_result.error() != errc::broken_pipe) { + VSOCK(1) << "HandleUpstreamWrite failed " << write_result.error(); + } + return write_result.error(); } CHECK_GT(*write_result, 0u); + upstream_write_ += *write_result; engine_->ConsumeOutputBuf(*write_result); buffer.remove_prefix(*write_result); } - DCHECK_EQ(engine_->OutputPending(), 0u); + + if (engine_->OutputPending() > 0) { + LOG(INFO) << "ssl buffer is not empty with " << engine_->OutputPending() + << " bytes. short write detected"; + } state_ &= ~WRITE_IN_PROGRESS; @@ -472,9 +494,10 @@ error_code TlsSocket::HandleSocketWrite() { error_code TlsSocket::HandleOp(int op_val) { switch (op_val) { case Engine::EOF_STREAM: + VLOG(1) << "EOF_STREAM received " << next_sock_->native_handle(); return make_error_code(errc::connection_reset); case Engine::NEED_READ_AND_MAYBE_WRITE: - return HandleSocketRead(); + return HandleUpstreamRead(); case Engine::NEED_WRITE: return MaybeSendOutput(); default: diff --git a/util/tls/tls_socket.h b/util/tls/tls_socket.h index e969fc00..16627551 100644 --- a/util/tls/tls_socket.h +++ b/util/tls/tls_socket.h @@ -95,13 +95,14 @@ class TlsSocket final : public FiberSocketBase { error_code MaybeSendOutput(); /// Read encrypted data from the network socket and feed it into the TLS engine. - error_code HandleSocketRead(); + error_code HandleUpstreamRead(); - error_code HandleSocketWrite(); + error_code HandleUpstreamWrite(); error_code HandleOp(int op); std::unique_ptr next_sock_; std::unique_ptr engine_; + size_t upstream_write_ = 0; enum { WRITE_IN_PROGRESS = 1, READ_IN_PROGRESS = 2, SHUTDOWN_IN_PROGRESS = 4, SHUTDOWN_DONE = 8 }; uint8_t state_{0}; From 7a27014a0fe8851fa7807e569f3cc5b0e3d6f47b Mon Sep 17 00:00:00 2001 From: Roman Gershman Date: Tue, 12 Nov 2024 14:09:58 +0200 Subject: [PATCH 2/2] chore: add logs for ssl errors as well Signed-off-by: Roman Gershman --- util/tls/tls_socket.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/util/tls/tls_socket.cc b/util/tls/tls_socket.cc index f01ded54..f476958c 100644 --- a/util/tls/tls_socket.cc +++ b/util/tls/tls_socket.cc @@ -57,8 +57,9 @@ error_condition error_category::default_error_condition(int ev) const noexcept { error_category tls_category; -inline error_code SSL2Error(unsigned long err) { +inline error_code SSL2Error(unsigned lineno, unsigned long err) { CHECK_LT(err, unsigned(INT_MAX)); + VLOG(1) << "SSL2Error at line " << lineno << ": " << error_category{}.message(int(err)); return error_code{int(err), tls_category}; } @@ -139,7 +140,7 @@ auto TlsSocket::Accept() -> AcceptResult { // now check the result of the handshake. if (!op_result) { - return make_unexpected(SSL2Error(op_result.error())); + return make_unexpected(SSL2Error(__LINE__, op_result.error())); } int op_val = *op_result; @@ -249,7 +250,7 @@ io::Result TlsSocket::RecvMsg(const msghdr& msg, int flags) { Engine::OpResult op_result = engine_->Read(dest.data(), read_len); if (!op_result) { - return make_unexpected(SSL2Error(op_result.error())); + return make_unexpected(SSL2Error(__LINE__, op_result.error())); } int op_val = *op_result; @@ -346,7 +347,7 @@ io::Result TlsSocket::SendBuffer(Engine::Buffer buf) { while (true) { Engine::OpResult op_result = engine_->Write(buf); if (!op_result) { - return make_unexpected(SSL2Error(op_result.error())); + return make_unexpected(SSL2Error(__LINE__, op_result.error())); } int op_val = *op_result; @@ -439,7 +440,6 @@ auto TlsSocket::HandleUpstreamRead() -> error_code { // log any errors as well as situations where we have unflushed output. if (esz.error() != errc::connection_aborted || engine_->OutputPending() > 0) { VSOCK(1) << "HandleUpstreamRead failed " << esz.error(); - return esz.error(); } return esz.error(); }