Skip to content

Commit

Permalink
Attempt parallel connections to all addrs when connecting (#1068)
Browse files Browse the repository at this point in the history
* Attempt parallel connections to all addrs when connecting

Part of the "happy eyeballs" recommendations.

Note I attempted to preserve the exception throwing behavior we had
previously. i.e. if I'm the last task running and errors, I'll close
the `Channel{TCPSocket}` with my exception which will then propagate up.
We could possibly accumulate all the exceptions into a CompositeException,
but meh, I'm not sure if that adds much value.

In addition to the new parallel connecting, we're also adjusting how
the `connect_timeout` is implemented. Instead of only applying to the
tcp connection, we now wrap the entire `newconnection` call, which means
any ssl handshake timing will also count towards the timeout. We saw a
case at RelationalAI where we had a reasonable connect_timeout (10s),
yet then saw long requests (>50s) where all the time was reported in
the connection layer. It would seem to suggest that the ssl layer is
somehow getting stuck or slow, so the proposal here is that if the ssl
operations also count, then we'll more aggressively cancel/restart
the connection process in the case of slow ssl handshaking.

* Ensure we don't have hanging tasks once our channel has been filled w/ connection
  • Loading branch information
quinnj authored Jun 29, 2023
1 parent ba68375 commit bb12854
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 25 deletions.
43 changes: 20 additions & 23 deletions src/Connections.jl
Original file line number Diff line number Diff line change
Expand Up @@ -501,40 +501,37 @@ function getconnection(::Type{TCPSocket},
# alive in the face of heavy workloads where Julia's task scheduler might take a while to
# keep up with midflight requests
keepalive::Bool=true,
connect_timeout::Int=10,
readtimeout::Int=0,
kw...)::TCPSocket

p::UInt = isempty(port) ? UInt(80) : parse(UInt, port)
@debugv 2 "TCP connect: $host:$p..."
addrs = Sockets.getalladdrinfo(host)
connect_timeout = connect_timeout == 0 && readtimeout > 0 ? readtimeout : connect_timeout
lasterr = ErrorException("unknown connection error")
ch = Channel{TCPSocket}(1)
n = Ref(length(addrs))
for addr in addrs
try
if connect_timeout > 0
tcp = Sockets.TCPSocket()
Sockets.connect!(tcp, addr, p)
try
try_with_timeout(connect_timeout) do _
Sockets.wait_connected(tcp)
keepalive && keepalive!(tcp)
end
catch
close(tcp)
rethrow()
end
else
tcp = Sockets.connect(addr, p)
Threads.@spawn begin
try
isready(ch) && return
tcp = Sockets.connect($addr, p)
isready(ch) && return
keepalive && keepalive!(tcp)
Base.@lock ch begin
isready(ch) && return
put!(ch, tcp)
end
catch e
Base.@lock ch begin
# if we're the last task to fail, and assuming
# all other tasks also failed, then we close
# the channel w/ our exception so the fetch call throws and
# our error propagates
(n[] -= 1) == 0 && close(ch, e)
end
end
return tcp
catch e
lasterr = e isa ConcurrentUtilities.TimeoutException ? ConnectTimeout(host, port) : e
end
end
# If no connetion could be set up, to any address, throw last error
throw(lasterr)
return fetch(ch)
end

const nosslconfig = SSLConfig()
Expand Down
11 changes: 9 additions & 2 deletions src/clientlayers/ConnectionRequest.jl
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ Close the connection if the request throws an exception.
Otherwise leave it open so that it can be reused.
"""
function connectionlayer(handler)
return function connections(req; proxy=getproxy(req.url.scheme, req.url.host), socket_type::Type=TCPSocket, socket_type_tls::Type=SOCKET_TYPE_TLS[], readtimeout::Int=0, logerrors::Bool=false, logtag=nothing, kw...)
return function connections(req; proxy=getproxy(req.url.scheme, req.url.host), socket_type::Type=TCPSocket, socket_type_tls::Type=SOCKET_TYPE_TLS[], readtimeout::Int=0, connect_timeout::Int=10, logerrors::Bool=false, logtag=nothing, kw...)
local io, stream
if proxy !== nothing
target_url = req.url
Expand All @@ -73,10 +73,17 @@ function connectionlayer(handler)
url = target_url = req.url
end

connect_timeout = connect_timeout == 0 && readtimeout > 0 ? readtimeout : connect_timeout
IOType = sockettype(url, socket_type, socket_type_tls)
start_time = time()
try
io = newconnection(IOType, url.host, url.port; readtimeout=readtimeout, kw...)
io = if connect_timeout > 0
try_with_timeout(connect_timeout) do _
newconnection(IOType, url.host, url.port; readtimeout=readtimeout, kw...)
end
else
newconnection(IOType, url.host, url.port; readtimeout=readtimeout, kw...)
end
catch e
if logerrors
err = current_exceptions_to_string()
Expand Down

0 comments on commit bb12854

Please sign in to comment.