diff --git a/src/main/java/hudson/remoting/Engine.java b/src/main/java/hudson/remoting/Engine.java index 11627ef71..77822d7c1 100644 --- a/src/main/java/hudson/remoting/Engine.java +++ b/src/main/java/hudson/remoting/Engine.java @@ -113,6 +113,13 @@ public class Engine extends Thread { */ public static final String WEBSOCKET_COOKIE_HEADER = "Connection-Cookie"; + static boolean nonFatalJnlpAgentEndpointResolutionExceptions = Boolean.getBoolean(Engine.class.getName() + ".nonFatalJnlpAgentEndpointResolutionExceptions"); + + // defaulting to 0 retries in keeping with the behaviour for other consumers such as the swarm plugin + static int nonFatalJnlpAgentEndpointResolutionExceptionsMaxRetries = Integer.getInteger(Engine.class.getName() + ".nonFatalJnlpAgentEndpointResolutionExceptionsMaxRetries", 0); + + static int nonFatalJnlpAgentEndpointResolutionExceptionsIntervalInMillis = Integer.getInteger(Engine.class.getName() + ".nonFatalJnlpAgentEndpointResolutionExceptionsIntervalInMillis", 5000); + /** * Thread pool that sets {@link #CURRENT}. */ @@ -740,22 +747,32 @@ private void innerRun(IOHub hub, SSLContext context, ExecutorService service) { JnlpEndpointResolver resolver = createEndpointResolver(jenkinsUrls); try { - boolean first = true; + int connectionAttempts = 0; while(true) { - if(first) { - first = false; - } else { - if(noReconnect) - return; // exit + if (connectionAttempts > 0 && noReconnect) { + return; // exit } + connectionAttempts++; events.status("Locating server among " + candidateUrls); final JnlpAgentEndpoint endpoint; try { endpoint = resolver.resolve(); } catch (Exception e) { - if (Boolean.getBoolean(Engine.class.getName() + ".nonFatalJnlpAgentEndpointResolutionExceptions")) { - events.status("Could not resolve JNLP agent endpoint", e); + if (nonFatalJnlpAgentEndpointResolutionExceptions) { + if (connectionAttempts > nonFatalJnlpAgentEndpointResolutionExceptionsMaxRetries) { + events.status("Could not resolve JNLP agent endpoint. Max number of retries reached. Attempt #" + connectionAttempts, e); + } else { + events.status("Could not resolve JNLP agent endpoint. Attempt #" + connectionAttempts, e); + try { + if (nonFatalJnlpAgentEndpointResolutionExceptionsIntervalInMillis > 0) { + Thread.sleep(nonFatalJnlpAgentEndpointResolutionExceptionsIntervalInMillis); + } + } catch (InterruptedException ignored) { + // Not much to do if we can't sleep. Run through the tries more quickly. + } + continue; + } } else { events.error(e); } diff --git a/src/test/java/hudson/remoting/EngineTest.java b/src/test/java/hudson/remoting/EngineTest.java index 243dee111..cbeae98d2 100644 --- a/src/test/java/hudson/remoting/EngineTest.java +++ b/src/test/java/hudson/remoting/EngineTest.java @@ -39,6 +39,7 @@ import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; +import static org.junit.Assert.assertThrows; import static org.junit.Assert.assertTrue; /** @@ -126,6 +127,90 @@ public void getAgentName() { assertThat(engine.getAgentName(), is(AGENT_NAME)); } + @Test + public void shouldNotReconnect() { + EngineListener l = new TestEngineListener() { + @Override + public void error(Throwable t) { + throw new NoReconnectException(); + } + }; + Engine engine = new Engine(l, jenkinsUrls, SECRET_KEY, AGENT_NAME); + assertThrows(NoReconnectException.class, () -> engine.run()); + } + + private static class NoReconnectException extends RuntimeException {} + + @Test + public void shouldNotReconnectOnJnlpAgentEndpointResolutionExceptionsButWithStatus() { + EngineListener l = new TestEngineListener() { + @Override + public void status(String msg, Throwable t) { + System.err.println("Status: " + msg); + if (msg.startsWith("Could not resolve JNLP agent endpoint")) { + throw new ExpectedException(); + } + } + + @Override + public void error(Throwable t) { + if (t instanceof RuntimeException) { + throw (RuntimeException) t; + } + } + }; + Engine.nonFatalJnlpAgentEndpointResolutionExceptions = true; + try { + Engine engine = new Engine(l, jenkinsUrls, SECRET_KEY, AGENT_NAME); + assertThrows("Message should have started with 'Could not resolve...'", ExpectedException.class, () -> engine.run()); + } finally { + // reinstate the static value + Engine.nonFatalJnlpAgentEndpointResolutionExceptions = false; + } + } + + @Test + public void shouldReconnectOnJnlpAgentEndpointResolutionExceptionsMaxRetries() { + EngineListener l = new TestEngineListener() { + private int count; + + @Override + public void status(String msg, Throwable t) { + System.err.println("Status: " + msg); + if (msg.startsWith("Could not resolve JNLP agent endpoint")) { + count++; + } + if (count == 5) { + throw new ExpectedException(); + } + } + + @Override + public void error(Throwable t) { + if (t instanceof RuntimeException) { + throw (RuntimeException) t; + } + } + }; + + int currentMaxRetries = Engine.nonFatalJnlpAgentEndpointResolutionExceptionsMaxRetries; + int currentIntervalInSeconds = Engine.nonFatalJnlpAgentEndpointResolutionExceptionsIntervalInMillis; + try { + Engine.nonFatalJnlpAgentEndpointResolutionExceptions = true; + Engine.nonFatalJnlpAgentEndpointResolutionExceptionsMaxRetries = 5; + Engine.nonFatalJnlpAgentEndpointResolutionExceptionsIntervalInMillis = 100; + Engine engine = new Engine(l, jenkinsUrls, SECRET_KEY, AGENT_NAME); + assertThrows("Should have tried at least five times", ExpectedException.class, () -> engine.run()); + } finally { + // reinstate the static values + Engine.nonFatalJnlpAgentEndpointResolutionExceptions = false; + Engine.nonFatalJnlpAgentEndpointResolutionExceptionsMaxRetries = currentMaxRetries; + Engine.nonFatalJnlpAgentEndpointResolutionExceptionsIntervalInMillis = currentIntervalInSeconds; + } + } + + private static class ExpectedException extends RuntimeException {} + private static class TestEngineListener implements EngineListener { @Override