Skip to content

Commit

Permalink
[JENKINS-20272] - Disconnected nodes should not be disconnected repea…
Browse files Browse the repository at this point in the history
…tedly (#3453)

* [JENKINS-20272] Reproduce in test

* [FIX JENKINS-20272] Do not disconnect skipped computers

* [JENKINS-20272] Clean the API a bit

(cherry picked from commit 8872b44)
  • Loading branch information
olivergondza committed Jun 7, 2018
1 parent 3e3a413 commit 527141e
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 7 deletions.
Expand Up @@ -6,10 +6,16 @@
import jenkins.model.Jenkins;

import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.TimeoutException;
Expand Down Expand Up @@ -60,10 +66,22 @@ protected T monitor(Computer c) throws IOException, InterruptedException {

/**
* Performs all monitoring concurrently.
*
* @return Mapping from computer to monitored value. The map values can be null for several reasons, see {@link Result}
* for more details.
*/
@Override
protected Map<Computer, T> monitor() throws InterruptedException {
// Bridge method to offer original constrained interface.
return monitorDetailed().getMonitoringData();
}

/**
* Perform monitoring with detailed reporting.
*/
protected final @Nonnull Result<T> monitorDetailed() throws InterruptedException {
Map<Computer,Future<T>> futures = new HashMap<Computer,Future<T>>();
Set<Computer> skipped = new HashSet<>();

for (Computer c : Jenkins.getInstance().getComputers()) {
try {
Expand Down Expand Up @@ -101,11 +119,53 @@ protected Map<Computer, T> monitor() throws InterruptedException {
} catch (TimeoutException x) {
LOGGER.log(WARNING, "Failed to monitor " + c.getDisplayName() + " for " + getDisplayName(), x);
}
} else {
skipped.add(c);
}
}

return data;
return new Result<>(data, skipped);
}

private static final Logger LOGGER = Logger.getLogger(AbstractAsyncNodeMonitorDescriptor.class.getName());

/**
* Result object for {@link AbstractAsyncNodeMonitorDescriptor#monitorDetailed()} to facilitate extending information
* returned in the future.
*
* The {@link #getMonitoringData()} provides the results of the monitoring as {@link #monitor()} does. Note the value
* in the map can be <tt>null</tt> for several reasons:
* <ul>
* <li>The monitoring {@link Callable} returned <tt>null</tt> as a provisioning result.</li>
* <li>Creating or evaluating that callable has thrown an exception.</li>
* <li>The computer was not monitored as it was offline.</li>
* <li>The {@link AbstractAsyncNodeMonitorDescriptor#createCallable} has returned null.</li>
* </ul>
*
* Clients can distinguishing among these states based on the additional data attached to this object. {@link #getSkipped()}
* returns computers that was not monitored as they ware either offline or monitor produced <tt>null</tt> {@link Callable}.
*/
protected static final class Result<T> {
private static final long serialVersionUID = -7671448355804481216L;

private final @Nonnull Map<Computer, T> data;
private final @Nonnull ArrayList<Computer> skipped;

private Result(@Nonnull Map<Computer, T> data, @Nonnull Collection<Computer> skipped) {
this.data = new HashMap<>(data);
this.skipped = new ArrayList<>(skipped);
}

protected @Nonnull Map<Computer, T> getMonitoringData() {
return data;
}

/**
* Computers that ware skipped during monitoring as they either do not have a a channel (offline) or the monitor
* have not produced the Callable. Computers that caused monitor to throw exception are not returned here.
*/
protected @Nonnull List<Computer> getSkipped() {
return skipped;
}
}
}
16 changes: 10 additions & 6 deletions core/src/main/java/hudson/node_monitors/ResponseTimeMonitor.java
Expand Up @@ -46,20 +46,24 @@
public class ResponseTimeMonitor extends NodeMonitor {
@Extension
public static final AbstractNodeMonitorDescriptor<Data> DESCRIPTOR = new AbstractAsyncNodeMonitorDescriptor<Data>() {

@Override
protected Callable<Data,IOException> createCallable(Computer c) {
if (c.getChannel() == null) {
return null;
}
return new Step1(get(c));
}

@Override
protected Map<Computer, Data> monitor() throws InterruptedException {
Map<Computer, Data> base = super.monitor();
for (Entry<Computer, Data> e : base.entrySet()) {
Result<Data> base = monitorDetailed();
Map<Computer, Data> monitoringData = base.getMonitoringData();
for (Entry<Computer, Data> e : monitoringData.entrySet()) {
Computer c = e.getKey();
Data d = e.getValue();
if (base.getSkipped().contains(c)) {
assert d == null;
continue;
}

if (d ==null) {
// if we failed to monitor, put in the special value that indicates a failure
e.setValue(d=new Data(get(c),-1L));
Expand All @@ -74,7 +78,7 @@ protected Map<Computer, Data> monitor() throws InterruptedException {
LOGGER.warning(Messages.ResponseTimeMonitor_MarkedOffline(c.getName()));
}
}
return base;
return monitoringData;
}

public String getDisplayName() {
Expand Down
@@ -1,14 +1,22 @@
package hudson.node_monitors;

import hudson.model.Computer;
import hudson.model.ComputerSet;
import hudson.model.Node;
import hudson.model.User;
import hudson.slaves.DumbSlave;
import hudson.slaves.JNLPLauncher;
import hudson.slaves.OfflineCause;
import hudson.slaves.RetentionStrategy;
import hudson.slaves.SlaveComputer;
import org.junit.Rule;
import org.junit.Test;
import org.jvnet.hudson.test.Issue;
import org.jvnet.hudson.test.JenkinsRule;

import java.util.Collections;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;

Expand Down Expand Up @@ -45,4 +53,19 @@ public void skipOfflineAgent() throws Exception {
assertNotNull(ResponseTimeMonitor.DESCRIPTOR.monitor(c));
}

@Test
public void doNotDisconnectBeforeLaunched() throws Exception {
DumbSlave slave = new DumbSlave("dummy", "dummy", j.createTmpDir().getPath(), "1", Node.Mode.NORMAL, "", new JNLPLauncher(), RetentionStrategy.NOOP, Collections.EMPTY_LIST);
j.jenkins.addNode(slave);
Computer c = slave.toComputer();
assertNotNull(c);
OfflineCause originalOfflineCause = c.getOfflineCause();

ResponseTimeMonitor rtm = ComputerSet.getMonitors().get(ResponseTimeMonitor.class);
for (int i = 0; i < 10; i++) {
rtm.triggerUpdate().join();
System.out.println(rtm.getDescriptor().get(c));
assertEquals(originalOfflineCause, c.getOfflineCause());
}
}
}

0 comments on commit 527141e

Please sign in to comment.