Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
[FIXED JENKINS-7291] Permit flyweight tasks to run on master even whe…
…n it has zero configured executors.

Always adding Computer for master as a fallback

The original proposed fix for JENKINS-7291 creates a Computer object
transitively. This seems unwise as it violates the design of Computer
as stated in the javadoc, and for example we can end up creating two
Computers for the master.

I think a better fix is to create a Computer for the master all the
time, even if there's no executors configured. The discrimination in
Queue.makeBuildable would ensure that such phantom Computer is only used
as a last resort (statistically speaking).

I've also tweaked executors.jelly a bit. I simplified it somewhat
based on the idea that "if there's only one computer to show, the
context is likely making it obvious".

(I must be missing the intricacy in the current code.)

Originally developed in a branch at
2c5b57f then squashed for clarity.
  • Loading branch information
kohsuke committed Mar 25, 2013
1 parent b185eb5 commit a114c69
Show file tree
Hide file tree
Showing 8 changed files with 88 additions and 29 deletions.
4 changes: 4 additions & 0 deletions changelog.html
Expand Up @@ -55,6 +55,10 @@
<!-- Record your changes in the trunk here. -->
<div id="trunk" style="display:none"><!--=TRUNK-BEGIN=-->
<ul class=image>
<li class=bug>
Flyweight tasks should execute on the master if there's no static
executors available.
(<a href="https://issues.jenkins-ci.org/browse/JENKINS-7291">issue 7291</a>)
<li class=rfe>
Promote the use of 'H' in cron.
(<a href="https://issues.jenkins-ci.org/browse/JENKINS-17311">issue 17311</a>)
Expand Down
4 changes: 3 additions & 1 deletion core/src/main/java/hudson/model/AbstractCIBase.java
Expand Up @@ -30,6 +30,7 @@
import hudson.security.AccessControlled;
import hudson.slaves.ComputerListener;
import hudson.slaves.RetentionStrategy;
import jenkins.model.Jenkins;
import org.kohsuke.stapler.StaplerFallback;
import org.kohsuke.stapler.StaplerProxy;

Expand Down Expand Up @@ -116,7 +117,8 @@ private void updateComputer(Node n, Map<String,Computer> byNameMap, Set<Computer
if (c!=null) {
c.setNode(n); // reuse
} else {
if(n.getNumExecutors()>0) {
// we always need Computer for the master as a fallback in case there's no other Computer.
if(n.getNumExecutors()>0 || n==Jenkins.getInstance()) {
computers.put(n, c = n.createComputer());
if (!n.isHoldOffLaunchUntilSave() && automaticSlaveLaunch) {
RetentionStrategy retentionStrategy = c.getRetentionStrategy();
Expand Down
10 changes: 7 additions & 3 deletions core/src/main/java/hudson/model/Computer.java
Expand Up @@ -32,6 +32,7 @@
import hudson.console.AnnotatedLargeText;
import hudson.init.Initializer;
import hudson.model.Descriptor.FormException;
import hudson.model.Queue.FlyweightTask;
import hudson.model.labels.LabelAtom;
import hudson.model.queue.WorkUnit;
import hudson.node_monitors.NodeMonitor;
Expand Down Expand Up @@ -111,7 +112,9 @@
* This object is related to {@link Node} but they have some significant difference.
* {@link Computer} primarily works as a holder of {@link Executor}s, so
* if a {@link Node} is configured (probably temporarily) with 0 executors,
* you won't have a {@link Computer} object for it.
* you won't have a {@link Computer} object for it (except for the master node,
* which always get its {@link Computer} in case we have no static executors and
* we need to run a {@link FlyweightTask} - see JENKINS-7291 for more discussion.)
*
* Also, even if you remove a {@link Node}, it takes time for the corresponding
* {@link Computer} to be removed, if some builds are already in progress on that
Expand Down Expand Up @@ -164,7 +167,6 @@
protected final Object statusChangeLock = new Object();

public Computer(Node node) {
assert node.getNumExecutors()!=0 : "Computer created with 0 executors";
setNode(node);
}

Expand Down Expand Up @@ -817,8 +819,10 @@ public final long getDemandStartMilliseconds() {
*
* Note that if an executor dies, we'll leave it in {@link #executors} until
* the administrator yanks it out, so that we can see why it died.
*
* @since 1.509
*/
private boolean isAlive() {
protected boolean isAlive() {
for (Executor e : executors)
if (e.isAlive())
return true;
Expand Down
7 changes: 4 additions & 3 deletions core/src/main/java/hudson/model/Queue.java
Expand Up @@ -1058,9 +1058,10 @@ public String hash(Node node) {
}
});
Jenkins h = Jenkins.getInstance();
hash.add(h, h.getNumExecutors()*100);
// Even if master is configured with zero executors, we may need to run a flyweight task like MatrixProject on it.
hash.add(h, Math.max(h.getNumExecutors()*100, 1));
for (Node n : h.getNodes())
hash.add(n,n.getNumExecutors()*100);
hash.add(n, n.getNumExecutors()*100);

Label lbl = p.getAssignedLabel();
for (Node n : hash.list(p.task.getFullDisplayName())) {
Expand Down Expand Up @@ -1457,7 +1458,7 @@ private Object readResolve() {

@Override
public String toString() {
return getClass().getName()+':'+task.toString();
return getClass().getName() + ':' + task + ':' + getWhy();
}
}

Expand Down
1 change: 1 addition & 0 deletions core/src/main/java/hudson/slaves/SlaveComputer.java
Expand Up @@ -134,6 +134,7 @@ public SlaveComputer(Slave slave) {
super(slave);
this.log = new ReopenableRotatingFileOutputStream(getLogFile(),10);
this.taskListener = new StreamTaskListener(log);
assert slave.getNumExecutors()!=0 : "Computer created with 0 executors";
}

/**
Expand Down
10 changes: 10 additions & 0 deletions core/src/main/java/jenkins/model/Jenkins.java
Expand Up @@ -66,6 +66,7 @@
import hudson.model.NoFingerprintMatch;
import hudson.model.OverallLoadStatistics;
import hudson.model.Project;
import hudson.model.Queue.FlyweightTask;
import hudson.model.RestartListener;
import hudson.model.RootAction;
import hudson.model.Slave;
Expand Down Expand Up @@ -3759,6 +3760,15 @@ public RetentionStrategy getRetentionStrategy() {
return RetentionStrategy.NOOP;
}

/**
* Will always keep this guy alive so that it can function as a fallback to
* execute {@link FlyweightTask}s. See JENKINS-7291.
*/
@Override
protected boolean isAlive() {
return true;
}

/**
* Report an error.
*/
Expand Down
37 changes: 15 additions & 22 deletions core/src/main/resources/lib/hudson/executors.jelly
Expand Up @@ -105,30 +105,23 @@ THE SOFTWARE.
<l:pane width="3" id="executors"
title="&lt;a href='${rootURL}/computer/'>${%Build Executor Status}&lt;/a>">
<colgroup><col width="1*"/><col width="200*"/><col width="24"/></colgroup>

<tr>
<th class="pane">#</th>
<th class="pane" colspan="2">
<div style="margin-right:19px">
${%Status}
</div>
</th>
</tr>

<j:forEach var="c" items="${computers}">
<tr>
<j:choose>
<j:when test="${c.node==app or computers.size()==1}">
<th class="pane">#</th>
<th class="pane" colspan="2">
<div style="margin-right:19px">
<j:choose>
<j:when test="${empty(app.slaves) or computers.size()==1}">
${%Status}
</j:when>
<j:otherwise>
<local:computerCaption title="${%Master}" />
</j:otherwise>
</j:choose>
</div>
</th>
</j:when>
<j:otherwise>
<th class="pane" colspan="3">
<local:computerCaption title="${c.displayName}" />
</th>
</j:otherwise>
</j:choose>
<j:if test="${computers.size() gt 1 and (c.executors.size()!=0 or c.oneOffExecutors.size()!=0)}">
<th class="pane" colspan="3">
<local:computerCaption title="${c.displayName}" />
</th>
</j:if>
</tr>
<j:forEach var="e" items="${c.executors}" varStatus="eloop">
<local:executor name="${eloop.index+1}" url="executors/${eloop.index}" />
Expand Down
44 changes: 44 additions & 0 deletions test/src/test/java/hudson/model/QueueTest.java
Expand Up @@ -39,6 +39,10 @@
import hudson.util.XStream2;
import hudson.util.OneShotEvent;
import hudson.Launcher;
import hudson.matrix.LabelAxis;
import hudson.matrix.MatrixRun;
import hudson.slaves.DummyCloudImpl;
import hudson.slaves.NodeProvisioner;
import org.apache.commons.fileupload.FileUploadException;
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
import org.apache.commons.fileupload.servlet.ServletFileUpload;
Expand All @@ -59,8 +63,12 @@
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;

/**
* @author Kohsuke Kawaguchi
Expand Down Expand Up @@ -289,6 +297,42 @@ public void testFlyweightTasks() throws Exception {
assertBuildStatusSuccess(f);
}

private int INITIALDELAY;
private int RECURRENCEPERIOD;
@Override protected void setUp() throws Exception {
INITIALDELAY = NodeProvisioner.NodeProvisionerInvoker.INITIALDELAY;
NodeProvisioner.NodeProvisionerInvoker.INITIALDELAY = 0;
RECURRENCEPERIOD = NodeProvisioner.NodeProvisionerInvoker.RECURRENCEPERIOD;
NodeProvisioner.NodeProvisionerInvoker.RECURRENCEPERIOD = 10;
super.setUp();
}
@Override protected void tearDown() throws Exception {
super.tearDown();
NodeProvisioner.NodeProvisionerInvoker.INITIALDELAY = INITIALDELAY;
NodeProvisioner.NodeProvisionerInvoker.RECURRENCEPERIOD = RECURRENCEPERIOD;
}
@Bug(7291)
public void testFlyweightTasksWithoutMasterExecutors() throws Exception {
DummyCloudImpl cloud = new DummyCloudImpl(this, 0);
cloud.label = jenkins.getLabel("remote");
jenkins.clouds.add(cloud);
jenkins.setNumExecutors(0);
jenkins.setNodes(Collections.<Node>emptyList());
MatrixProject m = createMatrixProject();
m.setAxes(new AxisList(new LabelAxis("label", Arrays.asList("remote"))));
MatrixBuild build;
try {
build = m.scheduleBuild2(0).get(60, TimeUnit.SECONDS);
} catch (TimeoutException x) {
throw new AssertionError(jenkins.getQueue().getApproximateItemsQuickly().toString(), x);
}
assertBuildStatusSuccess(build);
assertEquals("", build.getBuiltOnStr());
List<MatrixRun> runs = build.getRuns();
assertEquals(1, runs.size());
assertEquals("slave0", runs.get(0).getBuiltOnStr());
}

public void testWaitForStart() throws Exception {
final OneShotEvent ev = new OneShotEvent();
FreeStyleProject p = createFreeStyleProject();
Expand Down

0 comments on commit a114c69

Please sign in to comment.