Skip to content

Commit

Permalink
Merge pull request #1596 from stephenc/threadsafe-node-queue
Browse files Browse the repository at this point in the history
[JENKINS-27565] Fix threading issues with Nodes and Queue
  • Loading branch information
stephenc committed Mar 26, 2015
2 parents 1c78152 + 17d6d41 commit ecac963
Show file tree
Hide file tree
Showing 27 changed files with 1,742 additions and 674 deletions.
21 changes: 20 additions & 1 deletion core/src/main/java/hudson/Functions.java
Expand Up @@ -28,6 +28,7 @@
import hudson.cli.CLICommand;
import hudson.console.ConsoleAnnotationDescriptor;
import hudson.console.ConsoleAnnotatorFactory;
import hudson.init.InitMilestone;
import hudson.model.AbstractProject;
import hudson.model.Action;
import hudson.model.Describable;
Expand Down Expand Up @@ -201,7 +202,25 @@ public static String xsDate(Calendar cal) {
public static String rfc822Date(Calendar cal) {
return Util.RFC822_DATETIME_FORMATTER.format(cal.getTime());
}


/**
* During Jenkins start-up, before {@link InitMilestone#PLUGINS_STARTED} the extensions lists will be empty
* and they are not guaranteed to be fully populated until after {@link InitMilestone#EXTENSIONS_AUGMENTED}.
* If you attempt to access the extensions list from a UI thread while the extensions are being loaded you will
* hit a big honking great monitor lock that will block until the effective extension list has been determined
* (as if a plugin fails to start, all of the failed plugin's extensions and any dependent plugins' extensions
* will have to be evicted from the list of extensions. In practical terms this only affects the
* "Jenkins is loading" screen, but as that screen uses the generic layouts we provide this utility method
* so that the generic layouts can avoid iterating extension lists while Jenkins is starting up.
*
* @return {@code true} if the extensions lists have been populated.
* @since 1.FIXME
*/
public static boolean isExtensionsAvailable() {
final Jenkins jenkins = Jenkins.getInstance();
return jenkins != null && jenkins.getInitLevel().compareTo(InitMilestone.EXTENSIONS_AUGMENTED) >= 0;
}

public static void initPageVariables(JellyContext context) {
StaplerRequest currentRequest = Stapler.getCurrentRequest();
String rootURL = currentRequest.getContextPath();
Expand Down
86 changes: 49 additions & 37 deletions core/src/main/java/hudson/model/AbstractCIBase.java
Expand Up @@ -34,7 +34,6 @@
import org.kohsuke.stapler.StaplerFallback;
import org.kohsuke.stapler.StaplerProxy;

import java.io.IOException;
import java.util.*;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.logging.Logger;
Expand All @@ -48,8 +47,6 @@ public abstract class AbstractCIBase extends Node implements ItemGroup<TopLevelI

private static final Logger LOGGER = Logger.getLogger(AbstractCIBase.class.getName());

private final transient Object updateComputerLock = new Object();

/**
* If you are calling this on Hudson something is wrong.
*
Expand Down Expand Up @@ -137,15 +134,20 @@ private void updateComputer(Node n, Map<String,Computer> byNameMap, Set<Computer
used.add(c);
}

/*package*/ void removeComputer(Computer computer) {
Map<Node,Computer> computers = getComputerMap();
for (Map.Entry<Node, Computer> e : computers.entrySet()) {
if (e.getValue() == computer) {
computers.remove(e.getKey());
computer.onRemoved();
return;
/*package*/ void removeComputer(final Computer computer) {
Queue.withLock(new Runnable() {
@Override
public void run() {
Map<Node,Computer> computers = getComputerMap();
for (Map.Entry<Node, Computer> e : computers.entrySet()) {
if (e.getValue() == computer) {
computers.remove(e.getKey());
computer.onRemoved();
return;
}
}
}
}
});
}

/*package*/ @CheckForNull Computer getComputer(Node n) {
Expand All @@ -160,36 +162,46 @@ private void updateComputer(Node n, Map<String,Computer> byNameMap, Set<Computer
* This method tries to reuse existing {@link Computer} objects
* so that we won't upset {@link Executor}s running in it.
*/
protected void updateComputerList(boolean automaticSlaveLaunch) throws IOException {
Map<Node,Computer> computers = getComputerMap();
synchronized(updateComputerLock) {// just so that we don't have two code updating computer list at the same time
Map<String,Computer> byName = new HashMap<String,Computer>();
for (Computer c : computers.values()) {
Node node = c.getNode();
if (node == null)
continue; // this computer is gone
byName.put(node.getNodeName(),c);
}
protected void updateComputerList(final boolean automaticSlaveLaunch) {
final Map<Node,Computer> computers = getComputerMap();
final Set<Computer> old = new HashSet<Computer>(computers.size());
Queue.withLock(new Runnable() {
@Override
public void run() {
Map<String,Computer> byName = new HashMap<String,Computer>();
for (Computer c : computers.values()) {
old.add(c);
Node node = c.getNode();
if (node == null)
continue; // this computer is gone
byName.put(node.getNodeName(),c);
}

final Set<Computer> old = new HashSet<Computer>(computers.values());
Set<Computer> used = new HashSet<Computer>();
Set<Computer> used = new HashSet<Computer>(old.size());

updateComputer(this, byName, used, automaticSlaveLaunch);
for (Node s : getNodes()) {
long start = System.currentTimeMillis();
updateComputer(s, byName, used, automaticSlaveLaunch);
if(LOG_STARTUP_PERFORMANCE)
LOGGER.info(String.format("Took %dms to update node %s",
System.currentTimeMillis()-start, s.getNodeName()));
}
updateComputer(AbstractCIBase.this, byName, used, automaticSlaveLaunch);
for (Node s : getNodes()) {
long start = System.currentTimeMillis();
updateComputer(s, byName, used, automaticSlaveLaunch);
if(LOG_STARTUP_PERFORMANCE)
LOGGER.info(String.format("Took %dms to update node %s",
System.currentTimeMillis()-start, s.getNodeName()));
}

// find out what computers are removed, and kill off all executors.
// when all executors exit, it will be removed from the computers map.
// so don't remove too quickly
old.removeAll(used);
for (Computer c : old) {
killComputer(c);
// find out what computers are removed, and kill off all executors.
// when all executors exit, it will be removed from the computers map.
// so don't remove too quickly
old.removeAll(used);
// we need to start the process of reducing the executors on all computers as distinct
// from the killing action which should not excessively use the Queue lock.
for (Computer c : old) {
c.inflictMortalWound();
}
}
});
for (Computer c : old) {
// when we get to here, the number of executors should be zero so this call should not need the Queue.lock
killComputer(c);
}
getQueue().scheduleMaintenance();
for (ComputerListener cl : ComputerListener.all())
Expand Down

0 comments on commit ecac963

Please sign in to comment.