Skip to content

Commit

Permalink
Fixing JENKINS-28403 and JENKINS-27471 (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
felfert committed Nov 14, 2016
1 parent f141bc7 commit 3bc1200
Show file tree
Hide file tree
Showing 12 changed files with 263 additions and 34 deletions.
1 change: 0 additions & 1 deletion jclouds-plugin/pom.xml
Expand Up @@ -100,7 +100,6 @@
<version>3.0.4</version>
<configuration>
<excludeFilterFile>src/findbugs/excludesFilter.xml</excludeFilterFile>
<failOnError>true</failOnError>
</configuration>
<executions>
<execution>
Expand Down
Expand Up @@ -72,7 +72,7 @@ public ListBoxModel doFillTemplateNameItems(@QueryParameter("cloudName") String
JCloudsCloud c = JCloudsCloud.getByName(cname);
if (c != null) {
for (JCloudsSlaveTemplate t : c.getTemplates()) {
m.add(String.format("%s in cloud %s", t.name, cname), t.name);
m.add(t.name, t.name);
}
}
return m;
Expand Down
Expand Up @@ -7,14 +7,17 @@
import hudson.model.AbstractProject;
import hudson.model.BuildListener;
import hudson.model.Computer;
import hudson.slaves.Cloud;
import hudson.tasks.BuildWrapper;
import hudson.tasks.BuildWrapperDescriptor;

import java.io.IOException;
import java.io.PrintStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import jenkins.model.Jenkins;
import jenkins.plugins.jclouds.compute.internal.NodePlan;
import jenkins.plugins.jclouds.compute.internal.ProvisionPlannedInstancesAndDestroyAllOnError;
import jenkins.plugins.jclouds.compute.internal.RunningNode;
Expand Down Expand Up @@ -48,20 +51,51 @@ public List<InstancesToRun> getInstancesToRun() {
return instancesToRun;
}

private boolean isBeyondInstanceCap(final String cloudName, int numOfNewInstances) {
final Jenkins.CloudList cl = Jenkins.getInstance().clouds;
final Cloud c = cl.getByName(cloudName);
if (null != c && c instanceof JCloudsCloud) {
JCloudsCloud jc = (JCloudsCloud)c;
return jc.getRunningNodesCount() + numOfNewInstances >= jc.instanceCap;
}
return false;
}

private String validateInstanceCaps() throws IOException {
Map<String, Integer> startPerCloud = new HashMap<>();
for (final InstancesToRun inst : instancesToRun) {
Integer old = startPerCloud.put(inst.cloudName, Integer.valueOf(inst.count));
if (null != old) {
startPerCloud.put(inst.cloudName, old + Integer.valueOf(inst.count));
}
}
for (final Map.Entry<String,Integer> entry : startPerCloud.entrySet()) {
final String cname = entry.getKey();
if (isBeyondInstanceCap(cname, entry.getValue().intValue())) {
return cname;
}
}
return null;
}

//
// convert Jenkins staticy stuff into pojos; performing as little critical stuff here as
// possible, as this method is very hard to test due to static usage, etc.
//
@Override
public Environment setUp(final AbstractBuild build, Launcher launcher, final BuildListener listener) {
// TODO: on shutdown, close all
public Environment setUp(final AbstractBuild build, Launcher launcher, final BuildListener listener) throws IOException {
final String failedCloud = validateInstanceCaps();
if (null != failedCloud) {
listener.fatalError("Unable to launch supplemental JClouds instances:");
throw new IOException(String.format("Instance cap for cloud %s reached.", failedCloud));
}

final LoadingCache<String, ComputeService> computeCache = CacheBuilder.newBuilder().build(new CacheLoader<String, ComputeService>() {

@Override
public ComputeService load(String arg0) throws Exception {
return JCloudsCloud.getByName(arg0).getCompute();
}

});

// eagerly lookup node supplier so that errors occur before we attempt to provision things
Expand All @@ -86,18 +120,18 @@ public NodePlan apply(InstancesToRun instance) {
ProvisionPlannedInstancesAndDestroyAllOnError provisioner = new ProvisionPlannedInstancesAndDestroyAllOnError(
MoreExecutors.listeningDecorator(Computer.threadPoolForRemoting), logger, terminateNodes);

final Iterable<RunningNode> runningNode = provisioner.apply(nodePlans);
final Iterable<RunningNode> runningNodes = provisioner.apply(nodePlans);

return new Environment() {
@Override
public void buildEnvVars(Map<String, String> env) {
List<String> ips = getInstanceIPs(runningNode, listener.getLogger());
List<String> ips = getInstanceIPs(runningNodes, listener.getLogger());
env.put("JCLOUDS_IPS", Util.join(ips, ","));
}

@Override
public boolean tearDown(AbstractBuild build, final BuildListener listener) throws IOException, InterruptedException {
terminateNodes.apply(runningNode);
terminateNodes.apply(runningNodes);
return true;
}

Expand All @@ -122,7 +156,7 @@ public List<String> getInstanceIPs(Iterable<RunningNode> runningNodes, PrintStre
public static final class DescriptorImpl extends BuildWrapperDescriptor {
@Override
public String getDisplayName() {
return "JClouds Instance Creation";
return "Create supplemental instances";
}

@Override
Expand Down
Expand Up @@ -117,7 +117,6 @@ public static List<String> getCloudNames() {
cloudNames.add(c.name);
}
}

return cloudNames;
}

Expand Down Expand Up @@ -412,7 +411,7 @@ public int getRunningNodesCount() {
if (getTemplate(nodeGroup) != null && !nm.getStatus().equals(NodeMetadata.Status.SUSPENDED)
&& !nm.getStatus().equals(NodeMetadata.Status.TERMINATED)) {
nodeCount++;
}
}
}
}
return nodeCount;
Expand Down
Expand Up @@ -53,7 +53,7 @@ public boolean tearDown(AbstractBuild build, final BuildListener listener) throw
public static final class DescriptorImpl extends BuildWrapperDescriptor {
@Override
public String getDisplayName() {
return "JClouds Single-Use Slave";
return "JClouds Single-use slave";
}

@Override
Expand Down
@@ -0,0 +1,118 @@
package jenkins.plugins.jclouds.compute;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.logging.Level;
import java.util.logging.Logger;

import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.DirectoryIteratorException;
import java.nio.file.DirectoryStream;

import org.jclouds.compute.ComputeService;
import org.jclouds.compute.domain.NodeMetadata;

import shaded.com.google.common.base.Predicate;
import shaded.com.google.common.collect.Multimap;

import hudson.Extension;
import hudson.model.listeners.ItemListener;
import jenkins.model.Jenkins;

import jenkins.plugins.jclouds.compute.internal.TerminateNodes.Persistent;

/**
* Startup handler for JClouds.
* The sole purpose of this handler is to delete/suspend stale supplemental
* instances which might have been left running when jenkins was forcibly restarted or
* shutdown while running a job that uses JCloudsBuildWrapper.
*
* In order to do so, {@link TerminateNodes} persists a list of nodes to shutdown before
* it is attempting the actual delete/suspend. After successfully handling all nodes, the
* persisting xml file is removed. During a hard shutdown/restart, this process is usually
* aborted prematurely and the xml file remains. This handler then picks those files up at
* the next jenkins startup and completes the operation.
*/
@Extension
public class JCloudsStartupHandler extends ItemListener {
private static final Logger LOGGER = Logger.getLogger(JCloudsStartupHandler.class.getName());
private final static String STALE_PATTERN = "jenkins.plugins.jclouds.compute.internal.TerminateNodes@*.xml";

private final AtomicBoolean initial = new AtomicBoolean(true);

@Override
public void onLoaded() {
if (initial.compareAndSet(true, false)) {
for (Path path : listStaleNodeLists()) {
Persistent p = new Persistent(path.toFile());
try {
Multimap<String, String> work = p.getNodesToSuspend();
for (final String cloud : work.keySet()) {
JCloudsCloud c = JCloudsCloud.getByName(cloud);
if (null != c) {
final Collection<String> nodes = work.get(cloud);
final ComputeService cs = c.newCompute();
if (null != cs) {
try {
LOGGER.info("Suspending stale nodes in cloud " + cloud + ": " + nodes);
cs.suspendNodesMatching(new Predicate<NodeMetadata>() {
public boolean apply(final NodeMetadata input) {
return nodes.contains(input.getId());
}
});
} catch (Exception e) {
LOGGER.info("Suspending on cloud: " + cloud + "; nodes: " + nodes + ": " + e);
}
cs.getContext().close();
}
}
}
work = p.getNodesToDestroy();
for (final String cloud : work.keySet()) {
JCloudsCloud c = JCloudsCloud.getByName(cloud);
if (null != c) {
final Collection<String> nodes = work.get(cloud);
final ComputeService cs = c.newCompute();
if (null != cs) {
try {
LOGGER.info("Destroying stale nodes in cloud " + cloud + ": " + nodes);
cs.destroyNodesMatching(new Predicate<NodeMetadata>() {
public boolean apply(final NodeMetadata input) {
return nodes.contains(input.getId());
}
});
} catch (Exception e) {
LOGGER.info("Destroying on cloud: " + cloud + "; nodes: " + nodes + ": " + e);
}
cs.getContext().close();
}
}
}
} catch (Exception e) {
LOGGER.log(Level.WARNING, "Stale node cleanup", e);
}
p.remove();
}
}
}

private List<Path> listStaleNodeLists() {
List<Path> ret = new ArrayList<>();
Path jroot = Jenkins.getInstance().getRootDir().toPath();
try (DirectoryStream<Path> ds = Files.newDirectoryStream(jroot, STALE_PATTERN)) {
for (Path entry: ds) {
ret.add(entry);
}
} catch (Exception ex) {
if (ex instanceof DirectoryIteratorException) {
LOGGER.warning("Could not iterate jenkins root: " + ex.getCause());
} else {
LOGGER.warning("Could not iterate jenkins root: " + ex);
}
}
return ret;
}
}
@@ -1,7 +1,13 @@
package jenkins.plugins.jclouds.compute.internal;

import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.Collection;

import jenkins.model.Jenkins;
import hudson.XmlFile;

import org.jclouds.compute.ComputeService;
import org.jclouds.compute.domain.NodeMetadata;
import org.jclouds.logging.Logger;
Expand All @@ -11,12 +17,65 @@
import shaded.com.google.common.cache.LoadingCache;
import shaded.com.google.common.collect.ImmutableMultimap;
import shaded.com.google.common.collect.Multimap;
import shaded.com.google.common.collect.ArrayListMultimap;
import shaded.com.google.common.collect.ImmutableMultimap.Builder;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

public class TerminateNodes implements Function<Iterable<RunningNode>, Void> {

private final Logger logger;
private final LoadingCache<String, ComputeService> computeCache;

@SuppressFBWarnings("SE_TRANSIENT_FIELD_NOT_RESTORED")
public static class Persistent implements Serializable {
private static final long serialVersionUID = 3970810124738772984L;
private static final java.util.logging.Logger LOGGER = java.util.logging.Logger.getLogger(TerminateNodes.class.getName());

private final transient File f;
private Multimap<String, String> nodesToSuspend;
private Multimap<String, String> nodesToDestroy;

public Persistent(final String name, final Multimap<String, String> toSuspend, final Multimap<String, String> toDestroy) {
nodesToSuspend = toSuspend;
nodesToDestroy = toDestroy;
f = new File(Jenkins.getInstance().getRootDir(), name + ".xml");
XmlFile xf = new XmlFile(f);
try {
xf.write(this);
} catch (IOException x) {
LOGGER.warning("Failed to persist");
}
}

public Persistent(final File src) {
f = src;
XmlFile xf = new XmlFile(f);
try {
xf.unmarshal(this);
} catch (IOException x) {
nodesToSuspend = ArrayListMultimap.create();
nodesToDestroy = ArrayListMultimap.create();
LOGGER.warning("Failed to unmarshal");
}
}

public void remove() {
if (!f.delete()) {
LOGGER.warning("Could not delete " + f.getPath());
}
}

public Multimap<String, String> getNodesToSuspend() {
return nodesToSuspend;
}

public Multimap<String, String> getNodesToDestroy() {
return nodesToDestroy;
}
}


public TerminateNodes(Logger logger, LoadingCache<String, ComputeService> computeCache) {
this.logger = logger;
this.computeCache = computeCache;
Expand All @@ -32,24 +91,24 @@ public Void apply(Iterable<RunningNode> runningNode) {
cloudNodesToDestroyBuilder.put(cloudTemplateNode.getCloudName(), cloudTemplateNode.getNode().getId());
}
}
Multimap<String, String> cloudNodesToSuspend = cloudNodesToSuspendBuilder.build();
Multimap<String, String> cloudNodesToDestroy = cloudNodesToDestroyBuilder.build();
Multimap<String, String> toSuspend = cloudNodesToSuspendBuilder.build();
Multimap<String, String> toDestroy = cloudNodesToDestroyBuilder.build();

suspendIfSupported(cloudNodesToSuspend);
destroy(cloudNodesToDestroy);
Persistent p = new Persistent(this.toString(), toSuspend, toDestroy);
suspendIfSupported(toSuspend);
destroy(toDestroy);
p.remove();
return null;
}

private void destroy(Multimap<String, String> cloudNodesToDestroy) {
for (String cloudToDestroy : cloudNodesToDestroy.keySet()) {
for (final String cloudToDestroy : cloudNodesToDestroy.keySet()) {
final Collection<String> nodesToDestroy = cloudNodesToDestroy.get(cloudToDestroy);
logger.info("Destroying nodes: " + nodesToDestroy);
computeCache.getUnchecked(cloudToDestroy).destroyNodesMatching(new Predicate<NodeMetadata>() {

public boolean apply(NodeMetadata input) {
return nodesToDestroy.contains(input.getId());
}

});
}
}
Expand Down
Expand Up @@ -19,7 +19,7 @@
</f:radioBlock>


<f:entry title="${%Number of Executors}" field="count">
<f:entry title="${%Number of instances}" field="count">
<f:number clazz="positive-number" min="1" step="1" default="1"/>
</f:entry>

Expand Down

This file was deleted.

0 comments on commit 3bc1200

Please sign in to comment.