Skip to content

Commit

Permalink
JENKINS-32915 (#193)
Browse files Browse the repository at this point in the history
* JENKINS-32915: Corrected horizontal scaling if cloud/job label is null - is not defined in configuration. Better handling of inproper Jenkins core manegment over excessWorkload - if an instance takes 5 minutes to wake up Jenkins does not take it into consideration and tries to provision more until capacity is reached or queue is picked finally. Added logging message that provisioning for spot instance will not be possible if label is not configured. It should be made explicit for the user in configuration that it is needed.

* JENKINS-32915: Added missing implementation of method

* JENKINS-32915: Amendments to match tested bottom up code to minimize risk of inproper behaviour.

* JENKINS-32915: Negation fix

* JENKINS-32915: Refactored code to make it more readable and perform better

* JENKINS-32915: Refactored code to scale out properly. Added warning messages, information messages, amended future task to hold for timeout time

* Cleaning up PR as requested by @francisu
  • Loading branch information
jjudd authored and Francis Upton IV committed May 4, 2016
1 parent a6d2c87 commit ac5574e
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 14 deletions.
3 changes: 2 additions & 1 deletion src/main/java/hudson/plugins/ec2/EC2AbstractSlave.java
Expand Up @@ -92,6 +92,7 @@ public abstract class EC2AbstractSlave extends Slave {
public final String idleTerminationMinutes;
public final boolean usePrivateDnsName;
public final boolean useDedicatedTenancy;
public boolean isConnected = false;
public List<EC2Tag> tags;
public final String cloudName;
public AMITypeData amiType;
Expand Down Expand Up @@ -388,7 +389,7 @@ public boolean getStopOnTerminate() {
* Called when the slave is connected to Jenkins
*/
public void onConnected() {
// Do nothing by default.
isConnected = true;
}

protected boolean isAlive(boolean force) {
Expand Down
38 changes: 25 additions & 13 deletions src/main/java/hudson/plugins/ec2/EC2Cloud.java
Expand Up @@ -428,29 +428,27 @@ public Collection<PlannedNode> provision(Label label, int excessWorkload) {
try {
List<PlannedNode> r = new ArrayList<PlannedNode>();
final SlaveTemplate t = getTemplate(label);

LOGGER.log(Level.INFO, "Attempting provision, excess workload: " + excessWorkload);
if (label == null) {
LOGGER.log(Level.WARNING, String.format("Label is null - can't caculate how many executors slave will have. Using %s number of executors", t.getNumExecutors()));
}
while (excessWorkload > 0) {
LOGGER.log(Level.FINE, "Attempting provision, excess workload: " + excessWorkload);

final EC2AbstractSlave slave = provisionSlaveIfPossible(t);
// Returned null if a new node could not be created
if (slave == null)
break;
LOGGER.log(Level.INFO, String.format("We have now %s computers", Jenkins.getInstance().getComputers().length));
Jenkins.getInstance().addNode(slave);
LOGGER.log(Level.INFO, String.format("Added node named: %s, We have now %s computers", slave.getNodeName(), Jenkins.getInstance().getComputers().length));
r.add(new PlannedNode(t.getDisplayName(), Computer.threadPoolForRemoting.submit(new Callable<Node>() {

public Node call() throws Exception {
try {
slave.toComputer().connect(false).get();
} catch (Exception e) {
if (t.spotConfig != null) {
LOGGER.log(Level.INFO, "Expected - Spot instance " + slave.getInstanceId()
+ " failed to connect on initial provision");
return slave;
}
throw e;
long startTime = System.currentTimeMillis(); // fetch starting time
while ((System.currentTimeMillis() - startTime) < slave.launchTimeout * 1000) {
return tryToCallSlave(slave, t);
}
return slave;
LOGGER.log(Level.WARNING, "Expected - Instance - failed to connect within launch timeout");
return tryToCallSlave(slave, t);
}
}), t.getNumExecutors()));

Expand All @@ -467,6 +465,20 @@ public Node call() throws Exception {
}
}

private EC2AbstractSlave tryToCallSlave(EC2AbstractSlave slave, SlaveTemplate template) {
try {
slave.toComputer().connect(false).get();
} catch (Exception e) {
if (template.spotConfig != null) {
if(StringUtils.isNotEmpty(slave.getInstanceId()) && slave.isConnected) {
LOGGER.log(Level.INFO, String.format("Instance id: %s for node: %s is connected now.", slave.getInstanceId(), slave.getNodeName()));
return slave;
}
}
}
return slave;
}

@Override
public boolean canProvision(Label label) {
return getTemplate(label) != null;
Expand Down

0 comments on commit ac5574e

Please sign in to comment.