Skip to content

Commit

Permalink
- add a new mechanism to help count the total number of EC2 instances…
Browse files Browse the repository at this point in the history
… for a particular AMI. As an EC2Slave

is being provisioned, a temporary count is placed in a HashMap, which is used in addition to the
count reported by Amazon itself for a particular ami.   This is so that the count of total
nodes provisioned takes into account those which Amazon doesn't report yet.  The count returned
may be too high, if amazon reports it in addition to the "provision" count, but better to err on the
side of not spawning a node too soon; it will get spawned on the next go-around.
Tries to fix [JENKINS-6691].
  • Loading branch information
zzzeek committed Oct 10, 2012
1 parent e3e579a commit 8d0a9fa
Showing 1 changed file with 101 additions and 29 deletions.
130 changes: 101 additions & 29 deletions src/main/java/hudson/plugins/ec2/EC2Cloud.java
Expand Up @@ -11,6 +11,7 @@
import hudson.util.Secret;
import hudson.util.StreamTaskListener;

import java.lang.Math;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.StringReader;
Expand All @@ -22,6 +23,7 @@
import java.util.Collections;
import java.util.Date;
import java.util.List;
import java.util.HashMap;
import java.util.concurrent.Callable;
import java.util.logging.Level;
import java.util.logging.Logger;
Expand Down Expand Up @@ -50,15 +52,15 @@


/**
* Hudson's view of EC2.
* Hudson's view of EC2.
*
* @author Kohsuke Kawaguchi
*/
public abstract class EC2Cloud extends Cloud {

public static final String DEFAULT_EC2_HOST = "us-east-1";
public static final String EC2_URL_HOST = "ec2.amazonaws.com";

private final String accessId;
private final Secret secretKey;
private final EC2PrivateKey privateKey;
Expand All @@ -71,9 +73,14 @@ public abstract class EC2Cloud extends Cloud {
private transient KeyPair usableKeyPair;

private transient AmazonEC2 connection;

private static AWSCredentials awsCredentials;


/* Track the count per-AMI identifiers for AMIs currently being
* provisioned, but not necessarily reported yet by Amazon.
*/
private static HashMap<String, Integer> provisioningAmis = new HashMap<String, Integer>();

protected EC2Cloud(String id, String accessId, String secretKey, String privateKey, String instanceCapStr, List<SlaveTemplate> templates) {
super(id);
this.accessId = accessId.trim();
Expand Down Expand Up @@ -214,44 +221,109 @@ public void doProvision(StaplerRequest req, StaplerResponse rsp, @QueryParameter
}
}


/**
* Check for the count of EC2 slaves and determine if a new slave can be added.
* Takes into account both what Amazon reports as well as an internal count
* of slaves currently being "provisioned".
*/
private boolean addProvisionedSlave(String ami, int amiCap) throws AmazonClientException {
int estimatedTotalSlaves = countCurrentEC2Slaves(null);
int estimatedAmiSlaves = countCurrentEC2Slaves(ami);

synchronized (provisioningAmis) {
int currentProvisioning;

for (int amiCount : provisioningAmis.values()) {
estimatedTotalSlaves += amiCount;
}
try {
currentProvisioning = provisioningAmis.get(ami);
}
catch (NullPointerException npe) {
currentProvisioning = 0;
}

estimatedAmiSlaves += currentProvisioning;

if(estimatedTotalSlaves >= instanceCap) {
LOGGER.log(Level.INFO, "Total instance cap of " + instanceCap +
" reached, not provisioning.");
return false; // maxed out
}

if (estimatedAmiSlaves >= amiCap) {
LOGGER.log(Level.INFO, "AMI Instance cap of " + amiCap +
" reached for ami " + ami +
", not provisioning.");
return false; // maxed out
}

LOGGER.log(Level.INFO,
"Provisioning for AMI " + ami + "; " +
"Estimated number of total slaves: "
+ String.valueOf(estimatedTotalSlaves) + "; " +
"Estimated number of slaves for ami "
+ ami + ": "
+ String.valueOf(estimatedAmiSlaves)
);

provisioningAmis.put(ami, currentProvisioning + 1);
return true;
}
}

/**
* Decrease the count of slaves being "provisioned".
*/
private void decrementAmiSlaveProvision(String ami) {
synchronized (provisioningAmis) {
int currentProvisioning;
try {
currentProvisioning = provisioningAmis.get(ami);
} catch(NullPointerException npe) {
return;
}
provisioningAmis.put(ami, Math.max(currentProvisioning - 1, 0));
}
}

@Override
public Collection<PlannedNode> provision(Label label, int excessWorkload) {
try {
List<PlannedNode> r = new ArrayList<PlannedNode>();

final SlaveTemplate t = getTemplate(label);
int amiCap = t.getInstanceCap();

final SlaveTemplate t = getTemplate(label);

List<PlannedNode> r = new ArrayList<PlannedNode>();
for( ; excessWorkload>0; excessWorkload-- ) {
if(countCurrentEC2Slaves(null)>=instanceCap) {
LOGGER.log(Level.INFO, "Instance cap reached, not provisioning.");
break; // maxed out
}

int amiCap = t.getInstanceCap();
if (amiCap < countCurrentEC2Slaves(t.ami)) {
LOGGER.log(Level.INFO, "AMI Instance cap reached, not provisioning.");
break; // maxed out
if (!addProvisionedSlave(t.ami, amiCap)) {
break;
}


r.add(new PlannedNode(t.getDisplayName(),
Computer.threadPoolForRemoting.submit(new Callable<Node>() {
public Node call() throws Exception {
// TODO: record the output somewhere
EC2Slave s = t.provision(new StreamTaskListener(System.out));
Hudson.getInstance().addNode(s);
// EC2 instances may have a long init script. If we declare
// the provisioning complete by returning without the connect
// operation, NodeProvisioner may decide that it still wants
// one more instance, because it sees that (1) all the slaves
// are offline (because it's still being launched) and
// (2) there's no capacity provisioned yet.
//
// deferring the completion of provisioning until the launch
// goes successful prevents this problem.
s.toComputer().connect(false).get();
return s;
try {
EC2Slave s = t.provision(new StreamTaskListener(System.out));
Hudson.getInstance().addNode(s);
// EC2 instances may have a long init script. If we declare
// the provisioning complete by returning without the connect
// operation, NodeProvisioner may decide that it still wants
// one more instance, because it sees that (1) all the slaves
// are offline (because it's still being launched) and
// (2) there's no capacity provisioned yet.
//
// deferring the completion of provisioning until the launch
// goes successful prevents this problem.
s.toComputer().connect(false).get();
return s;
}
finally {
decrementAmiSlaveProvision(t.ami);
}
}
})
,t.getNumExecutors()));
Expand Down

0 comments on commit 8d0a9fa

Please sign in to comment.