Skip to content

Commit

Permalink
Tag deployed resources and remove them if they leak [FIXED JENKINS-41…
Browse files Browse the repository at this point in the history
…330]
  • Loading branch information
clguiman authored and arroyc committed Feb 7, 2017
1 parent ec4d96c commit 2f70f8d
Show file tree
Hide file tree
Showing 12 changed files with 598 additions and 61 deletions.
Expand Up @@ -15,6 +15,7 @@
*/
package com.microsoft.azure.vmagent;

import com.microsoft.azure.PagedList;
import com.microsoft.azure.vmagent.Messages;
import java.io.IOException;
import java.util.concurrent.Callable;
Expand All @@ -23,19 +24,30 @@
import com.microsoft.azure.vmagent.exceptions.AzureCloudException;
import com.microsoft.azure.management.Azure;
import com.microsoft.azure.management.resources.Deployment;
import com.microsoft.azure.management.resources.GenericResource;
import com.microsoft.azure.util.AzureCredentials.ServicePrincipal;
import com.microsoft.azure.vmagent.retry.DefaultRetryStrategy;
import com.microsoft.azure.vmagent.util.AzureUtil;
import com.microsoft.azure.vmagent.util.ExecutionEngine;
import com.microsoft.azure.vmagent.util.CleanUpAction;
import com.microsoft.azure.vmagent.util.Constants;
import com.microsoft.azure.vmagent.util.TokenCache;

import jenkins.model.Jenkins;
import hudson.Extension;
import hudson.model.AsyncPeriodicWork;
import hudson.model.TaskListener;
import hudson.model.Computer;
import java.net.URI;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.logging.Level;
import org.apache.commons.lang.StringUtils;
import org.joda.time.DateTime;

@Extension
Expand Down Expand Up @@ -93,6 +105,10 @@ public void registerDeployment(String cloudName, String resourceGroupName, Strin
DeploymentInfo newDeploymentToClean = new DeploymentInfo(cloudName, resourceGroupName, deploymentName, maxDeleteAttempts);
deploymentsToClean.add(newDeploymentToClean);
}

public AzureUtil.DeploymentTag getDeploymentTag() {
return new AzureUtil.DeploymentTag();
}
}

public void cleanDeployments() {
Expand Down Expand Up @@ -185,7 +201,121 @@ else if(state.equalsIgnoreCase("succeeded") && diffTimeInMinutes > successTimeou
}
LOGGER.log(Level.INFO, "AzureVMAgentCleanUpTask: cleanDeployments: Done cleaning deployments");
}


/* There are some edge-cases where we might loose track of the provisioned resources:
1. the process stops right after we start provisioning
2. some Azure error blocks us from deleting the resource
This method will look into the resource group and remove all resources that have our tag and are not accounted for.
*/
public void cleanLeakedResources() {
Jenkins instance = Jenkins.getInstance();
if (instance == null)
return;
for (AzureVMCloud cloud : instance.clouds.getAll(AzureVMCloud.class)) {
cleanLeakedResources(cloud.getResourceGroupName(), cloud.getServicePrincipal(), cloud.name, new DeploymentRegistrar());
}
}

public List<String> getValidVMs(final String cloudName) {
List<String> VMs = new ArrayList<>();
Jenkins instance = Jenkins.getInstance();
if (instance != null) {
for (Computer computer : instance.getComputers()) {
if (computer instanceof AzureVMComputer) {
AzureVMComputer azureComputer = (AzureVMComputer) computer;
AzureVMAgent agent = azureComputer.getNode();
if (agent != null && agent.getCloudName().equals(cloudName)) {
final String vmName = computer.getName();
VMs.add(vmName);
}
}
}
}
return VMs;
}

public void cleanLeakedResources(
final String resourceGroup,
final ServicePrincipal servicePrincipal,
final String cloudName,
final DeploymentRegistrar deploymentRegistrar) {
try{
final List<String> validVMs = getValidVMs(cloudName);
final Azure azureClient = TokenCache.getInstance(servicePrincipal).getAzureClient();
//can't use listByTag because for some reason that method strips all the tags from the outputted resources (https://github.com/Azure/azure-sdk-for-java/issues/1436)
final PagedList<GenericResource> resources = azureClient.genericResources().listByGroup(resourceGroup);

final PriorityQueue<GenericResource> resourcesMarkedForDeletion = new PriorityQueue<> (resources.size(), new Comparator<GenericResource>(){
@Override
public int compare(GenericResource o1, GenericResource o2) {
int o1Priority = getPriority(o1);
int o2Priority = getPriority(o2);
if (o1Priority == o2Priority) {
return 0;
}
return (o1Priority < o2Priority) ? - 1 : 1;
}
private int getPriority(final GenericResource resource) {
final String type = resource.type();
if (StringUtils.containsIgnoreCase(type, "virtualMachine")) {
return 1;
}
if (StringUtils.containsIgnoreCase(type, "networkInterface")) {
return 2;
}
if (StringUtils.containsIgnoreCase(type, "IPAddress")) {
return 3;
}
return 4;
}
});

for (GenericResource resource : resources) {
final Map<String,String> tags = resource.tags();
if ( !tags.containsKey(Constants.AZURE_RESOURCES_TAG_NAME) ||
!deploymentRegistrar.getDeploymentTag().matches(new AzureUtil.DeploymentTag(tags.get(Constants.AZURE_RESOURCES_TAG_NAME)))) {
continue;
}
boolean shouldSkipDeletion = false;
for (String validVM : validVMs) {
if (resource.name().contains(validVM)) {
shouldSkipDeletion = true;
break;
}
}
// we're not removing storage accounts of networks - someone else might be using them
if (shouldSkipDeletion || StringUtils.containsIgnoreCase(resource.type(), "StorageAccounts") || StringUtils.containsIgnoreCase(resource.type(), "virtualNetworks")) {
continue;
}
resourcesMarkedForDeletion.add(resource);
}

while(!resourcesMarkedForDeletion.isEmpty()) {
try {
final GenericResource resource = resourcesMarkedForDeletion.poll();
if (resource == null)
continue;

URI osDiskURI = null;
if (StringUtils.containsIgnoreCase(resource.type(), "virtualMachine")) {
osDiskURI = new URI(azureClient.virtualMachines().getById(resource.id()).osDiskVhdUri());
}

LOGGER.log(Level.INFO, "cleanLeakedResources: deleting {0} from resource group {1}", new Object[]{resource.name(), resourceGroup});
azureClient.genericResources().deleteById(resource.id());
if ( osDiskURI != null) {
AzureVMManagementServiceDelegate.removeStorageBlob(azureClient, osDiskURI, resourceGroup);
}
} catch (Exception e) {
LOGGER.log(Level.INFO, "AzureVMAgentCleanUpTask: cleanLeakedResources: failed to clean resource ", e);
}
}
} catch (Exception e) {
// No need to throw exception back, just log and move on.
LOGGER.log(Level.INFO, "AzureVMAgentCleanUpTask: cleanLeakedResources: failed to clean leaked resources ", e);
}
}

private void cleanVMs() {
cleanVMs(new ExecutionEngine());
}
Expand Down Expand Up @@ -293,6 +423,8 @@ public void execute(TaskListener arg0) throws InterruptedException {
cleanVMs();
// Clean up the deployments
cleanDeployments();

cleanLeakedResources();
}

@Override
Expand Down
Expand Up @@ -203,6 +203,8 @@ public static AzureVMDeploymentInfo createDeployment(final AzureVMAgentTemplate

ObjectNode.class.cast(tmp.get("variables")).put("vmName", vmBaseName);
ObjectNode.class.cast(tmp.get("variables")).put("location", locationName);
ObjectNode.class.cast(tmp.get("variables")).put("jenkinsTag", Constants.AZURE_JENKINS_TAG_VALUE);
ObjectNode.class.cast(tmp.get("variables")).put("resourceTag", deploymentRegistrar.getDeploymentTag().get());

if (StringUtils.isNotBlank(template.getImagePublisher())) {
ObjectNode.class.cast(tmp.get("variables")).put("imagePublisher", template.getImagePublisher());
Expand Down Expand Up @@ -836,25 +838,7 @@ public static void terminateVirtualMachine(

// Now remove the disks
for (URI diskUri : diskUrisToRemove) {
// Obtain container, storage account, and blob name
String storageAccountName = diskUri.getHost().split("\\.")[0];
String containerName = PathUtility.getContainerNameFromUri(diskUri, false);
String blobName = PathUtility.getBlobNameFromURI(diskUri, false);

LOGGER.log(Level.INFO, "AzureVMManagementServiceDelegate: terminateVirtualMachine: Removing disk blob {0}, in container {1} of storage account {2}",
new Object[]{blobName, containerName, storageAccountName});

List<StorageAccountKey> storageKeys = azureClient.storageAccounts()
.getByGroup(resourceGroupName, storageAccountName)
.getKeys();
if (!storageKeys.isEmpty()) {
String storageAccountKey = storageKeys.get(0).value();
CloudStorageAccount account = new CloudStorageAccount(new StorageCredentialsAccountAndKey(storageAccountName, storageAccountKey));
CloudBlobClient blobClient = account.createCloudBlobClient();
blobClient.getContainerReference(containerName)
.getBlockBlobReference(blobName)
.deleteIfExists();
}
AzureVMManagementServiceDelegate.removeStorageBlob(azureClient, diskUri, resourceGroupName);
}
}
} catch (Exception e) {
Expand Down Expand Up @@ -882,6 +866,28 @@ public Void call() throws Exception {
}
}

public static void removeStorageBlob(final Azure azureClient, final URI blobURI, final String resourceGroupName) throws Exception {
// Obtain container, storage account, and blob name
String storageAccountName = blobURI.getHost().split("\\.")[0];
String containerName = PathUtility.getContainerNameFromUri(blobURI, false);
String blobName = PathUtility.getBlobNameFromURI(blobURI, false);

LOGGER.log(Level.INFO, "removeStorageBlob: Removing disk blob {0}, in container {1} of storage account {2}",
new Object[]{blobName, containerName, storageAccountName});

List<StorageAccountKey> storageKeys = azureClient.storageAccounts()
.getByGroup(resourceGroupName, storageAccountName)
.getKeys();
if (!storageKeys.isEmpty()) {
String storageAccountKey = storageKeys.get(0).value();
CloudStorageAccount account = new CloudStorageAccount(new StorageCredentialsAccountAndKey(storageAccountName, storageAccountKey));
CloudBlobClient blobClient = account.createCloudBlobClient();
blobClient.getContainerReference(containerName)
.getBlockBlobReference(blobName)
.deleteIfExists();
}
}

/**
* Remove the IP name
*
Expand Down
61 changes: 61 additions & 0 deletions src/main/java/com/microsoft/azure/vmagent/util/AzureUtil.java
Expand Up @@ -396,4 +396,65 @@ public static boolean isValidTimeOut(String deploymentTimeout) {
return false;
return true;
}

public static class DeploymentTag {

public DeploymentTag() {
this(System.currentTimeMillis() / 1000);
}

/* Expects a string in this format: "<id>/<timestamp>".
If id is ommited it will be replaced with an empty string
If timestamp is ommited or it's a negative number than it will be replaced with 0 */
public DeploymentTag(final String tag){
String id = "";
long ts = 0;

if (tag != null && !tag.isEmpty()) {
String[] parts = tag.split("/");
if (parts.length >= 1) {
id = parts[0];
}
if (parts.length >= 2) {
try {
ts = Long.parseLong(parts[1]);
ts = (ts < 0) ? 0 : ts;
} catch (Exception e) {
ts = 0;
}
}
}
this.instanceId = id;
this.timestamp = ts;
}

public String get() {
return instanceId + "/" + Long.toString(timestamp);
}

// two tags match if they have the same instance id and the timestamp diff is greater than Constants.AZURE_DEPLOYMENT_TIMEOUT
public boolean matches(final DeploymentTag rhs) {
return matches(rhs, Constants.AZURE_DEPLOYMENT_TIMEOUT);
}

public boolean matches(final DeploymentTag rhs, long timeout) {
if (!instanceId.equals(rhs.instanceId))
return false;
return Math.abs(timestamp - rhs.timestamp) > timeout;
}

protected DeploymentTag(long timestamp) {
String id = "";
try {
id = Jenkins.getInstance().getLegacyInstanceId();
} catch (Exception e) {
id = "AzureJenkins000";
}
this.instanceId = id;
this.timestamp = timestamp;
}

private final String instanceId;
private final long timestamp;
}
}
8 changes: 8 additions & 0 deletions src/main/java/com/microsoft/azure/vmagent/util/Constants.java
Expand Up @@ -139,4 +139,12 @@ public class Constants {
public static final String DEFAULT_RESOURCE_GROUP_PATTERN = "^[a-zA-Z0-9][a-zA-Z\\-_0-9]{0,62}[a-zA-Z0-9]$";

public static final HttpLoggingInterceptor.Level DEFAULT_AZURE_SDK_LOGGING_LEVEL = HttpLoggingInterceptor.Level.NONE;

public static final String AZURE_JENKINS_TAG_NAME = "JenkinsManagedTag";

public static final String AZURE_JENKINS_TAG_VALUE = "ManagedByAzureVMAgents";

public static final String AZURE_RESOURCES_TAG_NAME = "JenkinsResourceTag";

public static final long AZURE_DEPLOYMENT_TIMEOUT = 2 * 60 * 60;//in seconds
}

0 comments on commit 2f70f8d

Please sign in to comment.