Skip to content

Commit

Permalink
[FIXED JENKINS-35160] - Job deletion: Wait up to 15 seconds for inter…
Browse files Browse the repository at this point in the history
…rupted builds to complete (#2789)

* [FIXED JENKINS-35160] Wait up to 15 seconds for interrupted builds to complete

- Also now aware of concurrent builds

* [JENKINS-35160] Tests are good, they catch bugs

* [JENKINS-35160] We should do the interrupt for any Item not just Jobs

* [JENKINS-35160] s/DeleteBlocker/ItemDeletion/g

Left over references before I settled on a better name

* [JENKINS-35160] Switch to Failure for better HTML rendering

* [JENKINS-35160] Align the i18n key with owning class
  • Loading branch information
stephenc authored and oleg-nenashev committed Apr 14, 2017
1 parent d48e1d6 commit 52a1a10
Show file tree
Hide file tree
Showing 9 changed files with 355 additions and 29 deletions.
107 changes: 104 additions & 3 deletions core/src/main/java/hudson/model/AbstractItem.java
Expand Up @@ -33,6 +33,8 @@
import hudson.cli.declarative.CLIResolver;
import hudson.model.listeners.ItemListener;
import hudson.model.listeners.SaveableListener;
import hudson.model.queue.Tasks;
import hudson.model.queue.WorkUnit;
import hudson.security.AccessControlled;
import hudson.security.Permission;
import hudson.security.ACL;
Expand All @@ -41,8 +43,13 @@
import hudson.util.AtomicFileWriter;
import hudson.util.IOUtils;
import hudson.util.Secret;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import jenkins.model.DirectlyModifiableTopLevelItemGroup;
import jenkins.model.Jenkins;
import jenkins.model.queue.ItemDeletion;
import jenkins.security.NotReallyRoleSensitiveCallable;
import jenkins.util.xml.XMLUtils;

Expand Down Expand Up @@ -79,6 +86,7 @@
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;

import static hudson.model.queue.Executables.getParentOf;
import static javax.servlet.http.HttpServletResponse.SC_BAD_REQUEST;
import org.apache.commons.io.FileUtils;
import org.kohsuke.accmod.Restricted;
Expand Down Expand Up @@ -577,9 +585,102 @@ public void delete( StaplerRequest req, StaplerResponse rsp ) throws IOException
*/
public void delete() throws IOException, InterruptedException {
checkPermission(DELETE);
synchronized (this) { // could just make performDelete synchronized but overriders might not honor that
performDelete();
} // JENKINS-19446: leave synch block, but JENKINS-22001: still notify synchronously
boolean responsibleForAbortingBuilds = !ItemDeletion.contains(this);
boolean ownsRegistration = ItemDeletion.register(this);
if (!ownsRegistration && ItemDeletion.isRegistered(this)) {
// we are not the owning thread and somebody else is concurrently deleting this exact item
throw new Failure(Messages.AbstractItem_BeingDeleted(getPronoun()));
}
try {
// if a build is in progress. Cancel it.
if (responsibleForAbortingBuilds || ownsRegistration) {
Queue queue = Queue.getInstance();
if (this instanceof Queue.Task) {
// clear any items in the queue so they do not get picked up
queue.cancel((Queue.Task) this);
}
// now cancel any child items - this happens after ItemDeletion registration, so we can use a snapshot
for (Queue.Item i : queue.getItems()) {
Item item = Tasks.getItemOf(i.task);
while (item != null) {
if (item == this) {
queue.cancel(i);
break;
}
if (item.getParent() instanceof Item) {
item = (Item) item.getParent();
} else {
break;
}
}
}
// interrupt any builds in progress (and this should be a recursive test so that folders do not pay
// the 15 second delay for every child item). This happens after queue cancellation, so will be
// a complete set of builds in flight
Map<Executor, Queue.Executable> buildsInProgress = new LinkedHashMap<>();
for (Computer c : Jenkins.getInstance().getComputers()) {
for (Executor e : c.getAllExecutors()) {
WorkUnit workUnit = e.getCurrentWorkUnit();
if (workUnit != null) {
Item item = Tasks.getItemOf(getParentOf(workUnit.getExecutable()));
if (item != null) {
while (item != null) {
if (item == this) {
buildsInProgress.put(e, e.getCurrentExecutable());
e.interrupt(Result.ABORTED);
break;
}
if (item.getParent() instanceof Item) {
item = (Item) item.getParent();
} else {
break;
}
}
}
}
}
}
if (!buildsInProgress.isEmpty()) {
// give them 15 seconds or so to respond to the interrupt
long expiration = System.nanoTime() + TimeUnit.SECONDS.toNanos(15);
// comparison with executor.getCurrentExecutable() == computation currently should always be true
// as we no longer recycle Executors, but safer to future-proof in case we ever revisit recycling
while (!buildsInProgress.isEmpty() && expiration - System.nanoTime() > 0L) {
// we know that ItemDeletion will prevent any new builds in the queue
// ItemDeletion happens-before Queue.cancel so we know that the Queue will stay clear
// Queue.cancel happens-before collecting the buildsInProgress list
// thus buildsInProgress contains the complete set we need to interrupt and wait for
for (Iterator<Map.Entry<Executor, Queue.Executable>> iterator =
buildsInProgress.entrySet().iterator();
iterator.hasNext(); ) {
Map.Entry<Executor, Queue.Executable> entry = iterator.next();
// comparison with executor.getCurrentExecutable() == executable currently should always be
// true as we no longer recycle Executors, but safer to future-proof in case we ever
// revisit recycling.
if (!entry.getKey().isAlive()
|| entry.getValue() != entry.getKey().getCurrentExecutable()) {
iterator.remove();
}
// I don't know why, but we have to keep interrupting
entry.getKey().interrupt(Result.ABORTED);
}
Thread.sleep(50L);
}
if (!buildsInProgress.isEmpty()) {
throw new Failure(Messages.AbstractItem_FailureToStopBuilds(
buildsInProgress.size(), getFullDisplayName()
));
}
}
}
synchronized (this) { // could just make performDelete synchronized but overriders might not honor that
performDelete();
} // JENKINS-19446: leave synch block, but JENKINS-22001: still notify synchronously
} finally {
if (ownsRegistration) {
ItemDeletion.deregister(this);
}
}
getParent().onDeleted(AbstractItem.this);
Jenkins.getInstance().rebuildDependencyGraphAsync();
}
Expand Down
13 changes: 13 additions & 0 deletions core/src/main/java/hudson/model/Computer.java
Expand Up @@ -971,6 +971,19 @@ public List<OneOffExecutor> getOneOffExecutors() {
return new ArrayList<OneOffExecutor>(oneOffExecutors);
}

/**
* Gets the read-only snapshot view of all {@link Executor} instances including {@linkplain OneOffExecutor}s.
*
* @return the read-only snapshot view of all {@link Executor} instances including {@linkplain OneOffExecutor}s.
* @since TODO
*/
public List<Executor> getAllExecutors() {
List<Executor> result = new ArrayList<>(executors.size() + oneOffExecutors.size());
result.addAll(executors);
result.addAll(oneOffExecutors);
return result;
}

/**
* Used to render the list of executors.
* @return a snapshot of the executor display information
Expand Down
7 changes: 1 addition & 6 deletions core/src/main/java/hudson/model/Executor.java
Expand Up @@ -941,12 +941,7 @@ public static Executor of(Executable executable) {
return null;
}
for (Computer computer : jenkins.getComputers()) {
for (Executor executor : computer.getExecutors()) {
if (executor.getCurrentExecutable() == executable) {
return executor;
}
}
for (Executor executor : computer.getOneOffExecutors()) {
for (Executor executor : computer.getAllExecutors()) {
if (executor.getCurrentExecutable() == executable) {
return executor;
}
Expand Down
14 changes: 0 additions & 14 deletions core/src/main/java/hudson/model/Job.java
Expand Up @@ -262,20 +262,6 @@ public void onCopied(Item src, Item item) {
}
}

@Override
protected void performDelete() throws IOException, InterruptedException {
// if a build is in progress. Cancel it.
RunT lb = getLastBuild();
if (lb != null) {
Executor e = lb.getExecutor();
if (e != null) {
e.interrupt();
// should we block until the build is cancelled?
}
}
super.performDelete();
}

/*package*/ TextFile getNextBuildNumberFile() {
return new TextFile(new File(this.getRootDir(), "nextBuildNumber"));
}
Expand Down
7 changes: 1 addition & 6 deletions core/src/main/java/hudson/model/RestartListener.java
Expand Up @@ -55,12 +55,7 @@ public static class Default extends RestartListener {
public boolean isReadyToRestart() throws IOException, InterruptedException {
for (Computer c : Jenkins.getInstance().getComputers()) {
if (c.isOnline()) {
for (Executor e : c.getExecutors()) {
if (blocksRestart(e)) {
return false;
}
}
for (Executor e : c.getOneOffExecutors()) {
for (Executor e : c.getAllExecutors()) {
if (blocksRestart(e)) {
return false;
}
Expand Down
25 changes: 25 additions & 0 deletions core/src/main/java/hudson/model/queue/Tasks.java
Expand Up @@ -23,9 +23,11 @@
*/
package hudson.model.queue;

import hudson.model.Queue;
import hudson.model.Queue.Item;
import hudson.model.Queue.Task;
import hudson.security.ACL;
import javax.annotation.CheckForNull;
import org.acegisecurity.Authentication;

import java.util.Collection;
Expand All @@ -34,6 +36,8 @@
import jenkins.security.QueueItemAuthenticator;
import jenkins.security.QueueItemAuthenticatorConfiguration;

import static hudson.model.queue.Executables.getParentOf;

/**
* Convenience methods around {@link Task} and {@link SubTask}.
*
Expand Down Expand Up @@ -66,6 +70,27 @@ public static Object getSameNodeConstraintOf(SubTask t) {
}
}

/**
* Gets the {@link hudson.model.Item} most closely associated with the supplied {@link SubTask}.
* @param t the {@link SubTask}.
* @return the {@link hudson.model.Item} associated with the {@link SubTask} or {@code null} if this
* {@link SubTask} is not associated with an {@link hudson.model.Item}
* @since TODO
*/
@CheckForNull
public static hudson.model.Item getItemOf(@Nonnull SubTask t) {
// TODO move to default method on SubTask once code level is Java 8
Queue.Task p = getOwnerTaskOf(t);
while (!(p instanceof hudson.model.Item)) {
Queue.Task o = getOwnerTaskOf(p);
if (o == p) {
break;
}
p = o;
}
return p instanceof hudson.model.Item ? (hudson.model.Item)p : null;
}

/**
* Helper method to safely invoke {@link Task#getDefaultAuthentication()} on classes that may come
* from plugins compiled against an earlier version of Jenkins.
Expand Down

1 comment on commit 52a1a10

@oleg-nenashev
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.