Skip to content

Commit

Permalink
Merge pull request #2026 from pjdarton/fix_jenkins_15331
Browse files Browse the repository at this point in the history
[FIX JENKINS-15331] Windows file locking workaround
  • Loading branch information
daniel-beck committed May 3, 2016
2 parents 49a65c2 + 879f055 commit 240405d
Show file tree
Hide file tree
Showing 2 changed files with 355 additions and 14 deletions.
239 changes: 225 additions & 14 deletions core/src/main/java/hudson/Util.java
Expand Up @@ -43,6 +43,9 @@
import org.apache.tools.ant.taskdefs.Copy;
import org.apache.tools.ant.types.FileSet;

import org.kohsuke.accmod.Restricted;
import org.kohsuke.accmod.restrictions.NoExternalUse;

import jnr.posix.FileStat;
import jnr.posix.POSIX;

Expand Down Expand Up @@ -214,24 +217,54 @@ public static String loadFile(@Nonnull File logfile, @Nonnull Charset charset) t
/**
* Deletes the contents of the given directory (but not the directory itself)
* recursively.
* It does not take no for an answer - if necessary, it will have multiple
* attempts at deleting things.
*
* @throws IOException
* if the operation fails.
*/
public static void deleteContentsRecursive(@Nonnull File file) throws IOException {
File[] files = file.listFiles();
if(files==null)
return; // the directory didn't exist in the first place
for (File child : files)
deleteRecursive(child);
for( int numberOfAttempts=1 ; ; numberOfAttempts++ ) {
try {
tryOnceDeleteContentsRecursive(file);
break; // success
} catch (IOException ex) {
boolean threadWasInterrupted = pauseBetweenDeletes(numberOfAttempts);
if( numberOfAttempts>= DELETION_MAX || threadWasInterrupted)
throw new IOException(deleteFailExceptionMessage(file, numberOfAttempts, threadWasInterrupted), ex);
}
}
}

/**
* Deletes this file (and does not take no for an answer).
* If necessary, it will have multiple attempts at deleting things.
*
* @param f a file to delete
* @throws IOException if it exists but could not be successfully deleted
*/
public static void deleteFile(@Nonnull File f) throws IOException {
for( int numberOfAttempts=1 ; ; numberOfAttempts++ ) {
try {
tryOnceDeleteFile(f);
break; // success
} catch (IOException ex) {
boolean threadWasInterrupted = pauseBetweenDeletes(numberOfAttempts);
if( numberOfAttempts>= DELETION_MAX || threadWasInterrupted)
throw new IOException(deleteFailExceptionMessage(f, numberOfAttempts, threadWasInterrupted), ex);
}
}
}

/**
* Deletes this file, working around most problems which might make
* this difficult.
*
* @param f
* What to delete. If a directory, it'll need to be empty.
* @throws IOException if it exists but could not be successfully deleted
*/
private static void tryOnceDeleteFile(File f) throws IOException {
if (!f.delete()) {
if(!f.exists())
// we are trying to delete a file that no longer exists, so this is not an error
Expand Down Expand Up @@ -300,19 +333,144 @@ private static void makeWritable(@Nonnull File f) {

}

/**
* Deletes the given directory (including its contents) recursively.
* It does not take no for an answer - if necessary, it will have multiple
* attempts at deleting things.
*
* @throws IOException
* if the operation fails.
*/
public static void deleteRecursive(@Nonnull File dir) throws IOException {
for( int numberOfAttempts=1 ; ; numberOfAttempts++ ) {
try {
tryOnceDeleteRecursive(dir);
break; // success
} catch (IOException ex) {
boolean threadWasInterrupted = pauseBetweenDeletes(numberOfAttempts);
if( numberOfAttempts>= DELETION_MAX || threadWasInterrupted)
throw new IOException(deleteFailExceptionMessage(dir, numberOfAttempts, threadWasInterrupted), ex);
}
}
}

/**
* Deletes a file or folder, throwing the first exception encountered, but
* having a go at deleting everything. i.e. it does not <em>stop</em> on the
* first exception, but tries (to delete) everything once.
*
* @param dir
* What to delete. If a directory, the contents will be deleted
* too.
* @throws The first exception encountered.
*/
private static void tryOnceDeleteRecursive(File dir) throws IOException {
if(!isSymlink(dir))
deleteContentsRecursive(dir);
tryOnceDeleteContentsRecursive(dir);
tryOnceDeleteFile(dir);
}

/**
* Deletes a folder's contents, throwing the first exception encountered,
* but having a go at deleting everything. i.e. it does not <em>stop</em>
* on the first exception, but tries (to delete) everything once.
*
* @param directory
* The directory whose contents will be deleted.
* @throws The first exception encountered.
*/
private static void tryOnceDeleteContentsRecursive(File directory) throws IOException {
File[] directoryContents = directory.listFiles();
if(directoryContents==null)
return; // the directory didn't exist in the first place
IOException firstCaught = null;
for (File child : directoryContents) {
try {
tryOnceDeleteRecursive(child);
} catch (IOException justCaught) {
if( firstCaught==null) {
firstCaught = justCaught;
}
}
}
if( firstCaught!=null )
throw firstCaught;
}

/**
* Pauses between delete attempts, and says if it's ok to try again.
* This does not wait if the wait time is zero or if we have tried
* too many times already.
* <p>
* See {@link #WAIT_BETWEEN_DELETION_RETRIES} for details of
* the pause duration.<br/>
* See {@link #GC_AFTER_FAILED_DELETE} for when {@link System#gc()} is called.
*
* @return false if it is ok to continue trying to delete things, true if
* we were interrupted (and should stop now).
*/
private static boolean pauseBetweenDeletes(int numberOfAttemptsSoFar) {
long delayInMs;
if( numberOfAttemptsSoFar>=DELETION_MAX ) return false;
/* If the Jenkins process had the file open earlier, and it has not
* closed it then Windows won't let us delete it until the Java object
* with the open stream is Garbage Collected, which can result in builds
* failing due to "file in use" on Windows despite working perfectly
* well on other OSs. */
if (GC_AFTER_FAILED_DELETE) {
System.gc();
}
if (WAIT_BETWEEN_DELETION_RETRIES>=0) {
delayInMs = WAIT_BETWEEN_DELETION_RETRIES;
} else {
delayInMs = -numberOfAttemptsSoFar*WAIT_BETWEEN_DELETION_RETRIES;
}
if (delayInMs<=0)
return Thread.interrupted();
try {
deleteFile(dir);
} catch (IOException e) {
// if some of the child directories are big, it might take long enough to delete that
// it allows others to create new files, causing problemsl ike JENKINS-10113
// so give it one more attempt before we give up.
if(!isSymlink(dir))
deleteContentsRecursive(dir);
deleteFile(dir);
Thread.sleep(delayInMs);
return false;
} catch (InterruptedException e) {
return true;
}
}

/**
* Creates a "couldn't delete file" message that explains how hard we tried.
* See {@link #DELETION_MAX}, {@link #WAIT_BETWEEN_DELETION_RETRIES}
* and {@link #GC_AFTER_FAILED_DELETE} for more details.
*/
private static String deleteFailExceptionMessage(File whatWeWereTryingToRemove, int retryCount, boolean wasInterrupted) {
StringBuilder sb = new StringBuilder();
sb.append("Unable to delete '");
sb.append(whatWeWereTryingToRemove);
sb.append("'. Tried ");
sb.append(retryCount);
sb.append(" time");
if( retryCount!=1 ) sb.append('s');
if( DELETION_MAX>1 ) {
sb.append(" (of a maximum of ");
sb.append(DELETION_MAX);
sb.append(')');
if( GC_AFTER_FAILED_DELETE )
sb.append(" garbage-collecting");
if( WAIT_BETWEEN_DELETION_RETRIES!=0 && GC_AFTER_FAILED_DELETE )
sb.append(" and");
if( WAIT_BETWEEN_DELETION_RETRIES!=0 ) {
sb.append(" waiting ");
sb.append(getTimeSpanString(Math.abs(WAIT_BETWEEN_DELETION_RETRIES)));
if( WAIT_BETWEEN_DELETION_RETRIES<0 ) {
sb.append("-");
sb.append(getTimeSpanString(Math.abs(WAIT_BETWEEN_DELETION_RETRIES)*DELETION_MAX));
}
}
if( WAIT_BETWEEN_DELETION_RETRIES!=0 || GC_AFTER_FAILED_DELETE)
sb.append(" between attempts");
}
if( wasInterrupted )
sb.append(". The delete operation was interrupted before it completed successfully");
sb.append('.');
return sb.toString();
}

/*
Expand Down Expand Up @@ -1465,4 +1623,57 @@ public static Properties loadProperties(@Nonnull String properties) throws IOExc
public static boolean NO_SYMLINK = Boolean.getBoolean(Util.class.getName()+".noSymLink");

public static boolean SYMLINK_ESCAPEHATCH = Boolean.getBoolean(Util.class.getName()+".symlinkEscapeHatch");

/**
* The number of times we will attempt to delete files/directory trees
* before giving up and throwing an exception.<br/>
* Specifying a value less than 1 is invalid and will be treated as if
* a value of 1 (i.e. one attempt, no retries) was specified.
* <p>
* e.g. if some of the child directories are big, it might take long enough
* to delete that it allows others to create new files in the directory we
* are trying to empty, causing problems like JENKINS-10113.
* Or, if we're on Windows, then deletes can fail for transient reasons
* regardless of external activity; see JENKINS-15331.
* Whatever the reason, this allows us to do multiple attempts before we
* give up, thus improving build reliability.
*/
@Restricted(value = NoExternalUse.class)
static int DELETION_MAX = Math.max(1, Integer.getInteger(Util.class.getName() + ".maxFileDeletionRetries", 3).intValue());

/**
* The time (in milliseconds) that we will wait between attempts to
* delete files when retrying.<br>
* This has no effect unless {@link #DELETION_MAX} is non-zero.
* <p>
* If zero, we will not delay between attempts.<br>
* If negative, we will wait an (linearly) increasing multiple of this value
* between attempts.
*/
@Restricted(value = NoExternalUse.class)
static int WAIT_BETWEEN_DELETION_RETRIES = Integer.getInteger(Util.class.getName() + ".deletionRetryWait", 100).intValue();

/**
* If this flag is set to true then we will request a garbage collection
* after a deletion failure before we next retry the delete.<br>
* It defaults to <code>false</code> and is ignored unless
* {@link #DELETION_MAX} is greater than 1.
* <p>
* Setting this flag to true <i>may</i> resolve some problems on Windows,
* and also for directory trees residing on an NFS share, <b>but</b> it can
* have a negative impact on performance and may have no effect at all (GC
* behavior is JVM-specific).
* <p>
* Warning: This should only ever be used if you find that your builds are
* failing because Jenkins is unable to delete files, that this failure is
* because Jenkins itself has those files locked "open", and even then it
* should only be used on slaves with relatively few executors (because the
* garbage collection can impact the performance of all job executors on
* that slave).<br/>
* i.e. Setting this flag is a act of last resort - it is <em>not</em>
* recommended, and should not be used on the main Jenkins server
* unless you can tolerate the performance impact.
*/
@Restricted(value = NoExternalUse.class)
static boolean GC_AFTER_FAILED_DELETE = Boolean.getBoolean(Util.class.getName() + ".performGCOnFailedDelete");
}

0 comments on commit 240405d

Please sign in to comment.