Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #14 from jglick/ProcessLiveness-JENKINS-32264
[JENKINS-32264] Fix process liveness check for non-Linux platforms
  • Loading branch information
jglick committed Jan 19, 2016
2 parents b27495f + 68bee24 commit 89b0bb9
Show file tree
Hide file tree
Showing 2 changed files with 58 additions and 7 deletions.
Expand Up @@ -139,6 +139,7 @@ private synchronized int pid(FilePath ws) throws IOException, InterruptedExcepti
if (_pid > 0 && !ProcessLiveness.isAlive(workspace.getChannel(), _pid, launcher)) {
// it looks like the process has disappeared. one last check to make sure it's not a result of a race condition,
// then if we still don't have the exit code, use fake exit code to distinguish from 0 (success) and 1+ (observed failure)
// TODO would be better to have exitStatus accept a TaskListener so we could print an informative message
status = super.exitStatus(workspace, launcher);
if (status == null) {
status = -1;
Expand Down
Expand Up @@ -24,17 +24,27 @@

package org.jenkinsci.plugins.durabletask;

import com.sun.jna.Library;
import com.sun.jna.Native;
import hudson.Launcher;
import hudson.remoting.VirtualChannel;
import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.Map;
import java.util.WeakHashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import jenkins.security.MasterToSlaveCallable;

/**
* Utility class to track whether a given process is still alive.
*/
final class ProcessLiveness {

private static final Logger LOGGER = Logger.getLogger(ProcessLiveness.class.getName());

private static final Map<Launcher,Boolean> workingLaunchers = Collections.synchronizedMap(new WeakHashMap<Launcher,Boolean>());

/**
* Determines whether a process is believed to still be alive.
* @param channel a connection to the machine on which it would be running
Expand All @@ -43,8 +53,34 @@ final class ProcessLiveness {
* @return true if it is apparently still alive (or we cannot tell); false if it is believed to not be running
*/
public static boolean isAlive(VirtualChannel channel, int pid, Launcher launcher) throws IOException, InterruptedException {
Boolean working = workingLaunchers.get(launcher);
if (working == null) {
// Check to see if our logic correctly reports that an unlikely PID is not running.
working = !_isAlive(channel, 9999, launcher);
workingLaunchers.put(launcher, working);
if (working) {
LOGGER.log(Level.FINE, "{0} on {1} appears to be working", new Object[] {launcher, channel});
} else {
LOGGER.log(Level.WARNING, "{0} on {1} does not seem able to determine whether processes are alive or not", new Object[] {launcher, channel});
// TODO Channel.toString should report slave name, but would be nice to also report OS
}
}
if (!working) {
return true;
}
return _isAlive(channel, pid, launcher);
}

private static boolean _isAlive(VirtualChannel channel, int pid, Launcher launcher) throws IOException, InterruptedException {
if (launcher instanceof Launcher.LocalLauncher || launcher instanceof Launcher.RemoteLauncher) {
return channel.call(new Liveness(pid));
try {
boolean alive = channel.call(new Liveness(pid));
LOGGER.log(Level.FINER, "{0} is alive? {1}", new Object[] {pid, alive});
return alive;
} catch (RuntimeException x) {
LOGGER.log(Level.WARNING, "cannot determine liveness of " + pid, x);
return true;
}
} else {
// Using a special launcher; let it decide how to do this.
// TODO perhaps this should be a method in Launcher, with the following fallback in DecoratedLauncher:
Expand All @@ -58,14 +94,28 @@ private static final class Liveness extends MasterToSlaveCallable<Boolean,Runtim
this.pid = pid;
}
@Override public Boolean call() throws RuntimeException {
File proc = new File("/proc");
if (!proc.isDirectory()) {
// procfs not in use here? Give up.
return true;
// JNR-POSIX does not seem to work on FreeBSD at least, so using JNA instead.
LibC libc = LibC.INSTANCE;
if (libc.getpgid(0) == -1) {
throw new IllegalStateException("getpgid does not seem to work on this platform");
}
return new File(proc, Integer.toString(pid)).isDirectory();
return libc.getpgid(pid) != -1;
}
}
private interface LibC extends Library {
/**
* Get the process group ID for a process.
* From <a href="http://pubs.opengroup.org/onlinepubs/9699919799/functions/getpgid.html">Open Group Base Specifications Issue 7</a>:
* <blockquote>
* The getpgid() function shall return the process group ID of the process whose process ID is equal to pid.
* If pid is equal to 0, getpgid() shall return the process group ID of the calling process.
* Upon successful completion, getpgid() shall return a process group ID. Otherwise, it shall return (pid_t)-1 and set errno to indicate the error.
* The getpgid() function shall fail if: […] There is no process with a process ID equal to pid. […]
* </blockquote>
*/
int getpgid(int pid);
LibC INSTANCE = (LibC) Native.loadLibrary("c", LibC.class);
}

private ProcessLiveness() {}

Expand Down

0 comments on commit 89b0bb9

Please sign in to comment.