Skip to content

Commit

Permalink
Merge pull request #17 from jglick/launch-failure-JENKINS-28400
Browse files Browse the repository at this point in the history
[JENKINS-28400] Better handle failure to start wrapper sh script
  • Loading branch information
jglick committed Feb 25, 2016
2 parents 7f14ad2 + 7b17a50 commit fa1959d
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 4 deletions.
Expand Up @@ -28,6 +28,7 @@
import hudson.Extension;
import hudson.FilePath;
import hudson.Launcher;
import hudson.LauncherDecorator;
import hudson.Platform;
import hudson.Util;
import hudson.model.TaskListener;
Expand All @@ -36,6 +37,8 @@
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.WeakHashMap;
import javax.annotation.Nonnull;
import jenkins.model.Jenkins;
import jenkins.security.MasterToSlaveCallable;
Expand All @@ -46,6 +49,11 @@
*/
public final class BourneShellScript extends FileMonitoringTask {

/** Number of times we will show launch diagnostics in a newly encountered workspace before going mute to save resources. */
private static /* not final */ int NOVEL_WORKSPACE_DIAGNOSTICS_COUNT = Integer.getInteger(BourneShellScript.class.getName() + ".NOVEL_WORKSPACE_DIAGNOSTICS_COUNT", 10);
/** Number of seconds we will wait for a controller script to be launched before assuming the launch failed. */
private static /* not final */ int LAUNCH_FAILURE_TIMEOUT = Integer.getInteger(BourneShellScript.class.getName() + ".LAUNCH_FAILURE_TIMEOUT", 15);

private final @Nonnull String script;

@DataBoundConstructor public BourneShellScript(String script) {
Expand All @@ -56,6 +64,17 @@ public String getScript() {
return script;
}

/**
* Set of workspaces which we have already run a process in.
* Copying output from the controller process consumes a Java thread, so we want to avoid it generally.
* But we do it the first few times we run a process in a new workspace, to assist in diagnosis.
* (For example, if we are unable to write to it due to permissions, we want to see that error message.)
* Ideally we would display output the first time a given {@link Launcher} was used in that workspace,
* but this seems impractical since {@link LauncherDecorator#decorate} may be called anew for each process,
* and forcing the resulting {@link Launcher}s to implement {@link Launcher#equals} seems onerous.
*/
private static final Map<FilePath,Integer> encounteredPaths = new WeakHashMap<FilePath,Integer>();

@Override protected FileMonitoringController launchWithCookie(FilePath ws, Launcher launcher, TaskListener listener, EnvVars envVars, String cookieVariable, String cookieValue) throws IOException, InterruptedException {
if (script.isEmpty()) {
listener.getLogger().println("Warning: was asked to run an empty script");
Expand Down Expand Up @@ -92,17 +111,30 @@ public String getScript() {
args.addAll(Arrays.asList("sh", "-c", cmd));
Launcher.ProcStarter ps = launcher.launch().cmds(args).envs(envVars).pwd(ws).quiet(true);
listener.getLogger().println("[" + ws.getRemote().replaceFirst("^.+/", "") + "] Running shell script"); // -x will give details
/* Uncomment for diagnosis (and comment following line) in case wrapper script fails. Otherwise skip since it consumes a thread:
ps.stdout(listener);
*/
ps.readStdout().readStderr(); // TODO RemoteLauncher.launch fails to check ps.stdout == NULL_OUTPUT_STREAM, so it creates a useless thread even if you never called stdout(…)
boolean novel;
synchronized (encounteredPaths) {
Integer cnt = encounteredPaths.get(ws);
if (cnt == null) {
cnt = 0;
}
novel = cnt < NOVEL_WORKSPACE_DIAGNOSTICS_COUNT;
encounteredPaths.put(ws, cnt + 1);
}
if (novel) {
// First time in this combination. Display any output from the wrapper script for diagnosis.
ps.stdout(listener);
} else {
// Second or subsequent time. Suppress output to save a thread.
ps.readStdout().readStderr(); // TODO RemoteLauncher.launch fails to check ps.stdout == NULL_OUTPUT_STREAM, so it creates a useless thread even if you never called stdout(…)
}
ps.start();
return c;
}

/*package*/ static final class ShellController extends FileMonitoringController {

private int pid;
private final long startTime = System.currentTimeMillis();

private ShellController(FilePath ws) throws IOException, InterruptedException {
super(ws);
Expand Down Expand Up @@ -145,6 +177,8 @@ private synchronized int pid(FilePath ws) throws IOException, InterruptedExcepti
status = -1;
}
return status;
} else if (_pid == 0 && /* compatibility */ startTime > 0 && System.currentTimeMillis() - startTime > 1000 * LAUNCH_FAILURE_TIMEOUT) {
return -2; // apparently never started
}
return null;
}
Expand Down
Expand Up @@ -63,6 +63,10 @@ public String getScript() {

Launcher.ProcStarter ps = launcher.launch().cmds("cmd", "/c", c.getBatchFile1(ws).getRemote()).envs(envVars).pwd(ws).quiet(true);
listener.getLogger().println("[" + ws.getRemote().replaceFirst("^.+\\\\", "") + "] Running batch script"); // details printed by cmd
/* Too noisy, and consumes a thread:
ps.stdout(listener);
*/
ps.readStdout().readStderr(); // TODO see BourneShellScript
ps.start();
return c;
}
Expand Down

0 comments on commit fa1959d

Please sign in to comment.