Skip to content

Commit

Permalink
[JENKINS-32695] Result from health checks are lost when more than 3 r…
Browse files Browse the repository at this point in the history
…un at the same time

If more than 3 checks are put in the pool before one finishes a RejectedExecutionException is thrown,
never logged anywhere, and nothing is returned from registry.runHealthChecks
  • Loading branch information
carlossg committed Jan 31, 2016
1 parent 1de26f6 commit 9553820
Show file tree
Hide file tree
Showing 4 changed files with 227 additions and 19 deletions.
37 changes: 18 additions & 19 deletions src/main/java/jenkins/metrics/api/Metrics.java
Expand Up @@ -45,12 +45,11 @@
import hudson.security.Permission;
import hudson.security.PermissionGroup;
import hudson.security.PermissionScope;
import hudson.util.DaemonThreadFactory;
import hudson.util.ExceptionCatchingThreadFactory;
import hudson.util.PluginServletFilter;
import hudson.util.StreamTaskListener;
import hudson.util.TimeUnit2;
import jenkins.model.Jenkins;
import jenkins.metrics.util.HealthChecksThreadPool;

import org.kohsuke.accmod.Restricted;
import org.kohsuke.accmod.restrictions.NoExternalUse;
Expand All @@ -67,11 +66,7 @@
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.SynchronousQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.RejectedExecutionException;
import java.util.logging.Level;
import java.util.logging.Logger;

Expand Down Expand Up @@ -112,19 +107,9 @@ public class Metrics extends Plugin {
*/
private static final Logger LOGGER = Logger.getLogger(Metrics.class.getName());
/**
* Thread pool for running health checks. We set the pool upper limit to 4 and we keep threads around for 5 seconds
* as this is a bursty pool used once per minute.
* Thread pool for running health checks.
*/
private static final ExecutorService threadPoolForHealthChecks = new ThreadPoolExecutor(0, 4,
5L, TimeUnit.SECONDS,
new SynchronousQueue<Runnable>(),
new ExceptionCatchingThreadFactory(new DaemonThreadFactory(new ThreadFactory() {
private final AtomicInteger number = new AtomicInteger();

public Thread newThread(Runnable r) {
return new Thread(r, "Metrics-HealthChecks-" + number.incrementAndGet());
}
})));
private static ExecutorService threadPoolForHealthChecks;
/**
* The registry of metrics.
*/
Expand Down Expand Up @@ -316,6 +301,7 @@ public static void afterExtensionsAugmented() {
plugin.healthCheckRegistry.register(c.getKey(), c.getValue());
}
}
threadPoolForHealthChecks = new HealthChecksThreadPool(healthCheckRegistry());
LOGGER.log(Level.FINE, "Metric provider and health check provider extensions registered");
}

Expand Down Expand Up @@ -430,6 +416,10 @@ public final void doRun() {
HealthChecker.class.getName() + " thread is still running. Execution aborted.");
return;
}
if (threadPoolForHealthChecks == null) {
LOGGER.info("Health checks thread pool not yet initialized, skipping until next execution");
return;
}
future = threadPoolForHealthChecks.submit(new Runnable() {
public void run() {
logger.log(Level.FINE, "Started " + HealthChecker.class.getName());
Expand All @@ -451,6 +441,11 @@ public void run() {
}
} catch (InterruptedException e) {
e.printStackTrace(l.fatalError("aborted"));
} catch (Exception e) {
logger.log(Level.SEVERE, "Error running " + HealthChecker.class.getName(), e);
if (l != null) {
e.printStackTrace(l.fatalError(e.getMessage()));
}
} finally {
if (l != null) {
l.closeQuietly();
Expand Down Expand Up @@ -490,6 +485,10 @@ private void execute(TaskListener listener) throws IOException, InterruptedExcep
SortedMap<String, HealthCheck.Result> results;
try {
results = registry.runHealthChecks(threadPoolForHealthChecks);
} catch (RejectedExecutionException e) {
listener.error("Health checks execution is rejected due to: {0}", e.getMessage());
LOGGER.log(Level.INFO, "Health checks execution is rejected due to: {0}", e.getMessage());
return;
} finally {
context.stop();
}
Expand Down
103 changes: 103 additions & 0 deletions src/main/java/jenkins/metrics/util/HealthChecksThreadPool.java
@@ -0,0 +1,103 @@
/*
* The MIT License
*
* Copyright (c) 2016, CloudBees, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package jenkins.metrics.util;

import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.RejectedExecutionHandler;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.kohsuke.accmod.Restricted;
import org.kohsuke.accmod.restrictions.DoNotUse;

import com.codahale.metrics.health.HealthCheckRegistry;

import hudson.util.DaemonThreadFactory;
import hudson.util.ExceptionCatchingThreadFactory;
import jenkins.metrics.api.Metrics;

/**
* Thread pool for running health checks. We set the pool size to 4 (max) and we keep threads around for 5 seconds as
* this is a bursty pool used once per minute. The queue is set to the same size as the number of health checks,
* dropping oldest items in the queue as new ones come in, to avoid running more than one health check in each
* recurrence period.
*
* @since 3.1.2.3
*/
public class HealthChecksThreadPool extends ThreadPoolExecutor {

private static final Logger LOGGER = Logger.getLogger(Metrics.class.getName());

private static final int MAX_THREAD_POOL_SIZE = 4;
private static long rejectedExecutions;

public HealthChecksThreadPool(HealthCheckRegistry healthCheckRegistry) {
super(0, MAX_THREAD_POOL_SIZE, //
5L, TimeUnit.SECONDS, //
// 1 thread is taken by the executor itself
new ArrayBlockingQueue<Runnable>(
Math.max(0, 1 + healthCheckRegistry.getNames().size() - MAX_THREAD_POOL_SIZE)), //
new ExceptionCatchingThreadFactory(new DaemonThreadFactory(new ThreadFactory() {
private final AtomicInteger number = new AtomicInteger();

public Thread newThread(Runnable r) {
return new Thread(r, "Metrics-HealthChecks-" + number.incrementAndGet());
}
})), new MetricsRejectedExecutionHandler(healthCheckRegistry));
}

@Restricted(DoNotUse.class) // testing only
public static long getRejectedExecutions() {
return rejectedExecutions;
}

/**
* Log the rejection and execute the {@link DiscardOldestPolicy} handler, dropping the first item in the queue and
* retrying
*/
private static class MetricsRejectedExecutionHandler extends DiscardOldestPolicy
implements RejectedExecutionHandler {

private HealthCheckRegistry healthCheckRegistry;

public MetricsRejectedExecutionHandler(HealthCheckRegistry healthCheckRegistry) {
this.healthCheckRegistry = healthCheckRegistry;
}

public void rejectedExecution(Runnable r, ThreadPoolExecutor executor) {
rejectedExecutions++;
LOGGER.log(Level.WARNING,
"Execution of health check was rejected, will drop oldest in queue, this may mean some health checks are taking too long to execute:"
+ " {0}, queue max size={1}, health checks={2} ({3})",
new Object[] { executor, executor.getQueue().size(), healthCheckRegistry.getNames(),
healthCheckRegistry.getNames().size() });
super.rejectedExecution(r, executor);
}
}

}
@@ -0,0 +1,55 @@
/*
* The MIT License
*
* Copyright (c) 2016, CloudBees, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package jenkins.metrics.api;

import java.util.Map;
import java.util.Map.Entry;

import com.codahale.metrics.health.HealthCheck;

import edu.umd.cs.findbugs.annotations.NonNull;
import hudson.Extension;

@Extension
public class HealthCheckProviderForTesting extends HealthCheckProvider {

public static int runs;

@NonNull
@Override
public Map<String, HealthCheck> getHealthChecks() {
return checks(check(1), check(2), check(3), check(4), check(5), check(6));
}

private Entry<String, HealthCheck> check(int i) {
return check("short-running-health-check-" + i, new HealthCheck() {
@Override
protected Result check() throws Exception {
runs++;
Thread.sleep(1 * 1000);
return Result.unhealthy("some error message");
}
});
}
}
51 changes: 51 additions & 0 deletions src/test/java/jenkins/metrics/api/HealthCheckerTest.java
@@ -0,0 +1,51 @@
/*
* The MIT License
*
* Copyright (c) 2016, CloudBees, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package jenkins.metrics.api;

import static org.junit.Assert.*;

import org.junit.Rule;
import org.junit.Test;
import org.jvnet.hudson.test.JenkinsRule;

import jenkins.metrics.api.Metrics.HealthChecker;
import jenkins.metrics.util.HealthChecksThreadPool;

/**
* Test the {@link HealthChecker} execution of health checks
*/
public class HealthCheckerTest {

@Rule
public JenkinsRule j = new JenkinsRule();

@Test
public void testHealthChecksAreNotRejected() throws Exception {
while (HealthCheckProviderForTesting.runs < 6 && HealthChecksThreadPool.getRejectedExecutions() == 0) {
Thread.sleep(1000);
}
assertEquals(0, HealthChecksThreadPool.getRejectedExecutions());
}

}

0 comments on commit 9553820

Please sign in to comment.