Skip to content

Commit

Permalink
Merge pull request #20 from jenkinsci/cached-results
Browse files Browse the repository at this point in the history
[JENKINS-32696] Healthcheck API endpoint causes a new run of health checks
  • Loading branch information
stephenc committed Feb 3, 2016
2 parents ef2d2f9 + 386cd49 commit b4ae1f4
Show file tree
Hide file tree
Showing 2 changed files with 893 additions and 258 deletions.
214 changes: 195 additions & 19 deletions src/main/java/jenkins/metrics/api/Metrics.java
Expand Up @@ -31,14 +31,15 @@
import com.codahale.metrics.MetricSet;
import com.codahale.metrics.Timer;
import com.codahale.metrics.health.HealthCheck;
import com.codahale.metrics.health.HealthCheck.Result;
import com.codahale.metrics.health.HealthCheckRegistry;
import hudson.init.InitMilestone;
import hudson.init.Initializer;
import jenkins.metrics.impl.MetricsFilter;
import com.infradna.tool.bridge_method_injector.WithBridgeMethods;
import edu.umd.cs.findbugs.annotations.CheckForNull;
import edu.umd.cs.findbugs.annotations.NonNull;
import hudson.Extension;
import hudson.Plugin;
import hudson.init.InitMilestone;
import hudson.init.Initializer;
import hudson.model.PeriodicWork;
import hudson.model.TaskListener;
import hudson.security.ACL;
Expand All @@ -48,27 +49,29 @@
import hudson.util.PluginServletFilter;
import hudson.util.StreamTaskListener;
import hudson.util.TimeUnit2;
import jenkins.model.Jenkins;
import jenkins.metrics.util.HealthChecksThreadPool;

import org.kohsuke.accmod.Restricted;
import org.kohsuke.accmod.restrictions.NoExternalUse;
import org.kohsuke.stapler.HttpResponse;

import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.RejectedExecutionException;
import java.util.logging.Level;
import java.util.logging.Logger;
import jenkins.metrics.impl.MetricsFilter;
import jenkins.metrics.util.HealthChecksThreadPool;
import jenkins.model.Jenkins;
import net.jcip.annotations.ThreadSafe;
import org.acegisecurity.context.SecurityContext;
import org.acegisecurity.context.SecurityContextHolder;
import org.kohsuke.accmod.Restricted;
import org.kohsuke.accmod.restrictions.NoExternalUse;
import org.kohsuke.stapler.HttpResponse;

import static com.codahale.metrics.MetricRegistry.name;

Expand Down Expand Up @@ -139,6 +142,37 @@ public static HealthCheckRegistry healthCheckRegistry() {
return plugin.healthCheckRegistry;
}

/**
* Get the last health check results
*
* @return a map with health check name -> health check result
*/
@NonNull
public static SortedMap<String, Result> getHealthCheckResults() {
HealthCheckData data = getHealthCheckData();
return data == null ? new TreeMap<String, Result>() : data.getResults();
}

/**
* Get the current health check data.
*
* @return the current health check data or {@code null} if the health checks have not run yet.
*/
@CheckForNull
public static HealthCheckData getHealthCheckData() {
Jenkins jenkins = Jenkins.getInstance();
if (jenkins == null) {
LOGGER.warning("Unable to get health check results, client master is not ready (startup or shutdown)");
return null;
}
HealthChecker healthChecker = jenkins.getExtensionList(PeriodicWork.class).get(HealthChecker.class);
if (healthChecker == null) {
LOGGER.warning("Unable to get health check results, HealthChecker is not available");
return null;
}
return healthChecker.getHealthCheckData();
}

/**
* Returns the {@link MetricRegistry} for the current {@link Jenkins}.
*
Expand Down Expand Up @@ -363,54 +397,116 @@ protected Double transform(Double value) {
/**
* Performs the periodic running of health checks and re-indexing of access keys.
*/
// TODO switch to AsyncPeriodicWork once on a new enough Jenkins core
@Extension
public static class HealthChecker extends PeriodicWork {

/**
* Timer to track how long the health checks are taking to execute.
*/
private final Timer healthCheckDuration = new Timer();

private Map<String, HealthCheck.Result> healthCheckResults = new HashMap<String, HealthCheck.Result>();
/**
* The most recent health check data.
*/
private HealthCheckData healthCheckData = null;

/**
* Gauge to track the number of health checks.
*/
private final Gauge<Integer> healthCheckCount = new Gauge<Integer>() {
public Integer getValue() {
return healthCheckRegistry().getNames().size();
}
};
/**
* Gauge to track the health check score.
*/
private final Gauge<Double> healthCheckScore = new Gauge<Double>() {
public Double getValue() {
return score;
}
};
/**
* Copy and paste from AsyncPeriodicWork
*/
private Future<?> future;
/**
* The current score.
*/
private volatile double score = 1.0;
/**
* The most recent unhealthy checks.
*/
private volatile Set<String> lastUnhealthy = null;

/**
* Default constructor.
*/
public HealthChecker() {
super();
}

/**
* {@inheritDoc}
*/
public long getRecurrencePeriod() {
return TimeUnit2.MINUTES.toMillis(Math.min(Math.max(1, HEALTH_CHECK_INTERVAL_MINS),
TimeUnit2.DAYS.toMinutes(1)));
}

/**
* Gets the {@link Timer} that tracks how long the health checks are taking to execute.
*
* @return the {@link Timer} that tracks how long the health checks are taking to execute.
*/
public Timer getHealthCheckDuration() {
return healthCheckDuration;
}

public Map<String, HealthCheck.Result> getHealthCheckResults() {
return healthCheckResults;
/**
* Gets the most recent results.
*
* @return the most recent results.
* @see #getHealthCheckData()
*/
@NonNull
@WithBridgeMethods(Map.class)
public SortedMap<String, HealthCheck.Result> getHealthCheckResults() {
return healthCheckData == null ? new TreeMap<String, Result>() : healthCheckData.results;
}

/**
* Gets the most recent health check data (which includes {@link HealthCheckData#getLastModified()})
*
* @return the most recent health check data or {@code null} if the health checks have not run yet.
*/
@CheckForNull
public HealthCheckData getHealthCheckData() {
return healthCheckData;
}

/**
* Gets the {@link Gauge} that tracks the number of health checks.
*
* @return the {@link Gauge} that tracks the number of health checks.
*/
public Gauge<Integer> getHealthCheckCount() {
return healthCheckCount;
}

/**
* Gets the {@link Gauge} that tracks the health check score.
*
* @return the {@link Gauge} that tracks the health check score.
*/
public Gauge<Double> getHealthCheckScore() {
return healthCheckScore;
}

/**
* Schedules this periodic work now in a new thread, if one isn't already running.
* Copy and paste from AsyncPeriodicWork
*/
public final void doRun() {
try {
Expand All @@ -429,11 +525,10 @@ public void run() {
long startTime = System.currentTimeMillis();

StreamTaskListener l = null;
SecurityContext oldContext = ACL.impersonate(ACL.SYSTEM);
try {
l = new StreamTaskListener(new File(Jenkins.getInstance().getRootDir(),
HealthChecker.class.getName() + ".log"));
ACL.impersonate(ACL.SYSTEM);

execute(l);
} catch (IOException e) {
if (l != null) {
Expand All @@ -453,8 +548,8 @@ public void run() {
if (l != null) {
l.closeQuietly();
}
SecurityContextHolder.setContext(oldContext); // required as we are running in a pool
}

logger.log(Level.FINE, "Finished " + HealthChecker.class.getName() + ". " +
(System.currentTimeMillis() - startTime) + " ms");
}
Expand All @@ -464,6 +559,13 @@ public void run() {
}
}

/**
* The actual periodic work to run asynchronously.
*
* @param listener the listener.
* @throws IOException if things go wrong.
* @throws InterruptedException if interrupted.
*/
private void execute(TaskListener listener) throws IOException, InterruptedException {
reindexAccessKeys();
HealthCheckRegistry registry = healthCheckRegistry();
Expand Down Expand Up @@ -496,6 +598,7 @@ private void execute(TaskListener listener) throws IOException, InterruptedExcep
} finally {
context.stop();
}
healthCheckData = new HealthCheckData(results, getRecurrencePeriod());
listener.getLogger().println("Health check results at " + new Date() + ":");
Set<String> unhealthy = null;
Set<String> unhealthyName = null;
Expand All @@ -504,7 +607,6 @@ private void execute(TaskListener listener) throws IOException, InterruptedExcep
for (Map.Entry<String, HealthCheck.Result> e : results.entrySet()) {
count++;
listener.getLogger().println(" * " + e.getKey() + ": " + e.getValue());
healthCheckResults.put(e.getKey(), e.getValue());
if (e.getValue().isHealthy()) {
total++;
} else {
Expand All @@ -517,7 +619,6 @@ private void execute(TaskListener listener) throws IOException, InterruptedExcep
}
}
// delete any result whose health check had been removed
healthCheckResults.keySet().retainAll(results.keySet());

score = total / ((double) count);
Set<String> lastUnhealthy = this.lastUnhealthy;
Expand All @@ -537,4 +638,79 @@ private void execute(TaskListener listener) throws IOException, InterruptedExcep
}
}

/**
* Health check data.
*/
@ThreadSafe
public static class HealthCheckData {
/**
* When the health check data was created.
*/
private final long lastModified;
/**
* When the health check data is expected to be replaced with a newer result.
*/
@CheckForNull
private final Long expires;
/**
* The results.
*/
@NonNull
private final SortedMap<String, HealthCheck.Result> results;

/**
* Constructor for when you know how long before the next collection.
*
* @param results the current results.
* @param nextMillis how long until the next results will be available.
*/
public HealthCheckData(@NonNull SortedMap<String, Result> results, long nextMillis) {
this.results = results;
this.lastModified = System.currentTimeMillis();
this.expires = lastModified + nextMillis;
}

/**
* Constructor for when you do not know how long before the next collection.
*
* @param results the current results.
*/
public HealthCheckData(@NonNull SortedMap<String, Result> results) {
this.results = results;
this.lastModified = System.currentTimeMillis();
this.expires = null;
}

/**
* The number of milliseconds since 1st January 1970 GMT when the results were collected.
*
* @return The number of milliseconds since 1st January 1970 GMT when the results were collected.
*/
public long getLastModified() {
return lastModified;
}

/**
* The number of milliseconds since 1st January 1970 GMT when the results are expected to be superceded by a
* newer result.
*
* @return The number of milliseconds since 1st January 1970 GMT when the results are expected to be
* superceded by a newer result or {@code null}
*/
@CheckForNull
public Long getExpires() {
return expires;
}

/**
* The results.
*
* @return the results.
*/
@NonNull
public SortedMap<String, Result> getResults() {
return results;
}
}

}

0 comments on commit b4ae1f4

Please sign in to comment.