Skip to content

Commit

Permalink
Merge pull request #122 from jenkinsci/JENKINS-39150
Browse files Browse the repository at this point in the history
[JENKINS-39150] expose diagnostics across all the channels
  • Loading branch information
kohsuke committed Oct 21, 2016
2 parents 0c7df25 + 07b99b0 commit f911511
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 20 deletions.
145 changes: 125 additions & 20 deletions src/main/java/hudson/remoting/Channel.java
Expand Up @@ -32,6 +32,7 @@
import hudson.remoting.forward.PortForwarder;
import org.jenkinsci.remoting.CallableDecorator;
import org.jenkinsci.remoting.RoleChecker;
import org.jenkinsci.remoting.nio.NioChannelHub;

import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
Expand All @@ -45,6 +46,7 @@
import java.lang.ref.WeakReference;
import java.net.URL;
import java.util.Collections;
import java.util.Date;
import java.util.Hashtable;
import java.util.Locale;
import java.util.Map;
Expand Down Expand Up @@ -204,8 +206,40 @@ public class Channel implements VirtualChannel, IChannel, Closeable {
*/
private final Vector<Listener> listeners = new Vector<Listener>();
private int gcCounter;

/**
* Number of {@link Command} objects sent to the other side.
*/
private int commandsSent;

/**
* Number of {@link Command} objects received from the other side.
*
* When a transport is functioning correctly, {@link #commandsSent} of one side
* and {@link #commandsReceived} of the other side should closely match.
*/
private int commandsReceived;

/**
* Timestamp of the last {@link Command} object sent/received, in
* {@link System#currentTimeMillis()} format.
* This can be used as a basis for detecting dead connections.
*
* <p>
* Note that {@link #lastCommandSentAt} doesn't mean
* anything in terms of whether the underlying network was able to send
* the data (for example, if the other end of a socket connection goes down
* without telling us anything, the {@link SocketOutputStream#write(int)} will
* return right away, and the socket only really times out after 10s of minutes.
*/
private volatile long lastCommandSentAt, lastCommandReceivedAt;

/**
* Timestamp of when this channel was connected/created, in
* {@link System#currentTimeMillis()} format.
*/
private final long createdAt = System.currentTimeMillis();

/**
* Total number of nanoseconds spent for remote class loading.
* <p>
Expand Down Expand Up @@ -266,19 +300,6 @@ public class Channel implements VirtualChannel, IChannel, Closeable {
*/
public final Capability remoteCapability;

/**
* When did we receive any data from this slave the last time?
* This can be used as a basis for detecting dead connections.
* <p>
* Note that this doesn't include our sender side of the operation,
* as successfully returning from {@link #send(Command)} doesn't mean
* anything in terms of whether the underlying network was able to send
* the data (for example, if the other end of a socket connection goes down
* without telling us anything, the {@link SocketOutputStream#write(int)} will
* return right away, and the socket only really times out after 10s of minutes.
*/
private volatile long lastHeard;

/**
* Single-thread executor for running pipe I/O operations.
*
Expand Down Expand Up @@ -494,7 +515,8 @@ protected Channel(ChannelBuilder settings, CommandTransport transport) throws IO

transport.setup(this, new CommandReceiver() {
public void handle(Command cmd) {
updateLastHeard();
commandsReceived++;
lastCommandReceivedAt = System.currentTimeMillis();
if (logger.isLoggable(Level.FINE))
logger.fine("Received " + cmd);
try {
Expand All @@ -509,6 +531,7 @@ public void terminate(IOException e) {
Channel.this.terminate(e);
}
});
ACTIVE_CHANNELS.put(this,ref());
}

/**
Expand Down Expand Up @@ -581,6 +604,7 @@ private ExecutorService createPipeWriterExecutor() {

transport.write(cmd, cmd instanceof CloseCommand);
commandsSent++;
lastCommandSentAt = System.currentTimeMillis();
}

/**
Expand Down Expand Up @@ -1148,6 +1172,73 @@ public void dumpPerformanceCounters(PrintWriter w) throws IOException {
w.printf(Locale.ENGLISH, "Resource loading time=%,dms%n", resourceLoadingTime.get() / (1000 * 1000));
}

/**
* Print the diagnostic information.
*
* <p>
* Here's how you interpret these metrics:
*
* <dl>
* <dt>Created
* <dd>
* When the channel was created, which is more or less the same thing as when the channel is connected.
*
* <dt>Commands sent
* <dd>
* Number of {@link Command} objects sent to the other side. More specifically, number of commands
* successfully passed to {@link #transport}, which means data was written to socket with
* {@link ClassicCommandTransport} but just buffered for write with {@link NioChannelHub}.
*
* If you have access to the remoting diagnostics
* of the other side of the channel, then you can compare this with "commandsReceived" metrics of the other
* side to see how many commands are in transit in transport. If {@code commandsSent==commandsReceived},
* then no command is in transit.
*
* <dt>Commands received
* <dd>
* Number of {@link Command} objects received from the other side. More precisely, number
* of commands reported from {@link #transport}. So for example, if data is still buffered somewhere
* in the networking stack, it won't be counted here.
*
* <dt>Last command sent
* <dd>
* The timestamp in which the last command was sent to the other side. The timestamp in which
* {@link #lastCommandSentAt} was updated.
*
* <dt>Last command received
* <dd>
* The timestamp in which the last command was sent to the other side. The timestamp in which
* {@link #lastCommandReceivedAt} was updated.
*
* <dt>Pending outgoing calls
* <dd>
* Number of RPC calls (e.g., method call through a {@linkplain RemoteInvocationHandler proxy})
* that are made but not returned yet. If you have the remoting diagnostics of the other side, you
* can compare this number with "pending incoming calls" on the other side to see how many RPC
* calls are executing vs in flight. "one side's incoming calls" < "the other side's outgoing calls"
* indicates some RPC requests or responses are passing through the network layer, and mismatch
* between "# of commands sent" vs "# of commands received" can give some indication of whether
* it is request or response that's in flight.
*
* <dt>Pending incoming calls
* <dd>
* The reverse of "pending outgoing calls".
* Number of RPC calls (e.g., method call through a {@linkplain RemoteInvocationHandler proxy})
* that the other side has made to us but not yet returned yet.
*
* @since 2.26.3
*/
public void dumpDiagnostics(PrintWriter w) throws IOException {
w.printf("Channel %s%n",name);
w.printf(" Created=%s%n", new Date(createdAt));
w.printf(" Commands sent=%d%n", commandsSent);
w.printf(" Commands received=%d%n", commandsReceived);
w.printf(" Last command sent=%s%n", new Date(lastCommandSentAt));
w.printf(" Last command received=%s%n", new Date(lastCommandReceivedAt));
w.printf(" Pending outgoing calls=%d%n", pendingCalls.size());
w.printf(" Pending incoming calls=%d%n", pendingCalls.size());
}

/**
* {@inheritDoc}
*/
Expand Down Expand Up @@ -1468,12 +1559,7 @@ public ExportTable.ExportList startExportRecording() {
*/
public long getLastHeard() {
// TODO - this is not safe against clock skew and is called from jenkins core (and potentially plugins)
return lastHeard;
}

private void updateLastHeard() {
// TODO - this is not safe against clock skew and is called from jenkins core (and potentially plugins)
lastHeard = System.currentTimeMillis();
return lastCommandReceivedAt;
}

/*package*/ static Channel setCurrent(Channel channel) {
Expand All @@ -1498,6 +1584,20 @@ public static Channel current() {
return CURRENT.get();
}

/**
* Calls {@link #dumpDiagnostics(PrintWriter)} across all the active channels in this system.
* Used for diagnostics.
*
* @since 2.26.3
*/
public static void dumpDiagnosticsForAll(PrintWriter w) throws IOException {
for (Ref ref : ACTIVE_CHANNELS.values().toArray(new Ref[0])) {
Channel ch = ref.channel();
if (ch!=null)
ch.dumpDiagnostics(w);
}
}

/**
* Remembers the current "channel" associated for this thread.
*/
Expand All @@ -1522,6 +1622,11 @@ public static Channel current() {
*/
public static final int PIPE_WINDOW_SIZE = Integer.getInteger(Channel.class.getName()+".pipeWindowSize",1024*1024);

/**
* Keep track of active channels in the system for diagnostics purposes.
*/
private static final Map<Channel,Ref> ACTIVE_CHANNELS = Collections.synchronizedMap(new WeakHashMap<Channel, Ref>());

static final Class jarLoaderProxy;

static {
Expand Down
2 changes: 2 additions & 0 deletions src/main/java/hudson/remoting/Command.java
Expand Up @@ -31,6 +31,8 @@

/**
* One-way command to be sent over to the remote system and executed there.
* This is the smallest unit of message in remoting from one side to another,
* such as "please execute this method" or "here's the return value from an earlier method call".
* This is layer 0, the lower most layer.
*
* <p>
Expand Down
13 changes: 13 additions & 0 deletions src/test/java/hudson/remoting/ChannelTest.java
Expand Up @@ -5,7 +5,9 @@

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.PrintWriter;
import java.io.Serializable;
import java.io.StringWriter;
import java.net.URL;
import java.net.URLClassLoader;
import java.util.concurrent.TimeUnit;
Expand Down Expand Up @@ -163,4 +165,15 @@ public T call() throws RuntimeException {
return t;
}
}

@Bug(39150)
public void testDiagnostics() throws Exception {
StringWriter sw = new StringWriter();
Channel.dumpDiagnosticsForAll(new PrintWriter(sw));
System.out.println(sw);
assertTrue(sw.toString().contains("Channel north"));
assertTrue(sw.toString().contains("Channel south"));
assertTrue(sw.toString().contains("Commands sent=0"));
assertTrue(sw.toString().contains("Commands received=0"));
}
}

0 comments on commit f911511

Please sign in to comment.