Commit 5fb1be95 authored by Jason Yellick's avatar Jason Yellick
Browse files

FAB-11863 Assorted Raft serviceability fixes



This CR bundles four small serviceability fixes for Raft.

1) It removes the newlines from a log message which made it difficult to
consume and appeared to create a truncated list like this:

 INFO 17155f Entering, channel: testorgschannel1, nodes: [ID: 3

2) It adds periods to all of the metric definitions in the cluster
metrics.

3) It converts the message send time in the cluster package to be
seconds and clarifies the description with the unit 'seconds'.

4) It clarifies that the number of leader changes is what this process
has observed since start, and not the total number of leader changes for
the network.

Change-Id: Ic4ad6551af57497f174518188022bf4dfd04fc19
Signed-off-by: default avatarJason Yellick <jyellick@us.ibm.com>
parent 3103a6ac
......@@ -48,26 +48,26 @@ The following metrics are currently exported for consumption by Prometheus.
| | | | channel |
| | | | chaincode |
+-----------------------------------------------------+-----------+------------------------------------------------------------+--------------------+
| cluster_comm_egress_queue_capacity | gauge | Capacity of the egress queue | host |
| cluster_comm_egress_queue_capacity | gauge | Capacity of the egress queue. | host |
| | | | msg_type |
| | | | channel |
+-----------------------------------------------------+-----------+------------------------------------------------------------+--------------------+
| cluster_comm_egress_queue_length | gauge | Length of the egress queue | host |
| cluster_comm_egress_queue_length | gauge | Length of the egress queue. | host |
| | | | msg_type |
| | | | channel |
+-----------------------------------------------------+-----------+------------------------------------------------------------+--------------------+
| cluster_comm_egress_queue_workers | gauge | Count of egress queue workers | channel |
| cluster_comm_egress_queue_workers | gauge | Count of egress queue workers. | channel |
+-----------------------------------------------------+-----------+------------------------------------------------------------+--------------------+
| cluster_comm_egress_stream_count | gauge | Count of streams to other nodes | channel |
| cluster_comm_egress_stream_count | gauge | Count of streams to other nodes. | channel |
+-----------------------------------------------------+-----------+------------------------------------------------------------+--------------------+
| cluster_comm_egress_tls_connection_count | gauge | Count of TLS connections to other nodes | |
| cluster_comm_egress_tls_connection_count | gauge | Count of TLS connections to other nodes. | |
+-----------------------------------------------------+-----------+------------------------------------------------------------+--------------------+
| cluster_comm_ingress_stream_count | gauge | Count of streams from other nodes | |
| cluster_comm_ingress_stream_count | gauge | Count of streams from other nodes. | |
+-----------------------------------------------------+-----------+------------------------------------------------------------+--------------------+
| cluster_comm_msg_dropped_count | counter | Count of messages dropped | host |
| cluster_comm_msg_dropped_count | counter | Count of messages dropped. | host |
| | | | channel |
+-----------------------------------------------------+-----------+------------------------------------------------------------+--------------------+
| cluster_comm_msg_send_time | histogram | Time it takes to send a message down the stream | host |
| cluster_comm_msg_send_time | histogram | The time it takes to send a message in seconds. | host |
| | | | channel |
+-----------------------------------------------------+-----------+------------------------------------------------------------+--------------------+
| consensus_etcdraft_cluster_size | gauge | Number of nodes in this channel. | channel |
......@@ -83,7 +83,7 @@ The following metrics are currently exported for consumption by Prometheus.
| consensus_etcdraft_is_leader | gauge | The leadership status of the current node: 1 if it is the | channel |
| | | leader else 0. | |
+-----------------------------------------------------+-----------+------------------------------------------------------------+--------------------+
| consensus_etcdraft_leader_changes | counter | The number of leader changes. | channel |
| consensus_etcdraft_leader_changes | counter | The number of leader changes since process start. | channel |
+-----------------------------------------------------+-----------+------------------------------------------------------------+--------------------+
| consensus_etcdraft_normal_proposals_received | counter | The total number of proposals received for normal type | channel |
| | | transactions. | |
......@@ -256,21 +256,21 @@ associated with the metric.
+-----------------------------------------------------------------------------------------+-----------+------------------------------------------------------------+
| chaincode.shim_requests_received.%{type}.%{channel}.%{chaincode} | counter | The number of chaincode shim requests received. |
+-----------------------------------------------------------------------------------------+-----------+------------------------------------------------------------+
| cluster.comm.egress_queue_capacity.%{host}.%{msg_type}.%{channel} | gauge | Capacity of the egress queue |
| cluster.comm.egress_queue_capacity.%{host}.%{msg_type}.%{channel} | gauge | Capacity of the egress queue. |
+-----------------------------------------------------------------------------------------+-----------+------------------------------------------------------------+
| cluster.comm.egress_queue_length.%{host}.%{msg_type}.%{channel} | gauge | Length of the egress queue |
| cluster.comm.egress_queue_length.%{host}.%{msg_type}.%{channel} | gauge | Length of the egress queue. |
+-----------------------------------------------------------------------------------------+-----------+------------------------------------------------------------+
| cluster.comm.egress_queue_workers.%{channel} | gauge | Count of egress queue workers |
| cluster.comm.egress_queue_workers.%{channel} | gauge | Count of egress queue workers. |
+-----------------------------------------------------------------------------------------+-----------+------------------------------------------------------------+
| cluster.comm.egress_stream_count.%{channel} | gauge | Count of streams to other nodes |
| cluster.comm.egress_stream_count.%{channel} | gauge | Count of streams to other nodes. |
+-----------------------------------------------------------------------------------------+-----------+------------------------------------------------------------+
| cluster.comm.egress_tls_connection_count | gauge | Count of TLS connections to other nodes |
| cluster.comm.egress_tls_connection_count | gauge | Count of TLS connections to other nodes. |
+-----------------------------------------------------------------------------------------+-----------+------------------------------------------------------------+
| cluster.comm.ingress_stream_count | gauge | Count of streams from other nodes |
| cluster.comm.ingress_stream_count | gauge | Count of streams from other nodes. |
+-----------------------------------------------------------------------------------------+-----------+------------------------------------------------------------+
| cluster.comm.msg_dropped_count.%{host}.%{channel} | counter | Count of messages dropped |
| cluster.comm.msg_dropped_count.%{host}.%{channel} | counter | Count of messages dropped. |
+-----------------------------------------------------------------------------------------+-----------+------------------------------------------------------------+
| cluster.comm.msg_send_time.%{host}.%{channel} | histogram | Time it takes to send a message down the stream |
| cluster.comm.msg_send_time.%{host}.%{channel} | histogram | The time it takes to send a message in seconds. |
+-----------------------------------------------------------------------------------------+-----------+------------------------------------------------------------+
| consensus.etcdraft.cluster_size.%{channel} | gauge | Number of nodes in this channel. |
+-----------------------------------------------------------------------------------------+-----------+------------------------------------------------------------+
......@@ -285,7 +285,7 @@ associated with the metric.
| consensus.etcdraft.is_leader.%{channel} | gauge | The leadership status of the current node: 1 if it is the |
| | | leader else 0. |
+-----------------------------------------------------------------------------------------+-----------+------------------------------------------------------------+
| consensus.etcdraft.leader_changes.%{channel} | counter | The number of leader changes. |
| consensus.etcdraft.leader_changes.%{channel} | counter | The number of leader changes since process start. |
+-----------------------------------------------------------------------------------------+-----------+------------------------------------------------------------+
| consensus.etcdraft.normal_proposals_received.%{channel} | counter | The total number of proposals received for normal type |
| | | transactions. |
......
......@@ -58,7 +58,7 @@ type RemoteNode struct {
// String returns a string representation of this RemoteNode
func (rm RemoteNode) String() string {
return fmt.Sprintf("ID: %d\nEndpoint: %s\nServerTLSCert:%s ClientTLSCert:%s",
return fmt.Sprintf("ID: %d,\nEndpoint: %s,\nServerTLSCert:%s, ClientTLSCert:%s",
rm.ID, rm.Endpoint, DERtoPEM(rm.ServerTLSCert), DERtoPEM(rm.ClientTLSCert))
}
......
......@@ -17,7 +17,7 @@ var (
Namespace: "cluster",
Subsystem: "comm",
Name: "egress_queue_length",
Help: "Length of the egress queue",
Help: "Length of the egress queue.",
LabelNames: []string{"host", "msg_type", "channel"},
StatsdFormat: "%{#fqname}.%{host}.%{msg_type}.%{channel}",
}
......@@ -26,7 +26,7 @@ var (
Namespace: "cluster",
Subsystem: "comm",
Name: "egress_queue_capacity",
Help: "Capacity of the egress queue",
Help: "Capacity of the egress queue.",
LabelNames: []string{"host", "msg_type", "channel"},
StatsdFormat: "%{#fqname}.%{host}.%{msg_type}.%{channel}",
}
......@@ -35,7 +35,7 @@ var (
Namespace: "cluster",
Subsystem: "comm",
Name: "egress_queue_workers",
Help: "Count of egress queue workers",
Help: "Count of egress queue workers.",
LabelNames: []string{"channel"},
StatsdFormat: "%{#fqname}.%{channel}",
}
......@@ -44,7 +44,7 @@ var (
Namespace: "cluster",
Subsystem: "comm",
Name: "ingress_stream_count",
Help: "Count of streams from other nodes",
Help: "Count of streams from other nodes.",
StatsdFormat: "%{#fqname}",
}
......@@ -52,7 +52,7 @@ var (
Namespace: "cluster",
Subsystem: "comm",
Name: "egress_stream_count",
Help: "Count of streams to other nodes",
Help: "Count of streams to other nodes.",
LabelNames: []string{"channel"},
StatsdFormat: "%{#fqname}.%{channel}",
}
......@@ -61,7 +61,7 @@ var (
Namespace: "cluster",
Subsystem: "comm",
Name: "egress_tls_connection_count",
Help: "Count of TLS connections to other nodes",
Help: "Count of TLS connections to other nodes.",
StatsdFormat: "%{#fqname}",
}
......@@ -69,7 +69,7 @@ var (
Namespace: "cluster",
Subsystem: "comm",
Name: "msg_send_time",
Help: "Time it takes to send a message down the stream",
Help: "The time it takes to send a message in seconds.",
LabelNames: []string{"host", "channel"},
StatsdFormat: "%{#fqname}.%{host}.%{channel}",
}
......@@ -78,7 +78,7 @@ var (
Namespace: "cluster",
Subsystem: "comm",
Name: "msg_dropped_count",
Help: "Count of messages dropped",
Help: "Count of messages dropped.",
LabelNames: []string{"host", "channel"},
StatsdFormat: "%{#fqname}.%{host}.%{channel}",
}
......@@ -137,7 +137,7 @@ func (m *Metrics) reportWorkerCount(channel string, count uint32) {
}
func (m *Metrics) reportMsgSendTime(host string, channel string, duration time.Duration) {
m.MessageSendTime.With("host", host, "channel", channel).Observe(float64(duration))
m.MessageSendTime.With("host", host, "channel", channel).Observe(float64(duration.Seconds()))
}
func (m *Metrics) reportEgressStreamCount(channel string, count uint32) {
......
......@@ -45,7 +45,7 @@ var (
Namespace: "consensus",
Subsystem: "etcdraft",
Name: "leader_changes",
Help: "The number of leader changes.",
Help: "The number of leader changes since process start.",
LabelNames: []string{"channel"},
StatsdFormat: "%{#fqname}.%{channel}",
}
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment