Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

network-gossip: add metric for number of local messages #7871

Merged
8 commits merged into from
Jan 12, 2021
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion client/finality-grandpa/src/communication/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,8 @@ impl<B: BlockT, N: Network<B>> NetworkBridge<B, N> {
let gossip_engine = Arc::new(Mutex::new(GossipEngine::new(
service.clone(),
GRANDPA_PROTOCOL_NAME,
validator.clone()
validator.clone(),
prometheus_registry,
)));

{
Expand Down
1 change: 1 addition & 0 deletions client/network-gossip/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ futures-timer = "3.0.1"
libp2p = { version = "0.33.0", default-features = false }
log = "0.4.8"
lru = "0.6.1"
prometheus-endpoint = { package = "substrate-prometheus-endpoint", version = "0.8.0", path = "../../utils/prometheus" }
sc-network = { version = "0.8.0", path = "../network" }
sp-runtime = { version = "2.0.0", path = "../../primitives/runtime" }
wasm-timer = "0.2"
Expand Down
19 changes: 13 additions & 6 deletions client/network-gossip/src/bridge.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ use futures::prelude::*;
use futures::channel::mpsc::{channel, Sender, Receiver};
use libp2p::PeerId;
use log::trace;
use prometheus_endpoint::Registry;
use sp_runtime::traits::Block as BlockT;
use std::{
borrow::Cow,
Expand Down Expand Up @@ -72,12 +73,13 @@ impl<B: BlockT> GossipEngine<B> {
network: N,
protocol: impl Into<Cow<'static, str>>,
validator: Arc<dyn Validator<B>>,
metrics_registry: Option<&Registry>,
) -> Self where B: 'static {
let protocol = protocol.into();
let network_event_stream = network.event_stream();

GossipEngine {
state_machine: ConsensusGossip::new(validator, protocol.clone()),
state_machine: ConsensusGossip::new(validator, protocol.clone(), metrics_registry),
network: Box::new(network),
periodic_maintenance_interval: futures_timer::Delay::new(PERIODIC_MAINTENANCE_INTERVAL),
protocol,
Expand Down Expand Up @@ -372,7 +374,8 @@ mod tests {
let mut gossip_engine = GossipEngine::<Block>::new(
network.clone(),
"/my_protocol",
Arc::new(AllowAll{}),
Arc::new(AllowAll {}),
None,
);

// Drop network event stream sender side.
Expand All @@ -399,7 +402,8 @@ mod tests {
let mut gossip_engine = GossipEngine::<Block>::new(
network.clone(),
protocol.clone(),
Arc::new(AllowAll{}),
Arc::new(AllowAll {}),
None,
);

let mut event_sender = network.inner.lock()
Expand Down Expand Up @@ -533,7 +537,8 @@ mod tests {
let mut gossip_engine = GossipEngine::<Block>::new(
network.clone(),
protocol.clone(),
Arc::new(TestValidator{}),
Arc::new(TestValidator {}),
None,
);

// Create channels.
Expand All @@ -549,8 +554,10 @@ mod tests {
// Insert sender sides into `gossip_engine`.
for (topic, tx) in txs {
match gossip_engine.message_sinks.get_mut(&topic) {
Some(entry) => entry.push(tx),
None => {gossip_engine.message_sinks.insert(topic, vec![tx]);},
Some(entry) => entry.push(tx),
None => {
gossip_engine.message_sinks.insert(topic, vec![tx]);
}
}
}

Expand Down
111 changes: 84 additions & 27 deletions client/network-gossip/src/state_machine.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,24 @@ use std::collections::{HashMap, HashSet};
use std::sync::Arc;
use std::iter;
use std::time;
use log::{error, trace};
use log::{debug, error, trace};
use lru::LruCache;
use libp2p::PeerId;
use prometheus_endpoint::{register, Counter, PrometheusError, Registry, U64};
use sp_runtime::traits::{Block as BlockT, Hash, HashFor};
use sc_network::ObservedRole;
use wasm_timer::Instant;

// FIXME: Add additional spam/DoS attack protection: https://github.com/paritytech/substrate/issues/1115
const KNOWN_MESSAGES_CACHE_SIZE: usize = 4096;
// NOTE: The current value is adjusted based on largest production network deployment (Kusama) and
// the current main gossip user (GRANDPA). Currently there are ~800 validators on Kusama, as such,
// each GRANDPA round should generate ~1600 messages, and we currently keep track of the last 2
// completed rounds and the current live one. That makes it so that at any point we will be holding
// ~4800 live messages.
//
// Assuming that each known message is tracked with a 32 byte hash (common for `Block::Hash`), then
// this cache should take about 256 KB of memory.
const KNOWN_MESSAGES_CACHE_SIZE: usize = 8192;
andresilva marked this conversation as resolved.
Show resolved Hide resolved

const REBROADCAST_INTERVAL: time::Duration = time::Duration::from_secs(30);

Expand Down Expand Up @@ -151,18 +160,33 @@ pub struct ConsensusGossip<B: BlockT> {
protocol: Cow<'static, str>,
validator: Arc<dyn Validator<B>>,
next_broadcast: Instant,
metrics: Option<Metrics>,
}

impl<B: BlockT> ConsensusGossip<B> {
/// Create a new instance using the given validator.
pub fn new(validator: Arc<dyn Validator<B>>, protocol: Cow<'static, str>) -> Self {
pub fn new(
validator: Arc<dyn Validator<B>>,
protocol: Cow<'static, str>,
metrics_registry: Option<&Registry>,
) -> Self {
let metrics = match metrics_registry.map(Metrics::register) {
Some(Ok(metrics)) => Some(metrics),
Some(Err(e)) => {
debug!(target: "gossip", "Failed to register metrics: {:?}", e);
None
}
None => None,
};

ConsensusGossip {
peers: HashMap::new(),
messages: Default::default(),
known_messages: LruCache::new(KNOWN_MESSAGES_CACHE_SIZE),
protocol,
validator,
next_broadcast: Instant::now() + REBROADCAST_INTERVAL,
metrics,
}
}

Expand Down Expand Up @@ -197,6 +221,10 @@ impl<B: BlockT> ConsensusGossip<B> {
message,
sender,
});

if let Some(ref metrics) = self.metrics {
metrics.registered_messages.inc();
}
}
}

Expand Down Expand Up @@ -264,10 +292,17 @@ impl<B: BlockT> ConsensusGossip<B> {
let before = self.messages.len();

let mut message_expired = self.validator.message_expired();
self.messages.retain(|entry| !message_expired(entry.topic, &entry.message));
self.messages
.retain(|entry| !message_expired(entry.topic, &entry.message));

let expired_messages = before - self.messages.len();

if let Some(ref metrics) = self.metrics {
metrics.expired_messages.inc_by(expired_messages as u64)
}

trace!(target: "gossip", "Cleaned up {} stale messages, {} left ({} known)",
before - self.messages.len(),
expired_messages,
self.messages.len(),
known_messages.len(),
);
Expand Down Expand Up @@ -429,6 +464,32 @@ impl<B: BlockT> ConsensusGossip<B> {
}
}

struct Metrics {
registered_messages: Counter<U64>,
expired_messages: Counter<U64>,
}

impl Metrics {
fn register(registry: &Registry) -> Result<Self, PrometheusError> {
Ok(Self {
registered_messages: register(
Counter::new(
"network_gossip_registered_messages_total",
"Number of registered messages by the gossip service.",
)?,
registry,
)?,
expired_messages: register(
Counter::new(
"network_gossip_expired_messages_total",
"Number of expired messages by the gossip service.",
)?,
registry,
)?,
})
}
}

#[cfg(test)]
mod tests {
use futures::prelude::*;
Expand Down Expand Up @@ -538,7 +599,7 @@ mod tests {

let prev_hash = H256::random();
let best_hash = H256::random();
let mut consensus = ConsensusGossip::<Block>::new(Arc::new(AllowAll), "/foo".into());
let mut consensus = ConsensusGossip::<Block>::new(Arc::new(AllowAll), "/foo".into(), None);
let m1_hash = H256::random();
let m2_hash = H256::random();
let m1 = vec![1, 2, 3];
Expand All @@ -565,11 +626,11 @@ mod tests {

#[test]
fn message_stream_include_those_sent_before_asking() {
let mut consensus = ConsensusGossip::<Block>::new(Arc::new(AllowAll), "/foo".into());
let mut consensus = ConsensusGossip::<Block>::new(Arc::new(AllowAll), "/foo".into(), None);

// Register message.
let message = vec![4, 5, 6];
let topic = HashFor::<Block>::hash(&[1,2,3]);
let topic = HashFor::<Block>::hash(&[1, 2, 3]);
consensus.register_message(topic, message.clone());

assert_eq!(
Expand All @@ -580,7 +641,7 @@ mod tests {

#[test]
fn can_keep_multiple_messages_per_topic() {
let mut consensus = ConsensusGossip::<Block>::new(Arc::new(AllowAll), "/foo".into());
let mut consensus = ConsensusGossip::<Block>::new(Arc::new(AllowAll), "/foo".into(), None);

let topic = [1; 32].into();
let msg_a = vec![1, 2, 3];
Expand All @@ -594,7 +655,7 @@ mod tests {

#[test]
fn peer_is_removed_on_disconnect() {
let mut consensus = ConsensusGossip::<Block>::new(Arc::new(AllowAll), "/foo".into());
let mut consensus = ConsensusGossip::<Block>::new(Arc::new(AllowAll), "/foo".into(), None);

let mut network = NoOpNetwork::default();

Expand All @@ -608,14 +669,12 @@ mod tests {

#[test]
fn on_incoming_ignores_discarded_messages() {
let to_forward = ConsensusGossip::<Block>::new(
Arc::new(DiscardAll),
"/foo".into(),
).on_incoming(
&mut NoOpNetwork::default(),
PeerId::random(),
vec![vec![1, 2, 3]],
);
let to_forward = ConsensusGossip::<Block>::new(Arc::new(DiscardAll), "/foo".into(), None)
.on_incoming(
&mut NoOpNetwork::default(),
PeerId::random(),
vec![vec![1, 2, 3]],
);

assert!(
to_forward.is_empty(),
Expand All @@ -628,15 +687,13 @@ mod tests {
let mut network = NoOpNetwork::default();
let remote = PeerId::random();

let to_forward = ConsensusGossip::<Block>::new(
Arc::new(AllowAll),
"/foo".into(),
).on_incoming(
&mut network,
// Unregistered peer.
remote.clone(),
vec![vec![1, 2, 3]],
);
let to_forward = ConsensusGossip::<Block>::new(Arc::new(AllowAll), "/foo".into(), None)
.on_incoming(
&mut network,
// Unregistered peer.
remote.clone(),
vec![vec![1, 2, 3]],
);

assert!(
to_forward.is_empty(),
Expand Down