117 lines
4.3 KiB
Rust
117 lines
4.3 KiB
Rust
//! M5 — Prometheus-format `/metrics` exporter installation and counter
|
||
//! pre-registration.
|
||
//!
|
||
//! Counters and histograms are emitted from the demux path
|
||
//! ([`crate::transport::server`]) and the world systems
|
||
//! ([`crate::world::ingest_system`], [`crate::world::simulation_system`],
|
||
//! [`crate::world::export_system`]). This module's only job is:
|
||
//!
|
||
//! 1. Install the global metrics recorder + HTTP listener on the existing
|
||
//! tokio runtime, once at startup.
|
||
//! 2. Pre-register every counter at value 0 so panels render "0" rather than
|
||
//! "No data" before the first event of a given kind fires.
|
||
//!
|
||
//! ## Runtime telemetry
|
||
//!
|
||
//! - `substrate_received_total{tier=t1|t2|t3}` — counter
|
||
//! - `substrate_dropped_total{tier=t1}` — counter (T1 lossy)
|
||
//! - `substrate_decode_errors_total{tier=t1|t2|t3}` — counter
|
||
//! - `substrate_t3_no_handler_total` — counter
|
||
//! - `substrate_latency_us{tier=t1|t2|t3}` — histogram
|
||
//! - `substrate_tick_hz` — gauge
|
||
//! - `substrate_entities` — gauge
|
||
//! - `substrate_channel_depth{tier=t1|t2|t3}` — gauge
|
||
//! - `substrate_channel_capacity{tier=t1|t2|t3}` — gauge
|
||
//! - `substrate_rss_bytes` — gauge
|
||
//!
|
||
//! ## Digital-twin surface (operator dashboard)
|
||
//!
|
||
//! - `sensor_aggregate{type=…, stat=count|mean|min|max}` — gauge
|
||
//! - `substrate_threshold_crossings_total{type, direction}` — counter
|
||
|
||
use std::net::SocketAddr;
|
||
|
||
use bevy::prelude::*;
|
||
use metrics::counter;
|
||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||
|
||
use crate::config::AppConfig;
|
||
use crate::transport::SensorType;
|
||
use crate::transport::ecs::TokioHandle;
|
||
|
||
pub struct ObservabilityPlugin;
|
||
|
||
impl Plugin for ObservabilityPlugin {
|
||
fn build(&self, app: &mut App) {
|
||
let config = app
|
||
.world()
|
||
.get_resource::<AppConfig>()
|
||
.expect("AppConfig must be inserted before ObservabilityPlugin");
|
||
|
||
if !config.observability.metrics_enabled {
|
||
tracing::info!("metrics exporter disabled by config");
|
||
return;
|
||
}
|
||
|
||
let listen: SocketAddr = config
|
||
.observability
|
||
.metrics_listen
|
||
.parse()
|
||
.expect("invalid metrics_listen address in config");
|
||
|
||
let runtime_handle = app
|
||
.world()
|
||
.get_resource::<TokioHandle>()
|
||
.expect("TokioHandle must be inserted before ObservabilityPlugin (load order: transport plugin first)")
|
||
.0
|
||
.clone();
|
||
|
||
// PrometheusBuilder::install spawns the HTTP listener via tokio::spawn,
|
||
// which requires being inside a runtime context.
|
||
let _guard = runtime_handle.enter();
|
||
PrometheusBuilder::new()
|
||
.with_http_listener(listen)
|
||
.install()
|
||
.expect("install prometheus exporter");
|
||
drop(_guard);
|
||
|
||
tracing::info!(?listen, "metrics exporter installed");
|
||
|
||
pre_register_counters();
|
||
}
|
||
}
|
||
|
||
/// Pre-register every counter at value 0 so Grafana sees a series to plot
|
||
/// even before the first event of that kind. Without this, the Prometheus
|
||
/// exporter omits any counter that has never been incremented, and panels
|
||
/// render "No data" — confusing when the metric exists, the counter is just
|
||
/// genuinely zero (e.g., `substrate_t3_no_handler_total` in normal operation).
|
||
fn pre_register_counters() {
|
||
for tier in ["t1", "t2", "t3"] {
|
||
counter!("substrate_received_total", "tier" => tier).increment(0);
|
||
counter!("substrate_decode_errors_total", "tier" => tier).increment(0);
|
||
}
|
||
counter!("substrate_dropped_total", "tier" => "t1").increment(0);
|
||
counter!("substrate_t3_no_handler_total").increment(0);
|
||
|
||
// Threshold crossings — bounded `|SensorType| × 2` cardinality, all
|
||
// pre-registered so dashboard panels show "0" instead of "No data".
|
||
for t in [
|
||
SensorType::Generic,
|
||
SensorType::Temperature,
|
||
SensorType::Humidity,
|
||
SensorType::Pressure,
|
||
SensorType::Voltage,
|
||
SensorType::Current,
|
||
] {
|
||
for direction in ["up", "down"] {
|
||
counter!(
|
||
"substrate_threshold_crossings_total",
|
||
"type" => t.label_str(),
|
||
"direction" => direction
|
||
)
|
||
.increment(0);
|
||
}
|
||
}
|
||
}
|