Compare commits
3 Commits
42ab81899c
...
d3f09ee062
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d3f09ee062 | ||
|
|
cac6c9ac02 | ||
|
|
4ec5b98df4 |
3
.gitignore
vendored
3
.gitignore
vendored
@@ -21,6 +21,9 @@ analysis/.venv/
|
||||
# Data — raw CSVs committed, processed outputs not
|
||||
data/**/*_processed.csv
|
||||
|
||||
# Self-signed dev TLS material — regenerate with `make certs`
|
||||
certs/
|
||||
|
||||
# OS
|
||||
.DS_Store
|
||||
*.swp
|
||||
|
||||
10
.idea/.gitignore
generated
vendored
Normal file
10
.idea/.gitignore
generated
vendored
Normal file
@@ -0,0 +1,10 @@
|
||||
# Default ignored files
|
||||
/shelf/
|
||||
/workspace.xml
|
||||
# Ignored default folder with query files
|
||||
/queries/
|
||||
# Datasource local storage ignored files
|
||||
/dataSources/
|
||||
/dataSources.local.xml
|
||||
# Editor-based HTTP Client requests
|
||||
/httpRequests/
|
||||
4
.idea/encodings.xml
generated
Normal file
4
.idea/encodings.xml
generated
Normal file
@@ -0,0 +1,4 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="Encoding" addBOMForNewFiles="with BOM under Windows, with no BOM otherwise" />
|
||||
</project>
|
||||
6
.idea/inspectionProfiles/Project_Default.xml
generated
Normal file
6
.idea/inspectionProfiles/Project_Default.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<component name="InspectionProjectProfileManager">
|
||||
<profile version="1.0">
|
||||
<option name="myName" value="Project Default" />
|
||||
<inspection_tool class="Eslint" enabled="true" level="WARNING" enabled_by_default="true" />
|
||||
</profile>
|
||||
</component>
|
||||
8
.idea/modules.xml
generated
Normal file
8
.idea/modules.xml
generated
Normal file
@@ -0,0 +1,8 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectModuleManager">
|
||||
<modules>
|
||||
<module fileurl="file://$PROJECT_DIR$/.idea/quic_ecs_dt.iml" filepath="$PROJECT_DIR$/.idea/quic_ecs_dt.iml" />
|
||||
</modules>
|
||||
</component>
|
||||
</project>
|
||||
6
.idea/prettier.xml
generated
Normal file
6
.idea/prettier.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="PrettierConfiguration">
|
||||
<option name="myConfigurationMode" value="AUTOMATIC" />
|
||||
</component>
|
||||
</project>
|
||||
12
.idea/quic_ecs_dt.iml
generated
Normal file
12
.idea/quic_ecs_dt.iml
generated
Normal file
@@ -0,0 +1,12 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module type="EMPTY_MODULE" version="4">
|
||||
<component name="NewModuleRootManager">
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/simulator/src" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/substrate/src" isTestSource="false" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
6
.idea/vcs.xml
generated
Normal file
6
.idea/vcs.xml
generated
Normal file
@@ -0,0 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="VcsDirectoryMappings">
|
||||
<mapping directory="" vcs="Git" />
|
||||
</component>
|
||||
</project>
|
||||
163
CLAUDE.md
Normal file
163
CLAUDE.md
Normal file
@@ -0,0 +1,163 @@
|
||||
# quic_ecs_dt — Project Guide for Claude
|
||||
|
||||
## What & why
|
||||
|
||||
Source repo for **"QUIC + ECS as Complementary Transport and Runtime Substrates for Industrial Digital Twins"** — UCAmI 2026 (Plantevin & Francillette, UQAC). Third paper in a sequence; the first two are at IEEE SWC 2026:
|
||||
|
||||
- `plantevin2026ecs` — ECS as runtime substrate for industrial DT (200k assets @ 114 Hz on Pi 5).
|
||||
- `plantevin2026quic` — QUIC partial reliability for DT sensor streams (94% P99 reduction vs TCP at 5% loss).
|
||||
|
||||
**UCAmI hypothesis (the composition question):** prior work shows ECS and QUIC each work as substrates *independently*. Does integrating real QUIC traffic into a Bevy ECS ingest path introduce coupling that degrades either one's claimed properties? The paper argues no, and measures it.
|
||||
|
||||
## Architecture
|
||||
|
||||
Three-tier QUIC ↔ ECS bridge, headless Bevy runtime:
|
||||
|
||||
| Tier | QUIC primitive | Use case | Channel cap | Tx newtype |
|
||||
|------|----------------|----------|-------------|------------|
|
||||
| T1 | Unreliable datagrams (RFC 9221) | High-freq ephemeral telemetry; drops OK | 1024 | `T1Sender::send_lossy` (try_send, drop on full) |
|
||||
| T2 | Unidirectional streams | Ordered threshold events; reliable | 512 | `T2Sender::send` (await, backpressure) |
|
||||
| T3 | Bidirectional streams | Actuator commands w/ ACK; per-command oneshot reply | 256 | `T3Sender::send` of `T3Inbound { command, reply }` |
|
||||
|
||||
QUIC server runs on a dedicated OS thread with a Tokio multi-thread runtime; pushes decoded `QuicMessage` (UUID + sensor_id + f64 + ts + seq, 38 B fixed LE) into `tokio::sync::mpsc` per tier via the `T1Sender / T2Sender / T3Sender` newtypes (in [substrate/src/transport/mod.rs](substrate/src/transport/mod.rs)) so misuse is a type error. Bevy `ingest_system` drains in `PreUpdate`, gated by `run_if(in_state(ServerState::Started))`. Pattern is in [substrate/src/transport/ecs.rs](substrate/src/transport/ecs.rs).
|
||||
|
||||
**T3 ack protocol.** A device opens a bi-stream and writes one `QuicMessage` (the command). The demux task reads it, builds a `T3Inbound { command, reply: oneshot::Sender<QuicMessage> }`, and sends it on the T3 mpsc. The ECS handler writes the ack into `reply`; the demux task awaits `reply_rx` and writes the resulting `QuicMessage` back on the bi-stream. Dropping the oneshot signals "no handler" and propagates as a stream close — used by the placeholder ingest until M4 installs real handlers.
|
||||
|
||||
**Target hardware:** CM5 (BCM2712, Cortex-A76, 4 GB) as DT runtime; M4 Max as traffic generator; 1 Gbps direct Ethernet. Both rigs are in hand.
|
||||
|
||||
## Repo map
|
||||
|
||||
```
|
||||
quic_ecs_dt/
|
||||
├── paper/ Quarto + LNCS source — single index.qmd, refs in references.bib
|
||||
├── substrate/ Rust crate: Bevy 0.18 + Quinn 0.11 + rustls 0.23 + Tokio
|
||||
│ └── src/
|
||||
│ ├── main.rs App::new, MinimalPlugins, EcsQuicTransportPlugin
|
||||
│ ├── config.rs figment chain: defaults → config.toml → APP_* env
|
||||
│ └── transport/
|
||||
│ ├── mod.rs QuicMessage struct
|
||||
│ ├── ecs.rs Plugin: tokio thread + 3 mpsc + PreUpdate ingest
|
||||
│ └── server.rs run_substrate_server (EMPTY STUB)
|
||||
├── simulator/ Rust crate: stub today; will be Quinn client + Bevy sensor generators
|
||||
├── data/ (created by M6) loopback/, two_machine/ — raw CSVs committed, *_processed ignored
|
||||
├── Cargo.toml workspace
|
||||
└── Makefile render, preview, build, build-cm5, deploy-cm5
|
||||
```
|
||||
|
||||
## Status
|
||||
|
||||
| Area | State |
|
||||
|------|-------|
|
||||
| `AppConfig` figment loader (defaults → TOML → env) | Done — [substrate/src/config.rs:42](substrate/src/config.rs#L42) |
|
||||
| 3-tier MPSC bridge scaffolding (Tokio thread + Bevy plugin) | Done — [substrate/src/transport/ecs.rs](substrate/src/transport/ecs.rs) |
|
||||
| `QuicMessage` struct (no codec yet) | Defined — [substrate/src/transport/mod.rs:4](substrate/src/transport/mod.rs#L4) |
|
||||
| Quinn server lifecycle | Listener up — `ServerState{Starting,Started}` in [substrate/src/transport/state.rs](substrate/src/transport/state.rs); `OnEnter(Starting)` → bind + accept loop in [substrate/src/transport/ecs.rs](substrate/src/transport/ecs.rs). Explicit `TransportConfig` w/ tuned datagram recv buffer (256 KiB) in [substrate/src/transport/server.rs](substrate/src/transport/server.rs). Per-tier sender newtypes (`T1Sender::send_lossy`, `T2Sender::send`, `T3Sender::send`) in [substrate/src/transport/mod.rs](substrate/src/transport/mod.rs) |
|
||||
| T1 demux (datagrams → ECS) | Done — `handle_incoming` orchestrator + `read_datagrams` reader in [substrate/src/transport/server.rs](substrate/src/transport/server.rs); decode errors logged but non-fatal; channel-full drops silent at trace; received/dropped/decode_errors counters in the end-of-stream debug line |
|
||||
| T2 demux (uni streams → ECS) | Done — `read_uni_streams` accepts streams in [substrate/src/transport/server.rs](substrate/src/transport/server.rs), spawns one task per stream that reads 38 B chunks until EOF; decode failure resets the stream via `recv.stop(0)` (one bad stream doesn't kill the connection); `t2.send().await` honours backpressure |
|
||||
| T3 demux (bi streams ↔ ECS) | Done — `accept_bi_streams` + `read_one_bi_stream` in [substrate/src/transport/server.rs](substrate/src/transport/server.rs); reads 38 B command, ships `T3Inbound { command, reply: oneshot::Sender }` to the ECS, awaits the reply, writes 38 B ack and finishes. If the ECS drops the oneshot (no handler installed yet — the M4 placeholder) `send.reset(0)` gives the client a clean signal instead of a half-open stream. `handle_incoming` joins all three readers on close |
|
||||
| TLS / self-signed cert | Done (M1) — `certs/server.{crt,key}` via `make certs`, gitignored. PEM loader in [substrate/src/transport/server.rs:15](substrate/src/transport/server.rs#L15); rustls `aws-lc-rs` default provider installed in [substrate/src/main.rs](substrate/src/main.rs) |
|
||||
| Wire codec for `QuicMessage` (39 B fixed LE, incl. `sensor_type: u8`) | Done — [substrate/src/transport/mod.rs](substrate/src/transport/mod.rs); 5 unit tests passing. `SensorType` enum: `Generic / Temperature / Humidity / Pressure / Voltage / Current` |
|
||||
| `tracing-subscriber` init w/ `RUST_LOG` | Done (M1) — [substrate/src/main.rs:8-12](substrate/src/main.rs#L8-L12) |
|
||||
| ECS components (`RawSensorData`, `SmoothedValue`) + 5 systems (Ingest/Sim/Export/FaultInjection/Diagnostics) | Done — entities = `(DeviceId, SensorId, SensorTypeTag, RawSensorData, SmoothedValue, Asset)` per (device, sensor); `SensorRegistry` upserts via `HashMap<(Uuid, u16), Entity>` in [substrate/src/world.rs](substrate/src/world.rs). `IngestSystem` drains all three tiers; T3 ack preserves command's `sensor_type` and returns the device's most recent `raw_value`. `SimulationSystem` maintains a 16-sample rolling mean per entity and emits `substrate_threshold_crossings_total{type, direction}` when the smoothed mean crosses a per-type threshold (`Changed<RawSensorData>` query so cost scales with ingress, not fleet size). `ExportSystem` samples `substrate_{entities,channel_depth,channel_capacity,rss_bytes}` + `sensor_aggregate{type, stat}` once per second. `FaultInjection` is still a stub awaiting M6. `Diagnostics` logs `tick_hz` once per second |
|
||||
| Schedule rate-gating | Done (M4) — `MinimalPlugins.set(ScheduleRunnerPlugin::run_loop(1/tick_rate_hz))` in [substrate/src/main.rs](substrate/src/main.rs); replaces the default busy-loop with the configured period |
|
||||
| Prometheus exporter + Grafana dashboards | Done (M5) — `ObservabilityPlugin` in [substrate/src/observability.rs](substrate/src/observability.rs) installs `metrics-exporter-prometheus` on the existing tokio runtime. **Runtime surface** (paper §Evaluation): counters `substrate_received_total{tier}`, `dropped_total{tier=t1}`, `decode_errors_total{tier}`, `t3_no_handler_total`; latency histograms `substrate_latency_us{tier}`; gauges `substrate_tick_hz`, `substrate_entities`, `substrate_channel_depth{tier}`, `substrate_channel_capacity{tier}`, `substrate_rss_bytes`. **Sensor data surface** (operator dashboard): per-type aggregates `sensor_aggregate{type, stat=count|mean|min|max}` computed once per second over the live world, cardinality bounded by `\|SensorType\| × 4` so it scales to thousands of sensors. Two dashboards: [dashboards/runtime.json](dashboards/runtime.json) and [dashboards/sensors.json](dashboards/sensors.json) (thermometer/gauge/stat panels per type) |
|
||||
| Simulator (Quinn client + sensor generators) | `SimulatorClient` lib in [simulator/src/client.rs](simulator/src/client.rs) — connects, trusts the substrate's PEM cert via custom `ServerCertVerifier` (sidesteps `CaUsedAsEndEntity`); `send_datagram(QuicMessage)` for T1, `send_uni_stream(&[QuicMessage])` for T2, `request(&QuicMessage) -> QuicMessage` for T3. CLI driver in [simulator/src/main.rs](simulator/src/main.rs) with clap flags (`--addr`, `--rate-hz`, `--t2-rate-hz`, `--t3-rate-hz`, `--t3-timeout-ms`, `--count`, `--devices`, `--sensor-id`, `--sensor-type`, `--profile`, `--cert`, `--server-name`); parallel T1+T2+T3 emitters, per-(device,sensor) sequence counters, type-appropriate waveform generators (sin/cos curves centred on realistic sensor ranges), 1-Hz combined progress logs, Ctrl-C drain. `--profile industrial` fans out to 5 sensors per device (Temperature/Humidity/Pressure/Voltage/Current). Bevy-driven sensor generator still pending |
|
||||
| End-to-end test harness | Six integration tests across [simulator/tests/end_to_end_t1.rs](simulator/tests/end_to_end_t1.rs), [simulator/tests/end_to_end_t2.rs](simulator/tests/end_to_end_t2.rs), [simulator/tests/end_to_end_t3.rs](simulator/tests/end_to_end_t3.rs): T1 single-datagram round-trip + 32-msg burst order; T2 single-stream order-preservation + 4-stream concurrent per-device ordering; T3 round-trip with fake-ECS handler + no-handler stream-reset. Each test calls `bind_endpoint` + `accept_loop` in-process with channels owned by the test |
|
||||
| `config.toml` at repo root | Done (M1) — [config.toml](config.toml); loaded by [substrate/src/main.rs:9](substrate/src/main.rs#L9) |
|
||||
| Benchmark harness (sweep + CSV writer) | Missing |
|
||||
| CM5 cross-compile / deploy | Wired in [Makefile:30](Makefile#L30); not exercised |
|
||||
|
||||
`cargo run -p substrate` boots, prints the loaded config, and idles on the (still-empty) Quinn server. `MinimalPlugins` busy-loops the ECS schedule by default — expected, will gate to `tick_rate_hz` in M4.
|
||||
|
||||
## Roadmap
|
||||
|
||||
Each milestone has one verification gate. Update Status here as we go.
|
||||
|
||||
- **M1 — Wire codec & root config.** ✅ Done 2026-05-04. Hand-rolled little-endian codec on `QuicMessage` (38 B fixed: 16 UUID + 2 stream_id + 8 f64 + 8 ts_us + 4 seq) with roundtrip + layout + length-error tests; `config.toml` at repo root; dev TLS via `make certs`; structured `tracing-subscriber` init reads `RUST_LOG` (default `info`).
|
||||
- **M2 — Quinn server + self-signed TLS.** ✅ Done 2026-05-06. Listener up under `ServerState::Starting/Started`; type-system tier semantics + T3 oneshot ack protocol; per-connection `handle_incoming` orchestrator joining T1 datagram, T2 uni-stream, and T3 bi-stream readers. T1 has dropped/decoded counters; T2 resets a stream on decode failure without killing the connection; T3 ships `T3Inbound { command, reply }` to the ECS and resets the stream when no handler answers. End-to-end coverage: 6 integration tests in [simulator/tests/](simulator/tests/) plus 4 codec unit tests, all green.
|
||||
- **M3 — Simulator client.** Replace [simulator/src/main.rs](simulator/src/main.rs) with a Bevy app: Quinn client, N synthetic devices, configurable per-tier rates. *Verify:* end-to-end loopback drains messages on all three tiers. **Status (2026-05-05):** simulator made into a lib + bin; `SimulatorClient::{connect,send_datagram,close}` plus a manual smoke runner in `simulator/src/main.rs`. Two integration tests in `simulator/tests/end_to_end_t1.rs` exercise the full T1 path against an in-process substrate. Bevy-driven generator + T2/T3 helpers + load profiles still pending.
|
||||
- **M4 — ECS world.** ✅ Done. `Asset` + `DeviceId` + `SensorId` + `SensorTypeTag` + `RawSensorData` + `SmoothedValue` components in [substrate/src/world.rs](substrate/src/world.rs); `SensorRegistry` resource for O(1) `(Uuid, u16) → Entity`. `IngestSystem` drains all three tiers (T1 batched, T2/T3 fully); T3 handler returns the latest sensor value as ack. `SimulationSystem` runs a per-entity 16-sample rolling mean and emits `substrate_threshold_crossings_total{type, direction}` on per-type threshold crossings — gives the ECS observable digital-twin work, not just write-through ingest. `ExportSystem` samples `substrate_{entities,channel_depth,channel_capacity,rss_bytes}` + `sensor_aggregate{type, stat}` once per second. `FaultInjection` still a stub (M6). `DiagnosticsSystem` logs tick rate once per second. Schedule rate-gated via `ScheduleRunnerPlugin::run_loop(1/tick_rate_hz)`. 8 unit tests passing (entity create, in-place update, T3 ack, SmoothedValue push/window/non-finite/full-roll, threshold-crossing transition).
|
||||
- **M5 — Observability (VictoriaMetrics + Grafana).** ✅ Done. Wire format extended to carry `sensor_type: u8` (38 → 39 B, decoded into `SensorType` enum). Two metric surfaces over `metrics-exporter-prometheus`:
|
||||
- **Runtime** (paper §Evaluation): `substrate_received_total{tier}`, `dropped_total{tier=t1}`, `decode_errors_total{tier}`, `t3_no_handler_total`, `latency_us{tier}` histograms, `tick_hz` / `entities` / `channel_depth{tier}` / `rss_bytes` gauges.
|
||||
- **Sensor data** (operator surface): `sensor_aggregate{type, stat=count|mean|min|max}` aggregated per second across the live ECS world. Cardinality bounded to `\|SensorType\| × 4` series independent of physical sensor count.
|
||||
- Dashboards: [dashboards/runtime.json](dashboards/runtime.json) + [dashboards/sensors.json](dashboards/sensors.json).
|
||||
- Verified: `--profile industrial --devices 2 --count 200` yields 10 entities and all 5 type aggregates with realistic values (T=20.5°C, RH=51%, P=1018 hPa, V=230.2 V, I=12 A).
|
||||
- **M6 — Benchmark harness.** Sweep `entity_count ∈ {10k, 50k, 100k, 200k}` × `loss_rate ∈ {0%, 1%, 5%}` with 2k warmup + 5k measurement ticks. Loss via `tc netem` or in-app injection. Writes `data/loopback/final_table.csv`. *Verify:* one full sweep on M4 Max produces a CSV the Quarto figures consume.
|
||||
- **M7 — CM5 cross-compile & deploy.** Exercise [Makefile:30](Makefile#L30) (`build-cm5`, `deploy-cm5`); set real `CM5_HOST`. *Verify:* binary runs on CM5 with a feed from M4 Max over 1 Gbps Ethernet.
|
||||
- **M8 — Two-machine run + paper render.** Sweep with simulator on M4 Max → substrate on CM5; populate `data/two_machine/final_table.csv`; `make render` produces a PDF. **Update §Evaluation prose to reflect actual numbers.** Current paper figures (241 Hz, 64 µs / 15.8 ms P99, 2.6 µs jitter, 1.02 MB/1k, R²=0.9999) are **aspirational placeholders** — they may move and the conclusions may shift; that's expected.
|
||||
|
||||
## Conventions
|
||||
|
||||
- **Rust:** edition 2024; workspace at root with `simulator` + `substrate`; `opt-level=1` dev, `opt-level=3` for deps.
|
||||
- **Pinned crates:** Bevy 0.18, Quinn 0.11, rustls 0.23, Tokio 1 (full), figment 0.10 (toml + env), uuid 1.23 (v4), serde 1.
|
||||
- **Config:** `figment` chain — defaults in [substrate/src/config.rs:25](substrate/src/config.rs#L25) → `config.toml` → env `APP_*` (double-underscore for nesting, e.g. `APP_NETWORK__SERVER_PORT=9000`).
|
||||
- **Bevy:** headless — `MinimalPlugins` only; do not pull rendering plugins.
|
||||
- **Tokio↔Bevy:** keep the dedicated-thread + mpsc pattern in [substrate/src/transport/ecs.rs:49](substrate/src/transport/ecs.rs#L49); do not block the ECS schedule on async work.
|
||||
- **Paper:** Quarto + LNCS template ([paper/_extensions/template.tex](paper/_extensions/template.tex), [paper/_quarto.yml](paper/_quarto.yml)). **Never commit `llncs.cls` or `splncs04.bst`** — CTAN licensing; download per [README.md:25-34](README.md#L25-L34).
|
||||
- **Data:** raw CSVs under `data/` are committed; `*_processed.csv` is gitignored. Paper figures consume `data/loopback/final_table.csv` and `data/two_machine/final_table.csv`.
|
||||
- **Build artifacts:** `target/`, `paper/_output/`, `paper/figures/`, `paper/.quarto/`, `paper/index.tex` all gitignored.
|
||||
- **Errors:** `anyhow` (with `.context()`) for internal startup paths where the error type is uninteresting; `thiserror` for boundary types we want to match against (e.g. `WireError` in the codec).
|
||||
- **Warnings:** let real warnings show. No `#[allow(dead_code)]`, `_var` blanket suppression, or `PhantomData` shims to silence the compiler — warnings are honest TODO markers and disappear when the consuming code lands. See [feedback memory](../../.claude/projects/-Users-vplantevin-Projects-Research-quic-ecs-dt/memory/feedback_no_warning_hacks.md).
|
||||
|
||||
## Known deferrals
|
||||
|
||||
- **Channel ownership is per-host, not per-connection.** All connections share the same three mpsc channels. Fairness under N-device load relies on tokio scheduling. Acceptable for the "one ECS world per host" model the paper describes; revisit if many-device benchmarks show starvation.
|
||||
- **No graceful shutdown.** The `quic-runtime` thread is parked on `pending()`; spawned tasks (accept loop, per-conn demux) are orphaned at process exit. Fine for research runs; we'll need an `OnExit(Started)` (or a `Stopping` state) when M5 observability needs clean drain or M8 wants finalised CSV writes.
|
||||
- **Bind failure is fatal.** `OnEnter(Starting)` panics if `bind_endpoint` fails. A `ServerState::Failed` variant joins when we wire proper error surfacing.
|
||||
- **T3 ack semantics are minimal.** The current handler echoes the device's most recent `raw_value` with a server timestamp — adequate for "read sensor" commands, not for actuator-write semantics. A future iteration may introduce an `ActuatorState` component and a setpoint-apply path; for now T3 is best framed as "reliable read/query RPC" in the paper.
|
||||
- **`FaultInjectionSystem` is still empty.** Runs on schedule but does nothing. M6 fills it with rate-controlled in-app drop so loss sweeps don't depend on external `tc netem`.
|
||||
- **Schedule rate-gating is approximate.** `ScheduleRunnerPlugin::run_loop(period)` honours `period` as a minimum; observed `tick_hz` runs ~85% of target on macOS dev (target 60 → ~50). Should be tighter on the CM5; revisit if M6 sweeps depend on a steady tick.
|
||||
|
||||
## Run / verify
|
||||
|
||||
```bash
|
||||
make certs # generate certs/server.{crt,key} (ECDSA P-256, SAN: localhost/cm5.local/127.0.0.1/::1)
|
||||
make build # cargo build --release (native, depends on certs)
|
||||
make build-cm5 # aarch64 cross-build for the CM5 (depends on certs)
|
||||
make deploy-cm5 # scp to $CM5_HOST (set in env or override Makefile var)
|
||||
make render # build the paper PDF
|
||||
make preview # live-reload paper preview at :4848
|
||||
make clean # cargo clean + drop generated paper outputs
|
||||
```
|
||||
|
||||
`certs/` is gitignored; `make build` regenerates the dev cert if missing. From the repo root: `cargo run -p substrate` boots, prints the loaded `AppConfig`, and idles. `config.toml` and cert paths are resolved relative to the cwd — always launch from the repo root.
|
||||
|
||||
**Tests.** `cargo test --workspace` runs the codec unit tests in `substrate` plus the end-to-end integration tests in [simulator/tests/](simulator/tests/). Each integration test calls `bind_endpoint` + `accept_loop` in-process on `127.0.0.1:0` (OS-assigned port), connects a `SimulatorClient` against it, and asserts what arrives on the test-owned T1 receiver. Add a new `simulator/tests/end_to_end_*.rs` for each new wire path (T2 uni, T3 bi) as the substrate-side demux lands.
|
||||
|
||||
**Metrics scrape.** With `metrics_enabled = true` (default), the substrate exposes a Prometheus-format endpoint:
|
||||
|
||||
```bash
|
||||
curl http://127.0.0.1:9100/metrics
|
||||
```
|
||||
|
||||
A docker-compose stack under [monitoring/](monitoring/) brings up VictoriaMetrics + Grafana auto-provisioned: `make monitoring-up` then Grafana at <http://localhost:3000> (admin / admin), both dashboards under the `quic_ecs_dt` folder. The compose mounts [dashboards/](dashboards/) directly so any edit to the JSON files re-imports within 10 s.
|
||||
|
||||
Two Grafana dashboards under [dashboards/](dashboards/):
|
||||
|
||||
- [`runtime.json`](dashboards/runtime.json) — tick rate, RSS, per-tier received/dropped/latency, channel depth (paper §Evaluation surface).
|
||||
- [`sensors.json`](dashboards/sensors.json) — thermometer + gauges + stat panels per `SensorType`, driven by `sensor_aggregate{type, stat}` (operator-facing surface).
|
||||
|
||||
Both use the `${datasource}` template variable so you can point them at any Prometheus-compatible source.
|
||||
|
||||
**Manual two-process run.** From the repo root, in two shells:
|
||||
|
||||
```bash
|
||||
# shell 1 — server (use RUST_LOG=substrate=debug to see the per-conn summary)
|
||||
cargo run -p substrate
|
||||
|
||||
# shell 2 — client; --help shows all flags
|
||||
cargo run -p simulator -- --rate-hz 100 --count 0 --devices 4
|
||||
```
|
||||
|
||||
Simulator flags (see `cargo run -p simulator -- --help`): `--addr`, `--server-name`, `--cert`, `--rate-hz` (T1 datagram rate; `0` disables T1), `--t2-rate-hz` / `--t3-rate-hz` (per-tier event rate; `0` disables), `--t3-timeout-ms` (T3 ack wait, default `2000`), `--count` (T1 count; `0` = until Ctrl-C), `--devices`, `--sensor-id`, `--sensor-type` (one of `generic|temperature|humidity|pressure|voltage|current`), `--profile` (`single` or `industrial` — 5 sensors per device on ids 0..4 covering all types). The client logs a one-second `progress` line with `t1_sent`/`t2_sent`/`t3_sent`/`t3_timeouts`/per-tier observed Hz, and a final `simulator done` line with elapsed time on exit.
|
||||
|
||||
## Key references
|
||||
|
||||
- Prior self-citations: `plantevin2026ecs`, `plantevin2026quic` (both IEEE SWC 2026, "to appear").
|
||||
- QUIC: RFC 9000 (core), RFC 9221 (unreliable datagrams).
|
||||
- DT foundations: Tao et al. 2019; Grieves & Vickers 2017; Minerva et al. 2020.
|
||||
- ECS: Nystrom 2014, *Game Programming Patterns*.
|
||||
- Mixed-reliability transport: Peeck et al. (W2RP for DDS).
|
||||
- DT sync metrics: Çakır et al. 2023 (Twin Alignment Ratio); Bellavista et al. 2023 (ODTE).
|
||||
- Industrial QUIC/IIoT: Fernández et al. 2021; Boeding et al. 2025.
|
||||
- Full bibliography: [paper/references.bib](paper/references.bib).
|
||||
27
Cargo.toml
27
Cargo.toml
@@ -1,3 +1,30 @@
|
||||
[workspace]
|
||||
resolver = "3"
|
||||
members = ["simulator", "substrate"]
|
||||
|
||||
# Enable a small amount of optimization in the dev profile.
|
||||
[profile.dev]
|
||||
opt-level = 1
|
||||
|
||||
# Enable a large amount of optimization in the dev profile for dependencies.
|
||||
[profile.dev.package."*"]
|
||||
opt-level = 3
|
||||
|
||||
# Enable more optimization in the release profile at the cost of compile time.
|
||||
[profile.release]
|
||||
# Compile the entire crate as one unit.
|
||||
# Slows compile times, marginal improvements.
|
||||
codegen-units = 1
|
||||
# Do a second optimization pass over the entire program, including dependencies.
|
||||
# Slows compile times, marginal improvements.
|
||||
lto = "thin"
|
||||
|
||||
# Optimize for size in the wasm-release profile to reduce load times and bandwidth usage on web.
|
||||
[profile.wasm-release]
|
||||
# Default to release profile values.
|
||||
inherits = "release"
|
||||
# Optimize with size in mind (also try "z", sometimes it is better).
|
||||
# Slightly slows compile times, great improvements to file size and runtime performance.
|
||||
opt-level = "s"
|
||||
# Strip all debugging information from the binary to slightly reduce file size.
|
||||
strip = "debuginfo"
|
||||
|
||||
58
Makefile
58
Makefile
@@ -1,14 +1,19 @@
|
||||
# ============================================================
|
||||
# quic_ecs_dt — top-level Makefile
|
||||
# Targets:
|
||||
# make render — build the paper PDF
|
||||
# make preview — live-reload preview in browser
|
||||
# make build — cargo build --release (native)
|
||||
# make build-cm5 — cargo build --release (aarch64 cross)
|
||||
# make clean — remove generated outputs
|
||||
# make demo — one-shot: certs → build → VM+Grafana →
|
||||
# substrate → simulator (Ctrl-C cleans up)
|
||||
# make render — build the paper PDF
|
||||
# make preview — live-reload preview in browser
|
||||
# make build — cargo build --release (native)
|
||||
# make build-cm5 — cargo build --release (aarch64 cross)
|
||||
# make monitoring-up — start VictoriaMetrics + Grafana (docker)
|
||||
# make monitoring-down — stop them
|
||||
# make monitoring-logs — tail the monitoring stack
|
||||
# make clean — remove generated outputs
|
||||
# ============================================================
|
||||
|
||||
.PHONY: render preview build build-cm5 clean
|
||||
.PHONY: render preview build build-cm5 clean certs monitoring-up monitoring-down monitoring-logs demo
|
||||
|
||||
VENV := $(HOME)/.venv/quic_ecs
|
||||
PYTHON := $(VENV)/bin/python
|
||||
@@ -16,6 +21,22 @@ CM5_HOST ?= 192.168.1.x
|
||||
CM5_USER ?= pi
|
||||
CM5_BIN_DIR ?= /home/pi/quic_ecs_dt
|
||||
|
||||
# Self-signed dev TLS for the QUIC server (regenerate with `make certs`).
|
||||
# SAN covers loopback, ::1, and cm5.local for the two-machine setup.
|
||||
CERT_DIR := certs
|
||||
CERT_FILE := $(CERT_DIR)/server.crt
|
||||
KEY_FILE := $(CERT_DIR)/server.key
|
||||
|
||||
certs: $(CERT_FILE)
|
||||
|
||||
$(CERT_FILE):
|
||||
mkdir -p $(CERT_DIR)
|
||||
openssl req -x509 -newkey ec -pkeyopt ec_paramgen_curve:P-256 \
|
||||
-keyout $(KEY_FILE) -out $(CERT_FILE) \
|
||||
-days 3650 -nodes \
|
||||
-subj "/CN=localhost/O=quic_ecs_dt-dev/OU=substrate" \
|
||||
-addext "subjectAltName=DNS:localhost,DNS:cm5.local,IP:127.0.0.1,IP:::1"
|
||||
|
||||
# Paper
|
||||
render:
|
||||
cd paper && quarto render index.qmd
|
||||
@@ -23,11 +44,11 @@ render:
|
||||
preview:
|
||||
cd paper && quarto preview index.qmd --port 4848 --no-browser
|
||||
|
||||
# Rust build
|
||||
build:
|
||||
# Rust build (depends on dev cert so `cargo run` boots out of the box)
|
||||
build: $(CERT_FILE)
|
||||
cargo build --release
|
||||
|
||||
build-cm5:
|
||||
build-cm5: $(CERT_FILE)
|
||||
CARGO_TARGET_AARCH64_UNKNOWN_LINUX_GNU_LINKER=aarch64-linux-gnu-gcc \
|
||||
cargo build --release --target aarch64-unknown-linux-gnu
|
||||
|
||||
@@ -37,6 +58,25 @@ deploy-cm5: build-cm5
|
||||
scp target/aarch64-unknown-linux-gnu/release/quic_ecs_dt \
|
||||
$(CM5_USER)@$(CM5_HOST):$(CM5_BIN_DIR)/
|
||||
|
||||
# One-shot demo runner — see scripts/demo.sh
|
||||
demo:
|
||||
@./scripts/demo.sh
|
||||
|
||||
# Monitoring (VictoriaMetrics + Grafana, auto-provisioned)
|
||||
monitoring-up:
|
||||
docker compose -f monitoring/docker-compose.yml up -d
|
||||
@echo ""
|
||||
@echo "Grafana: http://localhost:3000 (admin / admin, or anonymous Admin)"
|
||||
@echo " • runtime dashboard: quic_ecs_dt → quic_ecs_dt — substrate runtime"
|
||||
@echo " • sensors dashboard: quic_ecs_dt → quic_ecs_dt — sensors"
|
||||
@echo "VictoriaMetrics: http://localhost:8428"
|
||||
|
||||
monitoring-down:
|
||||
docker compose -f monitoring/docker-compose.yml down
|
||||
|
||||
monitoring-logs:
|
||||
docker compose -f monitoring/docker-compose.yml logs -f
|
||||
|
||||
# Clean
|
||||
clean:
|
||||
cargo clean
|
||||
|
||||
22
config.toml
Normal file
22
config.toml
Normal file
@@ -0,0 +1,22 @@
|
||||
# quic_ecs_dt — substrate runtime config
|
||||
#
|
||||
# Resolution order (figment chain in substrate/src/config.rs):
|
||||
# 1. compile-time defaults
|
||||
# 2. this file
|
||||
# 3. APP_* env vars (e.g. APP_NETWORK__SERVER_PORT=9001)
|
||||
#
|
||||
# All paths are resolved relative to the cwd at launch — run from the repo root.
|
||||
|
||||
[network]
|
||||
server_port = 9000
|
||||
server_interface = "0.0.0.0"
|
||||
server_cert = "certs/server.crt"
|
||||
server_key = "certs/server.key"
|
||||
|
||||
[simulation]
|
||||
tick_rate_hz = 60
|
||||
max_entities = 10000
|
||||
|
||||
[observability]
|
||||
metrics_enabled = true
|
||||
metrics_listen = "0.0.0.0:9100"
|
||||
148
dashboards/runtime.json
Normal file
148
dashboards/runtime.json
Normal file
@@ -0,0 +1,148 @@
|
||||
{
|
||||
"title": "quic_ecs_dt — substrate runtime",
|
||||
"uid": "quic-ecs-dt-runtime",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"timezone": "",
|
||||
"refresh": "5s",
|
||||
"time": { "from": "now-15m", "to": "now" },
|
||||
"tags": ["quic_ecs_dt", "ucami2026", "substrate"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"name": "datasource",
|
||||
"label": "Data source",
|
||||
"type": "datasource",
|
||||
"query": "prometheus",
|
||||
"current": { "selected": false, "text": "Prometheus", "value": "Prometheus" },
|
||||
"hide": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Tick rate (Hz)",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 0, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": { "defaults": { "unit": "hertz", "decimals": 1 } },
|
||||
"targets": [
|
||||
{ "expr": "substrate_tick_hz", "refId": "A", "legendFormat": "tick_hz" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Entities",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 6, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": { "defaults": { "unit": "short" } },
|
||||
"targets": [
|
||||
{ "expr": "substrate_entities", "refId": "A", "legendFormat": "entities" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "RSS",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 12, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": { "defaults": { "unit": "bytes", "decimals": 1 } },
|
||||
"targets": [
|
||||
{ "expr": "substrate_rss_bytes", "refId": "A", "legendFormat": "rss" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "T3 — no handler events (cumulative)",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 4, "w": 6, "x": 18, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": { "defaults": { "unit": "short" } },
|
||||
"targets": [
|
||||
{ "expr": "substrate_t3_no_handler_total", "refId": "A", "legendFormat": "no_handler" }
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"title": "Per-tier receive rate",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 4 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": { "defaults": { "unit": "cps" } },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(substrate_received_total[1m])",
|
||||
"refId": "A",
|
||||
"legendFormat": "received {{tier}}"
|
||||
},
|
||||
{
|
||||
"expr": "rate(substrate_dropped_total[1m])",
|
||||
"refId": "B",
|
||||
"legendFormat": "dropped {{tier}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"title": "Per-tier latency (µs)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 4 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": { "defaults": { "unit": "µs", "decimals": 0 } },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "substrate_latency_us{quantile=\"0.5\"}",
|
||||
"refId": "A",
|
||||
"legendFormat": "p50 {{tier}}"
|
||||
},
|
||||
{
|
||||
"expr": "substrate_latency_us{quantile=\"0.99\"}",
|
||||
"refId": "B",
|
||||
"legendFormat": "p99 {{tier}}"
|
||||
},
|
||||
{
|
||||
"expr": "substrate_latency_us{quantile=\"0.999\"}",
|
||||
"refId": "C",
|
||||
"legendFormat": "p999 {{tier}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"title": "Channel depth (vs. capacity)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 12 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": { "defaults": { "unit": "short" } },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "substrate_channel_depth",
|
||||
"refId": "A",
|
||||
"legendFormat": "depth {{tier}}"
|
||||
},
|
||||
{
|
||||
"expr": "substrate_channel_capacity",
|
||||
"refId": "B",
|
||||
"legendFormat": "capacity {{tier}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"title": "Decode errors (rate)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 12 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": { "defaults": { "unit": "cps" } },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "rate(substrate_decode_errors_total[1m])",
|
||||
"refId": "A",
|
||||
"legendFormat": "decode_errors {{tier}}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
272
dashboards/sensors.json
Normal file
272
dashboards/sensors.json
Normal file
@@ -0,0 +1,272 @@
|
||||
{
|
||||
"title": "quic_ecs_dt — sensors",
|
||||
"uid": "quic-ecs-dt-sensors",
|
||||
"schemaVersion": 39,
|
||||
"version": 1,
|
||||
"timezone": "",
|
||||
"refresh": "1s",
|
||||
"time": { "from": "now-5m", "to": "now" },
|
||||
"tags": ["quic_ecs_dt", "ucami2026", "sensors"],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"name": "datasource",
|
||||
"label": "Data source",
|
||||
"type": "datasource",
|
||||
"query": "prometheus",
|
||||
"current": { "selected": false, "text": "Prometheus", "value": "Prometheus" },
|
||||
"hide": 0
|
||||
}
|
||||
]
|
||||
},
|
||||
"panels": [
|
||||
{
|
||||
"id": 1,
|
||||
"title": "Temperature — mean (thermometer)",
|
||||
"type": "gauge",
|
||||
"gridPos": { "h": 8, "w": 6, "x": 0, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"options": {
|
||||
"showThresholdLabels": false,
|
||||
"showThresholdMarkers": true,
|
||||
"orientation": "vertical"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "celsius",
|
||||
"decimals": 1,
|
||||
"min": -20,
|
||||
"max": 80,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "blue", "value": null },
|
||||
{ "color": "green", "value": 10 },
|
||||
{ "color": "yellow", "value": 30 },
|
||||
{ "color": "orange", "value": 50 },
|
||||
{ "color": "red", "value": 70 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sensor_aggregate{type=\"temperature\", stat=\"mean\"}",
|
||||
"refId": "A",
|
||||
"legendFormat": "T mean"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "Humidity — mean",
|
||||
"type": "gauge",
|
||||
"gridPos": { "h": 8, "w": 6, "x": 6, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"options": { "showThresholdMarkers": true, "orientation": "vertical" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "percent",
|
||||
"decimals": 1,
|
||||
"min": 0,
|
||||
"max": 100,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "orange", "value": null },
|
||||
{ "color": "green", "value": 30 },
|
||||
{ "color": "blue", "value": 70 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sensor_aggregate{type=\"humidity\", stat=\"mean\"}",
|
||||
"refId": "A",
|
||||
"legendFormat": "RH mean"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "Pressure — mean",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 8, "w": 6, "x": 12, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"options": { "graphMode": "area", "colorMode": "value" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "pressurehpa",
|
||||
"decimals": 1,
|
||||
"min": 980,
|
||||
"max": 1040,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "blue", "value": null },
|
||||
{ "color": "green", "value": 1000 },
|
||||
{ "color": "yellow", "value": 1025 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sensor_aggregate{type=\"pressure\", stat=\"mean\"}",
|
||||
"refId": "A",
|
||||
"legendFormat": "P mean"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"title": "Voltage — mean",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 8, "w": 6, "x": 18, "y": 0 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"options": { "graphMode": "area", "colorMode": "value" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "volt",
|
||||
"decimals": 2,
|
||||
"min": 220,
|
||||
"max": 240,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "yellow", "value": null },
|
||||
{ "color": "green", "value": 225 },
|
||||
{ "color": "yellow", "value": 235 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sensor_aggregate{type=\"voltage\", stat=\"mean\"}",
|
||||
"refId": "A",
|
||||
"legendFormat": "V mean"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"title": "Current — mean",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 8, "w": 6, "x": 0, "y": 8 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"options": { "graphMode": "area", "colorMode": "value" },
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"unit": "amp",
|
||||
"decimals": 2,
|
||||
"min": 0,
|
||||
"max": 30,
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{ "color": "green", "value": null },
|
||||
{ "color": "yellow", "value": 20 },
|
||||
{ "color": "red", "value": 25 }
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sensor_aggregate{type=\"current\", stat=\"mean\"}",
|
||||
"refId": "A",
|
||||
"legendFormat": "I mean"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"title": "Sensor count by type",
|
||||
"type": "stat",
|
||||
"gridPos": { "h": 8, "w": 6, "x": 6, "y": 8 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": { "defaults": { "unit": "short" } },
|
||||
"options": { "colorMode": "value", "graphMode": "none" },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sensor_aggregate{stat=\"count\"}",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{type}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"title": "Temperature — min / mean / max over time",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": { "defaults": { "unit": "celsius", "decimals": 1 } },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sensor_aggregate{type=\"temperature\", stat=\"min\"}",
|
||||
"refId": "A",
|
||||
"legendFormat": "min"
|
||||
},
|
||||
{
|
||||
"expr": "sensor_aggregate{type=\"temperature\", stat=\"mean\"}",
|
||||
"refId": "B",
|
||||
"legendFormat": "mean"
|
||||
},
|
||||
{
|
||||
"expr": "sensor_aggregate{type=\"temperature\", stat=\"max\"}",
|
||||
"refId": "C",
|
||||
"legendFormat": "max"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"title": "All sensor types — mean over time",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 24, "x": 0, "y": 16 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": { "defaults": { "unit": "short", "decimals": 2 } },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "sensor_aggregate{stat=\"mean\"}",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{type}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"title": "Threshold crossings (cumulative) — per type / direction",
|
||||
"description": "Each time a sensor's smoothed mean crosses its per-type threshold, simulation_system increments the counter. up = rising through threshold; down = falling through. The counter being non-zero is the load-bearing evidence that the ECS runs the digital-twin transform — not just write-through ingest.",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 0, "y": 24 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": { "defaults": { "unit": "short" } },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "substrate_threshold_crossings_total",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{type}} {{direction}}"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"title": "Threshold crossings — rate (events/min)",
|
||||
"type": "timeseries",
|
||||
"gridPos": { "h": 8, "w": 12, "x": 12, "y": 24 },
|
||||
"datasource": { "type": "prometheus", "uid": "${datasource}" },
|
||||
"fieldConfig": { "defaults": { "unit": "cpm" } },
|
||||
"targets": [
|
||||
{
|
||||
"expr": "60 * rate(substrate_threshold_crossings_total[1m])",
|
||||
"refId": "A",
|
||||
"legendFormat": "{{type}} {{direction}}"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
2
data/local/cross_tier.csv
Normal file
2
data/local/cross_tier.csv
Normal file
@@ -0,0 +1,2 @@
|
||||
rate_hz,t3_rate_hz,devices,tick_rate_hz,window_s,t1_received,t1_dropped,t1_p50_us,t1_p99_us,t1_p999_us,t3_received,t3_no_handler,t3_p50_us,t3_p99_us,t3_p999_us,tick_hz,rss_mb,channel_depth_max
|
||||
100,100,100,0,25,2646,0,118.99720565324648,202.0065277946852,245.99224556720532,2646,0,120.98904580793433,199.99652925270829,238.0069829199846,15833.3,28.0,0
|
||||
|
10
data/local/scaling.csv
Normal file
10
data/local/scaling.csv
Normal file
@@ -0,0 +1,10 @@
|
||||
rate_hz,devices,tick_rate_hz,window_s,t1_received,t1_dropped,t1_p50_us,t1_p99_us,t1_p999_us,tick_hz,rss_mb,channel_depth_max
|
||||
100,100,0,25,2715,0,10287.656173771804,20683.6751522136,20899.90783549675,52.1,28.2,1
|
||||
500,100,0,25,13595,0,9945.744255905174,20441.042134756957,20879.018374063122,51.0,29.8,1
|
||||
1000,100,0,25,27324,0,9858.605678238058,20371.66060670275,20862.321838812768,51.6,30.3,2
|
||||
5000,100,0,25,136305,0,9700.182954474827,20144.770960915914,20803.98904149668,52.2,31.4,10
|
||||
10000,100,0,25,273443,0,9680.801975940145,20164.925807687836,20874.842987926906,51.9,31.9,10
|
||||
25000,100,0,25,685150,0,9466.362697231909,19813.128013911944,20766.575543347255,51.6,33.2,50
|
||||
50000,100,0,25,1371659,4515,9349.704574533685,19635.60989099387,20477.86914508828,51.5,33.3,100
|
||||
100000,100,0,25,2740689,1266351,13177.946960597013,20502.4573381096,28455.593524841766,53.0,35.2,200
|
||||
250000,100,0,25,6826035,5353528,16234.599694958577,20696.089081152582,22046.299162128806,53.2,35.6,747
|
||||
|
56
monitoring/docker-compose.yml
Normal file
56
monitoring/docker-compose.yml
Normal file
@@ -0,0 +1,56 @@
|
||||
# VictoriaMetrics + Grafana for `quic_ecs_dt` local demos.
|
||||
#
|
||||
# Run from the repo root (or via `make monitoring-up`). The substrate runs on
|
||||
# the host and exposes /metrics on :9100; VM scrapes it via
|
||||
# `host.docker.internal`, which works on Docker Desktop (mac/Windows) and on
|
||||
# recent Docker Engine on Linux thanks to the `extra_hosts` mapping below.
|
||||
#
|
||||
# Grafana auto-provisions:
|
||||
# • a Prometheus-typed data source pointing at VM
|
||||
# • both dashboards from ../dashboards (runtime + sensors)
|
||||
#
|
||||
# Endpoints:
|
||||
# • Grafana http://localhost:3000 (anonymous Admin)
|
||||
# • VictoriaMetrics http://localhost:8428
|
||||
# • Substrate /metrics http://localhost:9100/metrics (on the host)
|
||||
|
||||
services:
|
||||
victoria-metrics:
|
||||
image: victoriametrics/victoria-metrics:v1.115.0
|
||||
container_name: quic_ecs_dt_vm
|
||||
ports:
|
||||
- "8428:8428"
|
||||
command:
|
||||
- "-promscrape.config=/etc/vm/scrape.yml"
|
||||
- "-retentionPeriod=1d"
|
||||
- "-storageDataPath=/storage"
|
||||
volumes:
|
||||
- ./victoria-metrics/scrape.yml:/etc/vm/scrape.yml:ro
|
||||
- vm-data:/storage
|
||||
extra_hosts:
|
||||
- "host.docker.internal:host-gateway"
|
||||
restart: unless-stopped
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:11.4.0
|
||||
container_name: quic_ecs_dt_grafana
|
||||
ports:
|
||||
- "3000:3000"
|
||||
environment:
|
||||
- GF_AUTH_ANONYMOUS_ENABLED=true
|
||||
- GF_AUTH_ANONYMOUS_ORG_ROLE=Admin
|
||||
- GF_AUTH_DISABLE_LOGIN_FORM=false
|
||||
- GF_SECURITY_ADMIN_USER=admin
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||
- GF_USERS_DEFAULT_THEME=dark
|
||||
volumes:
|
||||
- ./grafana/provisioning:/etc/grafana/provisioning:ro
|
||||
- ../dashboards:/var/lib/grafana/dashboards:ro
|
||||
- grafana-data:/var/lib/grafana
|
||||
depends_on:
|
||||
- victoria-metrics
|
||||
restart: unless-stopped
|
||||
|
||||
volumes:
|
||||
vm-data:
|
||||
grafana-data:
|
||||
13
monitoring/grafana/provisioning/dashboards/provider.yml
Normal file
13
monitoring/grafana/provisioning/dashboards/provider.yml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: 1
|
||||
|
||||
providers:
|
||||
- name: quic_ecs_dt
|
||||
orgId: 1
|
||||
folder: "quic_ecs_dt"
|
||||
type: file
|
||||
disableDeletion: false
|
||||
updateIntervalSeconds: 10
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
foldersFromFilesStructure: false
|
||||
13
monitoring/grafana/provisioning/datasources/datasource.yml
Normal file
13
monitoring/grafana/provisioning/datasources/datasource.yml
Normal file
@@ -0,0 +1,13 @@
|
||||
apiVersion: 1
|
||||
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
uid: prometheus
|
||||
access: proxy
|
||||
url: http://victoria-metrics:8428
|
||||
isDefault: true
|
||||
editable: true
|
||||
jsonData:
|
||||
timeInterval: "1s"
|
||||
httpMethod: "POST"
|
||||
14
monitoring/victoria-metrics/scrape.yml
Normal file
14
monitoring/victoria-metrics/scrape.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
# VictoriaMetrics scrape config — uses Prometheus-compatible syntax.
|
||||
# 1-second interval gives Grafana something to redraw every refresh tick.
|
||||
|
||||
global:
|
||||
scrape_interval: 1s
|
||||
scrape_timeout: 800ms
|
||||
|
||||
scrape_configs:
|
||||
- job_name: substrate
|
||||
static_configs:
|
||||
- targets:
|
||||
- "host.docker.internal:9100"
|
||||
labels:
|
||||
instance: "substrate-local"
|
||||
248
scripts/bench-scaling.sh
Executable file
248
scripts/bench-scaling.sh
Executable file
@@ -0,0 +1,248 @@
|
||||
#!/usr/bin/env bash
|
||||
# scripts/bench-scaling.sh — M6-lite: sweep T1 rate at fixed entity count,
|
||||
# record tick_hz / P99 latency / drops / RSS into a CSV the paper can plot.
|
||||
#
|
||||
# Two modes:
|
||||
#
|
||||
# 1. Scaling sweep (default). Just T1 traffic. Tells you the substrate's
|
||||
# throughput ceiling on this host and where the lossy-tier kicks in.
|
||||
# Output: data/local/scaling.csv
|
||||
#
|
||||
# 2. Cross-tier isolation. Set T3_RATE_HZ=<N> to run a constant T3 baseline
|
||||
# in parallel with the T1 sweep. The CSV gains substrate-side T3 latency
|
||||
# columns. If T3 P99 stays flat as T1 climbs orders of magnitude, the
|
||||
# paper's composition thesis is supported.
|
||||
# Output: data/local/cross_tier.csv
|
||||
#
|
||||
# Holds:
|
||||
# - tick_rate_hz $TICK_RATE_HZ (default 1000; set 0 for busy-loop)
|
||||
# - device count $DEVICES (default 100, single-sensor profile)
|
||||
# - window $WINDOW_S (default 20s steady-state per rate)
|
||||
# - T3 baseline $T3_RATE_HZ (default 0 = disabled)
|
||||
# - T3 timeout $T3_TIMEOUT_MS (default 2000ms)
|
||||
# - build profile $BUILD (release | debug; default release)
|
||||
#
|
||||
# Sweeps:
|
||||
# T1 rate over the positional arguments, or these defaults:
|
||||
# 100 500 1000 5000 10000 25000 50000
|
||||
#
|
||||
# Examples:
|
||||
# # Pure T1 scaling sweep.
|
||||
# ./scripts/bench-scaling.sh
|
||||
#
|
||||
# # Cross-tier isolation: hold T3 at 100 Hz, sweep T1.
|
||||
# T3_RATE_HZ=100 ./scripts/bench-scaling.sh
|
||||
#
|
||||
# # Custom sweep, longer windows.
|
||||
# DEVICES=1000 WINDOW_S=30 ./scripts/bench-scaling.sh 1000 5000 20000
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
cd "$ROOT"
|
||||
|
||||
# --- knobs ---
|
||||
DEVICES="${DEVICES:-100}"
|
||||
TICK_RATE_HZ="${TICK_RATE_HZ:-1000}"
|
||||
WARMUP_S="${WARMUP_S:-3}"
|
||||
WINDOW_S="${WINDOW_S:-20}"
|
||||
T3_RATE_HZ="${T3_RATE_HZ:-0}"
|
||||
T3_TIMEOUT_MS="${T3_TIMEOUT_MS:-2000}"
|
||||
BUILD="${BUILD:-release}"
|
||||
RATES=("${@}")
|
||||
if [[ ${#RATES[@]} -eq 0 ]]; then
|
||||
RATES=(100 500 1000 5000 10000 25000 50000)
|
||||
fi
|
||||
|
||||
# Pick default output path based on mode so the two CSVs don't clobber.
|
||||
CROSS_TIER=$(awk -v r="$T3_RATE_HZ" 'BEGIN { print (r+0 > 0) ? "1" : "0" }')
|
||||
if [[ "$CROSS_TIER" == "1" ]]; then
|
||||
OUT_CSV="${OUT_CSV:-data/local/cross_tier.csv}"
|
||||
else
|
||||
OUT_CSV="${OUT_CSV:-data/local/scaling.csv}"
|
||||
fi
|
||||
|
||||
# --- pretty logging ---
|
||||
if [[ -t 1 ]]; then
|
||||
BOLD=$'\033[1m'; DIM=$'\033[2m'; GREEN=$'\033[32m'; RED=$'\033[31m'; RESET=$'\033[0m'
|
||||
else BOLD=; DIM=; GREEN=; RED=; RESET=; fi
|
||||
step() { printf '%s» %s%s\n' "$BOLD" "$1" "$RESET"; }
|
||||
ok() { printf '%s ✓ %s%s\n' "$GREEN" "$1" "$RESET"; }
|
||||
fail() { printf '%s ✗ %s%s\n' "$RED" "$1" "$RESET"; }
|
||||
|
||||
# --- prereqs ---
|
||||
for cmd in cargo curl lsof awk; do
|
||||
command -v "$cmd" >/dev/null || { fail "missing: $cmd"; exit 1; }
|
||||
done
|
||||
for port in 9000 9100; do
|
||||
if lsof -nP -iUDP:$port -iTCP:$port -sTCP:LISTEN 2>/dev/null | grep -q LISTEN; then
|
||||
fail "port $port in use — kill the running substrate first"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
[[ -f certs/server.crt ]] || make certs >/dev/null
|
||||
|
||||
# --- build ---
|
||||
step "Building ($BUILD)"
|
||||
if [[ "$BUILD" == "release" ]]; then
|
||||
cargo build --release -p substrate -p simulator >/dev/null
|
||||
SUBSTRATE="$ROOT/target/release/substrate"
|
||||
SIMULATOR="$ROOT/target/release/simulator"
|
||||
else
|
||||
cargo build -p substrate -p simulator >/dev/null
|
||||
SUBSTRATE="$ROOT/target/debug/substrate"
|
||||
SIMULATOR="$ROOT/target/debug/simulator"
|
||||
fi
|
||||
|
||||
# --- start substrate with high tick rate ---
|
||||
LOG_DIR="/tmp/quic_ecs_dt_bench"
|
||||
mkdir -p "$LOG_DIR"
|
||||
SUB_LOG="$LOG_DIR/substrate.log"
|
||||
: > "$SUB_LOG"
|
||||
|
||||
step "Starting substrate (tick_rate_hz=$TICK_RATE_HZ, log: $SUB_LOG)"
|
||||
APP_SIMULATION__TICK_RATE_HZ="$TICK_RATE_HZ" RUST_LOG=warn "$SUBSTRATE" >"$SUB_LOG" 2>&1 &
|
||||
SUBSTRATE_PID=$!
|
||||
|
||||
# Wait for /metrics
|
||||
for i in $(seq 1 40); do
|
||||
if curl -sf http://localhost:9100/metrics >/dev/null 2>&1; then
|
||||
ok "substrate /metrics ready"; break
|
||||
fi
|
||||
sleep 0.25
|
||||
if [[ $i -eq 40 ]]; then fail "substrate didn't start"; tail -20 "$SUB_LOG"; exit 1; fi
|
||||
done
|
||||
|
||||
cleanup() {
|
||||
[[ -n "${SIM_PID:-}" ]] && kill -TERM "$SIM_PID" 2>/dev/null || true
|
||||
[[ -n "${SUBSTRATE_PID:-}" ]] && kill -TERM "$SUBSTRATE_PID" 2>/dev/null || true
|
||||
wait 2>/dev/null || true
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
# --- helpers to scrape a single value out of /metrics text ---
|
||||
snapshot_to() {
|
||||
curl -s http://localhost:9100/metrics > "$1"
|
||||
}
|
||||
get_value() {
|
||||
# $1: snapshot file, $2: full metric name (regex-anchored at line start)
|
||||
awk -v pat="$2" '$0 ~ "^" pat " " { print $NF; exit }' "$1"
|
||||
}
|
||||
|
||||
# --- sweep ---
|
||||
mkdir -p "$(dirname "$OUT_CSV")"
|
||||
echo "rate_hz,t3_rate_hz,devices,tick_rate_hz,window_s,t1_received,t1_dropped,t1_p50_us,t1_p99_us,t1_p999_us,t3_received,t3_no_handler,t3_p50_us,t3_p99_us,t3_p999_us,tick_hz,rss_mb,channel_depth_max" > "$OUT_CSV"
|
||||
|
||||
if [[ "$CROSS_TIER" == "1" ]]; then
|
||||
step "Sweeping T1 + holding T3 at ${T3_RATE_HZ} Hz (warmup ${WARMUP_S}s, window ${WINDOW_S}s, devices=$DEVICES)"
|
||||
else
|
||||
step "Sweeping T1 rate (warmup ${WARMUP_S}s, window ${WINDOW_S}s, devices=$DEVICES)"
|
||||
fi
|
||||
printf '%s' "$BOLD"
|
||||
if [[ "$CROSS_TIER" == "1" ]]; then
|
||||
printf '%-8s %-9s %-9s %-10s %-10s %-8s %-9s %-10s %-10s %-8s %-7s\n' \
|
||||
"rate" "t1_recv" "t1_drop" "t1_p50" "t1_p99" "t3_recv" "t3_p50" "t3_p99" "t3_p999" "tick_hz" "rss_mb"
|
||||
else
|
||||
printf '%-8s %-9s %-9s %-10s %-10s %-10s %-8s %-7s\n' \
|
||||
"rate" "received" "dropped" "p50_us" "p99_us" "p999_us" "tick_hz" "rss_mb"
|
||||
fi
|
||||
printf '%s' "$RESET"
|
||||
|
||||
# Snapshot file paths
|
||||
BEFORE="$LOG_DIR/before.txt"
|
||||
AFTER="$LOG_DIR/after.txt"
|
||||
|
||||
# Peak-tracker for channel depth: tail /metrics at 4 Hz during the window
|
||||
peak_depth() {
|
||||
local label="$1" # "t1" or "t2" or "t3"
|
||||
local max=0
|
||||
local val
|
||||
for _ in $(seq 1 $(( WINDOW_S * 4 ))); do
|
||||
val=$(curl -s http://localhost:9100/metrics 2>/dev/null \
|
||||
| awk -v pat="^substrate_channel_depth\\\\{tier=\"$label\"\\\\}" '$0 ~ pat {print $NF; exit}')
|
||||
if [[ -n "$val" && "$val" != "0" ]]; then
|
||||
# Compare numerically; bash can do integer compare via [[ ]]
|
||||
int_val="${val%.*}"
|
||||
if (( int_val > max )); then max=$int_val; fi
|
||||
fi
|
||||
sleep 0.25
|
||||
done
|
||||
echo "$max"
|
||||
}
|
||||
|
||||
for rate in "${RATES[@]}"; do
|
||||
# Launch simulator in background. In cross-tier mode it drives both T1
|
||||
# and T3 on the same connection; otherwise just T1.
|
||||
sim_args=(
|
||||
--profile single
|
||||
--sensor-type generic
|
||||
--rate-hz "$rate"
|
||||
--count 0
|
||||
--devices "$DEVICES"
|
||||
)
|
||||
if [[ "$CROSS_TIER" == "1" ]]; then
|
||||
sim_args+=(--t3-rate-hz "$T3_RATE_HZ" --t3-timeout-ms "$T3_TIMEOUT_MS")
|
||||
fi
|
||||
RUST_LOG=warn "$SIMULATOR" "${sim_args[@]}" >"$LOG_DIR/sim_${rate}.log" 2>&1 &
|
||||
SIM_PID=$!
|
||||
|
||||
# Warmup, then snapshot counters at the start of the *measurement* window.
|
||||
sleep "$WARMUP_S"
|
||||
snapshot_to "$BEFORE"
|
||||
rec_before=$(get_value "$BEFORE" 'substrate_received_total\{tier="t1"\}')
|
||||
drop_before=$(get_value "$BEFORE" 'substrate_dropped_total\{tier="t1"\}')
|
||||
t3_rec_before=$(get_value "$BEFORE" 'substrate_received_total\{tier="t3"\}')
|
||||
t3_nh_before=$(get_value "$BEFORE" 'substrate_t3_no_handler_total')
|
||||
|
||||
depth_max=$(peak_depth t1)
|
||||
|
||||
snapshot_to "$AFTER"
|
||||
kill -TERM "$SIM_PID" 2>/dev/null || true
|
||||
wait "$SIM_PID" 2>/dev/null || true
|
||||
SIM_PID=""
|
||||
|
||||
rec_after=$(get_value "$AFTER" 'substrate_received_total\{tier="t1"\}')
|
||||
drop_after=$(get_value "$AFTER" 'substrate_dropped_total\{tier="t1"\}')
|
||||
p50=$(get_value "$AFTER" 'substrate_latency_us\{tier="t1",quantile="0.5"\}')
|
||||
p99=$(get_value "$AFTER" 'substrate_latency_us\{tier="t1",quantile="0.99"\}')
|
||||
p999=$(get_value "$AFTER" 'substrate_latency_us\{tier="t1",quantile="0.999"\}')
|
||||
|
||||
t3_rec_after=$(get_value "$AFTER" 'substrate_received_total\{tier="t3"\}')
|
||||
t3_nh_after=$(get_value "$AFTER" 'substrate_t3_no_handler_total')
|
||||
t3_p50=$(get_value "$AFTER" 'substrate_latency_us\{tier="t3",quantile="0.5"\}')
|
||||
t3_p99=$(get_value "$AFTER" 'substrate_latency_us\{tier="t3",quantile="0.99"\}')
|
||||
t3_p999=$(get_value "$AFTER" 'substrate_latency_us\{tier="t3",quantile="0.999"\}')
|
||||
|
||||
tick_hz=$(get_value "$AFTER" 'substrate_tick_hz')
|
||||
rss=$(get_value "$AFTER" 'substrate_rss_bytes')
|
||||
|
||||
# Compute deltas + format. Use awk for floating math.
|
||||
received=$(awk -v a="$rec_after" -v b="$rec_before" 'BEGIN { printf "%d", a-b }')
|
||||
dropped=$(awk -v a="$drop_after" -v b="$drop_before" 'BEGIN { printf "%d", a-b }')
|
||||
t3_received=$(awk -v a="$t3_rec_after" -v b="$t3_rec_before" 'BEGIN { printf "%d", a-b }')
|
||||
t3_no_handler=$(awk -v a="$t3_nh_after" -v b="$t3_nh_before" 'BEGIN { printf "%d", a-b }')
|
||||
rss_mb=$(awk -v r="$rss" 'BEGIN { printf "%.1f", r/1048576 }')
|
||||
tick_hz_fmt=$(awk -v t="$tick_hz" 'BEGIN { printf "%.1f", t }')
|
||||
|
||||
if [[ "$CROSS_TIER" == "1" ]]; then
|
||||
printf '%-8s %-9s %-9s %-10.0f %-10.0f %-8s %-9.0f %-10.0f %-10.0f %-8s %-7s\n' \
|
||||
"$rate" "$received" "$dropped" \
|
||||
"${p50:-0}" "${p99:-0}" \
|
||||
"$t3_received" "${t3_p50:-0}" "${t3_p99:-0}" "${t3_p999:-0}" \
|
||||
"$tick_hz_fmt" "$rss_mb"
|
||||
else
|
||||
printf '%-8s %-9s %-9s %-10.0f %-10.0f %-10.0f %-8s %-7s\n' \
|
||||
"$rate" "$received" "$dropped" "${p50:-0}" "${p99:-0}" "${p999:-0}" \
|
||||
"$tick_hz_fmt" "$rss_mb"
|
||||
fi
|
||||
|
||||
echo "$rate,$T3_RATE_HZ,$DEVICES,$TICK_RATE_HZ,$WINDOW_S,$received,$dropped,${p50:-0},${p99:-0},${p999:-0},$t3_received,$t3_no_handler,${t3_p50:-0},${t3_p99:-0},${t3_p999:-0},$tick_hz_fmt,$rss_mb,$depth_max" >> "$OUT_CSV"
|
||||
|
||||
# Tiny breather between rate points so the substrate's summary window
|
||||
# doesn't carry over.
|
||||
sleep 1
|
||||
done
|
||||
|
||||
printf '\n%sCSV written to:%s %s\n' "$DIM" "$RESET" "$OUT_CSV"
|
||||
printf '%sSubstrate log:%s %s\n' "$DIM" "$RESET" "$SUB_LOG"
|
||||
222
scripts/demo.sh
Executable file
222
scripts/demo.sh
Executable file
@@ -0,0 +1,222 @@
|
||||
#!/usr/bin/env bash
|
||||
# scripts/demo.sh — bring the whole stack up: certs → build → VM+Grafana →
|
||||
# substrate → simulator. Tails simulator progress in the foreground. Ctrl-C
|
||||
# cleans everything up.
|
||||
#
|
||||
# Overridable via env vars:
|
||||
# PROFILE single | industrial (default: industrial)
|
||||
# RATE_HZ T1 datagram rate (default: 500)
|
||||
# T2_RATE_HZ T2 uni stream rate (default: 5)
|
||||
# T3_RATE_HZ T3 bi stream rate (default: 2)
|
||||
# DEVICES number of devices (default: 5)
|
||||
# BUILD release | debug (default: release)
|
||||
# KEEP_MONITORING if 1, don't `docker compose down` on exit (default: 0)
|
||||
#
|
||||
# Example:
|
||||
# ./scripts/demo.sh
|
||||
# PROFILE=single RATE_HZ=100 DEVICES=20 ./scripts/demo.sh
|
||||
# KEEP_MONITORING=1 ./scripts/demo.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# --- locate repo root ---
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
cd "$ROOT"
|
||||
|
||||
# --- defaults ---
|
||||
PROFILE="${PROFILE:-industrial}"
|
||||
RATE_HZ="${RATE_HZ:-500}"
|
||||
T2_RATE_HZ="${T2_RATE_HZ:-5}"
|
||||
T3_RATE_HZ="${T3_RATE_HZ:-2}"
|
||||
DEVICES="${DEVICES:-5}"
|
||||
BUILD="${BUILD:-release}"
|
||||
KEEP_MONITORING="${KEEP_MONITORING:-0}"
|
||||
LOG_DIR="${LOG_DIR:-/tmp/quic_ecs_dt}"
|
||||
|
||||
# --- pretty logging ---
|
||||
if [[ -t 1 ]]; then
|
||||
BOLD=$'\033[1m'; DIM=$'\033[2m'; GREEN=$'\033[32m'
|
||||
YELLOW=$'\033[33m'; RED=$'\033[31m'; CYAN=$'\033[36m'; RESET=$'\033[0m'
|
||||
else
|
||||
BOLD=; DIM=; GREEN=; YELLOW=; RED=; CYAN=; RESET=
|
||||
fi
|
||||
step() { printf '%s» %s%s\n' "$BOLD" "$1" "$RESET"; }
|
||||
ok() { printf '%s ✓ %s%s\n' "$GREEN" "$1" "$RESET"; }
|
||||
warn() { printf '%s ! %s%s\n' "$YELLOW" "$1" "$RESET"; }
|
||||
fail() { printf '%s ✗ %s%s\n' "$RED" "$1" "$RESET"; }
|
||||
|
||||
# --- prereq check ---
|
||||
step "Checking prerequisites"
|
||||
for cmd in cargo docker openssl curl lsof; do
|
||||
if ! command -v "$cmd" >/dev/null 2>&1; then
|
||||
fail "missing required command: $cmd"
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
if ! docker compose version >/dev/null 2>&1; then
|
||||
fail "docker compose plugin not available (try 'docker compose version')"
|
||||
exit 1
|
||||
fi
|
||||
ok "cargo, docker, openssl, curl, lsof present"
|
||||
|
||||
# --- port collision check (substrate runs on 9000 udp + 9100 tcp) ---
|
||||
for port in 9000 9100; do
|
||||
if lsof -nP -iUDP:$port -iTCP:$port -sTCP:LISTEN 2>/dev/null | grep -q LISTEN; then
|
||||
fail "port $port appears to be in use — another substrate or process is running"
|
||||
lsof -nP -iUDP:$port -iTCP:$port -sTCP:LISTEN 2>/dev/null | head -5
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
ok "ports 9000 (QUIC) and 9100 (/metrics) are free"
|
||||
|
||||
# --- certs ---
|
||||
step "Ensuring dev TLS cert exists"
|
||||
if [[ ! -f certs/server.crt || ! -f certs/server.key ]]; then
|
||||
make certs >/dev/null
|
||||
ok "generated certs/server.{crt,key}"
|
||||
else
|
||||
ok "certs/server.{crt,key} already present"
|
||||
fi
|
||||
|
||||
# --- build ---
|
||||
step "Building substrate + simulator ($BUILD profile)"
|
||||
if [[ "$BUILD" == "release" ]]; then
|
||||
cargo build --release -p substrate -p simulator
|
||||
SUBSTRATE_BIN="$ROOT/target/release/substrate"
|
||||
SIMULATOR_BIN="$ROOT/target/release/simulator"
|
||||
else
|
||||
cargo build -p substrate -p simulator
|
||||
SUBSTRATE_BIN="$ROOT/target/debug/substrate"
|
||||
SIMULATOR_BIN="$ROOT/target/debug/simulator"
|
||||
fi
|
||||
ok "binaries: $SUBSTRATE_BIN, $SIMULATOR_BIN"
|
||||
|
||||
# --- monitoring ---
|
||||
step "Bringing up VictoriaMetrics + Grafana (docker compose)"
|
||||
docker compose -f monitoring/docker-compose.yml up -d >/dev/null
|
||||
ok "containers started"
|
||||
|
||||
printf '%s ⏳ waiting for VictoriaMetrics on :8428' "$DIM"
|
||||
for i in $(seq 1 40); do
|
||||
if curl -sf http://localhost:8428/health >/dev/null 2>&1; then
|
||||
printf ' ready%s\n' "$RESET"; break
|
||||
fi
|
||||
printf '.'; sleep 0.5
|
||||
if [[ $i -eq 40 ]]; then printf ' TIMEOUT%s\n' "$RESET"; exit 1; fi
|
||||
done
|
||||
|
||||
printf '%s ⏳ waiting for Grafana on :3000' "$DIM"
|
||||
for i in $(seq 1 40); do
|
||||
if curl -sf http://localhost:3000/api/health >/dev/null 2>&1; then
|
||||
printf ' ready%s\n' "$RESET"; break
|
||||
fi
|
||||
printf '.'; sleep 0.5
|
||||
if [[ $i -eq 40 ]]; then printf ' TIMEOUT%s\n' "$RESET"; exit 1; fi
|
||||
done
|
||||
|
||||
# --- substrate ---
|
||||
mkdir -p "$LOG_DIR"
|
||||
SUB_LOG="$LOG_DIR/substrate.log"
|
||||
SIM_LOG="$LOG_DIR/simulator.log"
|
||||
: >"$SUB_LOG"
|
||||
: >"$SIM_LOG"
|
||||
|
||||
step "Starting substrate (log: $SUB_LOG)"
|
||||
RUST_LOG=info "$SUBSTRATE_BIN" >"$SUB_LOG" 2>&1 &
|
||||
SUBSTRATE_PID=$!
|
||||
|
||||
printf '%s ⏳ waiting for /metrics on :9100' "$DIM"
|
||||
for i in $(seq 1 40); do
|
||||
if curl -sf http://localhost:9100/metrics >/dev/null 2>&1; then
|
||||
printf ' ready%s\n' "$RESET"; break
|
||||
fi
|
||||
printf '.'; sleep 0.25
|
||||
if [[ $i -eq 40 ]]; then
|
||||
printf ' TIMEOUT%s\n' "$RESET"
|
||||
warn "substrate failed to start; tail of $SUB_LOG:"
|
||||
tail -30 "$SUB_LOG"
|
||||
kill "$SUBSTRATE_PID" 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
done
|
||||
|
||||
# --- simulator ---
|
||||
TOTAL_SLOTS=$DEVICES
|
||||
if [[ "$PROFILE" == "industrial" ]]; then
|
||||
TOTAL_SLOTS=$((DEVICES * 5))
|
||||
fi
|
||||
|
||||
step "Starting simulator (log: $SIM_LOG)"
|
||||
RUST_LOG=info "$SIMULATOR_BIN" \
|
||||
--profile "$PROFILE" \
|
||||
--rate-hz "$RATE_HZ" \
|
||||
--t2-rate-hz "$T2_RATE_HZ" \
|
||||
--t3-rate-hz "$T3_RATE_HZ" \
|
||||
--count 0 \
|
||||
--devices "$DEVICES" \
|
||||
>"$SIM_LOG" 2>&1 &
|
||||
SIMULATOR_PID=$!
|
||||
sleep 0.5
|
||||
if ! kill -0 "$SIMULATOR_PID" 2>/dev/null; then
|
||||
fail "simulator exited immediately; tail of $SIM_LOG:"
|
||||
tail -20 "$SIM_LOG"
|
||||
kill "$SUBSTRATE_PID" 2>/dev/null || true
|
||||
exit 1
|
||||
fi
|
||||
ok "simulator PID $SIMULATOR_PID"
|
||||
|
||||
# --- cleanup trap ---
|
||||
cleanup() {
|
||||
printf '\n%s» Cleaning up%s\n' "$BOLD" "$RESET"
|
||||
if [[ -n "${SIMULATOR_PID:-}" ]]; then
|
||||
kill -TERM "$SIMULATOR_PID" 2>/dev/null || true
|
||||
wait "$SIMULATOR_PID" 2>/dev/null || true
|
||||
ok "simulator stopped"
|
||||
fi
|
||||
if [[ -n "${SUBSTRATE_PID:-}" ]]; then
|
||||
kill -TERM "$SUBSTRATE_PID" 2>/dev/null || true
|
||||
wait "$SUBSTRATE_PID" 2>/dev/null || true
|
||||
ok "substrate stopped"
|
||||
fi
|
||||
if [[ "$KEEP_MONITORING" == "1" ]]; then
|
||||
warn "leaving monitoring stack up (KEEP_MONITORING=1) — 'make monitoring-down' to stop"
|
||||
else
|
||||
docker compose -f monitoring/docker-compose.yml down >/dev/null 2>&1 || true
|
||||
ok "monitoring stack stopped"
|
||||
fi
|
||||
printf '%sLogs preserved at:%s %s\n' "$DIM" "$RESET" "$LOG_DIR"
|
||||
}
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
# --- summary ---
|
||||
cat <<EOF
|
||||
|
||||
${BOLD}════════════════════════════════════════════════════════════${RESET}
|
||||
${BOLD} Demo is live${RESET}
|
||||
${BOLD}════════════════════════════════════════════════════════════${RESET}
|
||||
|
||||
${CYAN}Grafana${RESET} http://localhost:3000 (admin / admin)
|
||||
sensors dash http://localhost:3000/d/quic-ecs-dt-sensors
|
||||
runtime dash http://localhost:3000/d/quic-ecs-dt-runtime
|
||||
${CYAN}VictoriaMetrics${RESET} http://localhost:8428
|
||||
${CYAN}substrate /metrics${RESET} http://localhost:9100/metrics
|
||||
|
||||
${DIM}Logs${RESET}
|
||||
substrate $SUB_LOG
|
||||
simulator $SIM_LOG
|
||||
|
||||
${DIM}Config${RESET}
|
||||
profile $PROFILE
|
||||
rates T1=$RATE_HZ Hz · T2=$T2_RATE_HZ Hz · T3=$T3_RATE_HZ Hz
|
||||
devices $DEVICES → $TOTAL_SLOTS sensor entities expected
|
||||
build $BUILD
|
||||
|
||||
${DIM}Below: live tail of simulator progress (Ctrl-C to stop everything).${RESET}
|
||||
|
||||
EOF
|
||||
|
||||
# --- foreground tail of simulator progress ---
|
||||
# Filter for the per-second `progress` / `simulator done` lines so the user
|
||||
# sees the rates the simulator is observing without noise.
|
||||
tail -F "$SIM_LOG" | grep --line-buffered -E 'progress|simulator (done|launching|client connected)'
|
||||
@@ -4,3 +4,21 @@ version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
thiserror = "2"
|
||||
anyhow = "1"
|
||||
bevy = "0.18"
|
||||
game_sockets = { git = "https://github.com/VALERE91/game_sockets.git"}
|
||||
substrate = { path = "../substrate" }
|
||||
quinn = "0.11"
|
||||
rustls = "0.23"
|
||||
rustls-pemfile = "2"
|
||||
rustls-pki-types = "1"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
uuid = { version = "1.23", features = ["v4"] }
|
||||
bytes = "1"
|
||||
clap = { version = "4", features = ["derive"] }
|
||||
|
||||
[dev-dependencies]
|
||||
tokio = { version = "1", features = ["full", "test-util"] }
|
||||
189
simulator/src/client.rs
Normal file
189
simulator/src/client.rs
Normal file
@@ -0,0 +1,189 @@
|
||||
use std::net::SocketAddr;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{anyhow, Context};
|
||||
use quinn::{ClientConfig, Connection, Endpoint};
|
||||
use rustls::client::danger::{HandshakeSignatureValid, ServerCertVerified, ServerCertVerifier};
|
||||
use rustls::pki_types::{CertificateDer, ServerName, UnixTime};
|
||||
use rustls::{DigitallySignedStruct, SignatureScheme};
|
||||
use substrate::transport::QuicMessage;
|
||||
|
||||
/// QUIC client for driving the substrate from tests, smoke runs, and
|
||||
/// (eventually) the full Bevy-driven sensor generator.
|
||||
///
|
||||
/// `connect` trusts the server's PEM cert by **exact byte match** — using a
|
||||
/// custom `ServerCertVerifier` that compares the leaf against the cert at
|
||||
/// `cert_path`. This sidesteps rustls' `CaUsedAsEndEntity` rejection of our
|
||||
/// self-signed cert (which acts as both trust anchor and leaf) without
|
||||
/// disabling signature verification or weakening the handshake.
|
||||
pub struct SimulatorClient {
|
||||
pub endpoint: Endpoint,
|
||||
pub conn: Connection,
|
||||
}
|
||||
|
||||
impl SimulatorClient {
|
||||
pub async fn connect(
|
||||
server_addr: SocketAddr,
|
||||
server_name: &str,
|
||||
cert_path: impl AsRef<Path>,
|
||||
) -> anyhow::Result<Self> {
|
||||
let cert_path = cert_path.as_ref();
|
||||
let cert_pem = std::fs::read(cert_path)
|
||||
.with_context(|| format!("read trust cert at {}", cert_path.display()))?;
|
||||
|
||||
let parsed: Vec<CertificateDer<'static>> = rustls_pemfile::certs(&mut cert_pem.as_slice())
|
||||
.collect::<Result<_, _>>()
|
||||
.with_context(|| format!("parse PEM certs at {}", cert_path.display()))?;
|
||||
let expected = parsed
|
||||
.into_iter()
|
||||
.next()
|
||||
.ok_or_else(|| anyhow!("no certificates found in {}", cert_path.display()))?;
|
||||
|
||||
// Reuse the process-wide rustls provider that `install_crypto_provider`
|
||||
// (or substrate's main) already installed. Failing to find one here
|
||||
// means nobody installed a default — caller error.
|
||||
let provider = rustls::crypto::CryptoProvider::get_default()
|
||||
.ok_or_else(|| anyhow!("no rustls default crypto provider installed"))?
|
||||
.clone();
|
||||
|
||||
let verifier = Arc::new(TrustExactCert {
|
||||
expected,
|
||||
provider: provider.clone(),
|
||||
});
|
||||
|
||||
let rustls_cfg = rustls::ClientConfig::builder_with_provider(provider)
|
||||
.with_safe_default_protocol_versions()
|
||||
.context("rustls client builder")?
|
||||
.dangerous()
|
||||
.with_custom_certificate_verifier(verifier)
|
||||
.with_no_client_auth();
|
||||
|
||||
let quic_cfg = quinn::crypto::rustls::QuicClientConfig::try_from(rustls_cfg)
|
||||
.context("wrap rustls config for QUIC")?;
|
||||
let client_cfg = ClientConfig::new(Arc::new(quic_cfg));
|
||||
|
||||
let bind: SocketAddr = if server_addr.is_ipv6() {
|
||||
"[::]:0".parse().unwrap()
|
||||
} else {
|
||||
"0.0.0.0:0".parse().unwrap()
|
||||
};
|
||||
let mut endpoint = Endpoint::client(bind).context("Endpoint::client bind")?;
|
||||
endpoint.set_default_client_config(client_cfg);
|
||||
|
||||
let connecting = endpoint
|
||||
.connect(server_addr, server_name)
|
||||
.with_context(|| format!("client connect to {server_addr} as {server_name}"))?;
|
||||
let conn = connecting.await.context("client TLS handshake")?;
|
||||
|
||||
tracing::info!(remote = %conn.remote_address(), "simulator client connected");
|
||||
Ok(Self { endpoint, conn })
|
||||
}
|
||||
|
||||
/// T1 — send one `QuicMessage` over a QUIC datagram (38 B fixed).
|
||||
pub fn send_datagram(&self, msg: &QuicMessage) -> anyhow::Result<()> {
|
||||
let bytes = bytes::Bytes::copy_from_slice(&msg.to_bytes());
|
||||
self.conn.send_datagram(bytes).context("send_datagram")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// T2 — open a unidirectional stream, write each message as 38 B back-to-back,
|
||||
/// then `finish()` the stream. The substrate sees one or many events per
|
||||
/// stream, ordered within the stream.
|
||||
pub async fn send_uni_stream(&self, msgs: &[QuicMessage]) -> anyhow::Result<()> {
|
||||
let mut send = self.conn.open_uni().await.context("open_uni")?;
|
||||
for msg in msgs {
|
||||
send.write_all(&msg.to_bytes())
|
||||
.await
|
||||
.context("write QuicMessage to uni stream")?;
|
||||
}
|
||||
send.finish().context("finish uni stream")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// T3 — open a bidirectional stream, write the command (38 B), finish the
|
||||
/// send half, then read the substrate's ack (38 B). Errors if the
|
||||
/// substrate resets the stream (e.g. no handler installed yet) or if the
|
||||
/// connection drops mid-exchange.
|
||||
pub async fn request(&self, command: &QuicMessage) -> anyhow::Result<QuicMessage> {
|
||||
let (mut send, mut recv) = self.conn.open_bi().await.context("open_bi")?;
|
||||
send.write_all(&command.to_bytes())
|
||||
.await
|
||||
.context("write T3 command")?;
|
||||
send.finish().context("finish T3 send half")?;
|
||||
|
||||
let mut buf = [0u8; QuicMessage::WIRE_SIZE];
|
||||
recv.read_exact(&mut buf)
|
||||
.await
|
||||
.context("read T3 ack")?;
|
||||
let ack = QuicMessage::decode(&buf).context("decode T3 ack")?;
|
||||
Ok(ack)
|
||||
}
|
||||
|
||||
/// Close the connection gracefully. Use before dropping in tests so the
|
||||
/// peer's `conn.closed()` resolves cleanly instead of via timeout.
|
||||
pub async fn close(&self) {
|
||||
self.conn.close(0u32.into(), b"client done");
|
||||
self.endpoint.wait_idle().await;
|
||||
}
|
||||
}
|
||||
|
||||
/// `ServerCertVerifier` that accepts exactly one specific cert by byte
|
||||
/// equality. Signature verification still runs through the default provider —
|
||||
/// only the chain-validity check is replaced.
|
||||
#[derive(Debug)]
|
||||
struct TrustExactCert {
|
||||
expected: CertificateDer<'static>,
|
||||
provider: Arc<rustls::crypto::CryptoProvider>,
|
||||
}
|
||||
|
||||
impl ServerCertVerifier for TrustExactCert {
|
||||
fn verify_server_cert(
|
||||
&self,
|
||||
end_entity: &CertificateDer<'_>,
|
||||
_intermediates: &[CertificateDer<'_>],
|
||||
_server_name: &ServerName<'_>,
|
||||
_ocsp_response: &[u8],
|
||||
_now: UnixTime,
|
||||
) -> Result<ServerCertVerified, rustls::Error> {
|
||||
if end_entity.as_ref() == self.expected.as_ref() {
|
||||
Ok(ServerCertVerified::assertion())
|
||||
} else {
|
||||
Err(rustls::Error::General(
|
||||
"server cert does not match trusted dev cert".into(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn verify_tls12_signature(
|
||||
&self,
|
||||
message: &[u8],
|
||||
cert: &CertificateDer<'_>,
|
||||
dss: &DigitallySignedStruct,
|
||||
) -> Result<HandshakeSignatureValid, rustls::Error> {
|
||||
rustls::crypto::verify_tls12_signature(
|
||||
message,
|
||||
cert,
|
||||
dss,
|
||||
&self.provider.signature_verification_algorithms,
|
||||
)
|
||||
}
|
||||
|
||||
fn verify_tls13_signature(
|
||||
&self,
|
||||
message: &[u8],
|
||||
cert: &CertificateDer<'_>,
|
||||
dss: &DigitallySignedStruct,
|
||||
) -> Result<HandshakeSignatureValid, rustls::Error> {
|
||||
rustls::crypto::verify_tls13_signature(
|
||||
message,
|
||||
cert,
|
||||
dss,
|
||||
&self.provider.signature_verification_algorithms,
|
||||
)
|
||||
}
|
||||
|
||||
fn supported_verify_schemes(&self) -> Vec<SignatureScheme> {
|
||||
self.provider.signature_verification_algorithms.supported_schemes()
|
||||
}
|
||||
}
|
||||
147
simulator/src/emitters.rs
Normal file
147
simulator/src/emitters.rs
Normal file
@@ -0,0 +1,147 @@
|
||||
//! Async emitter tasks for T2 (uni streams) and T3 (bi streams + ack).
|
||||
//!
|
||||
//! Each emitter ticks at its own rate, opens a fresh stream per event, and
|
||||
//! shares a `Connection` with the rest of the simulator. T1 (datagrams) is
|
||||
//! driven inline by the main loop so the foreground task owns the progress
|
||||
//! reporting; the reliable tiers run as `tokio::spawn`ed background tasks.
|
||||
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
||||
use std::time::{Duration, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use anyhow::Context;
|
||||
use substrate::transport::QuicMessage;
|
||||
use tokio::time::MissedTickBehavior;
|
||||
|
||||
use crate::profile::{SensorSlot, generate_value};
|
||||
|
||||
/// UNIX-epoch microseconds — the wall-clock timestamp the simulator stamps
|
||||
/// into every outgoing `QuicMessage`. Substrate-side latency is computed as
|
||||
/// `substrate_now_us - msg.timestamp_us`, so this needs to be a real wall
|
||||
/// clock both ends share (NTP for two-machine; loopback otherwise).
|
||||
pub fn now_us() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_micros() as u64)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// T2 emitter — opens a fresh uni stream per event, writes one
|
||||
/// `QuicMessage`, and `finish`es. Returns the count of events successfully
|
||||
/// delivered when `interrupted` is raised.
|
||||
pub async fn run_t2_emitter(
|
||||
conn: quinn::Connection,
|
||||
mut slot: SensorSlot,
|
||||
rate_hz: f64,
|
||||
interrupted: Arc<AtomicBool>,
|
||||
counter: Arc<AtomicU64>,
|
||||
) -> u64 {
|
||||
let period = Duration::from_nanos((1.0e9 / rate_hz) as u64);
|
||||
let mut ticker = tokio::time::interval(period);
|
||||
ticker.set_missed_tick_behavior(MissedTickBehavior::Delay);
|
||||
let mut sent: u64 = 0;
|
||||
|
||||
loop {
|
||||
ticker.tick().await;
|
||||
if interrupted.load(Ordering::SeqCst) {
|
||||
break;
|
||||
}
|
||||
|
||||
let msg = QuicMessage {
|
||||
device_id: slot.device_id,
|
||||
sensor_id: slot.sensor_id,
|
||||
raw_value: generate_value(slot.sensor_type, slot.seq),
|
||||
timestamp_us: now_us(),
|
||||
sequence_number: slot.seq,
|
||||
sensor_type: slot.sensor_type.as_u8(),
|
||||
};
|
||||
slot.seq = slot.seq.wrapping_add(1);
|
||||
|
||||
match conn.open_uni().await {
|
||||
Ok(mut send) => {
|
||||
if let Err(e) = send.write_all(&msg.to_bytes()).await {
|
||||
tracing::warn!(error = %e, "T2 write_all failed");
|
||||
continue;
|
||||
}
|
||||
if let Err(e) = send.finish() {
|
||||
tracing::warn!(error = %e, "T2 finish failed");
|
||||
continue;
|
||||
}
|
||||
sent += 1;
|
||||
counter.store(sent, Ordering::Relaxed);
|
||||
}
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %e, "T2 open_uni failed; emitter exiting");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
sent
|
||||
}
|
||||
|
||||
/// T3 emitter — opens a fresh bi-stream per command, writes the command,
|
||||
/// awaits the ack with a bounded timeout. Returns `(acks_received, timeouts)`.
|
||||
pub async fn run_t3_emitter(
|
||||
conn: quinn::Connection,
|
||||
mut slot: SensorSlot,
|
||||
rate_hz: f64,
|
||||
timeout: Duration,
|
||||
interrupted: Arc<AtomicBool>,
|
||||
sent_counter: Arc<AtomicU64>,
|
||||
timeout_counter: Arc<AtomicU64>,
|
||||
) -> (u64, u64) {
|
||||
let period = Duration::from_nanos((1.0e9 / rate_hz) as u64);
|
||||
let mut ticker = tokio::time::interval(period);
|
||||
ticker.set_missed_tick_behavior(MissedTickBehavior::Delay);
|
||||
let mut sent: u64 = 0;
|
||||
let mut timeouts: u64 = 0;
|
||||
|
||||
loop {
|
||||
ticker.tick().await;
|
||||
if interrupted.load(Ordering::SeqCst) {
|
||||
break;
|
||||
}
|
||||
|
||||
let cmd = QuicMessage {
|
||||
device_id: slot.device_id,
|
||||
sensor_id: slot.sensor_id,
|
||||
raw_value: generate_value(slot.sensor_type, slot.seq),
|
||||
timestamp_us: now_us(),
|
||||
sequence_number: slot.seq,
|
||||
sensor_type: slot.sensor_type.as_u8(),
|
||||
};
|
||||
slot.seq = slot.seq.wrapping_add(1);
|
||||
|
||||
match tokio::time::timeout(timeout, t3_one_request(&conn, &cmd)).await {
|
||||
Ok(Ok(_ack)) => {
|
||||
sent += 1;
|
||||
sent_counter.store(sent, Ordering::Relaxed);
|
||||
}
|
||||
Ok(Err(e)) => {
|
||||
tracing::warn!(error = %e, "T3 request failed");
|
||||
}
|
||||
Err(_) => {
|
||||
timeouts += 1;
|
||||
timeout_counter.store(timeouts, Ordering::Relaxed);
|
||||
tracing::warn!(?timeout, "T3 ack timed out");
|
||||
}
|
||||
}
|
||||
}
|
||||
(sent, timeouts)
|
||||
}
|
||||
|
||||
/// Single T3 round-trip: open bi-stream, write 38 B command, `finish` the
|
||||
/// send half, read 38 B ack. Used by `run_t3_emitter`.
|
||||
async fn t3_one_request(
|
||||
conn: &quinn::Connection,
|
||||
cmd: &QuicMessage,
|
||||
) -> anyhow::Result<QuicMessage> {
|
||||
let (mut send, mut recv) = conn.open_bi().await.context("T3 open_bi")?;
|
||||
send.write_all(&cmd.to_bytes())
|
||||
.await
|
||||
.context("T3 write command")?;
|
||||
send.finish().context("T3 finish send half")?;
|
||||
let mut buf = [0u8; QuicMessage::WIRE_SIZE];
|
||||
recv.read_exact(&mut buf).await.context("T3 read ack")?;
|
||||
QuicMessage::decode(&buf).context("T3 decode ack")
|
||||
}
|
||||
12
simulator/src/lib.rs
Normal file
12
simulator/src/lib.rs
Normal file
@@ -0,0 +1,12 @@
|
||||
pub mod client;
|
||||
pub mod emitters;
|
||||
pub mod profile;
|
||||
|
||||
/// Install rustls' default crypto provider. Idempotent: safe to call from
|
||||
/// every test, every binary entry, and the substrate process. The `aws_lc_rs`
|
||||
/// provider matches what the substrate installs in `main.rs`.
|
||||
pub fn install_crypto_provider() {
|
||||
// Returns Err if a provider is already installed; that's the expected
|
||||
// case in any process that's already booted substrate or a sibling test.
|
||||
let _ = rustls::crypto::aws_lc_rs::default_provider().install_default();
|
||||
}
|
||||
@@ -1,3 +1,320 @@
|
||||
fn main() {
|
||||
println!("Hello, world!");
|
||||
//! Manual smoke runner / load driver for the substrate.
|
||||
//!
|
||||
//! Parses the CLI, builds the per-device sensor layout, then drives T1
|
||||
//! datagrams in the foreground while T2 and T3 emitters run as background
|
||||
//! tokio tasks. Helpers live in the simulator library:
|
||||
//!
|
||||
//! - `simulator::profile` — `SensorProfile`, `SensorSlot`, waveform generator
|
||||
//! - `simulator::emitters` — `run_t2_emitter`, `run_t3_emitter`, `now_us`
|
||||
//! - `simulator::client` — Quinn client + TLS trust-by-cert verifier
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
use anyhow::{Context, anyhow};
|
||||
use clap::{Parser, ValueEnum};
|
||||
use simulator::client::SimulatorClient;
|
||||
use simulator::emitters::{now_us, run_t2_emitter, run_t3_emitter};
|
||||
use simulator::profile::{SensorProfile, build_slots, generate_value};
|
||||
use substrate::transport::{QuicMessage, SensorType};
|
||||
use tokio::time::MissedTickBehavior;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
#[derive(Parser, Debug)]
|
||||
#[command(name = "simulator", about, long_about = None)]
|
||||
struct Cli {
|
||||
/// Substrate address (host:port).
|
||||
#[arg(long, default_value = "127.0.0.1:9000")]
|
||||
addr: SocketAddr,
|
||||
|
||||
/// SNI name presented during the TLS handshake.
|
||||
#[arg(long, default_value = "localhost")]
|
||||
server_name: String,
|
||||
|
||||
/// Path to the substrate's PEM cert; used as the exact-match trust anchor.
|
||||
#[arg(long, default_value = "certs/server.crt")]
|
||||
cert: PathBuf,
|
||||
|
||||
/// Sensor mix per device.
|
||||
///
|
||||
/// - `single` (default): one sensor per device of `--sensor-type`, on
|
||||
/// `--sensor-id`. Lowest-cardinality, easiest to reason about.
|
||||
/// - `industrial`: five sensors per device on ids 0..4 — Temperature,
|
||||
/// Humidity, Pressure, Voltage, Current. Lights up every dashboard
|
||||
/// panel.
|
||||
#[arg(long, value_enum, default_value_t = SensorProfile::Single)]
|
||||
profile: SensorProfile,
|
||||
|
||||
/// Sensor type for the `single` profile. Ignored by `industrial`.
|
||||
#[arg(long, value_enum, default_value_t = CliSensorType::Generic)]
|
||||
sensor_type: CliSensorType,
|
||||
|
||||
/// T1 datagram rate across all (device, sensor) slots (Hz). `0` disables T1.
|
||||
#[arg(long, default_value_t = 20.0)]
|
||||
rate_hz: f64,
|
||||
|
||||
/// T2 uni-stream event rate (Hz). `0` disables T2 (default).
|
||||
#[arg(long, default_value_t = 0.0)]
|
||||
t2_rate_hz: f64,
|
||||
|
||||
/// T3 bidirectional command rate (Hz). `0` disables T3 (default).
|
||||
#[arg(long, default_value_t = 0.0)]
|
||||
t3_rate_hz: f64,
|
||||
|
||||
/// Per-command timeout for T3 ack waits (milliseconds).
|
||||
#[arg(long, default_value_t = 2000)]
|
||||
t3_timeout_ms: u64,
|
||||
|
||||
/// Number of T1 datagrams to send. `0` runs until Ctrl-C.
|
||||
#[arg(long, default_value_t = 10)]
|
||||
count: u64,
|
||||
|
||||
/// Number of distinct device UUIDs to round-robin.
|
||||
#[arg(long, default_value_t = 1)]
|
||||
devices: u32,
|
||||
|
||||
/// Sensor index for the `single` profile. Ignored by `industrial`.
|
||||
#[arg(long, default_value_t = 0)]
|
||||
sensor_id: u16,
|
||||
}
|
||||
|
||||
#[derive(ValueEnum, Clone, Copy, Debug, Default)]
|
||||
enum CliSensorType {
|
||||
#[default]
|
||||
Generic,
|
||||
Temperature,
|
||||
Humidity,
|
||||
Pressure,
|
||||
Voltage,
|
||||
Current,
|
||||
}
|
||||
|
||||
impl From<CliSensorType> for SensorType {
|
||||
fn from(c: CliSensorType) -> Self {
|
||||
match c {
|
||||
CliSensorType::Generic => SensorType::Generic,
|
||||
CliSensorType::Temperature => SensorType::Temperature,
|
||||
CliSensorType::Humidity => SensorType::Humidity,
|
||||
CliSensorType::Pressure => SensorType::Pressure,
|
||||
CliSensorType::Voltage => SensorType::Voltage,
|
||||
CliSensorType::Current => SensorType::Current,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn validate(cli: &Cli) -> anyhow::Result<()> {
|
||||
if cli.rate_hz < 0.0 {
|
||||
return Err(anyhow!("--rate-hz must be >= 0"));
|
||||
}
|
||||
if cli.t2_rate_hz < 0.0 {
|
||||
return Err(anyhow!("--t2-rate-hz must be >= 0"));
|
||||
}
|
||||
if cli.t3_rate_hz < 0.0 {
|
||||
return Err(anyhow!("--t3-rate-hz must be >= 0"));
|
||||
}
|
||||
if cli.rate_hz == 0.0 && cli.t2_rate_hz == 0.0 && cli.t3_rate_hz == 0.0 {
|
||||
return Err(anyhow!(
|
||||
"at least one of --rate-hz / --t2-rate-hz / --t3-rate-hz must be > 0"
|
||||
));
|
||||
}
|
||||
if cli.devices == 0 {
|
||||
return Err(anyhow!("--devices must be >= 1"));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> anyhow::Result<()> {
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")),
|
||||
)
|
||||
.init();
|
||||
|
||||
let cli = Cli::parse();
|
||||
validate(&cli)?;
|
||||
|
||||
simulator::install_crypto_provider();
|
||||
|
||||
let mut slots = build_slots(
|
||||
cli.profile,
|
||||
cli.devices,
|
||||
cli.sensor_type.into(),
|
||||
cli.sensor_id,
|
||||
);
|
||||
|
||||
tracing::info!(
|
||||
?cli.addr,
|
||||
rate_hz = cli.rate_hz,
|
||||
t2_rate_hz = cli.t2_rate_hz,
|
||||
t3_rate_hz = cli.t3_rate_hz,
|
||||
count = cli.count,
|
||||
devices = cli.devices,
|
||||
slots = slots.len(),
|
||||
profile = ?cli.profile,
|
||||
"simulator launching"
|
||||
);
|
||||
|
||||
let client = SimulatorClient::connect(cli.addr, &cli.server_name, &cli.cert)
|
||||
.await
|
||||
.context("connect to substrate")?;
|
||||
|
||||
let interrupted = Arc::new(AtomicBool::new(false));
|
||||
{
|
||||
let flag = interrupted.clone();
|
||||
tokio::spawn(async move {
|
||||
let _ = tokio::signal::ctrl_c().await;
|
||||
tracing::info!("Ctrl-C received, draining…");
|
||||
flag.store(true, Ordering::SeqCst);
|
||||
});
|
||||
}
|
||||
|
||||
// T2 / T3 emitters target slot[0] for their device/sensor identity.
|
||||
let t2_slot = slots[0].clone();
|
||||
let t3_slot = slots[0].clone();
|
||||
|
||||
let t2_sent = Arc::new(AtomicU64::new(0));
|
||||
let t2_handle = if cli.t2_rate_hz > 0.0 {
|
||||
let conn = client.conn.clone();
|
||||
let rate = cli.t2_rate_hz;
|
||||
let interrupted = interrupted.clone();
|
||||
let counter = t2_sent.clone();
|
||||
Some(tokio::spawn(async move {
|
||||
run_t2_emitter(conn, t2_slot, rate, interrupted, counter).await
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let t3_sent = Arc::new(AtomicU64::new(0));
|
||||
let t3_timeouts = Arc::new(AtomicU64::new(0));
|
||||
let t3_handle = if cli.t3_rate_hz > 0.0 {
|
||||
let conn = client.conn.clone();
|
||||
let rate = cli.t3_rate_hz;
|
||||
let timeout = Duration::from_millis(cli.t3_timeout_ms);
|
||||
let interrupted = interrupted.clone();
|
||||
let sent_counter = t3_sent.clone();
|
||||
let to_counter = t3_timeouts.clone();
|
||||
Some(tokio::spawn(async move {
|
||||
run_t3_emitter(
|
||||
conn,
|
||||
t3_slot,
|
||||
rate,
|
||||
timeout,
|
||||
interrupted,
|
||||
sent_counter,
|
||||
to_counter,
|
||||
)
|
||||
.await
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let started = Instant::now();
|
||||
let mut t1_sent: u64 = 0;
|
||||
let mut send_errors: u64 = 0;
|
||||
|
||||
if cli.rate_hz > 0.0 {
|
||||
let period = Duration::from_nanos((1.0e9 / cli.rate_hz) as u64);
|
||||
let mut ticker = tokio::time::interval(period);
|
||||
ticker.set_missed_tick_behavior(MissedTickBehavior::Delay);
|
||||
|
||||
let unlimited = cli.count == 0;
|
||||
let mut last_progress = started;
|
||||
|
||||
loop {
|
||||
ticker.tick().await;
|
||||
if interrupted.load(Ordering::SeqCst) {
|
||||
break;
|
||||
}
|
||||
if !unlimited && t1_sent >= cli.count {
|
||||
break;
|
||||
}
|
||||
|
||||
let slot_idx = (t1_sent as usize) % slots.len();
|
||||
let slot = &mut slots[slot_idx];
|
||||
let msg = QuicMessage {
|
||||
device_id: slot.device_id,
|
||||
sensor_id: slot.sensor_id,
|
||||
raw_value: generate_value(slot.sensor_type, slot.seq),
|
||||
timestamp_us: now_us(),
|
||||
sequence_number: slot.seq,
|
||||
sensor_type: slot.sensor_type.as_u8(),
|
||||
};
|
||||
slot.seq = slot.seq.wrapping_add(1);
|
||||
t1_sent += 1;
|
||||
|
||||
if let Err(e) = client.send_datagram(&msg) {
|
||||
send_errors += 1;
|
||||
tracing::warn!(error = %e, "send_datagram failed");
|
||||
}
|
||||
|
||||
let now = Instant::now();
|
||||
if now.duration_since(last_progress) >= Duration::from_secs(1) {
|
||||
let elapsed = now.duration_since(started).as_secs_f64();
|
||||
let t1_hz = (t1_sent as f64) / elapsed.max(1e-9);
|
||||
let t2_now = t2_sent.load(Ordering::Relaxed);
|
||||
let t2_hz = (t2_now as f64) / elapsed.max(1e-9);
|
||||
let t3_now = t3_sent.load(Ordering::Relaxed);
|
||||
let t3_hz = (t3_now as f64) / elapsed.max(1e-9);
|
||||
let t3_to = t3_timeouts.load(Ordering::Relaxed);
|
||||
tracing::info!(
|
||||
t1_sent,
|
||||
t2_sent = t2_now,
|
||||
t3_sent = t3_now,
|
||||
t3_timeouts = t3_to,
|
||||
send_errors,
|
||||
t1_hz = format_args!("{:.1}", t1_hz),
|
||||
t2_hz = format_args!("{:.1}", t2_hz),
|
||||
t3_hz = format_args!("{:.1}", t3_hz),
|
||||
"progress"
|
||||
);
|
||||
last_progress = now;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
while !interrupted.load(Ordering::SeqCst) {
|
||||
tokio::time::sleep(Duration::from_millis(100)).await;
|
||||
}
|
||||
}
|
||||
|
||||
interrupted.store(true, Ordering::SeqCst);
|
||||
let t2_total: u64 = match t2_handle {
|
||||
Some(h) => h.await.unwrap_or_else(|e| {
|
||||
tracing::warn!(error = %e, "T2 emitter task ended unexpectedly");
|
||||
0
|
||||
}),
|
||||
None => 0,
|
||||
};
|
||||
let (t3_total, t3_timeouts_total): (u64, u64) = match t3_handle {
|
||||
Some(h) => h.await.unwrap_or_else(|e| {
|
||||
tracing::warn!(error = %e, "T3 emitter task ended unexpectedly");
|
||||
(0, 0)
|
||||
}),
|
||||
None => (0, 0),
|
||||
};
|
||||
|
||||
let elapsed = started.elapsed().as_secs_f64();
|
||||
let t1_hz = (t1_sent as f64) / elapsed.max(1e-9);
|
||||
let t2_hz = (t2_total as f64) / elapsed.max(1e-9);
|
||||
let t3_hz = (t3_total as f64) / elapsed.max(1e-9);
|
||||
tracing::info!(
|
||||
t1_sent,
|
||||
t2_sent = t2_total,
|
||||
t3_sent = t3_total,
|
||||
t3_timeouts = t3_timeouts_total,
|
||||
send_errors,
|
||||
elapsed_s = format_args!("{:.3}", elapsed),
|
||||
t1_observed_hz = format_args!("{:.1}", t1_hz),
|
||||
t2_observed_hz = format_args!("{:.1}", t2_hz),
|
||||
t3_observed_hz = format_args!("{:.1}", t3_hz),
|
||||
"simulator done"
|
||||
);
|
||||
|
||||
client.close().await;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
88
simulator/src/profile.rs
Normal file
88
simulator/src/profile.rs
Normal file
@@ -0,0 +1,88 @@
|
||||
//! Per-device sensor layout (the `--profile` CLI flag's runtime form) and the
|
||||
//! type-appropriate waveform generators that feed the substrate's Grafana
|
||||
//! dashboard with believable numbers.
|
||||
|
||||
use clap::ValueEnum;
|
||||
use substrate::transport::SensorType;
|
||||
use uuid::Uuid;
|
||||
|
||||
/// Per-device sensor layout selected by the `--profile` CLI flag.
|
||||
///
|
||||
/// - `Single`: one sensor per device of a chosen `SensorType`. Lowest
|
||||
/// cardinality; the right pick for throughput / latency benchmarks.
|
||||
/// - `Industrial`: five sensors per device on ids 0..4 — Temperature,
|
||||
/// Humidity, Pressure, Voltage, Current. Lights up every sensor-type
|
||||
/// panel in the operator dashboard.
|
||||
#[derive(ValueEnum, Clone, Copy, Debug)]
|
||||
pub enum SensorProfile {
|
||||
Single,
|
||||
Industrial,
|
||||
}
|
||||
|
||||
/// A single emitter slot: the `(device, sensor, type)` triple plus the
|
||||
/// per-slot monotonic sequence counter that the simulator advances on every
|
||||
/// outgoing message.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SensorSlot {
|
||||
pub device_id: Uuid,
|
||||
pub sensor_id: u16,
|
||||
pub sensor_type: SensorType,
|
||||
pub seq: u32,
|
||||
}
|
||||
|
||||
/// Expand a `(profile, num_devices)` choice into the flat list of slots
|
||||
/// the T1 emitter rotates through. Each device gets a fresh UUID.
|
||||
pub fn build_slots(
|
||||
profile: SensorProfile,
|
||||
num_devices: u32,
|
||||
default_type: SensorType,
|
||||
default_sensor_id: u16,
|
||||
) -> Vec<SensorSlot> {
|
||||
let mut slots = Vec::new();
|
||||
for _ in 0..num_devices {
|
||||
let device_id = Uuid::new_v4();
|
||||
match profile {
|
||||
SensorProfile::Single => {
|
||||
slots.push(SensorSlot {
|
||||
device_id,
|
||||
sensor_id: default_sensor_id,
|
||||
sensor_type: default_type,
|
||||
seq: 0,
|
||||
});
|
||||
}
|
||||
SensorProfile::Industrial => {
|
||||
for (sensor_id, sensor_type) in [
|
||||
(0u16, SensorType::Temperature),
|
||||
(1, SensorType::Humidity),
|
||||
(2, SensorType::Pressure),
|
||||
(3, SensorType::Voltage),
|
||||
(4, SensorType::Current),
|
||||
] {
|
||||
slots.push(SensorSlot {
|
||||
device_id,
|
||||
sensor_id,
|
||||
sensor_type,
|
||||
seq: 0,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
slots
|
||||
}
|
||||
|
||||
/// Type-appropriate waveform so the dashboard has something believable to
|
||||
/// render. `seq` is the sample index — multiplying by 0.05 gives a
|
||||
/// "seconds-like" wall-clock pacing inside the trig functions regardless of
|
||||
/// the actual send rate, so panels animate over the same visible period.
|
||||
pub fn generate_value(t: SensorType, seq: u32) -> f64 {
|
||||
let t_phase = (seq as f64) * 0.05;
|
||||
match t {
|
||||
SensorType::Temperature => 20.0 + 5.0 * (t_phase / 10.0).sin(),
|
||||
SensorType::Humidity => 50.0 + 20.0 * (t_phase / 15.0).sin(),
|
||||
SensorType::Pressure => 1013.0 + 5.0 * (t_phase / 20.0).cos(),
|
||||
SensorType::Voltage => 230.0 + 0.5 * (t_phase / 3.0).sin(),
|
||||
SensorType::Current => 10.0 + 2.0 * (t_phase / 5.0).cos(),
|
||||
SensorType::Generic => t_phase.sin(),
|
||||
}
|
||||
}
|
||||
139
simulator/tests/end_to_end_t1.rs
Normal file
139
simulator/tests/end_to_end_t1.rs
Normal file
@@ -0,0 +1,139 @@
|
||||
//! End-to-end T1 datagram test: spin up substrate's listener in-process with
|
||||
//! channels the test owns, drive a `SimulatorClient` against it, and assert
|
||||
//! the datagram lands in the T1 receiver decoded.
|
||||
//!
|
||||
//! Run with `cargo test -p simulator`.
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use simulator::client::SimulatorClient;
|
||||
use substrate::config::QuicConfig;
|
||||
use substrate::transport::server::{accept_loop, bind_endpoint};
|
||||
use substrate::transport::{QuicMessage, SensorType, T1Sender, T2Sender, T3Sender};
|
||||
use tokio::sync::mpsc;
|
||||
use uuid::Uuid;
|
||||
|
||||
fn cert_path(name: &str) -> PathBuf {
|
||||
[env!("CARGO_MANIFEST_DIR"), "..", "certs", name].iter().collect()
|
||||
}
|
||||
|
||||
fn loopback_config(cert: PathBuf, key: PathBuf) -> QuicConfig {
|
||||
QuicConfig {
|
||||
// Port 0 lets the OS pick a free ephemeral port — tests can run in
|
||||
// parallel without colliding on a fixed bind.
|
||||
server_port: 0,
|
||||
server_interface: "127.0.0.1".to_string(),
|
||||
server_cert: cert.to_string_lossy().into_owned(),
|
||||
server_key: key.to_string_lossy().into_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn t1_datagram_decoded_into_ecs_channel() -> Result<()> {
|
||||
simulator::install_crypto_provider();
|
||||
|
||||
let cert = cert_path("server.crt");
|
||||
let key = cert_path("server.key");
|
||||
let cfg = loopback_config(cert.clone(), key);
|
||||
|
||||
// Bind the substrate's listener on an ephemeral port.
|
||||
let endpoint = bind_endpoint(&cfg)?;
|
||||
let server_addr: SocketAddr = endpoint.local_addr()?;
|
||||
|
||||
// Channels the test owns — gives us direct visibility into what the T1
|
||||
// demux pushes into the ECS bridge.
|
||||
let (t1_tx, mut t1_rx) = mpsc::channel(64);
|
||||
let (t2_tx, _t2_rx) = mpsc::channel(64);
|
||||
let (t3_tx, _t3_rx) = mpsc::channel(64);
|
||||
|
||||
let server_task = tokio::spawn(accept_loop(
|
||||
endpoint,
|
||||
T1Sender::new(t1_tx),
|
||||
T2Sender::new(t2_tx),
|
||||
T3Sender::new(t3_tx),
|
||||
));
|
||||
|
||||
// Connect a client and send one datagram.
|
||||
let client = SimulatorClient::connect(server_addr, "localhost", &cert).await?;
|
||||
|
||||
let sent = QuicMessage {
|
||||
device_id: Uuid::from_u128(0xdead_beef_cafe_f00d_1234_5678_90ab_cdef),
|
||||
sensor_id: 7,
|
||||
raw_value: 42.0,
|
||||
timestamp_us: 1_700_000_000_000_001,
|
||||
sequence_number: 1,
|
||||
sensor_type: SensorType::Temperature.as_u8(),
|
||||
};
|
||||
client.send_datagram(&sent)?;
|
||||
|
||||
// Wait for the substrate's read_datagrams reader to push it into T1.
|
||||
let received = tokio::time::timeout(Duration::from_secs(2), t1_rx.recv())
|
||||
.await
|
||||
.expect("did not observe T1 datagram within 2s")
|
||||
.expect("T1 channel closed unexpectedly");
|
||||
|
||||
assert_eq!(received, sent);
|
||||
|
||||
client.close().await;
|
||||
server_task.abort();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn t1_burst_preserves_order_and_count() -> Result<()> {
|
||||
simulator::install_crypto_provider();
|
||||
|
||||
let cert = cert_path("server.crt");
|
||||
let key = cert_path("server.key");
|
||||
let cfg = loopback_config(cert.clone(), key);
|
||||
|
||||
let endpoint = bind_endpoint(&cfg)?;
|
||||
let server_addr: SocketAddr = endpoint.local_addr()?;
|
||||
|
||||
// T1 capacity 64 ≥ burst size 32 so nothing is dropped under loopback.
|
||||
let (t1_tx, mut t1_rx) = mpsc::channel(64);
|
||||
let (t2_tx, _t2_rx) = mpsc::channel(8);
|
||||
let (t3_tx, _t3_rx) = mpsc::channel(8);
|
||||
|
||||
let server_task = tokio::spawn(accept_loop(
|
||||
endpoint,
|
||||
T1Sender::new(t1_tx),
|
||||
T2Sender::new(t2_tx),
|
||||
T3Sender::new(t3_tx),
|
||||
));
|
||||
|
||||
let client = SimulatorClient::connect(server_addr, "localhost", &cert).await?;
|
||||
|
||||
let device = Uuid::from_u128(0xa1a2_a3a4_b5b6_b7b8_c9ca_cbcc_cdce_cfd0);
|
||||
const BURST: u32 = 32;
|
||||
for seq in 0..BURST {
|
||||
let msg = QuicMessage {
|
||||
device_id: device,
|
||||
sensor_id: 0,
|
||||
raw_value: f64::from(seq),
|
||||
timestamp_us: 1_700_000_000_000_000 + u64::from(seq),
|
||||
sequence_number: seq,
|
||||
sensor_type: SensorType::Generic.as_u8(),
|
||||
};
|
||||
client.send_datagram(&msg)?;
|
||||
}
|
||||
|
||||
// Drain BURST messages with a per-message timeout. Loopback shouldn't
|
||||
// reorder QUIC datagrams within a single connection.
|
||||
for expected_seq in 0..BURST {
|
||||
let msg = tokio::time::timeout(Duration::from_secs(2), t1_rx.recv())
|
||||
.await
|
||||
.unwrap_or_else(|_| panic!("missed datagram seq={expected_seq}"))
|
||||
.expect("T1 channel closed");
|
||||
assert_eq!(msg.sequence_number, expected_seq);
|
||||
assert_eq!(msg.device_id, device);
|
||||
assert_eq!(msg.raw_value, f64::from(expected_seq));
|
||||
}
|
||||
|
||||
client.close().await;
|
||||
server_task.abort();
|
||||
Ok(())
|
||||
}
|
||||
163
simulator/tests/end_to_end_t2.rs
Normal file
163
simulator/tests/end_to_end_t2.rs
Normal file
@@ -0,0 +1,163 @@
|
||||
//! End-to-end T2 (unidirectional stream) tests. Mirrors the T1 harness:
|
||||
//! spin up substrate's listener with channels owned by the test, drive a
|
||||
//! `SimulatorClient` against it, assert what arrives on the T2 receiver.
|
||||
//!
|
||||
//! Run with `cargo test -p simulator`.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use simulator::client::SimulatorClient;
|
||||
use substrate::config::QuicConfig;
|
||||
use substrate::transport::server::{accept_loop, bind_endpoint};
|
||||
use substrate::transport::{QuicMessage, SensorType, T1Sender, T2Sender, T3Sender};
|
||||
use tokio::sync::mpsc;
|
||||
use uuid::Uuid;
|
||||
|
||||
fn cert_path(name: &str) -> PathBuf {
|
||||
[env!("CARGO_MANIFEST_DIR"), "..", "certs", name].iter().collect()
|
||||
}
|
||||
|
||||
fn loopback_config(cert: PathBuf, key: PathBuf) -> QuicConfig {
|
||||
QuicConfig {
|
||||
server_port: 0,
|
||||
server_interface: "127.0.0.1".to_string(),
|
||||
server_cert: cert.to_string_lossy().into_owned(),
|
||||
server_key: key.to_string_lossy().into_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn t2_single_stream_preserves_order() -> Result<()> {
|
||||
simulator::install_crypto_provider();
|
||||
|
||||
let cert = cert_path("server.crt");
|
||||
let key = cert_path("server.key");
|
||||
let cfg = loopback_config(cert.clone(), key);
|
||||
|
||||
let endpoint = bind_endpoint(&cfg)?;
|
||||
let server_addr: SocketAddr = endpoint.local_addr()?;
|
||||
|
||||
let (t1_tx, _t1_rx) = mpsc::channel(64);
|
||||
let (t2_tx, mut t2_rx) = mpsc::channel(64);
|
||||
let (t3_tx, _t3_rx) = mpsc::channel(64);
|
||||
|
||||
let server_task = tokio::spawn(accept_loop(
|
||||
endpoint,
|
||||
T1Sender::new(t1_tx),
|
||||
T2Sender::new(t2_tx),
|
||||
T3Sender::new(t3_tx),
|
||||
));
|
||||
|
||||
let client = SimulatorClient::connect(server_addr, "localhost", &cert).await?;
|
||||
|
||||
let device = Uuid::from_u128(0x0011_2233_4455_6677_8899_aabb_ccdd_eeff);
|
||||
const N: u32 = 10;
|
||||
let msgs: Vec<QuicMessage> = (0..N)
|
||||
.map(|i| QuicMessage {
|
||||
device_id: device,
|
||||
sensor_id: 1,
|
||||
raw_value: f64::from(i),
|
||||
timestamp_us: 1_700_000_000_000_000 + u64::from(i),
|
||||
sequence_number: i,
|
||||
sensor_type: SensorType::Pressure.as_u8(),
|
||||
})
|
||||
.collect();
|
||||
|
||||
client.send_uni_stream(&msgs).await?;
|
||||
|
||||
for expected in &msgs {
|
||||
let received = tokio::time::timeout(Duration::from_secs(2), t2_rx.recv())
|
||||
.await
|
||||
.expect("missed T2 message")
|
||||
.expect("T2 channel closed unexpectedly");
|
||||
assert_eq!(received, *expected);
|
||||
}
|
||||
|
||||
client.close().await;
|
||||
server_task.abort();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 4)]
|
||||
async fn t2_concurrent_streams_each_internally_ordered() -> Result<()> {
|
||||
simulator::install_crypto_provider();
|
||||
|
||||
let cert = cert_path("server.crt");
|
||||
let key = cert_path("server.key");
|
||||
let cfg = loopback_config(cert.clone(), key);
|
||||
|
||||
let endpoint = bind_endpoint(&cfg)?;
|
||||
let server_addr: SocketAddr = endpoint.local_addr()?;
|
||||
|
||||
let (t1_tx, _t1_rx) = mpsc::channel(64);
|
||||
let (t2_tx, mut t2_rx) = mpsc::channel(256);
|
||||
let (t3_tx, _t3_rx) = mpsc::channel(64);
|
||||
|
||||
let server_task = tokio::spawn(accept_loop(
|
||||
endpoint,
|
||||
T1Sender::new(t1_tx),
|
||||
T2Sender::new(t2_tx),
|
||||
T3Sender::new(t3_tx),
|
||||
));
|
||||
|
||||
let client = SimulatorClient::connect(server_addr, "localhost", &cert).await?;
|
||||
|
||||
// 4 devices × 8 messages each on independent uni streams. Cross-stream
|
||||
// ordering may interleave; per-stream ordering must be strict.
|
||||
const DEVICES: usize = 4;
|
||||
const PER_DEVICE: u32 = 8;
|
||||
let device_ids: Vec<Uuid> = (0..DEVICES).map(|_| Uuid::new_v4()).collect();
|
||||
|
||||
let mut handles = Vec::with_capacity(DEVICES);
|
||||
for &device in &device_ids {
|
||||
let conn = client.conn.clone();
|
||||
handles.push(tokio::spawn(async move {
|
||||
let msgs: Vec<QuicMessage> = (0..PER_DEVICE)
|
||||
.map(|i| QuicMessage {
|
||||
device_id: device,
|
||||
sensor_id: 0,
|
||||
raw_value: f64::from(i),
|
||||
timestamp_us: 1_700_000_000_000_000 + u64::from(i),
|
||||
sequence_number: i,
|
||||
sensor_type: SensorType::Generic.as_u8(),
|
||||
})
|
||||
.collect();
|
||||
// Use the connection directly so each task owns its own stream
|
||||
// — same wire pattern as `SimulatorClient::send_uni_stream`.
|
||||
let mut send = conn.open_uni().await.expect("open_uni");
|
||||
for m in &msgs {
|
||||
send.write_all(&m.to_bytes()).await.expect("write_all");
|
||||
}
|
||||
send.finish().expect("finish");
|
||||
}));
|
||||
}
|
||||
for h in handles {
|
||||
h.await?;
|
||||
}
|
||||
|
||||
// Drain DEVICES × PER_DEVICE messages, group by device, assert per-device
|
||||
// sequence numbers are strictly increasing from 0.
|
||||
let total = DEVICES * PER_DEVICE as usize;
|
||||
let mut by_device: HashMap<Uuid, Vec<u32>> = HashMap::new();
|
||||
for _ in 0..total {
|
||||
let msg = tokio::time::timeout(Duration::from_secs(2), t2_rx.recv())
|
||||
.await
|
||||
.expect("missed T2 message")
|
||||
.expect("T2 channel closed unexpectedly");
|
||||
by_device.entry(msg.device_id).or_default().push(msg.sequence_number);
|
||||
}
|
||||
|
||||
assert_eq!(by_device.len(), DEVICES, "expected one entry per device");
|
||||
for (dev, seqs) in &by_device {
|
||||
let expected: Vec<u32> = (0..PER_DEVICE).collect();
|
||||
assert_eq!(seqs, &expected, "out-of-order or missing sequence for {dev}");
|
||||
}
|
||||
|
||||
client.close().await;
|
||||
server_task.abort();
|
||||
Ok(())
|
||||
}
|
||||
152
simulator/tests/end_to_end_t3.rs
Normal file
152
simulator/tests/end_to_end_t3.rs
Normal file
@@ -0,0 +1,152 @@
|
||||
//! End-to-end T3 (bidirectional stream + oneshot ack) tests. Same shape as
|
||||
//! the T1/T2 harnesses: spin up substrate's listener with channels owned by
|
||||
//! the test, run a "fake ECS" task that drains the T3 receiver and either
|
||||
//! replies or drops the oneshot, and assert the client observes the right
|
||||
//! behaviour.
|
||||
//!
|
||||
//! Run with `cargo test -p simulator`.
|
||||
|
||||
use std::net::SocketAddr;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Duration;
|
||||
|
||||
use anyhow::Result;
|
||||
use simulator::client::SimulatorClient;
|
||||
use substrate::config::QuicConfig;
|
||||
use substrate::transport::server::{accept_loop, bind_endpoint};
|
||||
use substrate::transport::{QuicMessage, SensorType, T1Sender, T2Sender, T3Sender};
|
||||
use tokio::sync::mpsc;
|
||||
use uuid::Uuid;
|
||||
|
||||
fn cert_path(name: &str) -> PathBuf {
|
||||
[env!("CARGO_MANIFEST_DIR"), "..", "certs", name].iter().collect()
|
||||
}
|
||||
|
||||
fn loopback_config(cert: PathBuf, key: PathBuf) -> QuicConfig {
|
||||
QuicConfig {
|
||||
server_port: 0,
|
||||
server_interface: "127.0.0.1".to_string(),
|
||||
server_cert: cert.to_string_lossy().into_owned(),
|
||||
server_key: key.to_string_lossy().into_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Marker `timestamp_us` the fake ECS stamps onto every ack so the test can
|
||||
/// distinguish a real reply from any echo of the command's own timestamp.
|
||||
const ACK_MARKER_TS: u64 = 999_999_999_999;
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn t3_round_trip_with_fake_handler() -> Result<()> {
|
||||
simulator::install_crypto_provider();
|
||||
|
||||
let cert = cert_path("server.crt");
|
||||
let key = cert_path("server.key");
|
||||
let cfg = loopback_config(cert.clone(), key);
|
||||
|
||||
let endpoint = bind_endpoint(&cfg)?;
|
||||
let server_addr: SocketAddr = endpoint.local_addr()?;
|
||||
|
||||
let (t1_tx, _t1_rx) = mpsc::channel(64);
|
||||
let (t2_tx, _t2_rx) = mpsc::channel(64);
|
||||
let (t3_tx, mut t3_rx) = mpsc::channel(64);
|
||||
|
||||
let server_task = tokio::spawn(accept_loop(
|
||||
endpoint,
|
||||
T1Sender::new(t1_tx),
|
||||
T2Sender::new(t2_tx),
|
||||
T3Sender::new(t3_tx),
|
||||
));
|
||||
|
||||
// Fake ECS handler: drain T3 inbounds, mark the timestamp, send back.
|
||||
let handler = tokio::spawn(async move {
|
||||
while let Some(inbound) = t3_rx.recv().await {
|
||||
let mut ack = inbound.command;
|
||||
ack.timestamp_us = ACK_MARKER_TS;
|
||||
// Ignore send error (client may have disconnected before listening).
|
||||
let _ = inbound.reply.send(ack);
|
||||
}
|
||||
});
|
||||
|
||||
let client = SimulatorClient::connect(server_addr, "localhost", &cert).await?;
|
||||
|
||||
let cmd = QuicMessage {
|
||||
device_id: Uuid::from_u128(0xa5a5_a5a5_5a5a_5a5a_a5a5_5a5a_a5a5_5a5a),
|
||||
sensor_id: 3,
|
||||
raw_value: 1.5,
|
||||
timestamp_us: 1_700_000_000_000_000,
|
||||
sequence_number: 7,
|
||||
sensor_type: SensorType::Voltage.as_u8(),
|
||||
};
|
||||
|
||||
let ack = tokio::time::timeout(Duration::from_secs(2), client.request(&cmd))
|
||||
.await
|
||||
.expect("T3 ack timed out")?;
|
||||
|
||||
assert_eq!(ack.device_id, cmd.device_id, "ack should preserve device_id");
|
||||
assert_eq!(ack.sensor_id, cmd.sensor_id, "ack should preserve sensor_id");
|
||||
assert_eq!(
|
||||
ack.sequence_number, cmd.sequence_number,
|
||||
"ack should preserve sequence_number for correlation"
|
||||
);
|
||||
assert_eq!(ack.timestamp_us, ACK_MARKER_TS, "fake ECS should stamp the marker");
|
||||
|
||||
client.close().await;
|
||||
handler.abort();
|
||||
server_task.abort();
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[tokio::test(flavor = "multi_thread", worker_threads = 2)]
|
||||
async fn t3_no_handler_resets_stream() -> Result<()> {
|
||||
simulator::install_crypto_provider();
|
||||
|
||||
let cert = cert_path("server.crt");
|
||||
let key = cert_path("server.key");
|
||||
let cfg = loopback_config(cert.clone(), key);
|
||||
|
||||
let endpoint = bind_endpoint(&cfg)?;
|
||||
let server_addr: SocketAddr = endpoint.local_addr()?;
|
||||
|
||||
let (t1_tx, _t1_rx) = mpsc::channel(64);
|
||||
let (t2_tx, _t2_rx) = mpsc::channel(64);
|
||||
let (t3_tx, mut t3_rx) = mpsc::channel(64);
|
||||
|
||||
let server_task = tokio::spawn(accept_loop(
|
||||
endpoint,
|
||||
T1Sender::new(t1_tx),
|
||||
T2Sender::new(t2_tx),
|
||||
T3Sender::new(t3_tx),
|
||||
));
|
||||
|
||||
// Fake ECS that *drops* every oneshot — simulates "no handler installed",
|
||||
// which is the placeholder state in `ingest_system` until M4 lands.
|
||||
let handler = tokio::spawn(async move {
|
||||
while let Some(inbound) = t3_rx.recv().await {
|
||||
drop(inbound);
|
||||
}
|
||||
});
|
||||
|
||||
let client = SimulatorClient::connect(server_addr, "localhost", &cert).await?;
|
||||
|
||||
let cmd = QuicMessage {
|
||||
device_id: Uuid::new_v4(),
|
||||
sensor_id: 0,
|
||||
raw_value: 0.0,
|
||||
timestamp_us: 0,
|
||||
sequence_number: 0,
|
||||
sensor_type: SensorType::Generic.as_u8(),
|
||||
};
|
||||
|
||||
let result = tokio::time::timeout(Duration::from_secs(2), client.request(&cmd)).await;
|
||||
let inner = result.expect("client.request should not hang when stream is reset");
|
||||
assert!(
|
||||
inner.is_err(),
|
||||
"expected request to fail when substrate resets the stream, got Ok({:?})",
|
||||
inner.ok()
|
||||
);
|
||||
|
||||
client.close().await;
|
||||
handler.abort();
|
||||
server_task.abort();
|
||||
Ok(())
|
||||
}
|
||||
@@ -4,3 +4,19 @@ version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
bevy = "0.18"
|
||||
thiserror = "2"
|
||||
anyhow = "1"
|
||||
tracing = "0.1"
|
||||
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
|
||||
quinn = { version = "0.11" }
|
||||
rustls = { version = "0.23" }
|
||||
rustls-pemfile = "2"
|
||||
rustls-pki-types = "1"
|
||||
tokio = { version = "1", features = ["full"] }
|
||||
uuid = { version = "1.23", features = ["v4"] }
|
||||
figment = { version = "0.10", features = ["toml", "env"] }
|
||||
serde = { version = "1", features = ["derive"] }
|
||||
metrics = "0.24"
|
||||
metrics-exporter-prometheus = "0.17"
|
||||
memory-stats = "1"
|
||||
65
substrate/src/config.rs
Normal file
65
substrate/src/config.rs
Normal file
@@ -0,0 +1,65 @@
|
||||
use bevy::prelude::Resource;
|
||||
use figment::Figment;
|
||||
use figment::providers::{Env, Format, Serialized, Toml};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Debug, Resource, Serialize, Deserialize)]
|
||||
pub struct AppConfig {
|
||||
pub network: QuicConfig,
|
||||
pub simulation: SimulationConfig,
|
||||
pub observability: ObservabilityConfig,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct SimulationConfig {
|
||||
pub tick_rate_hz: u32,
|
||||
pub max_entities: usize,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct QuicConfig {
|
||||
pub server_port: u16,
|
||||
pub server_interface: String,
|
||||
pub server_cert: String,
|
||||
pub server_key: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
pub struct ObservabilityConfig {
|
||||
/// When true, install the Prometheus exporter at startup. Disable for
|
||||
/// environments where the metrics port collides or scraping is undesired.
|
||||
pub metrics_enabled: bool,
|
||||
/// Bind address for the `/metrics` HTTP listener.
|
||||
pub metrics_listen: String,
|
||||
}
|
||||
|
||||
impl Default for AppConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
network : QuicConfig {
|
||||
server_port: 9000,
|
||||
server_interface: "0.0.0.0".to_string(),
|
||||
server_cert: "certs/server.crt".to_string(),
|
||||
server_key: "certs/server.key".to_string(),
|
||||
},
|
||||
simulation: SimulationConfig {
|
||||
tick_rate_hz: 60,
|
||||
max_entities: 10000,
|
||||
},
|
||||
observability: ObservabilityConfig {
|
||||
metrics_enabled: true,
|
||||
metrics_listen: "0.0.0.0:9100".to_string(),
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AppConfig {
|
||||
pub fn load(config_file: &str) -> Result<Self, figment::Error> {
|
||||
Figment::new()
|
||||
.merge(Serialized::defaults(Self::default())) // compiled-in defaults
|
||||
.merge(Toml::file(config_file)) // config file
|
||||
.merge(Env::prefixed("APP_")) // env overrides, e.g. APP_NETWORK__PORT=9000
|
||||
.extract()
|
||||
}
|
||||
}
|
||||
4
substrate/src/lib.rs
Normal file
4
substrate/src/lib.rs
Normal file
@@ -0,0 +1,4 @@
|
||||
pub mod config;
|
||||
pub mod observability;
|
||||
pub mod transport;
|
||||
pub mod world;
|
||||
@@ -1,3 +1,34 @@
|
||||
use bevy::prelude::*;
|
||||
use tracing_subscriber::EnvFilter;
|
||||
|
||||
use substrate::config::AppConfig;
|
||||
use substrate::observability::ObservabilityPlugin;
|
||||
use substrate::transport;
|
||||
use substrate::world::WorldPlugin;
|
||||
|
||||
fn main() {
|
||||
println!("Hello, world!");
|
||||
tracing_subscriber::fmt()
|
||||
.with_env_filter(
|
||||
EnvFilter::try_from_default_env().unwrap_or_else(|_| EnvFilter::new("info")),
|
||||
)
|
||||
.init();
|
||||
|
||||
// rustls 0.23 requires an explicit default crypto provider. Quinn's
|
||||
// ServerConfig::with_single_cert otherwise panics at first use.
|
||||
rustls::crypto::aws_lc_rs::default_provider()
|
||||
.install_default()
|
||||
.expect("install rustls default crypto provider");
|
||||
|
||||
let config = AppConfig::load("config.toml").expect("Failed to load config");
|
||||
tracing::info!(?config, "substrate starting");
|
||||
|
||||
// Plugin order matters: EcsQuicTransportPlugin inserts the TokioHandle
|
||||
// resource ObservabilityPlugin reads in its `build()`.
|
||||
App::new()
|
||||
.insert_resource(config)
|
||||
.add_plugins(MinimalPlugins)
|
||||
.add_plugins(transport::ecs::EcsQuicTransportPlugin)
|
||||
.add_plugins(WorldPlugin)
|
||||
.add_plugins(ObservabilityPlugin)
|
||||
.run();
|
||||
}
|
||||
|
||||
116
substrate/src/observability.rs
Normal file
116
substrate/src/observability.rs
Normal file
@@ -0,0 +1,116 @@
|
||||
//! M5 — Prometheus-format `/metrics` exporter installation and counter
|
||||
//! pre-registration.
|
||||
//!
|
||||
//! Counters and histograms are emitted from the demux path
|
||||
//! ([`crate::transport::server`]) and the world systems
|
||||
//! ([`crate::world::ingest_system`], [`crate::world::simulation_system`],
|
||||
//! [`crate::world::export_system`]). This module's only job is:
|
||||
//!
|
||||
//! 1. Install the global metrics recorder + HTTP listener on the existing
|
||||
//! tokio runtime, once at startup.
|
||||
//! 2. Pre-register every counter at value 0 so panels render "0" rather than
|
||||
//! "No data" before the first event of a given kind fires.
|
||||
//!
|
||||
//! ## Runtime telemetry
|
||||
//!
|
||||
//! - `substrate_received_total{tier=t1|t2|t3}` — counter
|
||||
//! - `substrate_dropped_total{tier=t1}` — counter (T1 lossy)
|
||||
//! - `substrate_decode_errors_total{tier=t1|t2|t3}` — counter
|
||||
//! - `substrate_t3_no_handler_total` — counter
|
||||
//! - `substrate_latency_us{tier=t1|t2|t3}` — histogram
|
||||
//! - `substrate_tick_hz` — gauge
|
||||
//! - `substrate_entities` — gauge
|
||||
//! - `substrate_channel_depth{tier=t1|t2|t3}` — gauge
|
||||
//! - `substrate_channel_capacity{tier=t1|t2|t3}` — gauge
|
||||
//! - `substrate_rss_bytes` — gauge
|
||||
//!
|
||||
//! ## Digital-twin surface (operator dashboard)
|
||||
//!
|
||||
//! - `sensor_aggregate{type=…, stat=count|mean|min|max}` — gauge
|
||||
//! - `substrate_threshold_crossings_total{type, direction}` — counter
|
||||
|
||||
use std::net::SocketAddr;
|
||||
|
||||
use bevy::prelude::*;
|
||||
use metrics::counter;
|
||||
use metrics_exporter_prometheus::PrometheusBuilder;
|
||||
|
||||
use crate::config::AppConfig;
|
||||
use crate::transport::SensorType;
|
||||
use crate::transport::ecs::TokioHandle;
|
||||
|
||||
pub struct ObservabilityPlugin;
|
||||
|
||||
impl Plugin for ObservabilityPlugin {
|
||||
fn build(&self, app: &mut App) {
|
||||
let config = app
|
||||
.world()
|
||||
.get_resource::<AppConfig>()
|
||||
.expect("AppConfig must be inserted before ObservabilityPlugin");
|
||||
|
||||
if !config.observability.metrics_enabled {
|
||||
tracing::info!("metrics exporter disabled by config");
|
||||
return;
|
||||
}
|
||||
|
||||
let listen: SocketAddr = config
|
||||
.observability
|
||||
.metrics_listen
|
||||
.parse()
|
||||
.expect("invalid metrics_listen address in config");
|
||||
|
||||
let runtime_handle = app
|
||||
.world()
|
||||
.get_resource::<TokioHandle>()
|
||||
.expect("TokioHandle must be inserted before ObservabilityPlugin (load order: transport plugin first)")
|
||||
.0
|
||||
.clone();
|
||||
|
||||
// PrometheusBuilder::install spawns the HTTP listener via tokio::spawn,
|
||||
// which requires being inside a runtime context.
|
||||
let _guard = runtime_handle.enter();
|
||||
PrometheusBuilder::new()
|
||||
.with_http_listener(listen)
|
||||
.install()
|
||||
.expect("install prometheus exporter");
|
||||
drop(_guard);
|
||||
|
||||
tracing::info!(?listen, "metrics exporter installed");
|
||||
|
||||
pre_register_counters();
|
||||
}
|
||||
}
|
||||
|
||||
/// Pre-register every counter at value 0 so Grafana sees a series to plot
|
||||
/// even before the first event of that kind. Without this, the Prometheus
|
||||
/// exporter omits any counter that has never been incremented, and panels
|
||||
/// render "No data" — confusing when the metric exists, the counter is just
|
||||
/// genuinely zero (e.g., `substrate_t3_no_handler_total` in normal operation).
|
||||
fn pre_register_counters() {
|
||||
for tier in ["t1", "t2", "t3"] {
|
||||
counter!("substrate_received_total", "tier" => tier).increment(0);
|
||||
counter!("substrate_decode_errors_total", "tier" => tier).increment(0);
|
||||
}
|
||||
counter!("substrate_dropped_total", "tier" => "t1").increment(0);
|
||||
counter!("substrate_t3_no_handler_total").increment(0);
|
||||
|
||||
// Threshold crossings — bounded `|SensorType| × 2` cardinality, all
|
||||
// pre-registered so dashboard panels show "0" instead of "No data".
|
||||
for t in [
|
||||
SensorType::Generic,
|
||||
SensorType::Temperature,
|
||||
SensorType::Humidity,
|
||||
SensorType::Pressure,
|
||||
SensorType::Voltage,
|
||||
SensorType::Current,
|
||||
] {
|
||||
for direction in ["up", "down"] {
|
||||
counter!(
|
||||
"substrate_threshold_crossings_total",
|
||||
"type" => t.label_str(),
|
||||
"direction" => direction
|
||||
)
|
||||
.increment(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
111
substrate/src/transport/ecs.rs
Normal file
111
substrate/src/transport/ecs.rs
Normal file
@@ -0,0 +1,111 @@
|
||||
use std::sync::Mutex;
|
||||
|
||||
use bevy::prelude::*;
|
||||
use bevy::state::app::StatesPlugin;
|
||||
use tokio::runtime::Handle;
|
||||
use tokio::sync::mpsc;
|
||||
|
||||
use crate::config::AppConfig;
|
||||
use crate::transport::{QuicMessage, T1Sender, T2Sender, T3Inbound, T3Sender};
|
||||
use crate::transport::server::{accept_loop, bind_endpoint};
|
||||
use crate::transport::state::ServerState;
|
||||
|
||||
const T1_CAPACITY: usize = 1024;
|
||||
const T2_CAPACITY: usize = 512;
|
||||
const T3_CAPACITY: usize = 256;
|
||||
|
||||
pub struct EcsQuicTransportPlugin;
|
||||
|
||||
/// Receive halves of the three tier channels, wrapped so they can sit in a
|
||||
/// Bevy `Resource`. The `world` module's ingest system is the sole reader.
|
||||
#[derive(Resource)]
|
||||
pub(crate) struct BridgeReceivers {
|
||||
pub(crate) t1: Mutex<mpsc::Receiver<QuicMessage>>,
|
||||
pub(crate) t2: Mutex<mpsc::Receiver<QuicMessage>>,
|
||||
pub(crate) t3: Mutex<mpsc::Receiver<T3Inbound>>,
|
||||
}
|
||||
|
||||
#[derive(Resource, Clone)]
|
||||
pub(crate) struct BridgeSenders {
|
||||
pub(crate) t1: T1Sender,
|
||||
pub(crate) t2: T2Sender,
|
||||
pub(crate) t3: T3Sender,
|
||||
}
|
||||
|
||||
#[derive(Resource, Clone)]
|
||||
pub(crate) struct TokioHandle(pub(crate) Handle);
|
||||
|
||||
/// Bring up the QUIC listener using the loaded `AppConfig` and transition to
|
||||
/// `ServerState::Started`. Runs once via `OnEnter(ServerState::Starting)`.
|
||||
fn start_quic_server(
|
||||
config: Res<AppConfig>,
|
||||
senders: Res<BridgeSenders>,
|
||||
runtime: Res<TokioHandle>,
|
||||
mut next: ResMut<NextState<ServerState>>,
|
||||
) {
|
||||
tracing::info!("entering ServerState::Starting — bringing up QUIC listener");
|
||||
|
||||
// `Endpoint::server` is sync but needs a tokio runtime context for
|
||||
// `Handle::current()`; entering the runtime is enough — no async block
|
||||
// required.
|
||||
let _guard = runtime.0.enter();
|
||||
let endpoint = bind_endpoint(&config.network).expect("failed to bind QUIC endpoint");
|
||||
drop(_guard);
|
||||
|
||||
tracing::info!(local = ?endpoint.local_addr().ok(), "QUIC listener bound");
|
||||
|
||||
let s = senders.clone();
|
||||
runtime.0.spawn(accept_loop(endpoint, s.t1, s.t2, s.t3));
|
||||
|
||||
next.set(ServerState::Started);
|
||||
tracing::info!("ServerState::Started");
|
||||
}
|
||||
|
||||
impl Plugin for EcsQuicTransportPlugin {
|
||||
fn build(&self, app: &mut App) {
|
||||
// Three-tier bridge between the tokio-side QUIC accept loop and the
|
||||
// ECS PreUpdate ingest system (in the `world` module).
|
||||
let (t1_tx, t1_rx) = mpsc::channel::<QuicMessage>(T1_CAPACITY);
|
||||
let (t2_tx, t2_rx) = mpsc::channel::<QuicMessage>(T2_CAPACITY);
|
||||
let (t3_tx, t3_rx) = mpsc::channel::<T3Inbound>(T3_CAPACITY);
|
||||
|
||||
// Spawn a tokio runtime on a dedicated OS thread, ship its Handle back
|
||||
// to the ECS, and keep the runtime alive for the lifetime of the app
|
||||
// by parking on `pending()`.
|
||||
let (handle_tx, handle_rx) = std::sync::mpsc::sync_channel::<Handle>(1);
|
||||
std::thread::Builder::new()
|
||||
.name("quic-runtime".to_string())
|
||||
.spawn(move || {
|
||||
let rt = tokio::runtime::Builder::new_multi_thread()
|
||||
.worker_threads(2)
|
||||
.enable_all()
|
||||
.thread_name("quic-worker")
|
||||
.build()
|
||||
.expect("build tokio runtime");
|
||||
handle_tx
|
||||
.send(rt.handle().clone())
|
||||
.expect("send tokio Handle to ECS");
|
||||
rt.block_on(std::future::pending::<()>());
|
||||
})
|
||||
.expect("spawn quic-runtime thread");
|
||||
|
||||
let handle = handle_rx.recv().expect("receive tokio Handle");
|
||||
|
||||
// Bevy 0.18 split state machinery into its own plugin; under
|
||||
// MinimalPlugins it isn't installed by default.
|
||||
app.add_plugins(StatesPlugin)
|
||||
.init_state::<ServerState>()
|
||||
.insert_resource(TokioHandle(handle))
|
||||
.insert_resource(BridgeSenders {
|
||||
t1: T1Sender::new(t1_tx),
|
||||
t2: T2Sender::new(t2_tx),
|
||||
t3: T3Sender::new(t3_tx),
|
||||
})
|
||||
.insert_resource(BridgeReceivers {
|
||||
t1: Mutex::new(t1_rx),
|
||||
t2: Mutex::new(t2_rx),
|
||||
t3: Mutex::new(t3_rx),
|
||||
})
|
||||
.add_systems(OnEnter(ServerState::Starting), start_quic_server);
|
||||
}
|
||||
}
|
||||
312
substrate/src/transport/mod.rs
Normal file
312
substrate/src/transport/mod.rs
Normal file
@@ -0,0 +1,312 @@
|
||||
pub mod ecs;
|
||||
pub mod server;
|
||||
pub mod state;
|
||||
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
|
||||
/// Logical type of a sensor reading. Travels in `QuicMessage::sensor_type`
|
||||
/// so the substrate (and any downstream dashboard) knows which units / range
|
||||
/// / visualisation applies to the `raw_value`.
|
||||
///
|
||||
/// Forward compat: unknown discriminants decode as `Generic`.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
|
||||
#[repr(u8)]
|
||||
pub enum SensorType {
|
||||
#[default]
|
||||
Generic = 0,
|
||||
Temperature = 1,
|
||||
Humidity = 2,
|
||||
Pressure = 3,
|
||||
Voltage = 4,
|
||||
Current = 5,
|
||||
}
|
||||
|
||||
impl SensorType {
|
||||
pub fn from_u8(b: u8) -> Self {
|
||||
match b {
|
||||
1 => Self::Temperature,
|
||||
2 => Self::Humidity,
|
||||
3 => Self::Pressure,
|
||||
4 => Self::Voltage,
|
||||
5 => Self::Current,
|
||||
_ => Self::Generic,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn as_u8(self) -> u8 {
|
||||
self as u8
|
||||
}
|
||||
|
||||
/// Lowercase label used as a Prometheus label value.
|
||||
pub fn label_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Generic => "generic",
|
||||
Self::Temperature => "temperature",
|
||||
Self::Humidity => "humidity",
|
||||
Self::Pressure => "pressure",
|
||||
Self::Voltage => "voltage",
|
||||
Self::Current => "current",
|
||||
}
|
||||
}
|
||||
|
||||
/// SI / engineering unit string for Grafana axis labels.
|
||||
pub fn unit_str(self) -> &'static str {
|
||||
match self {
|
||||
Self::Generic => "",
|
||||
Self::Temperature => "°C",
|
||||
Self::Humidity => "%",
|
||||
Self::Pressure => "hPa",
|
||||
Self::Voltage => "V",
|
||||
Self::Current => "A",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// One sample (T1/T2 sensor reading or T3 actuator command/ack) on the wire.
|
||||
///
|
||||
/// Fixed 39-byte little-endian layout — same on x86_64 and aarch64 (the two
|
||||
/// evaluation hosts), so encode/decode is effectively a memcpy.
|
||||
///
|
||||
/// ```text
|
||||
/// offset size field
|
||||
/// ------ ---- --------------------------
|
||||
/// 0 16 device_id (UUID)
|
||||
/// 16 2 sensor_id (u16)
|
||||
/// 18 8 raw_value (f64)
|
||||
/// 26 8 timestamp_us (u64)
|
||||
/// 34 4 sequence_number (u32)
|
||||
/// 38 1 sensor_type (u8 — `SensorType` discriminant)
|
||||
/// ```
|
||||
///
|
||||
/// Field semantics:
|
||||
/// - `device_id` — UUID of the originating device (or target, for T3 commands).
|
||||
/// - `sensor_id` — logical sensor/actuator on that device (per-device index).
|
||||
/// - `raw_value` — sensor reading (T1/T2) or actuator setpoint/feedback (T3).
|
||||
/// - `timestamp_us` — capture time on the device clock for T1/T2; server-side
|
||||
/// ack time on T3 replies.
|
||||
/// - `sequence_number` — monotonic counter per `(device_id, sensor_id)` for
|
||||
/// T1/T2; correlation id linking T3 command and ack.
|
||||
/// - `sensor_type` — `SensorType` discriminant, decoded via `SensorType::from_u8`.
|
||||
#[derive(Debug, Clone, Default, Copy, PartialEq)]
|
||||
pub struct QuicMessage {
|
||||
pub device_id: uuid::Uuid,
|
||||
pub sensor_id: u16,
|
||||
pub raw_value: f64,
|
||||
pub timestamp_us: u64,
|
||||
pub sequence_number: u32,
|
||||
pub sensor_type: u8,
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
pub enum WireError {
|
||||
#[error("expected exactly {expected} bytes, got {got}")]
|
||||
BadLength { expected: usize, got: usize },
|
||||
}
|
||||
|
||||
impl QuicMessage {
|
||||
/// Bytes on the wire — fixed-size, no length prefix.
|
||||
pub const WIRE_SIZE: usize = 39;
|
||||
|
||||
pub fn encode_to(&self, buf: &mut [u8]) -> Result<(), WireError> {
|
||||
if buf.len() != Self::WIRE_SIZE {
|
||||
return Err(WireError::BadLength {
|
||||
expected: Self::WIRE_SIZE,
|
||||
got: buf.len(),
|
||||
});
|
||||
}
|
||||
buf[0..16].copy_from_slice(self.device_id.as_bytes());
|
||||
buf[16..18].copy_from_slice(&self.sensor_id.to_le_bytes());
|
||||
buf[18..26].copy_from_slice(&self.raw_value.to_le_bytes());
|
||||
buf[26..34].copy_from_slice(&self.timestamp_us.to_le_bytes());
|
||||
buf[34..38].copy_from_slice(&self.sequence_number.to_le_bytes());
|
||||
buf[38] = self.sensor_type;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn to_bytes(&self) -> [u8; Self::WIRE_SIZE] {
|
||||
let mut buf = [0u8; Self::WIRE_SIZE];
|
||||
self.encode_to(&mut buf).expect("WIRE_SIZE buffer is exactly sized");
|
||||
buf
|
||||
}
|
||||
|
||||
pub fn decode(buf: &[u8]) -> Result<Self, WireError> {
|
||||
if buf.len() != Self::WIRE_SIZE {
|
||||
return Err(WireError::BadLength {
|
||||
expected: Self::WIRE_SIZE,
|
||||
got: buf.len(),
|
||||
});
|
||||
}
|
||||
let mut id_bytes = [0u8; 16];
|
||||
id_bytes.copy_from_slice(&buf[0..16]);
|
||||
Ok(Self {
|
||||
device_id: uuid::Uuid::from_bytes(id_bytes),
|
||||
sensor_id: u16::from_le_bytes(buf[16..18].try_into().unwrap()),
|
||||
raw_value: f64::from_le_bytes(buf[18..26].try_into().unwrap()),
|
||||
timestamp_us: u64::from_le_bytes(buf[26..34].try_into().unwrap()),
|
||||
sequence_number: u32::from_le_bytes(buf[34..38].try_into().unwrap()),
|
||||
sensor_type: buf[38],
|
||||
})
|
||||
}
|
||||
|
||||
/// Convenience accessor — decodes `sensor_type` to the typed enum.
|
||||
pub fn typ(&self) -> SensorType {
|
||||
SensorType::from_u8(self.sensor_type)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Per-tier bridge senders -----------------------------------------------
|
||||
//
|
||||
// Three newtypes encode the paper's tier semantics into the type system so
|
||||
// the demux can't mix them up:
|
||||
//
|
||||
// * T1 (datagrams) — lossy; `try_send` drops on full
|
||||
// * T2 (uni streams) — reliable, ordered; `send().await` backpressures
|
||||
// * T3 (bi streams) — reliable command + per-command oneshot reply
|
||||
|
||||
/// Tier 1 — high-frequency telemetry over QUIC datagrams. Full channel drops.
|
||||
#[derive(Clone)]
|
||||
pub struct T1Sender {
|
||||
inner: mpsc::Sender<QuicMessage>,
|
||||
}
|
||||
|
||||
impl T1Sender {
|
||||
pub fn new(inner: mpsc::Sender<QuicMessage>) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
/// Returns `true` if queued, `false` if dropped (channel full or closed).
|
||||
pub fn send_lossy(&self, msg: QuicMessage) -> bool {
|
||||
self.inner.try_send(msg).is_ok()
|
||||
}
|
||||
|
||||
/// Currently queued messages — used for channel-depth gauges.
|
||||
pub fn depth(&self) -> usize {
|
||||
self.inner.max_capacity().saturating_sub(self.inner.capacity())
|
||||
}
|
||||
|
||||
pub fn capacity(&self) -> usize {
|
||||
self.inner.max_capacity()
|
||||
}
|
||||
}
|
||||
|
||||
/// Tier 2 — ordered events over a QUIC unidirectional stream. Awaits on full.
|
||||
#[derive(Clone)]
|
||||
pub struct T2Sender {
|
||||
inner: mpsc::Sender<QuicMessage>,
|
||||
}
|
||||
|
||||
impl T2Sender {
|
||||
pub fn new(inner: mpsc::Sender<QuicMessage>) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
pub async fn send(
|
||||
&self,
|
||||
msg: QuicMessage,
|
||||
) -> Result<(), mpsc::error::SendError<QuicMessage>> {
|
||||
self.inner.send(msg).await
|
||||
}
|
||||
|
||||
pub fn depth(&self) -> usize {
|
||||
self.inner.max_capacity().saturating_sub(self.inner.capacity())
|
||||
}
|
||||
|
||||
pub fn capacity(&self) -> usize {
|
||||
self.inner.max_capacity()
|
||||
}
|
||||
}
|
||||
|
||||
/// Tier 3 — actuator command on a QUIC bidirectional stream, paired with a
|
||||
/// `oneshot` channel the ECS uses to write the ack back over the same stream.
|
||||
pub struct T3Inbound {
|
||||
pub command: QuicMessage,
|
||||
pub reply: oneshot::Sender<QuicMessage>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct T3Sender {
|
||||
inner: mpsc::Sender<T3Inbound>,
|
||||
}
|
||||
|
||||
impl T3Sender {
|
||||
pub fn new(inner: mpsc::Sender<T3Inbound>) -> Self {
|
||||
Self { inner }
|
||||
}
|
||||
|
||||
pub async fn send(
|
||||
&self,
|
||||
inbound: T3Inbound,
|
||||
) -> Result<(), mpsc::error::SendError<T3Inbound>> {
|
||||
self.inner.send(inbound).await
|
||||
}
|
||||
|
||||
pub fn depth(&self) -> usize {
|
||||
self.inner.max_capacity().saturating_sub(self.inner.capacity())
|
||||
}
|
||||
|
||||
pub fn capacity(&self) -> usize {
|
||||
self.inner.max_capacity()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn wire_size_matches_fields() {
|
||||
assert_eq!(QuicMessage::WIRE_SIZE, 16 + 2 + 8 + 8 + 4 + 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roundtrip_preserves_all_fields() {
|
||||
let msg = QuicMessage {
|
||||
device_id: uuid::Uuid::from_u128(0x0123456789abcdef_fedcba9876543210),
|
||||
sensor_id: 0xBEEF,
|
||||
raw_value: -273.15,
|
||||
timestamp_us: 1_700_000_000_000_001,
|
||||
sequence_number: 42,
|
||||
sensor_type: SensorType::Temperature.as_u8(),
|
||||
};
|
||||
let bytes = msg.to_bytes();
|
||||
assert_eq!(bytes.len(), QuicMessage::WIRE_SIZE);
|
||||
let decoded = QuicMessage::decode(&bytes).unwrap();
|
||||
assert_eq!(msg, decoded);
|
||||
assert_eq!(decoded.typ(), SensorType::Temperature);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_rejects_wrong_length() {
|
||||
assert!(matches!(
|
||||
QuicMessage::decode(&[0u8; 38]),
|
||||
Err(WireError::BadLength { expected: 39, got: 38 })
|
||||
));
|
||||
assert!(matches!(
|
||||
QuicMessage::decode(&[0u8; 40]),
|
||||
Err(WireError::BadLength { expected: 39, got: 40 })
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn encode_layout_is_little_endian() {
|
||||
let msg = QuicMessage {
|
||||
device_id: uuid::Uuid::nil(),
|
||||
sensor_id: 0x0102,
|
||||
raw_value: 0.0,
|
||||
timestamp_us: 0,
|
||||
sequence_number: 0x04030201,
|
||||
sensor_type: SensorType::Humidity.as_u8(),
|
||||
};
|
||||
let bytes = msg.to_bytes();
|
||||
assert_eq!(&bytes[16..18], &[0x02, 0x01]);
|
||||
assert_eq!(&bytes[34..38], &[0x01, 0x02, 0x03, 0x04]);
|
||||
assert_eq!(bytes[38], SensorType::Humidity.as_u8());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unknown_sensor_type_decodes_as_generic() {
|
||||
assert_eq!(SensorType::from_u8(0), SensorType::Generic);
|
||||
assert_eq!(SensorType::from_u8(99), SensorType::Generic);
|
||||
assert_eq!(SensorType::from_u8(255), SensorType::Generic);
|
||||
}
|
||||
}
|
||||
350
substrate/src/transport/server.rs
Normal file
350
substrate/src/transport/server.rs
Normal file
@@ -0,0 +1,350 @@
|
||||
use std::net::SocketAddr;
|
||||
use std::sync::Arc;
|
||||
|
||||
use anyhow::{Context, anyhow};
|
||||
use metrics::counter;
|
||||
use quinn::{
|
||||
Connection, Endpoint, Incoming, RecvStream, SendStream, ServerConfig, StreamId, TransportConfig,
|
||||
};
|
||||
use rustls_pki_types::{CertificateDer, PrivateKeyDer};
|
||||
use tokio::sync::oneshot;
|
||||
|
||||
use crate::config::QuicConfig;
|
||||
use crate::transport::{QuicMessage, T1Sender, T2Sender, T3Inbound, T3Sender};
|
||||
|
||||
/// Datagram receive buffer in bytes. Sized to absorb microbursts at the
|
||||
/// telemetry rates.
|
||||
const DATAGRAM_RECV_BUFFER_BYTES: usize = 256 * 1024;
|
||||
|
||||
/// Load the cert chain + private key from disk and build a Quinn `ServerConfig`.
|
||||
pub fn build_server_config(cfg: &QuicConfig) -> anyhow::Result<ServerConfig> {
|
||||
let cert_pem = std::fs::read(&cfg.server_cert)
|
||||
.with_context(|| format!("read server_cert at {}", cfg.server_cert))?;
|
||||
let key_pem = std::fs::read(&cfg.server_key)
|
||||
.with_context(|| format!("read server_key at {}", cfg.server_key))?;
|
||||
|
||||
let certs: Vec<CertificateDer<'static>> = rustls_pemfile::certs(&mut cert_pem.as_slice())
|
||||
.collect::<Result<_, _>>()
|
||||
.with_context(|| format!("parse PEM certs at {}", cfg.server_cert))?;
|
||||
if certs.is_empty() {
|
||||
return Err(anyhow!("no certificates found in {}", cfg.server_cert));
|
||||
}
|
||||
|
||||
let key: PrivateKeyDer<'static> = rustls_pemfile::private_key(&mut key_pem.as_slice())
|
||||
.with_context(|| format!("parse PEM key at {}", cfg.server_key))?
|
||||
.ok_or_else(|| anyhow!("no private key found in {}", cfg.server_key))?;
|
||||
|
||||
let mut server_config =
|
||||
ServerConfig::with_single_cert(certs, key).context("build Quinn ServerConfig")?;
|
||||
|
||||
// Explicit transport config so the values driving evaluation are visible
|
||||
// in source and at startup, not buried in Quinn's defaults.
|
||||
let mut transport = TransportConfig::default();
|
||||
transport.datagram_receive_buffer_size(Some(DATAGRAM_RECV_BUFFER_BYTES));
|
||||
server_config.transport = Arc::new(transport);
|
||||
|
||||
tracing::info!(
|
||||
datagram_recv_buffer_bytes = DATAGRAM_RECV_BUFFER_BYTES,
|
||||
"Quinn TransportConfig tuned"
|
||||
);
|
||||
|
||||
Ok(server_config)
|
||||
}
|
||||
|
||||
/// Bind the listener. Must be called from inside a tokio runtime context
|
||||
/// (Quinn relies on `Handle::current()` internally).
|
||||
pub fn bind_endpoint(cfg: &QuicConfig) -> anyhow::Result<Endpoint> {
|
||||
let server_config = build_server_config(cfg)?;
|
||||
let addr: SocketAddr = format!("{}:{}", cfg.server_interface, cfg.server_port)
|
||||
.parse()
|
||||
.with_context(|| {
|
||||
format!(
|
||||
"invalid bind address {}:{}",
|
||||
cfg.server_interface, cfg.server_port
|
||||
)
|
||||
})?;
|
||||
Endpoint::server(server_config, addr).context("Endpoint::server bind")
|
||||
}
|
||||
|
||||
/// Accept loop: per-connection senders are cloned from the tier handles and
|
||||
/// shipped into `handle_incoming` for orchestration.
|
||||
pub async fn accept_loop(endpoint: Endpoint, t1: T1Sender, t2: T2Sender, t3: T3Sender) {
|
||||
tracing::info!(local = ?endpoint.local_addr().ok(), "QUIC accept loop running");
|
||||
while let Some(incoming) = endpoint.accept().await {
|
||||
let t1 = t1.clone();
|
||||
let t2 = t2.clone();
|
||||
let t3 = t3.clone();
|
||||
tokio::spawn(handle_incoming(incoming, t1, t2, t3));
|
||||
}
|
||||
tracing::info!("QUIC accept loop exited");
|
||||
}
|
||||
|
||||
/// Per-connection orchestrator. Performs the handshake and spawns one reader
|
||||
/// per tier, then waits for the connection to close and joins the readers.
|
||||
async fn handle_incoming(incoming: Incoming, t1: T1Sender, t2: T2Sender, t3: T3Sender) {
|
||||
let conn = match incoming.await {
|
||||
Ok(c) => c,
|
||||
Err(e) => {
|
||||
tracing::warn!(error = %e, "handshake failed");
|
||||
return;
|
||||
}
|
||||
};
|
||||
let remote = conn.remote_address();
|
||||
tracing::info!(?remote, "connection established");
|
||||
|
||||
// One task per tier — fully wired across T1/T2/T3.
|
||||
let dgram_task = tokio::spawn(read_datagrams(conn.clone(), t1));
|
||||
let uni_task = tokio::spawn(read_uni_streams(conn.clone(), t2));
|
||||
let bi_task = tokio::spawn(accept_bi_streams(conn.clone(), t3));
|
||||
|
||||
let _ = conn.closed().await;
|
||||
|
||||
if let Err(e) = dgram_task.await {
|
||||
tracing::warn!(?remote, error = %e, "T1 datagram task ended unexpectedly");
|
||||
}
|
||||
if let Err(e) = uni_task.await {
|
||||
tracing::warn!(?remote, error = %e, "T2 uni stream task ended unexpectedly");
|
||||
}
|
||||
if let Err(e) = bi_task.await {
|
||||
tracing::warn!(?remote, error = %e, "T3 bi stream task ended unexpectedly");
|
||||
}
|
||||
tracing::info!(?remote, "connection closed");
|
||||
}
|
||||
|
||||
/// T1 — read QUIC datagrams, decode each as a fixed-size `QuicMessage`, push
|
||||
/// into the lossy T1 channel.
|
||||
async fn read_datagrams(conn: Connection, t1: T1Sender) {
|
||||
let remote = conn.remote_address();
|
||||
let mut received: u64 = 0;
|
||||
let mut dropped: u64 = 0;
|
||||
let mut decode_errors: u64 = 0;
|
||||
|
||||
loop {
|
||||
match conn.read_datagram().await {
|
||||
Ok(bytes) => match QuicMessage::decode(&bytes[..]) {
|
||||
Ok(msg) => {
|
||||
received += 1;
|
||||
counter!("substrate_received_total", "tier" => "t1").increment(1);
|
||||
if !t1.send_lossy(msg) {
|
||||
dropped += 1;
|
||||
counter!("substrate_dropped_total", "tier" => "t1").increment(1);
|
||||
tracing::trace!(?remote, "T1 channel full, datagram dropped");
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
decode_errors += 1;
|
||||
counter!("substrate_decode_errors_total", "tier" => "t1").increment(1);
|
||||
tracing::warn!(
|
||||
?remote,
|
||||
len = bytes.len(),
|
||||
error = %e,
|
||||
"T1 datagram decode failed"
|
||||
);
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::debug!(
|
||||
?remote,
|
||||
received,
|
||||
dropped,
|
||||
decode_errors,
|
||||
error = %e,
|
||||
"T1 datagram reader ended"
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// T2 — accept unidirectional streams. Each accepted stream gets its own task
|
||||
/// reading 38-byte chunks until EOF (one stream may carry one event or many).
|
||||
/// Cross-stream interleaving is allowed; ordering is only guaranteed *within*
|
||||
/// a stream, matching QUIC's stream semantics.
|
||||
async fn read_uni_streams(conn: Connection, t2: T2Sender) {
|
||||
let remote = conn.remote_address();
|
||||
let mut streams_accepted: u64 = 0;
|
||||
|
||||
loop {
|
||||
let recv = match conn.accept_uni().await {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
tracing::debug!(
|
||||
?remote,
|
||||
streams_accepted,
|
||||
error = %e,
|
||||
"T2 uni accept loop ended"
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
streams_accepted += 1;
|
||||
let t2 = t2.clone();
|
||||
tokio::spawn(read_one_uni_stream(remote, recv, t2));
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-stream worker for T2. Reads fixed-size `QuicMessage`s back-to-back,
|
||||
/// awaits backpressure on the T2 channel, and resets the stream on a decode
|
||||
/// failure (one corrupt stream shouldn't take down the whole connection).
|
||||
async fn read_one_uni_stream(remote: SocketAddr, mut recv: RecvStream, t2: T2Sender) {
|
||||
let stream_id: StreamId = recv.id();
|
||||
let mut buf = [0u8; QuicMessage::WIRE_SIZE];
|
||||
let mut count: u64 = 0;
|
||||
|
||||
loop {
|
||||
match recv.read_exact(&mut buf).await {
|
||||
Ok(()) => match QuicMessage::decode(&buf) {
|
||||
Ok(msg) => {
|
||||
count += 1;
|
||||
counter!("substrate_received_total", "tier" => "t2").increment(1);
|
||||
if t2.send(msg).await.is_err() {
|
||||
// T2 receiver dropped (substrate shutting down).
|
||||
tracing::warn!(
|
||||
?remote,
|
||||
?stream_id,
|
||||
count,
|
||||
"T2 channel closed; abandoning stream"
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
Err(e) => {
|
||||
counter!("substrate_decode_errors_total", "tier" => "t2").increment(1);
|
||||
tracing::warn!(
|
||||
?remote,
|
||||
?stream_id,
|
||||
count,
|
||||
error = %e,
|
||||
"T2 decode failed; resetting stream"
|
||||
);
|
||||
let _ = recv.stop(0u32.into());
|
||||
return;
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
tracing::trace!(
|
||||
?remote,
|
||||
?stream_id,
|
||||
count,
|
||||
error = %e,
|
||||
"T2 uni stream ended"
|
||||
);
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// T3 — accept bidirectional streams. Each stream is one command/ack
|
||||
/// exchange, modeled per the paper's "per-command oneshot channels": the
|
||||
/// reader pushes a `T3Inbound { command, reply }` to the ECS, awaits the
|
||||
/// response on `reply_rx`, and writes it back on the same stream.
|
||||
async fn accept_bi_streams(conn: Connection, t3: T3Sender) {
|
||||
let remote = conn.remote_address();
|
||||
let mut streams_accepted: u64 = 0;
|
||||
|
||||
loop {
|
||||
let (send, recv) = match conn.accept_bi().await {
|
||||
Ok(s) => s,
|
||||
Err(e) => {
|
||||
tracing::debug!(
|
||||
?remote,
|
||||
streams_accepted,
|
||||
error = %e,
|
||||
"T3 bi accept loop ended"
|
||||
);
|
||||
return;
|
||||
}
|
||||
};
|
||||
streams_accepted += 1;
|
||||
let t3 = t3.clone();
|
||||
tokio::spawn(read_one_bi_stream(remote, send, recv, t3));
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-stream worker for T3. Reads exactly one command, ships it with a
|
||||
/// `oneshot::Sender` to the ECS, awaits the reply, writes it back. If the
|
||||
/// ECS drops the oneshot (no handler installed), the stream is reset so the
|
||||
/// client sees an explicit reset instead of a half-open stream.
|
||||
async fn read_one_bi_stream(
|
||||
remote: SocketAddr,
|
||||
mut send: SendStream,
|
||||
mut recv: RecvStream,
|
||||
t3: T3Sender,
|
||||
) {
|
||||
let stream_id: StreamId = recv.id();
|
||||
|
||||
let mut buf = [0u8; QuicMessage::WIRE_SIZE];
|
||||
if let Err(e) = recv.read_exact(&mut buf).await {
|
||||
tracing::trace!(
|
||||
?remote,
|
||||
?stream_id,
|
||||
error = %e,
|
||||
"T3: incomplete command read; closing"
|
||||
);
|
||||
return;
|
||||
}
|
||||
let command = match QuicMessage::decode(&buf) {
|
||||
Ok(m) => m,
|
||||
Err(e) => {
|
||||
counter!("substrate_decode_errors_total", "tier" => "t3").increment(1);
|
||||
tracing::warn!(
|
||||
?remote,
|
||||
?stream_id,
|
||||
error = %e,
|
||||
"T3 command decode failed; resetting stream"
|
||||
);
|
||||
let _ = recv.stop(0u32.into());
|
||||
let _ = send.reset(0u32.into());
|
||||
return;
|
||||
}
|
||||
};
|
||||
counter!("substrate_received_total", "tier" => "t3").increment(1);
|
||||
|
||||
let (reply_tx, reply_rx) = oneshot::channel::<QuicMessage>();
|
||||
let inbound = T3Inbound {
|
||||
command,
|
||||
reply: reply_tx,
|
||||
};
|
||||
if t3.send(inbound).await.is_err() {
|
||||
tracing::warn!(?remote, ?stream_id, "T3 channel closed; abandoning command");
|
||||
let _ = send.reset(0u32.into());
|
||||
return;
|
||||
}
|
||||
|
||||
let response = match reply_rx.await {
|
||||
Ok(msg) => msg,
|
||||
Err(_) => {
|
||||
// ECS dropped the oneshot. With M4's handler installed this
|
||||
// shouldn't happen normally; if it does, the stream is reset so
|
||||
// the client sees a clean signal.
|
||||
counter!("substrate_t3_no_handler_total").increment(1);
|
||||
tracing::debug!(
|
||||
?remote,
|
||||
?stream_id,
|
||||
"T3: no handler for command, resetting stream"
|
||||
);
|
||||
let _ = send.reset(0u32.into());
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
if let Err(e) = send.write_all(&response.to_bytes()).await {
|
||||
tracing::warn!(
|
||||
?remote,
|
||||
?stream_id,
|
||||
error = %e,
|
||||
"T3 ack write failed"
|
||||
);
|
||||
return;
|
||||
}
|
||||
if let Err(e) = send.finish() {
|
||||
tracing::warn!(
|
||||
?remote,
|
||||
?stream_id,
|
||||
error = %e,
|
||||
"T3 ack finish failed"
|
||||
);
|
||||
}
|
||||
}
|
||||
13
substrate/src/transport/state.rs
Normal file
13
substrate/src/transport/state.rs
Normal file
@@ -0,0 +1,13 @@
|
||||
use bevy::prelude::States;
|
||||
|
||||
/// Lifecycle of the QUIC listener inside the ECS schedule.
|
||||
///
|
||||
/// `Starting` is the default; `OnEnter(Starting)` performs the bind and, on
|
||||
/// success, transitions to `Started`. A `Failed` variant will join when we
|
||||
/// add proper error surfacing — for now a bind failure panics the app.
|
||||
#[derive(States, Debug, Clone, Copy, Default, Eq, PartialEq, Hash)]
|
||||
pub enum ServerState {
|
||||
#[default]
|
||||
Starting,
|
||||
Started,
|
||||
}
|
||||
97
substrate/src/world/components.rs
Normal file
97
substrate/src/world/components.rs
Normal file
@@ -0,0 +1,97 @@
|
||||
//! Components attached to per-sensor entities, plus the per-type threshold
|
||||
//! table used by `simulation_system`'s crossing detection.
|
||||
//!
|
||||
//! Each (device, sensor) pair becomes one entity tagged with `Asset` and
|
||||
//! carrying `DeviceId` + `SensorId` + `SensorTypeTag` + `RawSensorData` +
|
||||
//! `SmoothedValue`.
|
||||
|
||||
use bevy::prelude::*;
|
||||
|
||||
use crate::transport::SensorType;
|
||||
|
||||
/// Marker — every (device, sensor) pair becomes one entity tagged `Asset`.
|
||||
#[derive(Component, Debug, Default, Clone, Copy)]
|
||||
pub struct Asset;
|
||||
|
||||
#[derive(Component, Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct DeviceId(pub uuid::Uuid);
|
||||
|
||||
#[derive(Component, Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct SensorId(pub u16);
|
||||
|
||||
/// Sensor type — set on entity creation from the first message that names
|
||||
/// the (device, sensor) pair, then immutable. We don't track type changes:
|
||||
/// a given (device_id, sensor_id) is one logical sensor with one type for
|
||||
/// the lifetime of the run.
|
||||
#[derive(Component, Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct SensorTypeTag(pub SensorType);
|
||||
|
||||
/// Latest reading from this (device, sensor). Updated in place by
|
||||
/// `ingest_system`; read by simulation/export/diagnostics.
|
||||
#[derive(Component, Debug, Default, Clone, Copy, PartialEq)]
|
||||
pub struct RawSensorData {
|
||||
pub raw_value: f64,
|
||||
pub timestamp_us: u64,
|
||||
pub sequence_number: u32,
|
||||
}
|
||||
|
||||
pub const SMOOTHED_WINDOW: usize = 16;
|
||||
|
||||
/// Rolling-window mean of the last `SMOOTHED_WINDOW` raw readings, plus a
|
||||
/// hysteresis flag for threshold-crossing detection. Maintained by
|
||||
/// `simulation_system` — this is the bit of the ECS that does honest
|
||||
/// digital-twin transform work, not just write-through of incoming samples.
|
||||
#[derive(Component, Debug, Clone, Copy)]
|
||||
pub struct SmoothedValue {
|
||||
ring: [f64; SMOOTHED_WINDOW],
|
||||
head: usize,
|
||||
filled: u16,
|
||||
pub mean: f64,
|
||||
pub above_threshold: bool,
|
||||
}
|
||||
|
||||
impl Default for SmoothedValue {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
ring: [0.0; SMOOTHED_WINDOW],
|
||||
head: 0,
|
||||
filled: 0,
|
||||
mean: 0.0,
|
||||
above_threshold: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SmoothedValue {
|
||||
/// Push a new sample. Non-finite values (NaN / ±∞) are ignored — the
|
||||
/// smoothed state stays whatever it was. This matters because T3 acks
|
||||
/// can carry NaN when the substrate has never seen the target sensor.
|
||||
pub fn push(&mut self, v: f64) {
|
||||
if !v.is_finite() {
|
||||
return;
|
||||
}
|
||||
self.ring[self.head] = v;
|
||||
self.head = (self.head + 1) % SMOOTHED_WINDOW;
|
||||
if (self.filled as usize) < SMOOTHED_WINDOW {
|
||||
self.filled += 1;
|
||||
}
|
||||
let n = self.filled as usize;
|
||||
let sum: f64 = self.ring.iter().take(n).sum();
|
||||
self.mean = sum / n as f64;
|
||||
}
|
||||
}
|
||||
|
||||
/// Per-type threshold for `simulation_system`'s crossing detection. Chosen
|
||||
/// mid-band against the simulator's waveforms so crossings actually fire
|
||||
/// during a demo; in a real deployment these would be alarm thresholds
|
||||
/// supplied by config.
|
||||
pub(super) fn threshold_for(t: SensorType) -> f64 {
|
||||
match t {
|
||||
SensorType::Generic => 0.0,
|
||||
SensorType::Temperature => 22.0, // °C — simulator oscillates 15..25
|
||||
SensorType::Humidity => 55.0, // % — 30..70
|
||||
SensorType::Pressure => 1014.0, // hPa — 1008..1018
|
||||
SensorType::Voltage => 230.2, // V — 229.5..230.5
|
||||
SensorType::Current => 10.5, // A — 8..12
|
||||
}
|
||||
}
|
||||
52
substrate/src/world/mod.rs
Normal file
52
substrate/src/world/mod.rs
Normal file
@@ -0,0 +1,52 @@
|
||||
//! ECS world: the five paper-named systems plus the components and resources
|
||||
//! they operate on.
|
||||
//!
|
||||
//! ```text
|
||||
//! components.rs ── per-sensor components + per-type threshold table
|
||||
//! resources.rs ── SensorRegistry, DiagnosticsState, ExportSampleState
|
||||
//! systems.rs ── ingest / fault_injection / simulation / export / diagnostics
|
||||
//! tests.rs ── unit tests (#[cfg(test)] only)
|
||||
//! ```
|
||||
//!
|
||||
//! Each (device, sensor) pair becomes one entity with `Asset` + `DeviceId` +
|
||||
//! `SensorId` + `SensorTypeTag` + `RawSensorData` + `SmoothedValue`.
|
||||
//! `ingest_system` upserts on every incoming `QuicMessage`; the registry maps
|
||||
//! `(Uuid, u16) → Entity` for O(1) lookup.
|
||||
|
||||
mod components;
|
||||
mod resources;
|
||||
mod systems;
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
|
||||
use bevy::prelude::*;
|
||||
use bevy::state::condition::in_state;
|
||||
|
||||
use crate::transport::state::ServerState;
|
||||
|
||||
pub use components::{
|
||||
Asset, DeviceId, RawSensorData, SMOOTHED_WINDOW, SensorId, SensorTypeTag, SmoothedValue,
|
||||
};
|
||||
pub use resources::SensorRegistry;
|
||||
|
||||
pub struct WorldPlugin;
|
||||
|
||||
impl Plugin for WorldPlugin {
|
||||
fn build(&self, app: &mut App) {
|
||||
app.init_resource::<SensorRegistry>()
|
||||
.init_resource::<resources::DiagnosticsState>()
|
||||
.init_resource::<resources::ExportSampleState>()
|
||||
.add_systems(
|
||||
PreUpdate,
|
||||
(systems::fault_injection_system, systems::ingest_system)
|
||||
.chain()
|
||||
.run_if(in_state(ServerState::Started)),
|
||||
)
|
||||
.add_systems(Update, systems::simulation_system)
|
||||
.add_systems(
|
||||
PostUpdate,
|
||||
(systems::export_system, systems::diagnostics_system).chain(),
|
||||
);
|
||||
}
|
||||
}
|
||||
48
substrate/src/world/resources.rs
Normal file
48
substrate/src/world/resources.rs
Normal file
@@ -0,0 +1,48 @@
|
||||
//! Bevy `Resource`s consumed by the world's systems.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::time::Instant;
|
||||
|
||||
use bevy::prelude::{Entity, Resource};
|
||||
|
||||
/// O(1) lookup `(device_id, sensor_id) → Entity`. Populated lazily by the
|
||||
/// ingest system; queried by export/diagnostics.
|
||||
#[derive(Resource, Default)]
|
||||
pub struct SensorRegistry {
|
||||
pub(crate) map: HashMap<(uuid::Uuid, u16), Entity>,
|
||||
}
|
||||
|
||||
impl SensorRegistry {
|
||||
pub fn entity_count(&self) -> usize {
|
||||
self.map.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Rolling counter of ticks since the last `diagnostics` log line was emitted.
|
||||
#[derive(Resource)]
|
||||
pub(super) struct DiagnosticsState {
|
||||
pub(super) last_log: Instant,
|
||||
pub(super) ticks_since_log: u64,
|
||||
}
|
||||
|
||||
impl Default for DiagnosticsState {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
last_log: Instant::now(),
|
||||
ticks_since_log: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Rate-limiter for `export_system` — runs at the ECS tick rate but only
|
||||
/// emits gauges once per second.
|
||||
#[derive(Resource)]
|
||||
pub(super) struct ExportSampleState {
|
||||
pub(super) last_sample: Instant,
|
||||
}
|
||||
|
||||
impl Default for ExportSampleState {
|
||||
fn default() -> Self {
|
||||
Self { last_sample: Instant::now() }
|
||||
}
|
||||
}
|
||||
278
substrate/src/world/systems.rs
Normal file
278
substrate/src/world/systems.rs
Normal file
@@ -0,0 +1,278 @@
|
||||
//! The five paper-named ECS systems and their private helpers.
|
||||
//!
|
||||
//! Scheduler placement (configured in [`super::WorldPlugin`]):
|
||||
//!
|
||||
//! | Schedule | Systems |
|
||||
//! |-----------|--------------------------------------|
|
||||
//! | PreUpdate | fault_injection → ingest |
|
||||
//! | Update | simulation |
|
||||
//! | PostUpdate| export → diagnostics |
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
|
||||
|
||||
use bevy::prelude::*;
|
||||
use metrics::{counter, gauge, histogram};
|
||||
|
||||
use crate::transport::ecs::{BridgeReceivers, BridgeSenders};
|
||||
use crate::transport::{QuicMessage, SensorType};
|
||||
|
||||
use super::components::{
|
||||
Asset, DeviceId, RawSensorData, SensorId, SensorTypeTag, SmoothedValue, threshold_for,
|
||||
};
|
||||
use super::resources::{DiagnosticsState, ExportSampleState, SensorRegistry};
|
||||
|
||||
/// T1 batch limit per tick. Anything beyond this stays in the channel and
|
||||
/// either drains next tick or gets dropped on full (T1's contract is lossy).
|
||||
const T1_INGEST_BATCH: usize = 1024;
|
||||
|
||||
/// Drain the three tier channels into ECS state.
|
||||
///
|
||||
/// T1: bounded batch (lossy); T2: full drain (reliable); T3: full drain, with
|
||||
/// each command answered by an ack carrying the device's current sensor value.
|
||||
pub(super) fn ingest_system(
|
||||
bridge: Res<BridgeReceivers>,
|
||||
mut registry: ResMut<SensorRegistry>,
|
||||
mut commands: Commands,
|
||||
mut q: Query<&mut RawSensorData>,
|
||||
) {
|
||||
let now = now_us();
|
||||
|
||||
// T1 — datagrams.
|
||||
{
|
||||
let mut t1 = bridge.t1.lock().unwrap();
|
||||
for _ in 0..T1_INGEST_BATCH {
|
||||
match t1.try_recv() {
|
||||
Ok(msg) => {
|
||||
histogram!("substrate_latency_us", "tier" => "t1")
|
||||
.record(now.saturating_sub(msg.timestamp_us) as f64);
|
||||
upsert_reading(&mut registry, &mut commands, &mut q, msg);
|
||||
}
|
||||
Err(_) => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// T2 — uni streams.
|
||||
{
|
||||
let mut t2 = bridge.t2.lock().unwrap();
|
||||
while let Ok(msg) = t2.try_recv() {
|
||||
histogram!("substrate_latency_us", "tier" => "t2")
|
||||
.record(now.saturating_sub(msg.timestamp_us) as f64);
|
||||
upsert_reading(&mut registry, &mut commands, &mut q, msg);
|
||||
}
|
||||
}
|
||||
|
||||
// T3 — bidirectional commands. Reply with the device's most recent
|
||||
// sensor value (NaN if we've never seen this (device, sensor) before).
|
||||
{
|
||||
let mut t3 = bridge.t3.lock().unwrap();
|
||||
while let Ok(inbound) = t3.try_recv() {
|
||||
histogram!("substrate_latency_us", "tier" => "t3")
|
||||
.record(now.saturating_sub(inbound.command.timestamp_us) as f64);
|
||||
let key = (inbound.command.device_id, inbound.command.sensor_id);
|
||||
let current_value = registry
|
||||
.map
|
||||
.get(&key)
|
||||
.and_then(|&e| q.get(e).ok())
|
||||
.map(|d| d.raw_value)
|
||||
.unwrap_or(f64::NAN);
|
||||
let ack = QuicMessage {
|
||||
device_id: inbound.command.device_id,
|
||||
sensor_id: inbound.command.sensor_id,
|
||||
raw_value: current_value,
|
||||
timestamp_us: now_us(),
|
||||
sequence_number: inbound.command.sequence_number,
|
||||
sensor_type: inbound.command.sensor_type,
|
||||
};
|
||||
// Ignore send errors: the demux task may have given up if the
|
||||
// connection died while we were processing.
|
||||
let _ = inbound.reply.send(ack);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn upsert_reading(
|
||||
registry: &mut SensorRegistry,
|
||||
commands: &mut Commands,
|
||||
q: &mut Query<&mut RawSensorData>,
|
||||
msg: QuicMessage,
|
||||
) {
|
||||
let key = (msg.device_id, msg.sensor_id);
|
||||
let data = RawSensorData {
|
||||
raw_value: msg.raw_value,
|
||||
timestamp_us: msg.timestamp_us,
|
||||
sequence_number: msg.sequence_number,
|
||||
};
|
||||
|
||||
if let Some(&entity) = registry.map.get(&key) {
|
||||
// Common case: existing entity, mutate in place.
|
||||
if let Ok(mut existing) = q.get_mut(entity) {
|
||||
*existing = data;
|
||||
} else {
|
||||
// Edge case: entity was registered earlier in *this* tick via
|
||||
// `commands.spawn`, so the components aren't in the archetype
|
||||
// yet (`Commands` is deferred). Queue another insert; last write
|
||||
// wins when Commands flushes.
|
||||
commands.entity(entity).insert(data);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
let entity = commands
|
||||
.spawn((
|
||||
Asset,
|
||||
DeviceId(msg.device_id),
|
||||
SensorId(msg.sensor_id),
|
||||
SensorTypeTag(SensorType::from_u8(msg.sensor_type)),
|
||||
SmoothedValue::default(),
|
||||
data,
|
||||
))
|
||||
.id();
|
||||
registry.map.insert(key, entity);
|
||||
}
|
||||
|
||||
/// Stub — M6 inserts loss/delay here for benchmark scenarios.
|
||||
pub(super) fn fault_injection_system() {}
|
||||
|
||||
/// Per-sensor digital-twin transform. Pulls each entity's latest
|
||||
/// `RawSensorData` into a sliding-window mean (`SmoothedValue`), and emits
|
||||
/// `substrate_threshold_crossings_total{type, direction}` when that mean
|
||||
/// transitions across the per-type threshold. The `Changed<RawSensorData>`
|
||||
/// filter restricts the scan to entities updated *this tick*, so the cost
|
||||
/// scales with ingress rate, not fleet size.
|
||||
pub(super) fn simulation_system(
|
||||
mut q: Query<(&SensorTypeTag, &RawSensorData, &mut SmoothedValue), Changed<RawSensorData>>,
|
||||
) {
|
||||
for (st, raw, mut smoothed) in q.iter_mut() {
|
||||
smoothed.push(raw.raw_value);
|
||||
let now_above = smoothed.mean > threshold_for(st.0);
|
||||
if now_above != smoothed.above_threshold {
|
||||
smoothed.above_threshold = now_above;
|
||||
let dir = if now_above { "up" } else { "down" };
|
||||
counter!(
|
||||
"substrate_threshold_crossings_total",
|
||||
"type" => st.0.label_str(),
|
||||
"direction" => dir
|
||||
)
|
||||
.increment(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Sample ECS-side gauges into the Prometheus exporter. Runs every tick but
|
||||
/// only emits once per second to keep cost negligible. This is the system
|
||||
/// the paper's §Architecture diagram calls `ExportSystem`.
|
||||
pub(super) fn export_system(
|
||||
senders: Res<BridgeSenders>,
|
||||
registry: Res<SensorRegistry>,
|
||||
sensors_q: Query<(&SensorTypeTag, &RawSensorData)>,
|
||||
mut state: ResMut<ExportSampleState>,
|
||||
) {
|
||||
let now = Instant::now();
|
||||
if now.duration_since(state.last_sample) < Duration::from_secs(1) {
|
||||
return;
|
||||
}
|
||||
state.last_sample = now;
|
||||
|
||||
// ---- runtime telemetry ----
|
||||
gauge!("substrate_entities").set(registry.entity_count() as f64);
|
||||
|
||||
gauge!("substrate_channel_depth", "tier" => "t1").set(senders.t1.depth() as f64);
|
||||
gauge!("substrate_channel_depth", "tier" => "t2").set(senders.t2.depth() as f64);
|
||||
gauge!("substrate_channel_depth", "tier" => "t3").set(senders.t3.depth() as f64);
|
||||
|
||||
gauge!("substrate_channel_capacity", "tier" => "t1").set(senders.t1.capacity() as f64);
|
||||
gauge!("substrate_channel_capacity", "tier" => "t2").set(senders.t2.capacity() as f64);
|
||||
gauge!("substrate_channel_capacity", "tier" => "t3").set(senders.t3.capacity() as f64);
|
||||
|
||||
if let Some(stats) = memory_stats::memory_stats() {
|
||||
gauge!("substrate_rss_bytes").set(stats.physical_mem as f64);
|
||||
}
|
||||
|
||||
// ---- sensor data aggregates (per type) ----
|
||||
let mut by_type: HashMap<&'static str, Aggregate> = HashMap::new();
|
||||
for (st, data) in &sensors_q {
|
||||
by_type
|
||||
.entry(st.0.label_str())
|
||||
.or_insert_with(Aggregate::new)
|
||||
.push(data.raw_value);
|
||||
}
|
||||
for (label, agg) in &by_type {
|
||||
gauge!("sensor_aggregate", "type" => *label, "stat" => "count").set(agg.count as f64);
|
||||
if agg.count > 0 {
|
||||
gauge!("sensor_aggregate", "type" => *label, "stat" => "mean").set(agg.mean());
|
||||
gauge!("sensor_aggregate", "type" => *label, "stat" => "min").set(agg.min);
|
||||
gauge!("sensor_aggregate", "type" => *label, "stat" => "max").set(agg.max);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) fn diagnostics_system(
|
||||
mut state: ResMut<DiagnosticsState>,
|
||||
registry: Res<SensorRegistry>,
|
||||
) {
|
||||
state.ticks_since_log += 1;
|
||||
let now = Instant::now();
|
||||
let elapsed = now.duration_since(state.last_log);
|
||||
if elapsed >= Duration::from_secs(1) {
|
||||
let tick_hz = state.ticks_since_log as f64 / elapsed.as_secs_f64();
|
||||
gauge!("substrate_tick_hz").set(tick_hz);
|
||||
tracing::info!(
|
||||
tick_hz = format_args!("{:.1}", tick_hz),
|
||||
entities = registry.entity_count(),
|
||||
"diagnostics"
|
||||
);
|
||||
state.last_log = now;
|
||||
state.ticks_since_log = 0;
|
||||
}
|
||||
}
|
||||
|
||||
fn now_us() -> u64 {
|
||||
SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map(|d| d.as_micros() as u64)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
/// Per-type accumulator for `export_system`'s sensor aggregates. NaN-safe.
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct Aggregate {
|
||||
count: u64,
|
||||
sum: f64,
|
||||
min: f64,
|
||||
max: f64,
|
||||
}
|
||||
|
||||
impl Aggregate {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
count: 0,
|
||||
sum: 0.0,
|
||||
min: f64::INFINITY,
|
||||
max: f64::NEG_INFINITY,
|
||||
}
|
||||
}
|
||||
|
||||
fn push(&mut self, v: f64) {
|
||||
if !v.is_finite() {
|
||||
return;
|
||||
}
|
||||
self.count += 1;
|
||||
self.sum += v;
|
||||
if v < self.min {
|
||||
self.min = v;
|
||||
}
|
||||
if v > self.max {
|
||||
self.max = v;
|
||||
}
|
||||
}
|
||||
|
||||
fn mean(&self) -> f64 {
|
||||
if self.count == 0 {
|
||||
f64::NAN
|
||||
} else {
|
||||
self.sum / self.count as f64
|
||||
}
|
||||
}
|
||||
}
|
||||
294
substrate/src/world/tests.rs
Normal file
294
substrate/src/world/tests.rs
Normal file
@@ -0,0 +1,294 @@
|
||||
//! Unit tests for the world's components and systems.
|
||||
//!
|
||||
//! Lives as a child module so it can poke at `pub(super)` items (the
|
||||
//! internal resources, `threshold_for`, etc.) without enlarging the
|
||||
//! public API.
|
||||
|
||||
use std::sync::Mutex;
|
||||
|
||||
use bevy::prelude::*;
|
||||
use bevy::state::app::StatesPlugin;
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use uuid::Uuid;
|
||||
|
||||
use crate::transport::ecs::{BridgeReceivers, BridgeSenders};
|
||||
use crate::transport::state::ServerState;
|
||||
use crate::transport::{QuicMessage, SensorType, T1Sender, T2Sender, T3Inbound, T3Sender};
|
||||
|
||||
use super::WorldPlugin;
|
||||
use super::components::{RawSensorData, SMOOTHED_WINDOW, SmoothedValue, threshold_for};
|
||||
use super::resources::SensorRegistry;
|
||||
|
||||
/// Build a Bevy app with just enough plugins/resources to run the world
|
||||
/// systems against test-owned channels. No QUIC, no tokio runtime.
|
||||
fn make_test_app() -> (
|
||||
App,
|
||||
mpsc::Sender<QuicMessage>,
|
||||
mpsc::Sender<QuicMessage>,
|
||||
mpsc::Sender<T3Inbound>,
|
||||
) {
|
||||
let (t1_tx, t1_rx) = mpsc::channel::<QuicMessage>(64);
|
||||
let (t2_tx, t2_rx) = mpsc::channel::<QuicMessage>(64);
|
||||
let (t3_tx, t3_rx) = mpsc::channel::<T3Inbound>(64);
|
||||
|
||||
let bridge = BridgeReceivers {
|
||||
t1: Mutex::new(t1_rx),
|
||||
t2: Mutex::new(t2_rx),
|
||||
t3: Mutex::new(t3_rx),
|
||||
};
|
||||
// export_system samples channel depth/capacity from the senders; it
|
||||
// requires the resource even when the test pushes via the raw senders
|
||||
// directly (which is what the rest of the test does).
|
||||
let senders = BridgeSenders {
|
||||
t1: T1Sender::new(t1_tx.clone()),
|
||||
t2: T2Sender::new(t2_tx.clone()),
|
||||
t3: T3Sender::new(t3_tx.clone()),
|
||||
};
|
||||
|
||||
let mut app = App::new();
|
||||
app.add_plugins(MinimalPlugins)
|
||||
.add_plugins(StatesPlugin)
|
||||
.init_state::<ServerState>()
|
||||
.insert_resource(bridge)
|
||||
.insert_resource(senders)
|
||||
.add_plugins(WorldPlugin);
|
||||
|
||||
// Force the state machine into Started so the run_if guard passes.
|
||||
app.world_mut()
|
||||
.resource_mut::<NextState<ServerState>>()
|
||||
.set(ServerState::Started);
|
||||
// Process the state transition before tests push messages.
|
||||
app.update();
|
||||
|
||||
(app, t1_tx, t2_tx, t3_tx)
|
||||
}
|
||||
|
||||
// ---- ingest_system: entity lifecycle and T3 ack semantics ----
|
||||
|
||||
#[test]
|
||||
fn ingest_t1_creates_entity_and_writes_raw_data() {
|
||||
let (mut app, t1_tx, _t2_tx, _t3_tx) = make_test_app();
|
||||
|
||||
let device = Uuid::from_u128(0xa1a2_a3a4_a5a6_a7a8_a9aa_abac_adae_afb0);
|
||||
let msg = QuicMessage {
|
||||
device_id: device,
|
||||
sensor_id: 5,
|
||||
raw_value: 3.14,
|
||||
timestamp_us: 1_700_000_000_000_001,
|
||||
sequence_number: 1,
|
||||
sensor_type: SensorType::Temperature.as_u8(),
|
||||
};
|
||||
t1_tx.try_send(msg).expect("channel cap");
|
||||
|
||||
// Tick 1: ingest drains the channel and spawns via Commands.
|
||||
app.update();
|
||||
// Tick 2: Commands have flushed into the archetype.
|
||||
app.update();
|
||||
|
||||
let registry = app.world().resource::<SensorRegistry>();
|
||||
assert_eq!(registry.map.len(), 1);
|
||||
|
||||
let entity = *registry
|
||||
.map
|
||||
.get(&(device, 5))
|
||||
.expect("entity not registered");
|
||||
let data = app
|
||||
.world()
|
||||
.get::<RawSensorData>(entity)
|
||||
.expect("RawSensorData missing");
|
||||
assert_eq!(data.raw_value, 3.14);
|
||||
assert_eq!(data.sequence_number, 1);
|
||||
assert_eq!(data.timestamp_us, 1_700_000_000_000_001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ingest_t1_repeated_messages_update_in_place() {
|
||||
let (mut app, t1_tx, _t2_tx, _t3_tx) = make_test_app();
|
||||
let device = Uuid::new_v4();
|
||||
|
||||
// First reading.
|
||||
t1_tx
|
||||
.try_send(QuicMessage {
|
||||
device_id: device,
|
||||
sensor_id: 0,
|
||||
raw_value: 1.0,
|
||||
timestamp_us: 1,
|
||||
sequence_number: 1,
|
||||
sensor_type: SensorType::Generic.as_u8(),
|
||||
})
|
||||
.unwrap();
|
||||
app.update();
|
||||
app.update();
|
||||
|
||||
// Second reading on the same (device, sensor).
|
||||
t1_tx
|
||||
.try_send(QuicMessage {
|
||||
device_id: device,
|
||||
sensor_id: 0,
|
||||
raw_value: 2.0,
|
||||
timestamp_us: 2,
|
||||
sequence_number: 2,
|
||||
sensor_type: SensorType::Generic.as_u8(),
|
||||
})
|
||||
.unwrap();
|
||||
app.update();
|
||||
|
||||
let registry = app.world().resource::<SensorRegistry>();
|
||||
assert_eq!(registry.map.len(), 1, "should reuse the same entity");
|
||||
|
||||
let entity = *registry.map.get(&(device, 0)).unwrap();
|
||||
let data = app.world().get::<RawSensorData>(entity).unwrap();
|
||||
assert_eq!(data.raw_value, 2.0);
|
||||
assert_eq!(data.sequence_number, 2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ingest_t3_replies_with_current_sensor_value() {
|
||||
let (mut app, t1_tx, _t2_tx, t3_tx) = make_test_app();
|
||||
let device = Uuid::new_v4();
|
||||
|
||||
// Seed a T1 reading so the (device, sensor) entity exists.
|
||||
t1_tx
|
||||
.try_send(QuicMessage {
|
||||
device_id: device,
|
||||
sensor_id: 9,
|
||||
raw_value: 42.0,
|
||||
timestamp_us: 1,
|
||||
sequence_number: 1,
|
||||
sensor_type: SensorType::Temperature.as_u8(),
|
||||
})
|
||||
.unwrap();
|
||||
app.update();
|
||||
app.update();
|
||||
|
||||
// Send a T3 command and capture the ack via the oneshot.
|
||||
let (reply_tx, reply_rx) = oneshot::channel();
|
||||
t3_tx
|
||||
.try_send(T3Inbound {
|
||||
command: QuicMessage {
|
||||
device_id: device,
|
||||
sensor_id: 9,
|
||||
raw_value: 0.0,
|
||||
timestamp_us: 0,
|
||||
sequence_number: 7,
|
||||
sensor_type: SensorType::Temperature.as_u8(),
|
||||
},
|
||||
reply: reply_tx,
|
||||
})
|
||||
.unwrap();
|
||||
app.update();
|
||||
|
||||
let ack = reply_rx
|
||||
.blocking_recv()
|
||||
.expect("ECS handler should have replied");
|
||||
assert_eq!(ack.device_id, device);
|
||||
assert_eq!(ack.sensor_id, 9);
|
||||
assert_eq!(ack.sequence_number, 7, "ack preserves correlation id");
|
||||
assert_eq!(ack.raw_value, 42.0, "ack carries the latest sensor reading");
|
||||
assert_eq!(
|
||||
ack.typ(),
|
||||
SensorType::Temperature,
|
||||
"ack preserves sensor type"
|
||||
);
|
||||
assert!(ack.timestamp_us > 0, "ack stamped with server time");
|
||||
}
|
||||
|
||||
// ---- SmoothedValue unit tests ----
|
||||
|
||||
#[test]
|
||||
fn smoothed_value_first_push_sets_mean() {
|
||||
let mut s = SmoothedValue::default();
|
||||
s.push(10.0);
|
||||
assert_eq!(s.mean, 10.0);
|
||||
assert!(!s.above_threshold);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smoothed_value_averages_filled_window() {
|
||||
let mut s = SmoothedValue::default();
|
||||
for v in [1.0, 2.0, 3.0, 4.0] {
|
||||
s.push(v);
|
||||
}
|
||||
assert!((s.mean - 2.5).abs() < 1e-9);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smoothed_value_rolls_after_window_fills() {
|
||||
let mut s = SmoothedValue::default();
|
||||
for _ in 0..SMOOTHED_WINDOW {
|
||||
s.push(0.0);
|
||||
}
|
||||
assert!((s.mean - 0.0).abs() < 1e-9);
|
||||
for _ in 0..SMOOTHED_WINDOW {
|
||||
s.push(10.0);
|
||||
}
|
||||
assert!((s.mean - 10.0).abs() < 1e-9, "ring should fully roll over");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn smoothed_value_ignores_nonfinite() {
|
||||
let mut s = SmoothedValue::default();
|
||||
s.push(5.0);
|
||||
let before = s.mean;
|
||||
s.push(f64::NAN);
|
||||
s.push(f64::INFINITY);
|
||||
s.push(f64::NEG_INFINITY);
|
||||
assert_eq!(s.mean, before, "non-finite values should not perturb the mean");
|
||||
}
|
||||
|
||||
// ---- simulation_system: end-to-end threshold-crossing transition ----
|
||||
|
||||
#[test]
|
||||
fn simulation_smoothes_and_detects_threshold_crossing() {
|
||||
let (mut app, t1_tx, _t2_tx, _t3_tx) = make_test_app();
|
||||
let device = Uuid::new_v4();
|
||||
let threshold = threshold_for(SensorType::Temperature); // 22.0 °C
|
||||
|
||||
// Below-threshold readings: smoothed mean stays under, no crossing.
|
||||
for seq in 0..SMOOTHED_WINDOW as u32 {
|
||||
t1_tx
|
||||
.try_send(QuicMessage {
|
||||
device_id: device,
|
||||
sensor_id: 0,
|
||||
raw_value: 18.0,
|
||||
timestamp_us: u64::from(seq),
|
||||
sequence_number: seq,
|
||||
sensor_type: SensorType::Temperature.as_u8(),
|
||||
})
|
||||
.unwrap();
|
||||
app.update();
|
||||
app.update();
|
||||
}
|
||||
|
||||
let registry = app.world().resource::<SensorRegistry>();
|
||||
let entity = *registry.map.get(&(device, 0)).unwrap();
|
||||
let smoothed = app
|
||||
.world()
|
||||
.get::<SmoothedValue>(entity)
|
||||
.expect("SmoothedValue should be on every sensor entity");
|
||||
assert!(smoothed.mean < threshold);
|
||||
assert!(!smoothed.above_threshold, "should not have crossed up yet");
|
||||
|
||||
// Above-threshold readings: enough samples to drag the mean above
|
||||
// the threshold (window = 16; pushing 30°C for 16 ticks lands mean ≈ 30).
|
||||
for seq in (SMOOTHED_WINDOW as u32)..(SMOOTHED_WINDOW as u32 * 2) {
|
||||
t1_tx
|
||||
.try_send(QuicMessage {
|
||||
device_id: device,
|
||||
sensor_id: 0,
|
||||
raw_value: 30.0,
|
||||
timestamp_us: u64::from(seq),
|
||||
sequence_number: seq,
|
||||
sensor_type: SensorType::Temperature.as_u8(),
|
||||
})
|
||||
.unwrap();
|
||||
app.update();
|
||||
}
|
||||
|
||||
let smoothed = app.world().get::<SmoothedValue>(entity).unwrap();
|
||||
assert!(smoothed.mean > threshold);
|
||||
assert!(
|
||||
smoothed.above_threshold,
|
||||
"smoothed mean should have crossed up through {threshold}"
|
||||
);
|
||||
}
|
||||
Reference in New Issue
Block a user