First test kinda working

This commit is contained in:
Valère Plantevin
2026-05-12 11:21:40 -04:00
parent cac6c9ac02
commit d3f09ee062
36 changed files with 3903 additions and 102 deletions

248
scripts/bench-scaling.sh Executable file
View File

@@ -0,0 +1,248 @@
#!/usr/bin/env bash
# scripts/bench-scaling.sh — M6-lite: sweep T1 rate at fixed entity count,
# record tick_hz / P99 latency / drops / RSS into a CSV the paper can plot.
#
# Two modes:
#
# 1. Scaling sweep (default). Just T1 traffic. Tells you the substrate's
# throughput ceiling on this host and where the lossy-tier kicks in.
# Output: data/local/scaling.csv
#
# 2. Cross-tier isolation. Set T3_RATE_HZ=<N> to run a constant T3 baseline
# in parallel with the T1 sweep. The CSV gains substrate-side T3 latency
# columns. If T3 P99 stays flat as T1 climbs orders of magnitude, the
# paper's composition thesis is supported.
# Output: data/local/cross_tier.csv
#
# Holds:
# - tick_rate_hz $TICK_RATE_HZ (default 1000; set 0 for busy-loop)
# - device count $DEVICES (default 100, single-sensor profile)
# - window $WINDOW_S (default 20s steady-state per rate)
# - T3 baseline $T3_RATE_HZ (default 0 = disabled)
# - T3 timeout $T3_TIMEOUT_MS (default 2000ms)
# - build profile $BUILD (release | debug; default release)
#
# Sweeps:
# T1 rate over the positional arguments, or these defaults:
# 100 500 1000 5000 10000 25000 50000
#
# Examples:
# # Pure T1 scaling sweep.
# ./scripts/bench-scaling.sh
#
# # Cross-tier isolation: hold T3 at 100 Hz, sweep T1.
# T3_RATE_HZ=100 ./scripts/bench-scaling.sh
#
# # Custom sweep, longer windows.
# DEVICES=1000 WINDOW_S=30 ./scripts/bench-scaling.sh 1000 5000 20000
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
cd "$ROOT"
# --- knobs ---
DEVICES="${DEVICES:-100}"
TICK_RATE_HZ="${TICK_RATE_HZ:-1000}"
WARMUP_S="${WARMUP_S:-3}"
WINDOW_S="${WINDOW_S:-20}"
T3_RATE_HZ="${T3_RATE_HZ:-0}"
T3_TIMEOUT_MS="${T3_TIMEOUT_MS:-2000}"
BUILD="${BUILD:-release}"
RATES=("${@}")
if [[ ${#RATES[@]} -eq 0 ]]; then
RATES=(100 500 1000 5000 10000 25000 50000)
fi
# Pick default output path based on mode so the two CSVs don't clobber.
CROSS_TIER=$(awk -v r="$T3_RATE_HZ" 'BEGIN { print (r+0 > 0) ? "1" : "0" }')
if [[ "$CROSS_TIER" == "1" ]]; then
OUT_CSV="${OUT_CSV:-data/local/cross_tier.csv}"
else
OUT_CSV="${OUT_CSV:-data/local/scaling.csv}"
fi
# --- pretty logging ---
if [[ -t 1 ]]; then
BOLD=$'\033[1m'; DIM=$'\033[2m'; GREEN=$'\033[32m'; RED=$'\033[31m'; RESET=$'\033[0m'
else BOLD=; DIM=; GREEN=; RED=; RESET=; fi
step() { printf '%s» %s%s\n' "$BOLD" "$1" "$RESET"; }
ok() { printf '%s ✓ %s%s\n' "$GREEN" "$1" "$RESET"; }
fail() { printf '%s ✗ %s%s\n' "$RED" "$1" "$RESET"; }
# --- prereqs ---
for cmd in cargo curl lsof awk; do
command -v "$cmd" >/dev/null || { fail "missing: $cmd"; exit 1; }
done
for port in 9000 9100; do
if lsof -nP -iUDP:$port -iTCP:$port -sTCP:LISTEN 2>/dev/null | grep -q LISTEN; then
fail "port $port in use — kill the running substrate first"
exit 1
fi
done
[[ -f certs/server.crt ]] || make certs >/dev/null
# --- build ---
step "Building ($BUILD)"
if [[ "$BUILD" == "release" ]]; then
cargo build --release -p substrate -p simulator >/dev/null
SUBSTRATE="$ROOT/target/release/substrate"
SIMULATOR="$ROOT/target/release/simulator"
else
cargo build -p substrate -p simulator >/dev/null
SUBSTRATE="$ROOT/target/debug/substrate"
SIMULATOR="$ROOT/target/debug/simulator"
fi
# --- start substrate with high tick rate ---
LOG_DIR="/tmp/quic_ecs_dt_bench"
mkdir -p "$LOG_DIR"
SUB_LOG="$LOG_DIR/substrate.log"
: > "$SUB_LOG"
step "Starting substrate (tick_rate_hz=$TICK_RATE_HZ, log: $SUB_LOG)"
APP_SIMULATION__TICK_RATE_HZ="$TICK_RATE_HZ" RUST_LOG=warn "$SUBSTRATE" >"$SUB_LOG" 2>&1 &
SUBSTRATE_PID=$!
# Wait for /metrics
for i in $(seq 1 40); do
if curl -sf http://localhost:9100/metrics >/dev/null 2>&1; then
ok "substrate /metrics ready"; break
fi
sleep 0.25
if [[ $i -eq 40 ]]; then fail "substrate didn't start"; tail -20 "$SUB_LOG"; exit 1; fi
done
cleanup() {
[[ -n "${SIM_PID:-}" ]] && kill -TERM "$SIM_PID" 2>/dev/null || true
[[ -n "${SUBSTRATE_PID:-}" ]] && kill -TERM "$SUBSTRATE_PID" 2>/dev/null || true
wait 2>/dev/null || true
}
trap cleanup EXIT INT TERM
# --- helpers to scrape a single value out of /metrics text ---
snapshot_to() {
curl -s http://localhost:9100/metrics > "$1"
}
get_value() {
# $1: snapshot file, $2: full metric name (regex-anchored at line start)
awk -v pat="$2" '$0 ~ "^" pat " " { print $NF; exit }' "$1"
}
# --- sweep ---
mkdir -p "$(dirname "$OUT_CSV")"
echo "rate_hz,t3_rate_hz,devices,tick_rate_hz,window_s,t1_received,t1_dropped,t1_p50_us,t1_p99_us,t1_p999_us,t3_received,t3_no_handler,t3_p50_us,t3_p99_us,t3_p999_us,tick_hz,rss_mb,channel_depth_max" > "$OUT_CSV"
if [[ "$CROSS_TIER" == "1" ]]; then
step "Sweeping T1 + holding T3 at ${T3_RATE_HZ} Hz (warmup ${WARMUP_S}s, window ${WINDOW_S}s, devices=$DEVICES)"
else
step "Sweeping T1 rate (warmup ${WARMUP_S}s, window ${WINDOW_S}s, devices=$DEVICES)"
fi
printf '%s' "$BOLD"
if [[ "$CROSS_TIER" == "1" ]]; then
printf '%-8s %-9s %-9s %-10s %-10s %-8s %-9s %-10s %-10s %-8s %-7s\n' \
"rate" "t1_recv" "t1_drop" "t1_p50" "t1_p99" "t3_recv" "t3_p50" "t3_p99" "t3_p999" "tick_hz" "rss_mb"
else
printf '%-8s %-9s %-9s %-10s %-10s %-10s %-8s %-7s\n' \
"rate" "received" "dropped" "p50_us" "p99_us" "p999_us" "tick_hz" "rss_mb"
fi
printf '%s' "$RESET"
# Snapshot file paths
BEFORE="$LOG_DIR/before.txt"
AFTER="$LOG_DIR/after.txt"
# Peak-tracker for channel depth: tail /metrics at 4 Hz during the window
peak_depth() {
local label="$1" # "t1" or "t2" or "t3"
local max=0
local val
for _ in $(seq 1 $(( WINDOW_S * 4 ))); do
val=$(curl -s http://localhost:9100/metrics 2>/dev/null \
| awk -v pat="^substrate_channel_depth\\\\{tier=\"$label\"\\\\}" '$0 ~ pat {print $NF; exit}')
if [[ -n "$val" && "$val" != "0" ]]; then
# Compare numerically; bash can do integer compare via [[ ]]
int_val="${val%.*}"
if (( int_val > max )); then max=$int_val; fi
fi
sleep 0.25
done
echo "$max"
}
for rate in "${RATES[@]}"; do
# Launch simulator in background. In cross-tier mode it drives both T1
# and T3 on the same connection; otherwise just T1.
sim_args=(
--profile single
--sensor-type generic
--rate-hz "$rate"
--count 0
--devices "$DEVICES"
)
if [[ "$CROSS_TIER" == "1" ]]; then
sim_args+=(--t3-rate-hz "$T3_RATE_HZ" --t3-timeout-ms "$T3_TIMEOUT_MS")
fi
RUST_LOG=warn "$SIMULATOR" "${sim_args[@]}" >"$LOG_DIR/sim_${rate}.log" 2>&1 &
SIM_PID=$!
# Warmup, then snapshot counters at the start of the *measurement* window.
sleep "$WARMUP_S"
snapshot_to "$BEFORE"
rec_before=$(get_value "$BEFORE" 'substrate_received_total\{tier="t1"\}')
drop_before=$(get_value "$BEFORE" 'substrate_dropped_total\{tier="t1"\}')
t3_rec_before=$(get_value "$BEFORE" 'substrate_received_total\{tier="t3"\}')
t3_nh_before=$(get_value "$BEFORE" 'substrate_t3_no_handler_total')
depth_max=$(peak_depth t1)
snapshot_to "$AFTER"
kill -TERM "$SIM_PID" 2>/dev/null || true
wait "$SIM_PID" 2>/dev/null || true
SIM_PID=""
rec_after=$(get_value "$AFTER" 'substrate_received_total\{tier="t1"\}')
drop_after=$(get_value "$AFTER" 'substrate_dropped_total\{tier="t1"\}')
p50=$(get_value "$AFTER" 'substrate_latency_us\{tier="t1",quantile="0.5"\}')
p99=$(get_value "$AFTER" 'substrate_latency_us\{tier="t1",quantile="0.99"\}')
p999=$(get_value "$AFTER" 'substrate_latency_us\{tier="t1",quantile="0.999"\}')
t3_rec_after=$(get_value "$AFTER" 'substrate_received_total\{tier="t3"\}')
t3_nh_after=$(get_value "$AFTER" 'substrate_t3_no_handler_total')
t3_p50=$(get_value "$AFTER" 'substrate_latency_us\{tier="t3",quantile="0.5"\}')
t3_p99=$(get_value "$AFTER" 'substrate_latency_us\{tier="t3",quantile="0.99"\}')
t3_p999=$(get_value "$AFTER" 'substrate_latency_us\{tier="t3",quantile="0.999"\}')
tick_hz=$(get_value "$AFTER" 'substrate_tick_hz')
rss=$(get_value "$AFTER" 'substrate_rss_bytes')
# Compute deltas + format. Use awk for floating math.
received=$(awk -v a="$rec_after" -v b="$rec_before" 'BEGIN { printf "%d", a-b }')
dropped=$(awk -v a="$drop_after" -v b="$drop_before" 'BEGIN { printf "%d", a-b }')
t3_received=$(awk -v a="$t3_rec_after" -v b="$t3_rec_before" 'BEGIN { printf "%d", a-b }')
t3_no_handler=$(awk -v a="$t3_nh_after" -v b="$t3_nh_before" 'BEGIN { printf "%d", a-b }')
rss_mb=$(awk -v r="$rss" 'BEGIN { printf "%.1f", r/1048576 }')
tick_hz_fmt=$(awk -v t="$tick_hz" 'BEGIN { printf "%.1f", t }')
if [[ "$CROSS_TIER" == "1" ]]; then
printf '%-8s %-9s %-9s %-10.0f %-10.0f %-8s %-9.0f %-10.0f %-10.0f %-8s %-7s\n' \
"$rate" "$received" "$dropped" \
"${p50:-0}" "${p99:-0}" \
"$t3_received" "${t3_p50:-0}" "${t3_p99:-0}" "${t3_p999:-0}" \
"$tick_hz_fmt" "$rss_mb"
else
printf '%-8s %-9s %-9s %-10.0f %-10.0f %-10.0f %-8s %-7s\n' \
"$rate" "$received" "$dropped" "${p50:-0}" "${p99:-0}" "${p999:-0}" \
"$tick_hz_fmt" "$rss_mb"
fi
echo "$rate,$T3_RATE_HZ,$DEVICES,$TICK_RATE_HZ,$WINDOW_S,$received,$dropped,${p50:-0},${p99:-0},${p999:-0},$t3_received,$t3_no_handler,${t3_p50:-0},${t3_p99:-0},${t3_p999:-0},$tick_hz_fmt,$rss_mb,$depth_max" >> "$OUT_CSV"
# Tiny breather between rate points so the substrate's summary window
# doesn't carry over.
sleep 1
done
printf '\n%sCSV written to:%s %s\n' "$DIM" "$RESET" "$OUT_CSV"
printf '%sSubstrate log:%s %s\n' "$DIM" "$RESET" "$SUB_LOG"

222
scripts/demo.sh Executable file
View File

@@ -0,0 +1,222 @@
#!/usr/bin/env bash
# scripts/demo.sh — bring the whole stack up: certs → build → VM+Grafana →
# substrate → simulator. Tails simulator progress in the foreground. Ctrl-C
# cleans everything up.
#
# Overridable via env vars:
# PROFILE single | industrial (default: industrial)
# RATE_HZ T1 datagram rate (default: 500)
# T2_RATE_HZ T2 uni stream rate (default: 5)
# T3_RATE_HZ T3 bi stream rate (default: 2)
# DEVICES number of devices (default: 5)
# BUILD release | debug (default: release)
# KEEP_MONITORING if 1, don't `docker compose down` on exit (default: 0)
#
# Example:
# ./scripts/demo.sh
# PROFILE=single RATE_HZ=100 DEVICES=20 ./scripts/demo.sh
# KEEP_MONITORING=1 ./scripts/demo.sh
set -euo pipefail
# --- locate repo root ---
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
cd "$ROOT"
# --- defaults ---
PROFILE="${PROFILE:-industrial}"
RATE_HZ="${RATE_HZ:-500}"
T2_RATE_HZ="${T2_RATE_HZ:-5}"
T3_RATE_HZ="${T3_RATE_HZ:-2}"
DEVICES="${DEVICES:-5}"
BUILD="${BUILD:-release}"
KEEP_MONITORING="${KEEP_MONITORING:-0}"
LOG_DIR="${LOG_DIR:-/tmp/quic_ecs_dt}"
# --- pretty logging ---
if [[ -t 1 ]]; then
BOLD=$'\033[1m'; DIM=$'\033[2m'; GREEN=$'\033[32m'
YELLOW=$'\033[33m'; RED=$'\033[31m'; CYAN=$'\033[36m'; RESET=$'\033[0m'
else
BOLD=; DIM=; GREEN=; YELLOW=; RED=; CYAN=; RESET=
fi
step() { printf '%s» %s%s\n' "$BOLD" "$1" "$RESET"; }
ok() { printf '%s ✓ %s%s\n' "$GREEN" "$1" "$RESET"; }
warn() { printf '%s ! %s%s\n' "$YELLOW" "$1" "$RESET"; }
fail() { printf '%s ✗ %s%s\n' "$RED" "$1" "$RESET"; }
# --- prereq check ---
step "Checking prerequisites"
for cmd in cargo docker openssl curl lsof; do
if ! command -v "$cmd" >/dev/null 2>&1; then
fail "missing required command: $cmd"
exit 1
fi
done
if ! docker compose version >/dev/null 2>&1; then
fail "docker compose plugin not available (try 'docker compose version')"
exit 1
fi
ok "cargo, docker, openssl, curl, lsof present"
# --- port collision check (substrate runs on 9000 udp + 9100 tcp) ---
for port in 9000 9100; do
if lsof -nP -iUDP:$port -iTCP:$port -sTCP:LISTEN 2>/dev/null | grep -q LISTEN; then
fail "port $port appears to be in use — another substrate or process is running"
lsof -nP -iUDP:$port -iTCP:$port -sTCP:LISTEN 2>/dev/null | head -5
exit 1
fi
done
ok "ports 9000 (QUIC) and 9100 (/metrics) are free"
# --- certs ---
step "Ensuring dev TLS cert exists"
if [[ ! -f certs/server.crt || ! -f certs/server.key ]]; then
make certs >/dev/null
ok "generated certs/server.{crt,key}"
else
ok "certs/server.{crt,key} already present"
fi
# --- build ---
step "Building substrate + simulator ($BUILD profile)"
if [[ "$BUILD" == "release" ]]; then
cargo build --release -p substrate -p simulator
SUBSTRATE_BIN="$ROOT/target/release/substrate"
SIMULATOR_BIN="$ROOT/target/release/simulator"
else
cargo build -p substrate -p simulator
SUBSTRATE_BIN="$ROOT/target/debug/substrate"
SIMULATOR_BIN="$ROOT/target/debug/simulator"
fi
ok "binaries: $SUBSTRATE_BIN, $SIMULATOR_BIN"
# --- monitoring ---
step "Bringing up VictoriaMetrics + Grafana (docker compose)"
docker compose -f monitoring/docker-compose.yml up -d >/dev/null
ok "containers started"
printf '%s ⏳ waiting for VictoriaMetrics on :8428' "$DIM"
for i in $(seq 1 40); do
if curl -sf http://localhost:8428/health >/dev/null 2>&1; then
printf ' ready%s\n' "$RESET"; break
fi
printf '.'; sleep 0.5
if [[ $i -eq 40 ]]; then printf ' TIMEOUT%s\n' "$RESET"; exit 1; fi
done
printf '%s ⏳ waiting for Grafana on :3000' "$DIM"
for i in $(seq 1 40); do
if curl -sf http://localhost:3000/api/health >/dev/null 2>&1; then
printf ' ready%s\n' "$RESET"; break
fi
printf '.'; sleep 0.5
if [[ $i -eq 40 ]]; then printf ' TIMEOUT%s\n' "$RESET"; exit 1; fi
done
# --- substrate ---
mkdir -p "$LOG_DIR"
SUB_LOG="$LOG_DIR/substrate.log"
SIM_LOG="$LOG_DIR/simulator.log"
: >"$SUB_LOG"
: >"$SIM_LOG"
step "Starting substrate (log: $SUB_LOG)"
RUST_LOG=info "$SUBSTRATE_BIN" >"$SUB_LOG" 2>&1 &
SUBSTRATE_PID=$!
printf '%s ⏳ waiting for /metrics on :9100' "$DIM"
for i in $(seq 1 40); do
if curl -sf http://localhost:9100/metrics >/dev/null 2>&1; then
printf ' ready%s\n' "$RESET"; break
fi
printf '.'; sleep 0.25
if [[ $i -eq 40 ]]; then
printf ' TIMEOUT%s\n' "$RESET"
warn "substrate failed to start; tail of $SUB_LOG:"
tail -30 "$SUB_LOG"
kill "$SUBSTRATE_PID" 2>/dev/null || true
exit 1
fi
done
# --- simulator ---
TOTAL_SLOTS=$DEVICES
if [[ "$PROFILE" == "industrial" ]]; then
TOTAL_SLOTS=$((DEVICES * 5))
fi
step "Starting simulator (log: $SIM_LOG)"
RUST_LOG=info "$SIMULATOR_BIN" \
--profile "$PROFILE" \
--rate-hz "$RATE_HZ" \
--t2-rate-hz "$T2_RATE_HZ" \
--t3-rate-hz "$T3_RATE_HZ" \
--count 0 \
--devices "$DEVICES" \
>"$SIM_LOG" 2>&1 &
SIMULATOR_PID=$!
sleep 0.5
if ! kill -0 "$SIMULATOR_PID" 2>/dev/null; then
fail "simulator exited immediately; tail of $SIM_LOG:"
tail -20 "$SIM_LOG"
kill "$SUBSTRATE_PID" 2>/dev/null || true
exit 1
fi
ok "simulator PID $SIMULATOR_PID"
# --- cleanup trap ---
cleanup() {
printf '\n%s» Cleaning up%s\n' "$BOLD" "$RESET"
if [[ -n "${SIMULATOR_PID:-}" ]]; then
kill -TERM "$SIMULATOR_PID" 2>/dev/null || true
wait "$SIMULATOR_PID" 2>/dev/null || true
ok "simulator stopped"
fi
if [[ -n "${SUBSTRATE_PID:-}" ]]; then
kill -TERM "$SUBSTRATE_PID" 2>/dev/null || true
wait "$SUBSTRATE_PID" 2>/dev/null || true
ok "substrate stopped"
fi
if [[ "$KEEP_MONITORING" == "1" ]]; then
warn "leaving monitoring stack up (KEEP_MONITORING=1) — 'make monitoring-down' to stop"
else
docker compose -f monitoring/docker-compose.yml down >/dev/null 2>&1 || true
ok "monitoring stack stopped"
fi
printf '%sLogs preserved at:%s %s\n' "$DIM" "$RESET" "$LOG_DIR"
}
trap cleanup EXIT INT TERM
# --- summary ---
cat <<EOF
${BOLD}════════════════════════════════════════════════════════════${RESET}
${BOLD} Demo is live${RESET}
${BOLD}════════════════════════════════════════════════════════════${RESET}
${CYAN}Grafana${RESET} http://localhost:3000 (admin / admin)
sensors dash http://localhost:3000/d/quic-ecs-dt-sensors
runtime dash http://localhost:3000/d/quic-ecs-dt-runtime
${CYAN}VictoriaMetrics${RESET} http://localhost:8428
${CYAN}substrate /metrics${RESET} http://localhost:9100/metrics
${DIM}Logs${RESET}
substrate $SUB_LOG
simulator $SIM_LOG
${DIM}Config${RESET}
profile $PROFILE
rates T1=$RATE_HZ Hz · T2=$T2_RATE_HZ Hz · T3=$T3_RATE_HZ Hz
devices $DEVICES → $TOTAL_SLOTS sensor entities expected
build $BUILD
${DIM}Below: live tail of simulator progress (Ctrl-C to stop everything).${RESET}
EOF
# --- foreground tail of simulator progress ---
# Filter for the per-second `progress` / `simulator done` lines so the user
# sees the rates the simulator is observing without noise.
tail -F "$SIM_LOG" | grep --line-buffered -E 'progress|simulator (done|launching|client connected)'