248 lines
9.6 KiB
Bash
Executable File
248 lines
9.6 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# scripts/bench-scaling.sh — M6-lite: sweep T1 rate at fixed entity count,
|
|
# record tick_hz / P99 latency / drops / RSS into a CSV the paper can plot.
|
|
#
|
|
# Two modes:
|
|
#
|
|
# 1. Scaling sweep (default). Just T1 traffic. Tells you the substrate's
|
|
# throughput ceiling on this host and where the lossy-tier kicks in.
|
|
# Output: data/local/scaling.csv
|
|
#
|
|
# 2. Cross-tier isolation. Set T3_RATE_HZ=<N> to enable the substrate's
|
|
# synthetic T3 driver (server-initiated Relay commands to every
|
|
# connected device at that rate) in parallel with the T1 sweep. The CSV
|
|
# gains substrate-side T3 latency columns. If T3 P99 stays flat as T1
|
|
# climbs orders of magnitude, the paper's composition thesis is supported.
|
|
# Output: data/local/cross_tier.csv
|
|
#
|
|
# Holds:
|
|
# - tick_rate_hz $TICK_RATE_HZ (default 1000; set 0 for busy-loop)
|
|
# - device count $DEVICES (default 100, single-sensor profile)
|
|
# - window $WINDOW_S (default 20s steady-state per rate)
|
|
# - T3 baseline $T3_RATE_HZ (default 0 = disabled)
|
|
# - build profile $BUILD (release | debug; default release)
|
|
#
|
|
# Sweeps:
|
|
# T1 rate over the positional arguments, or these defaults:
|
|
# 100 500 1000 5000 10000 25000 50000
|
|
#
|
|
# Examples:
|
|
# # Pure T1 scaling sweep.
|
|
# ./scripts/bench-scaling.sh
|
|
#
|
|
# # Cross-tier isolation: hold T3 at 100 Hz, sweep T1.
|
|
# T3_RATE_HZ=100 ./scripts/bench-scaling.sh
|
|
#
|
|
# # Custom sweep, longer windows.
|
|
# DEVICES=1000 WINDOW_S=30 ./scripts/bench-scaling.sh 1000 5000 20000
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
|
cd "$ROOT"
|
|
|
|
# --- knobs ---
|
|
DEVICES="${DEVICES:-100}"
|
|
TICK_RATE_HZ="${TICK_RATE_HZ:-1000}"
|
|
WARMUP_S="${WARMUP_S:-3}"
|
|
WINDOW_S="${WINDOW_S:-20}"
|
|
T3_RATE_HZ="${T3_RATE_HZ:-0}"
|
|
BUILD="${BUILD:-release}"
|
|
RATES=("${@}")
|
|
if [[ ${#RATES[@]} -eq 0 ]]; then
|
|
RATES=(100 500 1000 5000 10000 25000 50000)
|
|
fi
|
|
|
|
# Pick default output path based on mode so the two CSVs don't clobber.
|
|
CROSS_TIER=$(awk -v r="$T3_RATE_HZ" 'BEGIN { print (r+0 > 0) ? "1" : "0" }')
|
|
if [[ "$CROSS_TIER" == "1" ]]; then
|
|
OUT_CSV="${OUT_CSV:-data/local/cross_tier.csv}"
|
|
else
|
|
OUT_CSV="${OUT_CSV:-data/local/scaling.csv}"
|
|
fi
|
|
|
|
# --- pretty logging ---
|
|
if [[ -t 1 ]]; then
|
|
BOLD=$'\033[1m'; DIM=$'\033[2m'; GREEN=$'\033[32m'; RED=$'\033[31m'; RESET=$'\033[0m'
|
|
else BOLD=; DIM=; GREEN=; RED=; RESET=; fi
|
|
step() { printf '%s» %s%s\n' "$BOLD" "$1" "$RESET"; }
|
|
ok() { printf '%s ✓ %s%s\n' "$GREEN" "$1" "$RESET"; }
|
|
fail() { printf '%s ✗ %s%s\n' "$RED" "$1" "$RESET"; }
|
|
|
|
# --- prereqs ---
|
|
for cmd in cargo curl lsof awk; do
|
|
command -v "$cmd" >/dev/null || { fail "missing: $cmd"; exit 1; }
|
|
done
|
|
for port in 9000 9100; do
|
|
if lsof -nP -iUDP:$port -iTCP:$port -sTCP:LISTEN 2>/dev/null | grep -q LISTEN; then
|
|
fail "port $port in use — kill the running substrate first"
|
|
exit 1
|
|
fi
|
|
done
|
|
[[ -f certs/server.crt ]] || make certs >/dev/null
|
|
|
|
# --- build ---
|
|
step "Building ($BUILD)"
|
|
if [[ "$BUILD" == "release" ]]; then
|
|
cargo build --release -p substrate -p simulator >/dev/null
|
|
SUBSTRATE="$ROOT/target/release/substrate"
|
|
SIMULATOR="$ROOT/target/release/simulator"
|
|
else
|
|
cargo build -p substrate -p simulator >/dev/null
|
|
SUBSTRATE="$ROOT/target/debug/substrate"
|
|
SIMULATOR="$ROOT/target/debug/simulator"
|
|
fi
|
|
|
|
# --- start substrate with high tick rate ---
|
|
LOG_DIR="/tmp/quic_ecs_dt_bench"
|
|
mkdir -p "$LOG_DIR"
|
|
SUB_LOG="$LOG_DIR/substrate.log"
|
|
: > "$SUB_LOG"
|
|
|
|
step "Starting substrate (tick_rate_hz=$TICK_RATE_HZ, synthetic_t3=$T3_RATE_HZ Hz, log: $SUB_LOG)"
|
|
APP_SIMULATION__TICK_RATE_HZ="$TICK_RATE_HZ" \
|
|
APP_NETWORK__SYNTHETIC_T3_RATE_HZ="$T3_RATE_HZ" \
|
|
RUST_LOG=warn "$SUBSTRATE" >"$SUB_LOG" 2>&1 &
|
|
SUBSTRATE_PID=$!
|
|
|
|
# Wait for /metrics
|
|
for i in $(seq 1 40); do
|
|
if curl -sf http://localhost:9100/metrics >/dev/null 2>&1; then
|
|
ok "substrate /metrics ready"; break
|
|
fi
|
|
sleep 0.25
|
|
if [[ $i -eq 40 ]]; then fail "substrate didn't start"; tail -20 "$SUB_LOG"; exit 1; fi
|
|
done
|
|
|
|
cleanup() {
|
|
[[ -n "${SIM_PID:-}" ]] && kill -TERM "$SIM_PID" 2>/dev/null || true
|
|
[[ -n "${SUBSTRATE_PID:-}" ]] && kill -TERM "$SUBSTRATE_PID" 2>/dev/null || true
|
|
wait 2>/dev/null || true
|
|
}
|
|
trap cleanup EXIT INT TERM
|
|
|
|
# --- helpers to scrape a single value out of /metrics text ---
|
|
snapshot_to() {
|
|
curl -s http://localhost:9100/metrics > "$1"
|
|
}
|
|
get_value() {
|
|
# $1: snapshot file, $2: full metric name (regex-anchored at line start)
|
|
awk -v pat="$2" '$0 ~ "^" pat " " { print $NF; exit }' "$1"
|
|
}
|
|
|
|
# --- sweep ---
|
|
mkdir -p "$(dirname "$OUT_CSV")"
|
|
echo "rate_hz,t3_rate_hz,devices,tick_rate_hz,window_s,t1_received,t1_dropped,t1_p50_us,t1_p99_us,t1_p999_us,t3_received,t3_no_route,t3_p50_us,t3_p99_us,t3_p999_us,tick_hz,rss_mb,channel_depth_max" > "$OUT_CSV"
|
|
|
|
if [[ "$CROSS_TIER" == "1" ]]; then
|
|
step "Sweeping T1 + holding T3 at ${T3_RATE_HZ} Hz (warmup ${WARMUP_S}s, window ${WINDOW_S}s, devices=$DEVICES)"
|
|
else
|
|
step "Sweeping T1 rate (warmup ${WARMUP_S}s, window ${WINDOW_S}s, devices=$DEVICES)"
|
|
fi
|
|
printf '%s' "$BOLD"
|
|
if [[ "$CROSS_TIER" == "1" ]]; then
|
|
printf '%-8s %-9s %-9s %-10s %-10s %-8s %-9s %-10s %-10s %-8s %-7s\n' \
|
|
"rate" "t1_recv" "t1_drop" "t1_p50" "t1_p99" "t3_recv" "t3_p50" "t3_p99" "t3_p999" "tick_hz" "rss_mb"
|
|
else
|
|
printf '%-8s %-9s %-9s %-10s %-10s %-10s %-8s %-7s\n' \
|
|
"rate" "received" "dropped" "p50_us" "p99_us" "p999_us" "tick_hz" "rss_mb"
|
|
fi
|
|
printf '%s' "$RESET"
|
|
|
|
# Snapshot file paths
|
|
BEFORE="$LOG_DIR/before.txt"
|
|
AFTER="$LOG_DIR/after.txt"
|
|
|
|
# Peak-tracker for channel depth: tail /metrics at 4 Hz during the window
|
|
peak_depth() {
|
|
local label="$1" # "t1" or "t2" or "t3"
|
|
local max=0
|
|
local val
|
|
for _ in $(seq 1 $(( WINDOW_S * 4 ))); do
|
|
val=$(curl -s http://localhost:9100/metrics 2>/dev/null \
|
|
| awk -v pat="^substrate_channel_depth\\\\{tier=\"$label\"\\\\}" '$0 ~ pat {print $NF; exit}')
|
|
if [[ -n "$val" && "$val" != "0" ]]; then
|
|
# Compare numerically; bash can do integer compare via [[ ]]
|
|
int_val="${val%.*}"
|
|
if (( int_val > max )); then max=$int_val; fi
|
|
fi
|
|
sleep 0.25
|
|
done
|
|
echo "$max"
|
|
}
|
|
|
|
for rate in "${RATES[@]}"; do
|
|
# Launch simulator: T1 sweep only. In cross-tier mode the substrate's
|
|
# synthetic_t3 driver (enabled via env at startup) generates the T3
|
|
# traffic; the simulator just keeps the connection alive and pushes T1.
|
|
sim_args=(
|
|
--profile single
|
|
--sensor-type generic
|
|
--rate-hz "$rate"
|
|
--count 0
|
|
--devices "$DEVICES"
|
|
)
|
|
RUST_LOG=warn "$SIMULATOR" "${sim_args[@]}" >"$LOG_DIR/sim_${rate}.log" 2>&1 &
|
|
SIM_PID=$!
|
|
|
|
# Warmup, then snapshot counters at the start of the *measurement* window.
|
|
sleep "$WARMUP_S"
|
|
snapshot_to "$BEFORE"
|
|
rec_before=$(get_value "$BEFORE" 'substrate_received_total\{tier="t1"\}')
|
|
drop_before=$(get_value "$BEFORE" 'substrate_dropped_total\{tier="t1"\}')
|
|
t3_rec_before=$(get_value "$BEFORE" 'substrate_received_total\{tier="t3"\}')
|
|
t3_nr_before=$(get_value "$BEFORE" 'substrate_t3_outbound_no_route_total')
|
|
|
|
depth_max=$(peak_depth t1)
|
|
|
|
snapshot_to "$AFTER"
|
|
kill -TERM "$SIM_PID" 2>/dev/null || true
|
|
wait "$SIM_PID" 2>/dev/null || true
|
|
SIM_PID=""
|
|
|
|
rec_after=$(get_value "$AFTER" 'substrate_received_total\{tier="t1"\}')
|
|
drop_after=$(get_value "$AFTER" 'substrate_dropped_total\{tier="t1"\}')
|
|
p50=$(get_value "$AFTER" 'substrate_latency_us\{tier="t1",quantile="0.5"\}')
|
|
p99=$(get_value "$AFTER" 'substrate_latency_us\{tier="t1",quantile="0.99"\}')
|
|
p999=$(get_value "$AFTER" 'substrate_latency_us\{tier="t1",quantile="0.999"\}')
|
|
|
|
t3_rec_after=$(get_value "$AFTER" 'substrate_received_total\{tier="t3"\}')
|
|
t3_nr_after=$(get_value "$AFTER" 'substrate_t3_outbound_no_route_total')
|
|
t3_p50=$(get_value "$AFTER" 'substrate_latency_us\{tier="t3",quantile="0.5"\}')
|
|
t3_p99=$(get_value "$AFTER" 'substrate_latency_us\{tier="t3",quantile="0.99"\}')
|
|
t3_p999=$(get_value "$AFTER" 'substrate_latency_us\{tier="t3",quantile="0.999"\}')
|
|
|
|
tick_hz=$(get_value "$AFTER" 'substrate_tick_hz')
|
|
rss=$(get_value "$AFTER" 'substrate_rss_bytes')
|
|
|
|
# Compute deltas + format. Use awk for floating math.
|
|
received=$(awk -v a="$rec_after" -v b="$rec_before" 'BEGIN { printf "%d", a-b }')
|
|
dropped=$(awk -v a="$drop_after" -v b="$drop_before" 'BEGIN { printf "%d", a-b }')
|
|
t3_received=$(awk -v a="$t3_rec_after" -v b="$t3_rec_before" 'BEGIN { printf "%d", a-b }')
|
|
t3_no_route=$(awk -v a="$t3_nr_after" -v b="$t3_nr_before" 'BEGIN { printf "%d", a-b }')
|
|
rss_mb=$(awk -v r="$rss" 'BEGIN { printf "%.1f", r/1048576 }')
|
|
tick_hz_fmt=$(awk -v t="$tick_hz" 'BEGIN { printf "%.1f", t }')
|
|
|
|
if [[ "$CROSS_TIER" == "1" ]]; then
|
|
printf '%-8s %-9s %-9s %-10.0f %-10.0f %-8s %-9.0f %-10.0f %-10.0f %-8s %-7s\n' \
|
|
"$rate" "$received" "$dropped" \
|
|
"${p50:-0}" "${p99:-0}" \
|
|
"$t3_received" "${t3_p50:-0}" "${t3_p99:-0}" "${t3_p999:-0}" \
|
|
"$tick_hz_fmt" "$rss_mb"
|
|
else
|
|
printf '%-8s %-9s %-9s %-10.0f %-10.0f %-10.0f %-8s %-7s\n' \
|
|
"$rate" "$received" "$dropped" "${p50:-0}" "${p99:-0}" "${p999:-0}" \
|
|
"$tick_hz_fmt" "$rss_mb"
|
|
fi
|
|
|
|
echo "$rate,$T3_RATE_HZ,$DEVICES,$TICK_RATE_HZ,$WINDOW_S,$received,$dropped,${p50:-0},${p99:-0},${p999:-0},$t3_received,$t3_no_route,${t3_p50:-0},${t3_p99:-0},${t3_p999:-0},$tick_hz_fmt,$rss_mb,$depth_max" >> "$OUT_CSV"
|
|
|
|
# Tiny breather between rate points so the substrate's summary window
|
|
# doesn't carry over.
|
|
sleep 1
|
|
done
|
|
|
|
printf '\n%sCSV written to:%s %s\n' "$DIM" "$RESET" "$OUT_CSV"
|
|
printf '%sSubstrate log:%s %s\n' "$DIM" "$RESET" "$SUB_LOG"
|