#!/usr/bin/env bash # scripts/bench-loss.sh — M6 benchmark harness # Sweeps entity count {10k, 50k, 100k, 200k} x loss_rate {0, 1, 5}% # Output: data/loopback/final_table.csv set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" cd "$ROOT" TICK_RATE_HZ="${TICK_RATE_HZ:-100}" WARMUP_S="${WARMUP_S:-20}" WINDOW_S="${WINDOW_S:-50}" RATE_HZ="${RATE_HZ:-100}" BUILD="${BUILD:-release}" IFACE="${IFACE:-eth0}" RUN_SIMULATOR="${RUN_SIMULATOR:-1}" # Bidirectional loss via ifb (ingress redirect). Set BIDI=0 to disable and fall # back to egress-only shaping on $IFACE. BIDI="${BIDI:-1}" IFB_DEV="${IFB_DEV:-ifb0}" OUT_CSV="${OUT_CSV:-data/loopback/final_table.csv}" HAS_TC=1 # Check for root/sudo since we need to run tc if ! command -v tc >/dev/null; then echo "Warning: 'tc' command not found. Loss emulation will be skipped." HAS_TC=0 fi # --- pretty logging --- if [[ -t 1 ]]; then BOLD=$'\033[1m'; DIM=$'\033[2m'; GREEN=$'\033[32m'; RED=$'\033[31m'; RESET=$'\033[0m' else BOLD=; DIM=; GREEN=; RED=; RESET=; fi step() { printf '%s» %s%s\n' "$BOLD" "$1" "$RESET"; } ok() { printf '%s ✓ %s%s\n' "$GREEN" "$1" "$RESET"; } fail() { printf '%s ✗ %s%s\n' "$RED" "$1" "$RESET"; } for cmd in cargo curl lsof awk; do command -v "$cmd" >/dev/null || { fail "missing: $cmd"; exit 1; } done for port in 9000 9100; do if lsof -nP -iUDP:$port -iTCP:$port -sTCP:LISTEN 2>/dev/null | grep -q LISTEN; then fail "port $port in use — kill the running substrate first" exit 1 fi done [[ -f certs/server.crt ]] || make certs >/dev/null # --- netem helpers ----------------------------------------------------------- # tc qdiscs are egress-only. To shape ingress (traffic arriving on $IFACE) we # install an ingress qdisc that redirects every packet to an ifb device, then # put the netem qdisc on the ifb's egress. Net effect: both directions of # $IFACE see the configured loss percentage. # # Egress-only fallback (BIDI=0) keeps the historical behaviour: shape outgoing # CM5 → peer traffic only. netem_init() { [[ "$HAS_TC" -eq 1 && "$BIDI" -eq 1 ]] || return 0 # Load the ifb module (idempotent on modern kernels; ignored if built in). sudo modprobe ifb numifbs=1 2>/dev/null || true if ! ip link show "$IFB_DEV" >/dev/null 2>&1; then fail "ifb device $IFB_DEV not present after modprobe; BIDI mode unavailable" echo " - check 'modprobe ifb' on this kernel, or run with BIDI=0" return 1 fi sudo ip link set "$IFB_DEV" up } netem_clear() { [[ "$HAS_TC" -eq 1 ]] || return 0 sudo tc qdisc del dev "$IFACE" root 2>/dev/null || true sudo tc qdisc del dev "$IFACE" ingress 2>/dev/null || true if [[ "$BIDI" -eq 1 ]]; then sudo tc qdisc del dev "$IFB_DEV" root 2>/dev/null || true fi } # Apply $1% loss in both directions (or egress-only when BIDI=0). netem_apply() { local pct="$1" [[ "$HAS_TC" -eq 1 ]] || return 0 netem_clear [[ "$pct" -gt 0 ]] || return 0 # Egress: outgoing from $IFACE. sudo tc qdisc add dev "$IFACE" root netem loss "${pct}%" 2>/dev/null || { echo "Warning: failed to apply egress netem loss on $IFACE." return 1 } if [[ "$BIDI" -eq 1 ]]; then # Ingress: incoming on $IFACE, redirected to $IFB_DEV egress and shaped there. sudo tc qdisc add dev "$IFACE" handle ffff: ingress 2>/dev/null || { echo "Warning: failed to add ingress qdisc on $IFACE." return 1 } sudo tc filter add dev "$IFACE" parent ffff: protocol all u32 \ match u32 0 0 action mirred egress redirect dev "$IFB_DEV" 2>/dev/null || { echo "Warning: failed to install ingress redirect filter." return 1 } sudo tc qdisc add dev "$IFB_DEV" root netem loss "${pct}%" 2>/dev/null || { echo "Warning: failed to apply netem on $IFB_DEV." return 1 } fi } netem_init || true step "Building ($BUILD)" if [[ "$BUILD" == "release" ]]; then if [[ "$RUN_SIMULATOR" -eq 1 ]]; then cargo build --release -p substrate -p simulator >/dev/null else cargo build --release -p substrate >/dev/null fi SUBSTRATE="$ROOT/target/release/substrate" SIMULATOR="$ROOT/target/release/simulator" else if [[ "$RUN_SIMULATOR" -eq 1 ]]; then cargo build -p substrate -p simulator >/dev/null else cargo build -p substrate >/dev/null fi SUBSTRATE="$ROOT/target/debug/substrate" SIMULATOR="$ROOT/target/debug/simulator" fi LOG_DIR="/tmp/quic_ecs_dt_bench" mkdir -p "$LOG_DIR" SUB_LOG="$LOG_DIR/substrate.log" : > "$SUB_LOG" step "Starting substrate (tick_rate_hz=$TICK_RATE_HZ, log: $SUB_LOG)" APP_SIMULATION__TICK_RATE_HZ="$TICK_RATE_HZ" RUST_LOG=warn "$SUBSTRATE" >"$SUB_LOG" 2>&1 & SUBSTRATE_PID=$! for i in $(seq 1 40); do if curl -sf http://localhost:9100/metrics >/dev/null 2>&1; then ok "substrate /metrics ready"; break fi sleep 0.25 if [[ $i -eq 40 ]]; then fail "substrate didn't start"; tail -20 "$SUB_LOG"; exit 1; fi done cleanup() { [[ -n "${SIM_PID:-}" ]] && kill -TERM "$SIM_PID" 2>/dev/null || true [[ -n "${SUBSTRATE_PID:-}" ]] && kill -TERM "$SUBSTRATE_PID" 2>/dev/null || true netem_clear wait 2>/dev/null || true } trap cleanup EXIT INT TERM snapshot_to() { curl -s http://localhost:9100/metrics > "$1" } get_value() { awk -v pat="$2" '$1 == pat { print $NF; exit }' "$1" } mkdir -p "$(dirname "$OUT_CSV")" echo "entities,loss_pct,devices,rate_hz,t1_received,t1_dropped,t1_p50_us,t1_p99_us,t1_p999_us,t2_p99_us,t3_rtt_us,hz,rss_mb" > "$OUT_CSV" step "Sweeping entity_count x loss_pct (warmup ${WARMUP_S}s, window ${WINDOW_S}s)" printf '%s%-10s %-8s %-8s %-9s %-9s %-10s %-10s %-10s %-10s %-10s %-8s %-7s%s\n' \ "$BOLD" "entities" "loss_pct" "devices" "received" "dropped" "t1_p50" "t1_p99" "t1_p999" "t2_p99" "t3_rtt" "hz" "rss_mb" "$RESET" BEFORE="$LOG_DIR/before.txt" AFTER="$LOG_DIR/after.txt" ENTITIES_LIST=(10000 50000 100000 200000) LOSS_LIST=(0 1 5) for entities in "${ENTITIES_LIST[@]}"; do devices=$(( entities / 7 )) for loss in "${LOSS_LIST[@]}"; do # Apply tc-netem loss in both directions (or egress-only when BIDI=0). netem_apply "$loss" if [[ "$RUN_SIMULATOR" -eq 1 ]]; then sim_args=( --profile industrial --rate-hz "$RATE_HZ" --count 0 --devices "$devices" ) RUST_LOG=warn "$SIMULATOR" "${sim_args[@]}" >"$LOG_DIR/sim_${entities}_${loss}.log" 2>&1 & SIM_PID=$! else echo -e "\n${BOLD}Ready for: $entities entities, $loss% loss${RESET}" read -p "Press Enter to begin recording (ensure Mac simulator is started)..." /dev/null || true wait "$SIM_PID" 2>/dev/null || true SIM_PID="" fi rec_after=$(get_value "$AFTER" 'substrate_received_total{tier="t1"}') drop_after=$(get_value "$AFTER" 'substrate_dropped_total{tier="t1"}') p50=$(get_value "$AFTER" 'substrate_latency_us{tier="t1",quantile="0.5"}') p99=$(get_value "$AFTER" 'substrate_latency_us{tier="t1",quantile="0.99"}') p999=$(get_value "$AFTER" 'substrate_latency_us{tier="t1",quantile="0.999"}') t2_p99=$(get_value "$AFTER" 'substrate_latency_us{tier="t2",quantile="0.99"}') t3_p99=$(get_value "$AFTER" 'substrate_latency_us{tier="t3",quantile="0.99"}') tick_hz=$(get_value "$AFTER" 'substrate_tick_hz') rss=$(get_value "$AFTER" 'substrate_rss_bytes') received=$(awk -v a="$rec_after" -v b="$rec_before" 'BEGIN { printf "%d", a-b }') dropped=$(awk -v a="$drop_after" -v b="$drop_before" 'BEGIN { printf "%d", a-b }') rss_mb=$(awk -v r="$rss" 'BEGIN { printf "%.1f", r/1048576 }') tick_hz_fmt=$(awk -v t="$tick_hz" 'BEGIN { printf "%.1f", t }') printf '%-10s %-8s %-8s %-9s %-9s %-10.0f %-10.0f %-10.0f %-10.0f %-10.0f %-8s %-7s\n' \ "$entities" "$loss" "$devices" "$received" "$dropped" "${p50:-0}" "${p99:-0}" "${p999:-0}" "${t2_p99:-0}" "${t3_p99:-0}" \ "$tick_hz_fmt" "$rss_mb" echo "$entities,$loss,$devices,$RATE_HZ,$received,$dropped,${p50:-0},${p99:-0},${p999:-0},${t2_p99:-0},${t3_p99:-0},$tick_hz_fmt,$rss_mb" >> "$OUT_CSV" done done netem_clear printf '\n%sCSV written to:%s %s\n' "$DIM" "$RESET" "$OUT_CSV"