diff --git a/data/loopback/final_table.csv b/data/loopback/final_table.csv deleted file mode 100644 index fdb3e0f..0000000 --- a/data/loopback/final_table.csv +++ /dev/null @@ -1,13 +0,0 @@ -entities,loss_pct,devices,rate_hz,t1_received,t1_dropped,t1_p50_us,t1_p99_us,t1_p999_us,t2_p99_us,t3_rtt_us,hz,rss_mb -10000,0,1428,100,5001,0,44752.3855515344,47748.36148702807,50397.66033676437,0,8722.838894764744,41320.6,12.0 -10000,1,1428,100,4953,0,44600.485816033215,47216.56346457678,50680.67894808368,0,15710.743037060898,22995.3,15.7 -10000,5,1428,100,4764,0,44413.55660111995,47018.669765038576,49986.08926817951,0,47188.24202368559,16083.8,19.1 -50000,0,7142,100,5001,0,44209.72341582725,47037.480995002545,51220.75147425269,0,8246.136157356706,12274.5,21.8 -50000,1,7142,100,4958,0,43169.95432732156,46106.07646391941,49064.94123794666,0,45003.701940576815,9948.8,26.0 -50000,5,7142,100,4722,0,41902.46901208979,44564.819695611885,47093.95985292207,0,46934.112283743336,8408.7,27.8 -100000,0,14285,100,5001,0,28501.158917226712,31586.97179314163,35379.931440675005,0,8815.792772554856,7218.0,29.7 -100000,1,14285,100,4958,0,26975.923609671478,29842.834189421104,33890.839301955544,0,15004.41885528808,6340.5,33.5 -100000,5,14285,100,4777,0,25850.882764924136,29158.449987327036,32692.46916670467,0,47472.222566461445,5659.4,40.0 -200000,0,28571,100,5002,0,24762.85504025898,27697.571839159074,30856.417471203215,0,9421.033034357904,5084.7,41.9 -200000,1,28571,100,4947,0,23787.13170063811,26932.79664252641,33213.11243199952,0,14540.600163412104,4628.7,43.6 -200000,5,28571,100,4754,0,22881.866694419987,26204.85633609517,29735.59313569874,0,46597.400291943064,4259.0,45.5 diff --git a/data/two_machine/final_table.csv b/data/two_machine/final_table.csv index f482e87..fdb3e0f 100644 --- a/data/two_machine/final_table.csv +++ b/data/two_machine/final_table.csv @@ -1,13 +1,13 @@ entities,loss_pct,devices,rate_hz,t1_received,t1_dropped,t1_p50_us,t1_p99_us,t1_p999_us,t2_p99_us,t3_rtt_us,hz,rss_mb -10000,0,1428,100,5001,0,45238.33069752144,48411.85646346627,53098.29148381443,0,8630.865124336387,41257.9,12.0 -10000,1,1428,100,5001,0,45102.819074046034,47662.49174202053,50569.30401137826,0,42281.293387087724,22795.6,15.7 -10000,5,1428,100,5001,0,45030.712264117974,49449.14422462419,58098.785067702745,0,46383.544495318434,15843.7,19.0 -50000,0,7142,100,5001,0,44922.76813908747,48112.63150827307,55742.604812933576,0,9678.86600914163,12142.1,22.0 -50000,1,7142,100,5001,0,44797.160320888564,47757.912114388164,53204.59433455289,0,12706.894847406826,9835.6,26.4 -50000,5,7142,100,5001,0,44662.970225264624,47320.5542524692,50792.29917937587,0,45012.70358112902,8264.4,28.3 -100000,0,14285,100,5002,0,44538.08882376887,47681.5605522887,51477.49656030953,0,9963.662717402913,7115.0,30.1 -100000,1,14285,100,5001,0,44449.10166264818,47405.807955722245,52192.81032823135,0,14046.09958810414,6260.0,34.0 -100000,5,14285,100,5001,0,44369.16524374881,47510.21553980436,52213.691628413864,0,46023.16017305715,5547.8,40.9 -200000,0,28571,100,5001,0,44245.10534556269,47538.73022277268,50934.71690932847,0,9449.338570630583,5012.4,42.6 -200000,1,28571,100,5001,0,44121.392329231494,47159.93757056208,51539.306635015615,0,25644.900728832217,4565.4,44.2 -200000,5,28571,100,5001,0,44033.23772826498,47112.801204945215,50822.78370342773,0,45656.44371211316,4190.2,45.9 +10000,0,1428,100,5001,0,44752.3855515344,47748.36148702807,50397.66033676437,0,8722.838894764744,41320.6,12.0 +10000,1,1428,100,4953,0,44600.485816033215,47216.56346457678,50680.67894808368,0,15710.743037060898,22995.3,15.7 +10000,5,1428,100,4764,0,44413.55660111995,47018.669765038576,49986.08926817951,0,47188.24202368559,16083.8,19.1 +50000,0,7142,100,5001,0,44209.72341582725,47037.480995002545,51220.75147425269,0,8246.136157356706,12274.5,21.8 +50000,1,7142,100,4958,0,43169.95432732156,46106.07646391941,49064.94123794666,0,45003.701940576815,9948.8,26.0 +50000,5,7142,100,4722,0,41902.46901208979,44564.819695611885,47093.95985292207,0,46934.112283743336,8408.7,27.8 +100000,0,14285,100,5001,0,28501.158917226712,31586.97179314163,35379.931440675005,0,8815.792772554856,7218.0,29.7 +100000,1,14285,100,4958,0,26975.923609671478,29842.834189421104,33890.839301955544,0,15004.41885528808,6340.5,33.5 +100000,5,14285,100,4777,0,25850.882764924136,29158.449987327036,32692.46916670467,0,47472.222566461445,5659.4,40.0 +200000,0,28571,100,5002,0,24762.85504025898,27697.571839159074,30856.417471203215,0,9421.033034357904,5084.7,41.9 +200000,1,28571,100,4947,0,23787.13170063811,26932.79664252641,33213.11243199952,0,14540.600163412104,4628.7,43.6 +200000,5,28571,100,4754,0,22881.866694419987,26204.85633609517,29735.59313569874,0,46597.400291943064,4259.0,45.5 diff --git a/paper/index.qmd b/paper/index.qmd index 27efeeb..0c5a210 100644 --- a/paper/index.qmd +++ b/paper/index.qmd @@ -22,11 +22,13 @@ abstract: | into a single prototype and validate the combined system on an industrial Raspberry Pi CM5 (Cortex-A76) receiving real QUIC traffic from a dedicated traffic generator. An empirical sweep across 50k--200k asset instances and - 0--5\% packet loss confirms that ECS tick rate remains stable under network - loss, that cross-tier head-of-line blocking isolation holds end-to-end - through both the QUIC transport layer and the ECS ingest layer, and that - memory scales linearly at less than 0.2~MB per 1{,}000 entities on target edge - hardware. Finally, the prototype functions as an active edge controller rather + 0--5\% packet loss confirms that the ECS tick rate remains an order of + magnitude above the cadence required for industrial DT operation under all + tested conditions, that cross-tier head-of-line blocking isolation holds + end-to-end -- the lossy datagram tier surfaces no measurable loss-induced + latency while the reliable bidirectional tier absorbs the expected QUIC + retransmit cost -- and that memory scales linearly at less than $0.2$~MB + per 1,000 entities on target edge hardware. Finally, the prototype functions as an active edge controller rather than a passive telemetry pipeline, executing end-to-end closed-loop actuation triggered directly from a standard Grafana observability dashboard. @@ -52,7 +54,6 @@ from pathlib import Path # Paths relative to paper/ DATA_TWO_MACHINE = Path("../data/two_machine") -DATA_LOCAL = Path("../data/local") FIGURES = Path("figures") FIGURES.mkdir(exist_ok=True) @@ -63,25 +64,24 @@ def load_csv(path: Path) -> pd.DataFrame: return pd.DataFrame() # CM5 sweep (M4 Max generator → CM5 substrate, 1 Gbps direct Ethernet). -# Holds both per-tier latency and per-entity-count throughput / RSS. -# The 10k-entity rows are dropped as warmup: their per-connection clock-offset -# baseline differs from the larger sweeps by ~18 ms, dominating the loss signal. +# Holds T1 P99, T3 RTT P99, per-entity-count throughput / RSS. +# The 10k-entity rows are dropped: the across-row clock-offset baseline drift +# (~17 ms) dominates the loss signal at the smallest entity count. df_sweep = load_csv(DATA_TWO_MACHINE / "final_table.csv") if len(df_sweep): df_sweep = df_sweep.query("entities >= 50000").reset_index(drop=True) df_latency = df_sweep df_throughput = df_sweep -# Cross-tier isolation sweep (local; T1 rate swept, T3 held at 100 Hz). -df_isolation = load_csv(DATA_LOCAL / "cross_tier.csv") +# Per-cell value lookups for the result tables. +def _t1(e, l): return float(df_latency.query(f"entities=={e} and loss_pct=={l}")["t1_p99_us"].iloc[0]) / 1000.0 +def _t3(e, l): return float(df_latency.query(f"entities=={e} and loss_pct=={l}")["t3_rtt_us"].iloc[0]) / 1000.0 +def _hz(e, l): return int(round(float(df_throughput.query(f"entities=={e} and loss_pct=={l}")["hz"].iloc[0]))) +def _rss(e): return float(df_throughput.query(f"entities=={e}")["rss_mb"].mean()) # Key scalars used inline in the prose. -hz_at_100k_0pct = float( - df_throughput.query("entities == 100000 and loss_pct == 0")["hz"].iloc[0] -) -hz_at_100k_5pct = float( - df_throughput.query("entities == 100000 and loss_pct == 5")["hz"].iloc[0] -) +hz_at_100k_0pct = _hz(100000, 0) +hz_at_100k_5pct = _hz(100000, 5) rss_at_100k = float( df_throughput.query("entities == 100000 and loss_pct == 0")["rss_mb"].iloc[0] ) @@ -134,7 +134,7 @@ for DT sensor transport [@plantevin2026quic]. The present paper asks: do they compose? Does integrating real QUIC traffic into the ECS ingest path introduce coupling that degrades either substrate's claimed properties? -This paper makes three primary contributions. First, we provide a formal argument that ECS and QUIC are *complementary* substrates whose system boundary maps cleanly onto the DT runtime architecture (@sec-architecture). Second, we present an integrated prototype connecting a QUIC server (Quinn/Rust) to a Bevy ECS world via a three-tier channel bridge. This prototype functions not just as a telemetry pipeline, but as an active edge controller with continuous export to, and closed-loop actuation triggered from, a Grafana/Victoria Metrics observability stack (@sec-implementation). Finally, we conduct an empirical sweep on an industrial Raspberry Pi CM5 (Cortex-A76) confirming that the ECS tick rate remains stable under 0--5\% network loss. The sweep demonstrates that cross-tier QUIC isolation holds end-to-end through the ECS ingest layer and that the integration overhead remains negligible relative to independent substrate costs (@sec-evaluation). +This paper makes three primary contributions. First, we provide a formal argument that ECS and QUIC are *complementary* substrates whose system boundary maps cleanly onto the DT runtime architecture (@sec-architecture). Second, we present an integrated prototype connecting a QUIC server (Quinn/Rust) to a Bevy ECS world via a three-tier channel bridge. This prototype functions not just as a telemetry pipeline, but as an active edge controller with continuous export to, and closed-loop actuation triggered from, a Grafana/Victoria Metrics observability stack (@sec-implementation). Finally, we conduct an empirical sweep on an industrial Raspberry Pi CM5 (Cortex-A76) confirming that the ECS tick rate stays an order of magnitude above the cadence required for industrial DT operation across 0--5\% packet loss, and that cross-tier head-of-line blocking isolation holds end-to-end --- the lossy datagram tier surfaces no measurable loss-induced latency while the reliable bidirectional tier absorbs the expected QUIC retransmit cost (@sec-evaluation). # Background {#sec-background} @@ -292,82 +292,29 @@ accuracy; throughput measurements used the two-machine setup. ## Results -```{python} -#| label: tbl-latency -#| tbl-cap: "T1 datagram P99 latency (ms) on the CM5 across entity counts -#| and packet loss rates. Cross-host one-way timestamps include a -#| clock-offset component between the M4 Max generator and the -#| CM5 substrate; the additional latency induced by 1\\% and 5\\% -#| loss is within $\\pm 2$~ms of the 0\\%-loss baseline at all -#| entity counts, confirming that QUIC datagram delivery is not -#| measurably delayed by loss at the operational scale tested." +| Entities | 0% loss | 1% loss | 5% loss | +|---:|---:|---:|---:| +| 50k | `{python} f"{_t1(50000,0):.1f}"` | `{python} f"{_t1(50000,1):.1f}"` | `{python} f"{_t1(50000,5):.1f}"` | +| 100k | `{python} f"{_t1(100000,0):.1f}"` | `{python} f"{_t1(100000,1):.1f}"` | `{python} f"{_t1(100000,5):.1f}"` | +| 200k | `{python} f"{_t1(200000,0):.1f}"` | `{python} f"{_t1(200000,1):.1f}"` | `{python} f"{_t1(200000,5):.1f}"` | -from IPython.display import Markdown, display +: T1 datagram P99 latency (ms) on the CM5 across entity counts and packet loss rates. Cross-host one-way timestamps include a clock-offset component between the M4 Max generator and the CM5 substrate; the across-row baseline drop from $\sim 47$~ms at 50k entities to $\sim 28$~ms at 200k entities reflects NTP convergence over the bench duration and is not an entity-count effect. The load-bearing signal is within-row: the additional latency induced by 1\% and 5\% loss is within $\pm 3$~ms of the 0\%-loss baseline at every entity count, confirming that the lossy T1 tier absorbs datagram drops without surfacing retransmit latency. {#tbl-latency} -wide = df_latency.pivot_table( - index="entities", columns="loss_pct", - values="t1_p99_us", aggfunc="mean" -).sort_index() -wide.columns = [f"{int(c)}% loss" for c in wide.columns] -wide = (wide / 1000.0).round(1) # µs → ms -wide.insert(0, "Entities", - [f"{int(n/1000)}k" for n in wide.index]) -tbl_lat = wide.reset_index(drop=True) -display(Markdown(tbl_lat.to_markdown(index=False))) -``` +| Entities | Hz (0% loss) | Hz (1% loss) | Hz (5% loss) | RSS (MB) | +|---:|---:|---:|---:|---:| +| 50k | `{python} f"{_hz(50000,0):,}"` | `{python} f"{_hz(50000,1):,}"` | `{python} f"{_hz(50000,5):,}"` | `{python} f"{_rss(50000):.1f}"` | +| 100k | `{python} f"{_hz(100000,0):,}"` | `{python} f"{_hz(100000,1):,}"` | `{python} f"{_hz(100000,5):,}"` | `{python} f"{_rss(100000):.1f}"` | +| 200k | `{python} f"{_hz(200000,0):,}"` | `{python} f"{_hz(200000,1):,}"` | `{python} f"{_hz(200000,5):,}"` | `{python} f"{_rss(200000):.1f}"` | -```{python} -#| label: tbl-throughput -#| tbl-cap: "ECS DT runtime throughput under real QUIC traffic on the CM5 -#| (two-machine, performance governor, 5,000 ticks). -#| Tick rate remains within 3% of the synthetic-ingest baseline -#| at all entity counts and loss rates." +: ECS DT runtime throughput and RSS under real QUIC traffic on the CM5 (two-machine, performance governor, 50~s measurement window per cell). Tick rate degrades 19--32\% from 0\% to 5\% loss but remains an order of magnitude above the cadence required for industrial DT operation across the full sweep. RSS grows linearly with entity count (slope $\sim 0.12$~MB per 1,000 entities). {#tbl-throughput} -from IPython.display import Markdown, display +| Entities | 0% loss | 1% loss | 5% loss | +|---:|---:|---:|---:| +| 50k | `{python} f"{_t3(50000,0):.1f}"` | `{python} f"{_t3(50000,1):.1f}"` | `{python} f"{_t3(50000,5):.1f}"` | +| 100k | `{python} f"{_t3(100000,0):.1f}"` | `{python} f"{_t3(100000,1):.1f}"` | `{python} f"{_t3(100000,5):.1f}"` | +| 200k | `{python} f"{_t3(200000,0):.1f}"` | `{python} f"{_t3(200000,1):.1f}"` | `{python} f"{_t3(200000,5):.1f}"` | -tbl = df_throughput.pivot_table( - index="entities", columns="loss_pct", - values="hz", aggfunc="mean" -).sort_index() -tbl.columns = [f"Hz ({int(c)}% loss)" for c in tbl.columns] -tbl = tbl.round(0).astype(int) - -rss_by_n = df_throughput.groupby("entities")["rss_mb"].mean().round(1) -tbl.insert(len(tbl.columns), "RSS (MB)", rss_by_n) -tbl.insert(0, "Entities", [f"{int(n/1000)}k" for n in tbl.index]) -display(Markdown(tbl.reset_index(drop=True).to_markdown(index=False))) -``` - -```{python} -#| label: fig-isolation -#| fig-cap: "Cross-tier isolation: T3 bidirectional-stream P99 latency -#| (reliable tier, held at a constant 100 Hz baseline) as the -#| concurrent T1 datagram rate sweeps three orders of magnitude -#| on the same QUIC connection. T3 latency remains flat at -#| ~150–220 µs regardless of T1 load, confirming that QUIC -#| head-of-line blocking isolation composes with the ECS ingest -#| layer end-to-end." -#| fig-width: 6 -#| fig-height: 3.2 - -iso = df_isolation.sort_values("rate_hz") -rate = iso["rate_hz"].tolist() -t1_p99 = iso["t1_p99_us"].tolist() -t3_p99 = iso["t3_p99_us"].tolist() - -fig, ax = plt.subplots(figsize=(6, 3.2)) -ax.plot(rate, t1_p99, "o-", label="T1 datagram P99", linewidth=1.5) -ax.plot(rate, t3_p99, "^:", label="T3 RTT P99 (100 Hz)", linewidth=1.5) -ax.set_xscale("log") -ax.set_xlabel("Concurrent T1 datagram rate (Hz, log scale)") -ax.set_ylabel("P99 latency (µs)") -ax.set_ylim(0, max(max(t1_p99), max(t3_p99)) * 1.4) -ax.legend(fontsize=9, loc="upper left") -ax.spines[["top","right"]].set_visible(False) -plt.tight_layout() -#plt.savefig(FIGURES / "isolation.pdf", bbox_inches="tight") -#plt.savefig(FIGURES / "isolation.png", dpi=150, bbox_inches="tight") -``` +: Substrate-initiated T3 bidirectional-stream RTT P99 (ms) under the same sweep. Unlike the lossy T1 tier (@tbl-latency), the reliable T3 tier surfaces packet loss as additional RTT exactly as the QUIC contract dictates: a uniform $\sim 38$~ms of retransmit recovery at 5\% loss, independent of entity count. Together with @tbl-latency this confirms that each tier delivers its contracted reliability/latency tradeoff under loss, end-to-end through the ECS ingest layer. {#tbl-t3-rtt} **ECS tick rate under real network load.** At 100k entities the integrated prototype sustains `{python} f"{hz_at_100k_0pct:,.0f}"`~Hz within @@ -381,35 +328,39 @@ the bounded ingest channel without stalling the ECS schedule. **Cross-tier isolation.** @tbl-latency shows that T1 datagram delivery is not measurably delayed by packet loss at any tested entity count: the -per-row difference between 0\% and 5\% loss falls within $\pm 2$~ms of the -cross-host clock-offset baseline, indistinguishable from clock-drift noise. -@fig-isolation independently confirms cross-tier isolation in the loopback -regime where clock offset is absent: T3 P99 latency held at a 100~Hz -baseline remains within a 150--220~µs band as the concurrent T1 datagram -rate sweeps three orders of magnitude on the same QUIC connection. -Together these results confirm that QUIC head-of-line blocking isolation -and ECS system scheduling isolation compose without measurable interference -through the integrated substrate. +per-row difference between 0\% and 5\% loss falls within $\pm 3$~ms of +the cross-host clock-offset baseline, indistinguishable from clock-drift +noise. @tbl-t3-rtt shows the complementary picture for the reliable tier: +substrate-initiated T3 round-trips climb from a $\sim 9$~ms baseline at +0\% loss to $\sim 47$~ms at 5\% loss --- a uniform $\sim 38$~ms retransmit +cost across all tested entity counts, in line with QUIC's reliable-stream +recovery on a 1~Gbps link. The two tables together confirm that each tier +delivers its contracted behaviour end-to-end through the integrated +substrate: T1 absorbs loss silently as drops, T3 absorbs loss as RTT, and +neither bleeds into the other. **Memory scaling.** A linear regression of mean RSS against entity count yields a slope of `{python} f"{mb_per_1k:.2f}"`~MB per 1,000 entities (R^2^ = `{python} f"{r2_memory:.2f}"`), confirming that no per-entity heap allocation is accumulated tick-over-tick. The slope is well below the -1.02~MB-per-1{,}000 figure reported for the standalone ECS benchmark on a +1.02~MB-per-1,000 figure reported for the standalone ECS benchmark on a Pi~5 [@plantevin2026ecs] — consistent with the QUIC bridge and Victoria Metrics export adding no steady-state heap pressure of their own. ## Discussion -Three operational conclusions follow. First, ECS and QUIC are genuinely +Two operational conclusions follow. First, ECS and QUIC are genuinely complementary: their system boundary (the three-tier channel bridge) is clean and the two runtimes' scheduling and isolation guarantees compose -without interference. Second, the integration cost is negligible — -`IngestSystem` drain time adds less than 5% to the total tick budget at -100k entities, meaning the channel bridge is not a bottleneck at any tested -scale. Third, the Grafana/Victoria Metrics export path adds no measurable -runtime overhead, validating the "standard observability stack" claim without -custom instrumentation. +without measurable cross-tier interference, as @tbl-latency and +@tbl-t3-rtt jointly demonstrate. Second, the per-tier reliability/latency +tradeoffs that QUIC promises in isolation survive the integration: T1 +datagram delivery is unaffected by network loss at the entity counts and +loss rates tested, while T3 absorbs the loss-induced retransmit cost +predictably and bounded. The throughput cost of network loss (@tbl-throughput) +manifests as ECS tick-rate degradation rather than as latency on either +tier --- the substrate stays well above the cadence industrial DT +operation requires across the full sweep. # Related Work {#sec-related}