Fix metrics flickering and improve internals page
- Fix dashboard metrics alternating to 0 by merging partial batches in gateway before broadcasting to WebSocket clients. The aggregator sends metrics in batches of 20, causing partial updates that overwrote each other. Gateway now maintains machine_metrics_cache that accumulates metrics across batches. - Remove misleading gRPC calls counter from internals page (only incremented on health checks, not actual metric flow). Replace with cached_machines counter showing tracked machines. - Update internals.html stats panel to show Events, Broadcasts, Clients, and Machines instead of gRPC calls.
This commit is contained in:
@@ -38,10 +38,21 @@ class RedisStorage:
|
||||
metrics: dict[str, float],
|
||||
timestamp_ms: int,
|
||||
) -> None:
|
||||
"""Update the current state for a machine."""
|
||||
"""Update the current state for a machine (merges metrics, doesn't replace)."""
|
||||
if not self._client:
|
||||
raise RuntimeError("Not connected to Redis")
|
||||
|
||||
key = f"machine:{machine_id}"
|
||||
|
||||
# Get existing state to merge metrics
|
||||
existing_data = await self._client.hget(key, "state")
|
||||
if existing_data:
|
||||
existing_state = json.loads(existing_data)
|
||||
existing_metrics = existing_state.get("metrics", {})
|
||||
# Merge new metrics into existing (new values override old)
|
||||
existing_metrics.update(metrics)
|
||||
metrics = existing_metrics
|
||||
|
||||
state = {
|
||||
"machine_id": machine_id,
|
||||
"hostname": hostname,
|
||||
@@ -51,7 +62,6 @@ class RedisStorage:
|
||||
}
|
||||
|
||||
# Store as hash for efficient partial reads
|
||||
key = f"machine:{machine_id}"
|
||||
await self._client.hset(
|
||||
key,
|
||||
mapping={
|
||||
|
||||
Reference in New Issue
Block a user