Fix metrics flickering and improve internals page

- Fix dashboard metrics alternating to 0 by merging partial batches
  in gateway before broadcasting to WebSocket clients. The aggregator
  sends metrics in batches of 20, causing partial updates that
  overwrote each other. Gateway now maintains machine_metrics_cache
  that accumulates metrics across batches.

- Remove misleading gRPC calls counter from internals page (only
  incremented on health checks, not actual metric flow). Replace
  with cached_machines counter showing tracked machines.

- Update internals.html stats panel to show Events, Broadcasts,
  Clients, and Machines instead of gRPC calls.
This commit is contained in:
buenosairesam
2025-12-31 02:15:57 -03:00
parent ee9cbf73ec
commit 00b1e663d9
3 changed files with 577 additions and 37 deletions

View File

@@ -0,0 +1,414 @@
<!doctype html>
<html lang="en">
<head>
<meta charset="UTF-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>System Monitor - Internals</title>
<style>
:root {
--bg-primary: #0d1117;
--bg-secondary: #161b22;
--bg-tertiary: #21262d;
--border: #30363d;
--text-primary: #c9d1d9;
--text-secondary: #8b949e;
--accent-green: #3fb950;
--accent-red: #f85149;
--accent-yellow: #d29922;
--accent-blue: #58a6ff;
--accent-purple: #a371f7;
}
* {
box-sizing: border-box;
margin: 0;
padding: 0;
}
body {
font-family: "SF Mono", "Fira Code", monospace;
background: var(--bg-primary);
color: var(--text-primary);
font-size: 13px;
line-height: 1.5;
}
.container {
max-width: 1400px;
margin: 0 auto;
padding: 1rem;
}
header {
background: var(--bg-secondary);
border-bottom: 1px solid var(--border);
padding: 0.75rem 1rem;
display: flex;
justify-content: space-between;
align-items: center;
}
header h1 {
font-size: 1rem;
font-weight: 500;
}
header h1 span {
color: var(--accent-purple);
}
.nav-links a {
color: var(--text-secondary);
text-decoration: none;
margin-left: 1.5rem;
}
.nav-links a:hover {
color: var(--text-primary);
}
.grid {
display: grid;
grid-template-columns: 300px 1fr;
gap: 1rem;
margin-top: 1rem;
}
.panel {
background: var(--bg-secondary);
border: 1px solid var(--border);
border-radius: 6px;
overflow: hidden;
}
.panel-header {
background: var(--bg-tertiary);
padding: 0.5rem 0.75rem;
border-bottom: 1px solid var(--border);
font-weight: 500;
font-size: 0.75rem;
text-transform: uppercase;
letter-spacing: 0.05em;
color: var(--text-secondary);
}
.panel-body {
padding: 0.75rem;
}
.status-grid {
display: grid;
gap: 0.5rem;
}
.status-item {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0.5rem;
background: var(--bg-tertiary);
border-radius: 4px;
}
.status-label {
color: var(--text-secondary);
}
.status-value {
font-weight: 500;
}
.status-dot {
width: 8px;
height: 8px;
border-radius: 50%;
display: inline-block;
margin-right: 0.5rem;
}
.status-dot.healthy {
background: var(--accent-green);
box-shadow: 0 0 6px var(--accent-green);
}
.status-dot.error {
background: var(--accent-red);
}
.status-dot.unknown {
background: var(--accent-yellow);
}
.stats-grid {
display: grid;
grid-template-columns: repeat(2, 1fr);
gap: 0.5rem;
}
.stat-box {
background: var(--bg-tertiary);
padding: 0.75rem;
border-radius: 4px;
text-align: center;
}
.stat-value {
font-size: 1.5rem;
font-weight: 600;
color: var(--accent-blue);
}
.stat-label {
font-size: 0.7rem;
color: var(--text-secondary);
text-transform: uppercase;
}
.event-stream {
height: calc(100vh - 200px);
overflow-y: auto;
}
.event-item {
display: grid;
grid-template-columns: 70px 90px 1fr 80px 60px;
gap: 0.5rem;
padding: 0.4rem 0.75rem;
border-bottom: 1px solid var(--border);
font-size: 0.8rem;
align-items: center;
}
.event-item:hover {
background: var(--bg-tertiary);
}
.event-time {
color: var(--text-secondary);
}
.event-topic {
padding: 0.15rem 0.4rem;
border-radius: 3px;
font-size: 0.7rem;
font-weight: 500;
}
.event-topic.metrics {
background: rgba(56, 139, 253, 0.15);
color: var(--accent-blue);
}
.event-topic.alerts {
background: rgba(248, 81, 73, 0.15);
color: var(--accent-red);
}
.event-source {
color: var(--text-secondary);
}
.event-machine {
color: var(--accent-purple);
}
.event-count {
color: var(--text-secondary);
text-align: right;
}
.pulse {
animation: pulse 2s ease-in-out infinite;
}
@keyframes pulse {
0%,
100% {
opacity: 1;
}
50% {
opacity: 0.5;
}
}
.connection-status {
display: flex;
align-items: center;
gap: 0.5rem;
font-size: 0.75rem;
}
.connection-status .status-dot {
margin-right: 0;
}
</style>
</head>
<body>
<header>
<h1><span>&gt;_</span> System Monitor Internals</h1>
<div style="display: flex; align-items: center; gap: 2rem">
<div class="connection-status">
<span class="status-dot" id="conn-status"></span>
<span id="conn-text">Connecting...</span>
</div>
<nav class="nav-links">
<a href="/">Dashboard</a>
<a href="/docs">API Docs</a>
</nav>
</div>
</header>
<div class="container">
<div class="grid">
<div class="sidebar">
<div class="panel" style="margin-bottom: 1rem">
<div class="panel-header">Services</div>
<div class="panel-body">
<div class="status-grid" id="services">
<div class="status-item">
<span class="status-label">Loading...</span>
</div>
</div>
</div>
</div>
<div class="panel">
<div class="panel-header">Statistics</div>
<div class="panel-body">
<div class="stats-grid">
<div class="stat-box">
<div class="stat-value" id="stat-events">
0
</div>
<div class="stat-label">Events</div>
</div>
<div class="stat-box">
<div class="stat-value" id="stat-ws">0</div>
<div class="stat-label">Broadcasts</div>
</div>
<div class="stat-box">
<div class="stat-value" id="stat-clients">
0
</div>
<div class="stat-label">Clients</div>
</div>
<div class="stat-box">
<div class="stat-value" id="stat-machines">
0
</div>
<div class="stat-label">Machines</div>
</div>
</div>
</div>
</div>
</div>
<div class="panel">
<div class="panel-header">
Event Stream
<span class="pulse" style="color: var(--accent-green)"
></span
>
</div>
<div class="event-stream" id="events">
<div
class="event-item"
style="color: var(--text-secondary)"
>
Waiting for events...
</div>
</div>
</div>
</div>
</div>
<script>
const eventsEl = document.getElementById("events");
const servicesEl = document.getElementById("services");
const connStatus = document.getElementById("conn-status");
const connText = document.getElementById("conn-text");
function formatTime(isoString) {
const d = new Date(isoString);
return d.toLocaleTimeString("en-US", { hour12: false });
}
function updateServices(services) {
servicesEl.innerHTML = Object.entries(services)
.map(([name, info]) => {
const status =
info.status === "healthy"
? "healthy"
: info.status.includes("error")
? "error"
: "unknown";
return `
<div class="status-item">
<span><span class="status-dot ${status}"></span>${name}</span>
<span class="status-value">${info.status}</span>
</div>
`;
})
.join("");
}
function updateStats(stats) {
document.getElementById("stat-events").textContent =
stats.events_received?.toLocaleString() || "0";
document.getElementById("stat-ws").textContent =
stats.websocket_broadcasts?.toLocaleString() || "0";
document.getElementById("stat-clients").textContent = (
(stats.dashboard_connections || 0) +
(stats.internals_connections || 0)
).toString();
document.getElementById("stat-machines").textContent =
stats.cached_machines?.toString() || "0";
}
function addEvent(event) {
const topicClass = event.topic.includes("alert")
? "alerts"
: "metrics";
const html = `
<div class="event-item">
<span class="event-time">${formatTime(event.timestamp)}</span>
<span class="event-topic ${topicClass}">${event.topic}</span>
<span class="event-source">${event.source || "unknown"}</span>
<span class="event-machine">${event.machine_id || "-"}</span>
<span class="event-count">${event.metrics_count || 0} metrics</span>
</div>
`;
eventsEl.insertAdjacentHTML("afterbegin", html);
// Keep max 100 events in DOM
while (eventsEl.children.length > 100) {
eventsEl.removeChild(eventsEl.lastChild);
}
}
function connect() {
const ws = new WebSocket(`ws://${location.host}/ws/internals`);
ws.onopen = () => {
connStatus.className = "status-dot healthy";
connText.textContent = "Connected";
};
ws.onclose = () => {
connStatus.className = "status-dot error";
connText.textContent = "Disconnected - Reconnecting...";
setTimeout(connect, 3000);
};
ws.onmessage = (e) => {
const msg = JSON.parse(e.data);
if (msg.type === "init") {
updateStats(msg.data.stats);
eventsEl.innerHTML = "";
msg.data.recent_events.reverse().forEach(addEvent);
} else if (msg.type === "event") {
addEvent(msg.data);
// Increment local counter
const el = document.getElementById("stat-events");
el.textContent = (
parseInt(el.textContent.replace(/,/g, "")) + 1
).toLocaleString();
}
};
// Ping to keep alive
setInterval(() => {
if (ws.readyState === WebSocket.OPEN) ws.send("ping");
}, 30000);
}
// Fetch initial service status
fetch("/api/internals")
.then((r) => r.json())
.then((data) => {
updateServices(data.services);
updateStats(data.stats);
});
// Refresh service status periodically
setInterval(() => {
fetch("/api/internals")
.then((r) => r.json())
.then((data) => updateServices(data.services));
}, 10000);
connect();
</script>
</body>
</html>