Files
sysmonstm/ctrl/edge/edge.py
buenosairesam 3106bc835e
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
Update edge: match full stack dashboard style, fix metric names
2026-01-26 20:58:46 -03:00

483 lines
16 KiB
Python

"""Minimal sysmonstm gateway - standalone mode without dependencies."""
import asyncio
import json
import logging
import os
from datetime import datetime
from fastapi import FastAPI, Query, WebSocket, WebSocketDisconnect
from fastapi.responses import HTMLResponse
# Configuration
API_KEY = os.environ.get("API_KEY", "")
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
# Logging setup
logging.basicConfig(
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
log = logging.getLogger("gateway")
app = FastAPI(title="sysmonstm")
# Store connected websockets
connections: list[WebSocket] = []
# Store latest metrics from collectors
machines: dict = {}
HTML = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>System Monitor Dashboard</title>
<style>
:root {
--bg-primary: #1a1a2e;
--bg-secondary: #16213e;
--bg-card: #0f3460;
--text-primary: #eee;
--text-secondary: #a0a0a0;
--accent: #e94560;
--success: #4ade80;
--warning: #fbbf24;
--danger: #ef4444;
--border: #2a2a4a;
}
* { box-sizing: border-box; margin: 0; padding: 0; }
body {
font-family: system-ui, -apple-system, sans-serif;
background: var(--bg-primary);
color: var(--text-primary);
min-height: 100vh;
}
header {
background: var(--bg-secondary);
padding: 1rem 2rem;
border-bottom: 2px solid var(--accent);
display: flex;
justify-content: space-between;
align-items: center;
}
header h1 { font-size: 1.5rem; }
.status {
display: flex;
align-items: center;
gap: 0.5rem;
font-size: 0.875rem;
}
.status-dot {
width: 10px;
height: 10px;
border-radius: 50%;
background: var(--danger);
}
.status-dot.connected { background: var(--success); }
main {
padding: 1.5rem;
max-width: 1600px;
margin: 0 auto;
}
.machines-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(400px, 1fr));
gap: 1.5rem;
}
.machine-card {
background: var(--bg-secondary);
border-radius: 8px;
padding: 1.25rem;
border: 1px solid var(--border);
}
.machine-header {
display: flex;
justify-content: space-between;
align-items: center;
margin-bottom: 1rem;
padding-bottom: 0.75rem;
border-bottom: 1px solid var(--border);
}
.machine-name {
font-weight: 600;
color: var(--accent);
}
.machine-id {
font-size: 0.75rem;
color: var(--text-secondary);
}
.machine-status {
font-size: 0.75rem;
padding: 0.25rem 0.5rem;
border-radius: 4px;
background: var(--success);
color: #000;
}
.machine-status.warning { background: var(--warning); }
.machine-status.critical { background: var(--danger); color: #fff; }
.metrics-grid {
display: grid;
grid-template-columns: repeat(2, 1fr);
gap: 0.75rem;
}
.metric {
background: var(--bg-card);
padding: 0.75rem;
border-radius: 6px;
}
.metric-label {
font-size: 0.75rem;
color: var(--text-secondary);
margin-bottom: 0.25rem;
}
.metric-value {
font-size: 1.5rem;
font-weight: 600;
}
.metric-bar {
height: 4px;
background: var(--border);
border-radius: 2px;
margin-top: 0.5rem;
overflow: hidden;
}
.metric-bar-fill {
height: 100%;
background: var(--success);
transition: width 0.3s ease;
}
.metric-bar-fill.warning { background: var(--warning); }
.metric-bar-fill.critical { background: var(--danger); }
.last-seen {
font-size: 0.75rem;
color: var(--text-secondary);
margin-top: 1rem;
text-align: right;
}
.no-machines {
text-align: center;
padding: 3rem;
color: var(--text-secondary);
}
.no-machines h2 {
color: var(--text-primary);
margin-bottom: 0.5rem;
}
@media (max-width: 600px) {
.machines-grid { grid-template-columns: 1fr; }
.metrics-grid { grid-template-columns: 1fr; }
}
</style>
</head>
<body>
<header>
<h1>System Monitor</h1>
<div class="status">
<span class="status-dot" id="status-dot"></span>
<span id="status-text">Connecting...</span>
</div>
</header>
<main>
<div class="machines-grid" id="machines-grid">
<div class="no-machines">
<h2>No machines connected</h2>
<p>Waiting for collectors to send metrics...</p>
</div>
</div>
</main>
<script>
const machinesGrid = document.getElementById('machines-grid');
const statusDot = document.getElementById('status-dot');
const statusText = document.getElementById('status-text');
const machines = new Map();
function formatBytes(bytes) {
if (bytes === 0) return '0 B';
const k = 1024;
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i];
}
function formatRate(bytesPerSec) {
return formatBytes(bytesPerSec) + '/s';
}
function getBarClass(value, warning = 80, critical = 95) {
if (value >= critical) return 'critical';
if (value >= warning) return 'warning';
return '';
}
function getStatusClass(m) {
const cpu = m.cpu_percent || 0;
const mem = m.memory_percent || 0;
const disk = m.disk_percent || 0;
if (cpu > 95 || mem > 95 || disk > 90) return 'critical';
if (cpu > 80 || mem > 85 || disk > 80) return 'warning';
return '';
}
function timeSince(timestamp) {
if (!timestamp) return '-';
const date = typeof timestamp === 'string' ? new Date(timestamp) : new Date(timestamp);
const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
if (seconds < 5) return 'just now';
if (seconds < 60) return seconds + 's ago';
const minutes = Math.floor(seconds / 60);
if (minutes < 60) return minutes + 'm ago';
return Math.floor(minutes / 60) + 'h ago';
}
function renderMachine(data) {
const m = data;
const statusClass = getStatusClass(m);
return `
<div class="machine-card" data-machine="${data.machine_id}">
<div class="machine-header">
<div>
<div class="machine-name">${data.hostname || data.machine_id}</div>
<div class="machine-id">${data.machine_id}</div>
</div>
<span class="machine-status ${statusClass}">${statusClass || 'healthy'}</span>
</div>
<div class="metrics-grid">
<div class="metric">
<div class="metric-label">CPU</div>
<div class="metric-value">${(m.cpu_percent || 0).toFixed(1)}%</div>
<div class="metric-bar">
<div class="metric-bar-fill ${getBarClass(m.cpu_percent || 0)}"
style="width: ${m.cpu_percent || 0}%"></div>
</div>
</div>
<div class="metric">
<div class="metric-label">Memory</div>
<div class="metric-value">${(m.memory_percent || 0).toFixed(1)}%</div>
<div class="metric-bar">
<div class="metric-bar-fill ${getBarClass(m.memory_percent || 0, 85, 95)}"
style="width: ${m.memory_percent || 0}%"></div>
</div>
</div>
<div class="metric">
<div class="metric-label">Disk</div>
<div class="metric-value">${(m.disk_percent || 0).toFixed(1)}%</div>
<div class="metric-bar">
<div class="metric-bar-fill ${getBarClass(m.disk_percent || 0, 80, 90)}"
style="width: ${m.disk_percent || 0}%"></div>
</div>
</div>
<div class="metric">
<div class="metric-label">Load (1m)</div>
<div class="metric-value">${(m.load_avg_1m || 0).toFixed(2)}</div>
</div>
<div class="metric">
<div class="metric-label">Network In</div>
<div class="metric-value">${formatRate(m.network_recv_bytes_sec || 0)}</div>
</div>
<div class="metric">
<div class="metric-label">Network Out</div>
<div class="metric-value">${formatRate(m.network_sent_bytes_sec || 0)}</div>
</div>
</div>
<div class="last-seen">Last seen: ${timeSince(m.timestamp)}</div>
</div>
`;
}
function updateUI() {
if (machines.size === 0) {
machinesGrid.innerHTML = `
<div class="no-machines">
<h2>No machines connected</h2>
<p>Waiting for collectors to send metrics...</p>
</div>
`;
return;
}
machinesGrid.innerHTML = Array.from(machines.values())
.map(renderMachine)
.join('');
}
function connect() {
const protocol = location.protocol === 'https:' ? 'wss:' : 'ws:';
const ws = new WebSocket(`${protocol}//${location.host}/ws`);
ws.onopen = () => {
statusDot.classList.add('connected');
statusText.textContent = 'Connected';
};
ws.onclose = () => {
statusDot.classList.remove('connected');
statusText.textContent = 'Disconnected - Reconnecting...';
setTimeout(connect, 3000);
};
ws.onerror = () => {
statusDot.classList.remove('connected');
statusText.textContent = 'Connection error';
};
ws.onmessage = (event) => {
try {
const msg = JSON.parse(event.data);
if (msg.type === 'metrics' || msg.type === 'initial') {
machines.set(msg.machine_id, msg);
updateUI();
}
} catch (e) {
console.error('Failed to parse message:', e);
}
};
setInterval(() => {
if (ws.readyState === WebSocket.OPEN) {
ws.send('ping');
}
}, 30000);
}
setInterval(updateUI, 5000);
connect();
</script>
</body>
</html>
"""
@app.get("/", response_class=HTMLResponse)
async def index():
return HTML
@app.get("/health")
async def health():
return {"status": "ok", "machines": len(machines)}
@app.get("/api/machines")
async def get_machines():
return machines
@app.websocket("/ws")
async def websocket_endpoint(websocket: WebSocket, key: str = Query(default="")):
# API key validation for collectors (browsers don't need key)
# Check if this looks like a collector (will send metrics) or browser (will receive)
# We validate key only when metrics are received, allowing browsers to connect freely
await websocket.accept()
connections.append(websocket)
client = websocket.client.host if websocket.client else "unknown"
log.info(f"WebSocket connected: {client}")
try:
# Send current state to new connection
for machine_id, data in machines.items():
await websocket.send_json(
{"type": "metrics", "machine_id": machine_id, **data}
)
# Main loop
while True:
try:
msg = await asyncio.wait_for(websocket.receive_text(), timeout=30)
data = json.loads(msg)
if data.get("type") == "metrics":
# Validate API key for metric submissions
if API_KEY and key != API_KEY:
log.warning(f"Invalid API key from {client}")
await websocket.close(code=4001, reason="Invalid API key")
return
# Handle both formats:
# 1. Direct: {"type": "metrics", "machine_id": "...", "cpu": ...}
# 2. Nested (from gateway): {"type": "metrics", "data": {...}, "timestamp": "..."}
if "data" in data and isinstance(data["data"], dict):
# Nested format from gateway forwarding
payload = data["data"]
machine_id = payload.get("machine_id", "unknown")
# Extract metrics from nested structure
metrics = payload.get("metrics", {})
metric_data = {
"type": "metrics",
"machine_id": machine_id,
"hostname": payload.get("hostname", ""),
"timestamp": data.get("timestamp"),
}
# Flatten metrics for dashboard display
for key_name, value in metrics.items():
metric_data[key_name.lower()] = value
machines[machine_id] = metric_data
log.debug(f"Metrics (forwarded) from {machine_id}")
else:
# Direct format from collector
machine_id = data.get("machine_id", "unknown")
machines[machine_id] = data
log.debug(f"Metrics from {machine_id}: cpu={data.get('cpu')}%")
# Broadcast to all connected clients
broadcast_data = machines[machine_id]
for conn in connections:
try:
await conn.send_json(broadcast_data)
except Exception:
pass
except asyncio.TimeoutError:
# Send ping to keep connection alive
await websocket.send_json({"type": "ping"})
except WebSocketDisconnect:
log.info(f"WebSocket disconnected: {client}")
except Exception as e:
log.error(f"WebSocket error: {e}")
finally:
if websocket in connections:
connections.remove(websocket)
if __name__ == "__main__":
import uvicorn
log.info("Starting sysmonstm gateway")
log.info(f" API key: {'configured' if API_KEY else 'not set (open)'}")
uvicorn.run(app, host="0.0.0.0", port=8080)