483 lines
16 KiB
Python
483 lines
16 KiB
Python
"""Minimal sysmonstm gateway - standalone mode without dependencies."""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
from datetime import datetime
|
|
|
|
from fastapi import FastAPI, Query, WebSocket, WebSocketDisconnect
|
|
from fastapi.responses import HTMLResponse
|
|
|
|
# Configuration
|
|
API_KEY = os.environ.get("API_KEY", "")
|
|
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
|
|
|
|
# Logging setup
|
|
logging.basicConfig(
|
|
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
)
|
|
log = logging.getLogger("gateway")
|
|
|
|
app = FastAPI(title="sysmonstm")
|
|
|
|
# Store connected websockets
|
|
connections: list[WebSocket] = []
|
|
# Store latest metrics from collectors
|
|
machines: dict = {}
|
|
|
|
HTML = """
|
|
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<title>System Monitor Dashboard</title>
|
|
<style>
|
|
:root {
|
|
--bg-primary: #1a1a2e;
|
|
--bg-secondary: #16213e;
|
|
--bg-card: #0f3460;
|
|
--text-primary: #eee;
|
|
--text-secondary: #a0a0a0;
|
|
--accent: #e94560;
|
|
--success: #4ade80;
|
|
--warning: #fbbf24;
|
|
--danger: #ef4444;
|
|
--border: #2a2a4a;
|
|
}
|
|
|
|
* { box-sizing: border-box; margin: 0; padding: 0; }
|
|
|
|
body {
|
|
font-family: system-ui, -apple-system, sans-serif;
|
|
background: var(--bg-primary);
|
|
color: var(--text-primary);
|
|
min-height: 100vh;
|
|
}
|
|
|
|
header {
|
|
background: var(--bg-secondary);
|
|
padding: 1rem 2rem;
|
|
border-bottom: 2px solid var(--accent);
|
|
display: flex;
|
|
justify-content: space-between;
|
|
align-items: center;
|
|
}
|
|
|
|
header h1 { font-size: 1.5rem; }
|
|
|
|
.status {
|
|
display: flex;
|
|
align-items: center;
|
|
gap: 0.5rem;
|
|
font-size: 0.875rem;
|
|
}
|
|
|
|
.status-dot {
|
|
width: 10px;
|
|
height: 10px;
|
|
border-radius: 50%;
|
|
background: var(--danger);
|
|
}
|
|
|
|
.status-dot.connected { background: var(--success); }
|
|
|
|
main {
|
|
padding: 1.5rem;
|
|
max-width: 1600px;
|
|
margin: 0 auto;
|
|
}
|
|
|
|
.machines-grid {
|
|
display: grid;
|
|
grid-template-columns: repeat(auto-fill, minmax(400px, 1fr));
|
|
gap: 1.5rem;
|
|
}
|
|
|
|
.machine-card {
|
|
background: var(--bg-secondary);
|
|
border-radius: 8px;
|
|
padding: 1.25rem;
|
|
border: 1px solid var(--border);
|
|
}
|
|
|
|
.machine-header {
|
|
display: flex;
|
|
justify-content: space-between;
|
|
align-items: center;
|
|
margin-bottom: 1rem;
|
|
padding-bottom: 0.75rem;
|
|
border-bottom: 1px solid var(--border);
|
|
}
|
|
|
|
.machine-name {
|
|
font-weight: 600;
|
|
color: var(--accent);
|
|
}
|
|
|
|
.machine-id {
|
|
font-size: 0.75rem;
|
|
color: var(--text-secondary);
|
|
}
|
|
|
|
.machine-status {
|
|
font-size: 0.75rem;
|
|
padding: 0.25rem 0.5rem;
|
|
border-radius: 4px;
|
|
background: var(--success);
|
|
color: #000;
|
|
}
|
|
|
|
.machine-status.warning { background: var(--warning); }
|
|
.machine-status.critical { background: var(--danger); color: #fff; }
|
|
|
|
.metrics-grid {
|
|
display: grid;
|
|
grid-template-columns: repeat(2, 1fr);
|
|
gap: 0.75rem;
|
|
}
|
|
|
|
.metric {
|
|
background: var(--bg-card);
|
|
padding: 0.75rem;
|
|
border-radius: 6px;
|
|
}
|
|
|
|
.metric-label {
|
|
font-size: 0.75rem;
|
|
color: var(--text-secondary);
|
|
margin-bottom: 0.25rem;
|
|
}
|
|
|
|
.metric-value {
|
|
font-size: 1.5rem;
|
|
font-weight: 600;
|
|
}
|
|
|
|
.metric-bar {
|
|
height: 4px;
|
|
background: var(--border);
|
|
border-radius: 2px;
|
|
margin-top: 0.5rem;
|
|
overflow: hidden;
|
|
}
|
|
|
|
.metric-bar-fill {
|
|
height: 100%;
|
|
background: var(--success);
|
|
transition: width 0.3s ease;
|
|
}
|
|
|
|
.metric-bar-fill.warning { background: var(--warning); }
|
|
.metric-bar-fill.critical { background: var(--danger); }
|
|
|
|
.last-seen {
|
|
font-size: 0.75rem;
|
|
color: var(--text-secondary);
|
|
margin-top: 1rem;
|
|
text-align: right;
|
|
}
|
|
|
|
.no-machines {
|
|
text-align: center;
|
|
padding: 3rem;
|
|
color: var(--text-secondary);
|
|
}
|
|
|
|
.no-machines h2 {
|
|
color: var(--text-primary);
|
|
margin-bottom: 0.5rem;
|
|
}
|
|
|
|
@media (max-width: 600px) {
|
|
.machines-grid { grid-template-columns: 1fr; }
|
|
.metrics-grid { grid-template-columns: 1fr; }
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<header>
|
|
<h1>System Monitor</h1>
|
|
<div class="status">
|
|
<span class="status-dot" id="status-dot"></span>
|
|
<span id="status-text">Connecting...</span>
|
|
</div>
|
|
</header>
|
|
|
|
<main>
|
|
<div class="machines-grid" id="machines-grid">
|
|
<div class="no-machines">
|
|
<h2>No machines connected</h2>
|
|
<p>Waiting for collectors to send metrics...</p>
|
|
</div>
|
|
</div>
|
|
</main>
|
|
|
|
<script>
|
|
const machinesGrid = document.getElementById('machines-grid');
|
|
const statusDot = document.getElementById('status-dot');
|
|
const statusText = document.getElementById('status-text');
|
|
|
|
const machines = new Map();
|
|
|
|
function formatBytes(bytes) {
|
|
if (bytes === 0) return '0 B';
|
|
const k = 1024;
|
|
const sizes = ['B', 'KB', 'MB', 'GB', 'TB'];
|
|
const i = Math.floor(Math.log(bytes) / Math.log(k));
|
|
return parseFloat((bytes / Math.pow(k, i)).toFixed(1)) + ' ' + sizes[i];
|
|
}
|
|
|
|
function formatRate(bytesPerSec) {
|
|
return formatBytes(bytesPerSec) + '/s';
|
|
}
|
|
|
|
function getBarClass(value, warning = 80, critical = 95) {
|
|
if (value >= critical) return 'critical';
|
|
if (value >= warning) return 'warning';
|
|
return '';
|
|
}
|
|
|
|
function getStatusClass(m) {
|
|
const cpu = m.cpu_percent || 0;
|
|
const mem = m.memory_percent || 0;
|
|
const disk = m.disk_percent || 0;
|
|
|
|
if (cpu > 95 || mem > 95 || disk > 90) return 'critical';
|
|
if (cpu > 80 || mem > 85 || disk > 80) return 'warning';
|
|
return '';
|
|
}
|
|
|
|
function timeSince(timestamp) {
|
|
if (!timestamp) return '-';
|
|
const date = typeof timestamp === 'string' ? new Date(timestamp) : new Date(timestamp);
|
|
const seconds = Math.floor((Date.now() - date.getTime()) / 1000);
|
|
if (seconds < 5) return 'just now';
|
|
if (seconds < 60) return seconds + 's ago';
|
|
const minutes = Math.floor(seconds / 60);
|
|
if (minutes < 60) return minutes + 'm ago';
|
|
return Math.floor(minutes / 60) + 'h ago';
|
|
}
|
|
|
|
function renderMachine(data) {
|
|
const m = data;
|
|
const statusClass = getStatusClass(m);
|
|
|
|
return `
|
|
<div class="machine-card" data-machine="${data.machine_id}">
|
|
<div class="machine-header">
|
|
<div>
|
|
<div class="machine-name">${data.hostname || data.machine_id}</div>
|
|
<div class="machine-id">${data.machine_id}</div>
|
|
</div>
|
|
<span class="machine-status ${statusClass}">${statusClass || 'healthy'}</span>
|
|
</div>
|
|
<div class="metrics-grid">
|
|
<div class="metric">
|
|
<div class="metric-label">CPU</div>
|
|
<div class="metric-value">${(m.cpu_percent || 0).toFixed(1)}%</div>
|
|
<div class="metric-bar">
|
|
<div class="metric-bar-fill ${getBarClass(m.cpu_percent || 0)}"
|
|
style="width: ${m.cpu_percent || 0}%"></div>
|
|
</div>
|
|
</div>
|
|
<div class="metric">
|
|
<div class="metric-label">Memory</div>
|
|
<div class="metric-value">${(m.memory_percent || 0).toFixed(1)}%</div>
|
|
<div class="metric-bar">
|
|
<div class="metric-bar-fill ${getBarClass(m.memory_percent || 0, 85, 95)}"
|
|
style="width: ${m.memory_percent || 0}%"></div>
|
|
</div>
|
|
</div>
|
|
<div class="metric">
|
|
<div class="metric-label">Disk</div>
|
|
<div class="metric-value">${(m.disk_percent || 0).toFixed(1)}%</div>
|
|
<div class="metric-bar">
|
|
<div class="metric-bar-fill ${getBarClass(m.disk_percent || 0, 80, 90)}"
|
|
style="width: ${m.disk_percent || 0}%"></div>
|
|
</div>
|
|
</div>
|
|
<div class="metric">
|
|
<div class="metric-label">Load (1m)</div>
|
|
<div class="metric-value">${(m.load_avg_1m || 0).toFixed(2)}</div>
|
|
</div>
|
|
<div class="metric">
|
|
<div class="metric-label">Network In</div>
|
|
<div class="metric-value">${formatRate(m.network_recv_bytes_sec || 0)}</div>
|
|
</div>
|
|
<div class="metric">
|
|
<div class="metric-label">Network Out</div>
|
|
<div class="metric-value">${formatRate(m.network_sent_bytes_sec || 0)}</div>
|
|
</div>
|
|
</div>
|
|
<div class="last-seen">Last seen: ${timeSince(m.timestamp)}</div>
|
|
</div>
|
|
`;
|
|
}
|
|
|
|
function updateUI() {
|
|
if (machines.size === 0) {
|
|
machinesGrid.innerHTML = `
|
|
<div class="no-machines">
|
|
<h2>No machines connected</h2>
|
|
<p>Waiting for collectors to send metrics...</p>
|
|
</div>
|
|
`;
|
|
return;
|
|
}
|
|
|
|
machinesGrid.innerHTML = Array.from(machines.values())
|
|
.map(renderMachine)
|
|
.join('');
|
|
}
|
|
|
|
function connect() {
|
|
const protocol = location.protocol === 'https:' ? 'wss:' : 'ws:';
|
|
const ws = new WebSocket(`${protocol}//${location.host}/ws`);
|
|
|
|
ws.onopen = () => {
|
|
statusDot.classList.add('connected');
|
|
statusText.textContent = 'Connected';
|
|
};
|
|
|
|
ws.onclose = () => {
|
|
statusDot.classList.remove('connected');
|
|
statusText.textContent = 'Disconnected - Reconnecting...';
|
|
setTimeout(connect, 3000);
|
|
};
|
|
|
|
ws.onerror = () => {
|
|
statusDot.classList.remove('connected');
|
|
statusText.textContent = 'Connection error';
|
|
};
|
|
|
|
ws.onmessage = (event) => {
|
|
try {
|
|
const msg = JSON.parse(event.data);
|
|
if (msg.type === 'metrics' || msg.type === 'initial') {
|
|
machines.set(msg.machine_id, msg);
|
|
updateUI();
|
|
}
|
|
} catch (e) {
|
|
console.error('Failed to parse message:', e);
|
|
}
|
|
};
|
|
|
|
setInterval(() => {
|
|
if (ws.readyState === WebSocket.OPEN) {
|
|
ws.send('ping');
|
|
}
|
|
}, 30000);
|
|
}
|
|
|
|
setInterval(updateUI, 5000);
|
|
connect();
|
|
</script>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
|
|
@app.get("/", response_class=HTMLResponse)
|
|
async def index():
|
|
return HTML
|
|
|
|
|
|
@app.get("/health")
|
|
async def health():
|
|
return {"status": "ok", "machines": len(machines)}
|
|
|
|
|
|
@app.get("/api/machines")
|
|
async def get_machines():
|
|
return machines
|
|
|
|
|
|
@app.websocket("/ws")
|
|
async def websocket_endpoint(websocket: WebSocket, key: str = Query(default="")):
|
|
# API key validation for collectors (browsers don't need key)
|
|
# Check if this looks like a collector (will send metrics) or browser (will receive)
|
|
# We validate key only when metrics are received, allowing browsers to connect freely
|
|
|
|
await websocket.accept()
|
|
connections.append(websocket)
|
|
client = websocket.client.host if websocket.client else "unknown"
|
|
log.info(f"WebSocket connected: {client}")
|
|
|
|
try:
|
|
# Send current state to new connection
|
|
for machine_id, data in machines.items():
|
|
await websocket.send_json(
|
|
{"type": "metrics", "machine_id": machine_id, **data}
|
|
)
|
|
|
|
# Main loop
|
|
while True:
|
|
try:
|
|
msg = await asyncio.wait_for(websocket.receive_text(), timeout=30)
|
|
data = json.loads(msg)
|
|
|
|
if data.get("type") == "metrics":
|
|
# Validate API key for metric submissions
|
|
if API_KEY and key != API_KEY:
|
|
log.warning(f"Invalid API key from {client}")
|
|
await websocket.close(code=4001, reason="Invalid API key")
|
|
return
|
|
|
|
# Handle both formats:
|
|
# 1. Direct: {"type": "metrics", "machine_id": "...", "cpu": ...}
|
|
# 2. Nested (from gateway): {"type": "metrics", "data": {...}, "timestamp": "..."}
|
|
if "data" in data and isinstance(data["data"], dict):
|
|
# Nested format from gateway forwarding
|
|
payload = data["data"]
|
|
machine_id = payload.get("machine_id", "unknown")
|
|
# Extract metrics from nested structure
|
|
metrics = payload.get("metrics", {})
|
|
metric_data = {
|
|
"type": "metrics",
|
|
"machine_id": machine_id,
|
|
"hostname": payload.get("hostname", ""),
|
|
"timestamp": data.get("timestamp"),
|
|
}
|
|
# Flatten metrics for dashboard display
|
|
for key_name, value in metrics.items():
|
|
metric_data[key_name.lower()] = value
|
|
machines[machine_id] = metric_data
|
|
log.debug(f"Metrics (forwarded) from {machine_id}")
|
|
else:
|
|
# Direct format from collector
|
|
machine_id = data.get("machine_id", "unknown")
|
|
machines[machine_id] = data
|
|
log.debug(f"Metrics from {machine_id}: cpu={data.get('cpu')}%")
|
|
|
|
# Broadcast to all connected clients
|
|
broadcast_data = machines[machine_id]
|
|
for conn in connections:
|
|
try:
|
|
await conn.send_json(broadcast_data)
|
|
except Exception:
|
|
pass
|
|
|
|
except asyncio.TimeoutError:
|
|
# Send ping to keep connection alive
|
|
await websocket.send_json({"type": "ping"})
|
|
|
|
except WebSocketDisconnect:
|
|
log.info(f"WebSocket disconnected: {client}")
|
|
except Exception as e:
|
|
log.error(f"WebSocket error: {e}")
|
|
finally:
|
|
if websocket in connections:
|
|
connections.remove(websocket)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
import uvicorn
|
|
|
|
log.info("Starting sysmonstm gateway")
|
|
log.info(f" API key: {'configured' if API_KEY else 'not set (open)'}")
|
|
uvicorn.run(app, host="0.0.0.0", port=8080)
|