Files
sysmonstm/ctrl/collector/collector.py
2026-01-22 12:55:50 -03:00

137 lines
3.7 KiB
Python

#!/usr/bin/env python3
"""Lightweight WebSocket metrics collector for sysmonstm standalone deployment."""
import asyncio
import json
import logging
import os
import socket
import time
import psutil
# Configuration from environment
HUB_URL = os.environ.get("HUB_URL", "ws://localhost:8080/ws")
MACHINE_ID = os.environ.get("MACHINE_ID", socket.gethostname())
API_KEY = os.environ.get("API_KEY", "")
INTERVAL = int(os.environ.get("INTERVAL", "5"))
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
# Logging setup
logging.basicConfig(
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
format="%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)
log = logging.getLogger("collector")
def collect_metrics() -> dict:
"""Collect system metrics using psutil."""
metrics = {
"type": "metrics",
"machine_id": MACHINE_ID,
"hostname": socket.gethostname(),
"timestamp": time.time(),
}
# CPU
try:
metrics["cpu"] = psutil.cpu_percent(interval=None)
except Exception:
pass
# Memory
try:
mem = psutil.virtual_memory()
metrics["memory"] = mem.percent
metrics["memory_used_gb"] = round(mem.used / (1024**3), 2)
metrics["memory_total_gb"] = round(mem.total / (1024**3), 2)
except Exception:
pass
# Disk
try:
disk = psutil.disk_usage("/")
metrics["disk"] = disk.percent
metrics["disk_used_gb"] = round(disk.used / (1024**3), 2)
metrics["disk_total_gb"] = round(disk.total / (1024**3), 2)
except Exception:
pass
# Load average (Unix only)
try:
load1, load5, load15 = psutil.getloadavg()
metrics["load_1m"] = round(load1, 2)
metrics["load_5m"] = round(load5, 2)
metrics["load_15m"] = round(load15, 2)
except (AttributeError, OSError):
pass
# Network connections count
try:
metrics["connections"] = len(psutil.net_connections(kind="inet"))
except (psutil.AccessDenied, PermissionError):
pass
# Process count
try:
metrics["processes"] = len(psutil.pids())
except Exception:
pass
return metrics
async def run_collector():
"""Main collector loop with auto-reconnect."""
import websockets
# Build URL with API key if provided
url = HUB_URL
if API_KEY:
separator = "&" if "?" in url else "?"
url = f"{url}{separator}key={API_KEY}"
# Prime CPU percent (first call always returns 0)
psutil.cpu_percent(interval=None)
while True:
try:
log.info(f"Connecting to {HUB_URL}...")
async with websockets.connect(url) as ws:
log.info(
f"Connected. Sending metrics every {INTERVAL}s as '{MACHINE_ID}'"
)
while True:
metrics = collect_metrics()
await ws.send(json.dumps(metrics))
log.debug(
f"Sent: cpu={metrics.get('cpu', '?')}% mem={metrics.get('memory', '?')}% disk={metrics.get('disk', '?')}%"
)
await asyncio.sleep(INTERVAL)
except asyncio.CancelledError:
log.info("Collector stopped")
break
except Exception as e:
log.warning(f"Connection error: {e}. Reconnecting in 5s...")
await asyncio.sleep(5)
def main():
log.info("sysmonstm collector starting")
log.info(f" Hub: {HUB_URL}")
log.info(f" Machine: {MACHINE_ID}")
log.info(f" Interval: {INTERVAL}s")
try:
asyncio.run(run_collector())
except KeyboardInterrupt:
log.info("Stopped")
if __name__ == "__main__":
main()