137 lines
3.7 KiB
Python
137 lines
3.7 KiB
Python
#!/usr/bin/env python3
|
|
"""Lightweight WebSocket metrics collector for sysmonstm standalone deployment."""
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import os
|
|
import socket
|
|
import time
|
|
|
|
import psutil
|
|
|
|
# Configuration from environment
|
|
HUB_URL = os.environ.get("HUB_URL", "ws://localhost:8080/ws")
|
|
MACHINE_ID = os.environ.get("MACHINE_ID", socket.gethostname())
|
|
API_KEY = os.environ.get("API_KEY", "")
|
|
INTERVAL = int(os.environ.get("INTERVAL", "5"))
|
|
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
|
|
|
|
# Logging setup
|
|
logging.basicConfig(
|
|
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
|
|
format="%(asctime)s [%(levelname)s] %(message)s",
|
|
datefmt="%Y-%m-%d %H:%M:%S",
|
|
)
|
|
log = logging.getLogger("collector")
|
|
|
|
|
|
def collect_metrics() -> dict:
|
|
"""Collect system metrics using psutil."""
|
|
metrics = {
|
|
"type": "metrics",
|
|
"machine_id": MACHINE_ID,
|
|
"hostname": socket.gethostname(),
|
|
"timestamp": time.time(),
|
|
}
|
|
|
|
# CPU
|
|
try:
|
|
metrics["cpu"] = psutil.cpu_percent(interval=None)
|
|
except Exception:
|
|
pass
|
|
|
|
# Memory
|
|
try:
|
|
mem = psutil.virtual_memory()
|
|
metrics["memory"] = mem.percent
|
|
metrics["memory_used_gb"] = round(mem.used / (1024**3), 2)
|
|
metrics["memory_total_gb"] = round(mem.total / (1024**3), 2)
|
|
except Exception:
|
|
pass
|
|
|
|
# Disk
|
|
try:
|
|
disk = psutil.disk_usage("/")
|
|
metrics["disk"] = disk.percent
|
|
metrics["disk_used_gb"] = round(disk.used / (1024**3), 2)
|
|
metrics["disk_total_gb"] = round(disk.total / (1024**3), 2)
|
|
except Exception:
|
|
pass
|
|
|
|
# Load average (Unix only)
|
|
try:
|
|
load1, load5, load15 = psutil.getloadavg()
|
|
metrics["load_1m"] = round(load1, 2)
|
|
metrics["load_5m"] = round(load5, 2)
|
|
metrics["load_15m"] = round(load15, 2)
|
|
except (AttributeError, OSError):
|
|
pass
|
|
|
|
# Network connections count
|
|
try:
|
|
metrics["connections"] = len(psutil.net_connections(kind="inet"))
|
|
except (psutil.AccessDenied, PermissionError):
|
|
pass
|
|
|
|
# Process count
|
|
try:
|
|
metrics["processes"] = len(psutil.pids())
|
|
except Exception:
|
|
pass
|
|
|
|
return metrics
|
|
|
|
|
|
async def run_collector():
|
|
"""Main collector loop with auto-reconnect."""
|
|
import websockets
|
|
|
|
# Build URL with API key if provided
|
|
url = HUB_URL
|
|
if API_KEY:
|
|
separator = "&" if "?" in url else "?"
|
|
url = f"{url}{separator}key={API_KEY}"
|
|
|
|
# Prime CPU percent (first call always returns 0)
|
|
psutil.cpu_percent(interval=None)
|
|
|
|
while True:
|
|
try:
|
|
log.info(f"Connecting to {HUB_URL}...")
|
|
async with websockets.connect(url) as ws:
|
|
log.info(
|
|
f"Connected. Sending metrics every {INTERVAL}s as '{MACHINE_ID}'"
|
|
)
|
|
|
|
while True:
|
|
metrics = collect_metrics()
|
|
await ws.send(json.dumps(metrics))
|
|
log.debug(
|
|
f"Sent: cpu={metrics.get('cpu', '?')}% mem={metrics.get('memory', '?')}% disk={metrics.get('disk', '?')}%"
|
|
)
|
|
await asyncio.sleep(INTERVAL)
|
|
|
|
except asyncio.CancelledError:
|
|
log.info("Collector stopped")
|
|
break
|
|
except Exception as e:
|
|
log.warning(f"Connection error: {e}. Reconnecting in 5s...")
|
|
await asyncio.sleep(5)
|
|
|
|
|
|
def main():
|
|
log.info("sysmonstm collector starting")
|
|
log.info(f" Hub: {HUB_URL}")
|
|
log.info(f" Machine: {MACHINE_ID}")
|
|
log.info(f" Interval: {INTERVAL}s")
|
|
|
|
try:
|
|
asyncio.run(run_collector())
|
|
except KeyboardInterrupt:
|
|
log.info("Stopped")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|