new three layer deployment
This commit is contained in:
136
ctrl/collector/collector.py
Normal file
136
ctrl/collector/collector.py
Normal file
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Lightweight WebSocket metrics collector for sysmonstm standalone deployment."""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
import time
|
||||
|
||||
import psutil
|
||||
|
||||
# Configuration from environment
|
||||
HUB_URL = os.environ.get("HUB_URL", "ws://localhost:8080/ws")
|
||||
MACHINE_ID = os.environ.get("MACHINE_ID", socket.gethostname())
|
||||
API_KEY = os.environ.get("API_KEY", "")
|
||||
INTERVAL = int(os.environ.get("INTERVAL", "5"))
|
||||
LOG_LEVEL = os.environ.get("LOG_LEVEL", "INFO")
|
||||
|
||||
# Logging setup
|
||||
logging.basicConfig(
|
||||
level=getattr(logging, LOG_LEVEL.upper(), logging.INFO),
|
||||
format="%(asctime)s [%(levelname)s] %(message)s",
|
||||
datefmt="%Y-%m-%d %H:%M:%S",
|
||||
)
|
||||
log = logging.getLogger("collector")
|
||||
|
||||
|
||||
def collect_metrics() -> dict:
|
||||
"""Collect system metrics using psutil."""
|
||||
metrics = {
|
||||
"type": "metrics",
|
||||
"machine_id": MACHINE_ID,
|
||||
"hostname": socket.gethostname(),
|
||||
"timestamp": time.time(),
|
||||
}
|
||||
|
||||
# CPU
|
||||
try:
|
||||
metrics["cpu"] = psutil.cpu_percent(interval=None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Memory
|
||||
try:
|
||||
mem = psutil.virtual_memory()
|
||||
metrics["memory"] = mem.percent
|
||||
metrics["memory_used_gb"] = round(mem.used / (1024**3), 2)
|
||||
metrics["memory_total_gb"] = round(mem.total / (1024**3), 2)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Disk
|
||||
try:
|
||||
disk = psutil.disk_usage("/")
|
||||
metrics["disk"] = disk.percent
|
||||
metrics["disk_used_gb"] = round(disk.used / (1024**3), 2)
|
||||
metrics["disk_total_gb"] = round(disk.total / (1024**3), 2)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Load average (Unix only)
|
||||
try:
|
||||
load1, load5, load15 = psutil.getloadavg()
|
||||
metrics["load_1m"] = round(load1, 2)
|
||||
metrics["load_5m"] = round(load5, 2)
|
||||
metrics["load_15m"] = round(load15, 2)
|
||||
except (AttributeError, OSError):
|
||||
pass
|
||||
|
||||
# Network connections count
|
||||
try:
|
||||
metrics["connections"] = len(psutil.net_connections(kind="inet"))
|
||||
except (psutil.AccessDenied, PermissionError):
|
||||
pass
|
||||
|
||||
# Process count
|
||||
try:
|
||||
metrics["processes"] = len(psutil.pids())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return metrics
|
||||
|
||||
|
||||
async def run_collector():
|
||||
"""Main collector loop with auto-reconnect."""
|
||||
import websockets
|
||||
|
||||
# Build URL with API key if provided
|
||||
url = HUB_URL
|
||||
if API_KEY:
|
||||
separator = "&" if "?" in url else "?"
|
||||
url = f"{url}{separator}key={API_KEY}"
|
||||
|
||||
# Prime CPU percent (first call always returns 0)
|
||||
psutil.cpu_percent(interval=None)
|
||||
|
||||
while True:
|
||||
try:
|
||||
log.info(f"Connecting to {HUB_URL}...")
|
||||
async with websockets.connect(url) as ws:
|
||||
log.info(
|
||||
f"Connected. Sending metrics every {INTERVAL}s as '{MACHINE_ID}'"
|
||||
)
|
||||
|
||||
while True:
|
||||
metrics = collect_metrics()
|
||||
await ws.send(json.dumps(metrics))
|
||||
log.debug(
|
||||
f"Sent: cpu={metrics.get('cpu', '?')}% mem={metrics.get('memory', '?')}% disk={metrics.get('disk', '?')}%"
|
||||
)
|
||||
await asyncio.sleep(INTERVAL)
|
||||
|
||||
except asyncio.CancelledError:
|
||||
log.info("Collector stopped")
|
||||
break
|
||||
except Exception as e:
|
||||
log.warning(f"Connection error: {e}. Reconnecting in 5s...")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
|
||||
def main():
|
||||
log.info("sysmonstm collector starting")
|
||||
log.info(f" Hub: {HUB_URL}")
|
||||
log.info(f" Machine: {MACHINE_ID}")
|
||||
log.info(f" Interval: {INTERVAL}s")
|
||||
|
||||
try:
|
||||
asyncio.run(run_collector())
|
||||
except KeyboardInterrupt:
|
||||
log.info("Stopped")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user