"""System metrics collection using psutil.""" import socket import time from dataclasses import dataclass, field import psutil @dataclass class MetricPoint: """A single metric data point.""" metric_type: str value: float labels: dict[str, str] = field(default_factory=dict) @dataclass class MetricsBatch: """A batch of metrics from a single collection cycle.""" machine_id: str hostname: str timestamp_ms: int metrics: list[MetricPoint] class MetricsCollector: """Collects system metrics using psutil.""" def __init__( self, machine_id: str, collect_cpu: bool = True, collect_memory: bool = True, collect_disk: bool = True, collect_network: bool = True, collect_load: bool = True, ): self.machine_id = machine_id self.hostname = socket.gethostname() self.collect_cpu = collect_cpu self.collect_memory = collect_memory self.collect_disk = collect_disk self.collect_network = collect_network self.collect_load = collect_load # Track previous network counters for rate calculation self._prev_net_io: psutil._common.snetio | None = None self._prev_net_time: float | None = None def collect(self) -> MetricsBatch: """Collect all enabled metrics and return as a batch.""" metrics: list[MetricPoint] = [] if self.collect_cpu: metrics.extend(self._collect_cpu()) if self.collect_memory: metrics.extend(self._collect_memory()) if self.collect_disk: metrics.extend(self._collect_disk()) if self.collect_network: metrics.extend(self._collect_network()) if self.collect_load: metrics.extend(self._collect_load()) return MetricsBatch( machine_id=self.machine_id, hostname=self.hostname, timestamp_ms=int(time.time() * 1000), metrics=metrics, ) def _collect_cpu(self) -> list[MetricPoint]: """Collect CPU metrics.""" metrics = [] # Overall CPU percent cpu_percent = psutil.cpu_percent(interval=None) metrics.append( MetricPoint( metric_type="CPU_PERCENT", value=cpu_percent, ) ) # Per-core CPU percent per_cpu = psutil.cpu_percent(interval=None, percpu=True) for i, pct in enumerate(per_cpu): metrics.append( MetricPoint( metric_type="CPU_PERCENT_PER_CORE", value=pct, labels={"core": str(i)}, ) ) return metrics def _collect_memory(self) -> list[MetricPoint]: """Collect memory metrics.""" mem = psutil.virtual_memory() return [ MetricPoint(metric_type="MEMORY_PERCENT", value=mem.percent), MetricPoint(metric_type="MEMORY_USED_BYTES", value=float(mem.used)), MetricPoint( metric_type="MEMORY_AVAILABLE_BYTES", value=float(mem.available) ), ] def _collect_disk(self) -> list[MetricPoint]: """Collect disk metrics.""" metrics = [] # Disk usage for root partition try: disk = psutil.disk_usage("/") metrics.append( MetricPoint( metric_type="DISK_PERCENT", value=disk.percent, labels={"mount": "/"}, ) ) metrics.append( MetricPoint( metric_type="DISK_USED_BYTES", value=float(disk.used), labels={"mount": "/"}, ) ) except (PermissionError, FileNotFoundError): pass # Disk I/O rates try: io = psutil.disk_io_counters() if io: metrics.append( MetricPoint( metric_type="DISK_READ_BYTES_SEC", value=float( io.read_bytes ), # Will be converted to rate by aggregator ) ) metrics.append( MetricPoint( metric_type="DISK_WRITE_BYTES_SEC", value=float(io.write_bytes), ) ) except (PermissionError, AttributeError): pass return metrics def _collect_network(self) -> list[MetricPoint]: """Collect network metrics with rate calculation.""" metrics = [] try: net_io = psutil.net_io_counters() current_time = time.time() if self._prev_net_io is not None and self._prev_net_time is not None: time_delta = current_time - self._prev_net_time if time_delta > 0: bytes_sent_rate = ( net_io.bytes_sent - self._prev_net_io.bytes_sent ) / time_delta bytes_recv_rate = ( net_io.bytes_recv - self._prev_net_io.bytes_recv ) / time_delta metrics.append( MetricPoint( metric_type="NETWORK_SENT_BYTES_SEC", value=bytes_sent_rate, ) ) metrics.append( MetricPoint( metric_type="NETWORK_RECV_BYTES_SEC", value=bytes_recv_rate, ) ) self._prev_net_io = net_io self._prev_net_time = current_time # Connection count connections = len(psutil.net_connections(kind="inet")) metrics.append( MetricPoint( metric_type="NETWORK_CONNECTIONS", value=float(connections), ) ) except (PermissionError, psutil.AccessDenied): pass return metrics def _collect_load(self) -> list[MetricPoint]: """Collect load average metrics (Unix only).""" metrics = [] try: load1, load5, load15 = psutil.getloadavg() metrics.append(MetricPoint(metric_type="LOAD_AVG_1M", value=load1)) metrics.append(MetricPoint(metric_type="LOAD_AVG_5M", value=load5)) metrics.append(MetricPoint(metric_type="LOAD_AVG_15M", value=load15)) except (AttributeError, OSError): # Windows doesn't have getloadavg pass # Process count metrics.append( MetricPoint( metric_type="PROCESS_COUNT", value=float(len(psutil.pids())), ) ) return metrics