364 lines
14 KiB
HTML
364 lines
14 KiB
HTML
<!doctype html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8" />
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
<title>System Monitor - Documentation</title>
|
|
<link rel="stylesheet" href="architecture/styles.css" />
|
|
<style>
|
|
/* Additional styles for docs index */
|
|
.nav-section {
|
|
background: var(--bg-secondary);
|
|
border-radius: 8px;
|
|
padding: 1.5rem;
|
|
margin-bottom: 2rem;
|
|
border: 1px solid var(--border);
|
|
}
|
|
|
|
.nav-section h2 {
|
|
color: var(--accent);
|
|
margin-bottom: 1rem;
|
|
font-size: 1.25rem;
|
|
}
|
|
|
|
.doc-links {
|
|
display: grid;
|
|
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
|
gap: 1rem;
|
|
}
|
|
|
|
.doc-link {
|
|
display: block;
|
|
background: var(--bg-card);
|
|
padding: 1.25rem;
|
|
border-radius: 8px;
|
|
text-decoration: none;
|
|
border: 1px solid var(--border);
|
|
transition:
|
|
border-color 0.2s,
|
|
transform 0.2s;
|
|
}
|
|
|
|
.doc-link:hover {
|
|
border-color: var(--accent);
|
|
transform: translateY(-2px);
|
|
}
|
|
|
|
.doc-link h3 {
|
|
color: var(--text-primary);
|
|
margin-bottom: 0.5rem;
|
|
font-size: 1rem;
|
|
}
|
|
|
|
.doc-link p {
|
|
color: var(--text-secondary);
|
|
font-size: 0.875rem;
|
|
line-height: 1.5;
|
|
}
|
|
|
|
.doc-link .tag {
|
|
display: inline-block;
|
|
background: var(--accent-secondary);
|
|
color: var(--text-primary);
|
|
padding: 0.125rem 0.5rem;
|
|
border-radius: 3px;
|
|
font-size: 0.75rem;
|
|
margin-top: 0.75rem;
|
|
}
|
|
|
|
.section-divider {
|
|
border: none;
|
|
border-top: 1px solid var(--border);
|
|
margin: 2rem 0;
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<header>
|
|
<h1>System Monitoring Platform</h1>
|
|
<p class="subtitle">Documentation</p>
|
|
</header>
|
|
|
|
<main>
|
|
<!-- Architecture Diagrams -->
|
|
<section class="graph-section" id="overview">
|
|
<div class="graph-header-row">
|
|
<h2>System Overview</h2>
|
|
<a
|
|
href="architecture/graph.html?g=01-system-overview"
|
|
class="view-btn"
|
|
>View Full</a
|
|
>
|
|
</div>
|
|
<a
|
|
href="architecture/graph.html?g=01-system-overview"
|
|
class="graph-preview"
|
|
>
|
|
<img
|
|
src="architecture/01-system-overview.svg"
|
|
alt="System Overview"
|
|
/>
|
|
</a>
|
|
<div class="graph-details">
|
|
<p>
|
|
High-level architecture showing all services, data
|
|
stores, and communication patterns.
|
|
</p>
|
|
<h4>Key Components</h4>
|
|
<ul>
|
|
<li>
|
|
<strong>Collector</strong>: Runs on each monitored
|
|
machine, streams metrics via gRPC
|
|
</li>
|
|
<li>
|
|
<strong>Aggregator</strong>: Central gRPC server,
|
|
receives streams, normalizes data
|
|
</li>
|
|
<li>
|
|
<strong>Gateway</strong>: FastAPI service, WebSocket
|
|
for browser, REST for queries
|
|
</li>
|
|
<li>
|
|
<strong>Alerts</strong>: Subscribes to events,
|
|
evaluates thresholds, triggers actions
|
|
</li>
|
|
<li>
|
|
<strong>Edge</strong>: Lightweight WebSocket
|
|
relay on AWS, serves public dashboard at
|
|
sysmonstm.mcrn.ar
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</section>
|
|
|
|
<section class="graph-section" id="data-flow">
|
|
<div class="graph-header-row">
|
|
<h2>Data Flow Pipeline</h2>
|
|
<a
|
|
href="architecture/graph.html?g=02-data-flow"
|
|
class="view-btn"
|
|
>View Full</a
|
|
>
|
|
</div>
|
|
<a
|
|
href="architecture/graph.html?g=02-data-flow"
|
|
class="graph-preview"
|
|
>
|
|
<img src="architecture/02-data-flow.svg" alt="Data Flow" />
|
|
</a>
|
|
<div class="graph-details">
|
|
<p>
|
|
How metrics flow from collection through storage with
|
|
different retention tiers.
|
|
</p>
|
|
<h4>Storage Tiers</h4>
|
|
<table class="details-table">
|
|
<thead>
|
|
<tr>
|
|
<th>Tier</th>
|
|
<th>Resolution</th>
|
|
<th>Retention</th>
|
|
<th>Use Case</th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>
|
|
<tr>
|
|
<td>Hot (Redis)</td>
|
|
<td>5s</td>
|
|
<td>5 min</td>
|
|
<td>Current state, live dashboard</td>
|
|
</tr>
|
|
<tr>
|
|
<td>Raw (TimescaleDB)</td>
|
|
<td>5s</td>
|
|
<td>24h</td>
|
|
<td>Recent detailed analysis</td>
|
|
</tr>
|
|
<tr>
|
|
<td>1-min Aggregates</td>
|
|
<td>1m</td>
|
|
<td>7d</td>
|
|
<td>Week view, trends</td>
|
|
</tr>
|
|
<tr>
|
|
<td>1-hour Aggregates</td>
|
|
<td>1h</td>
|
|
<td>90d</td>
|
|
<td>Long-term analysis</td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
</section>
|
|
|
|
<section class="graph-section" id="deployment">
|
|
<div class="graph-header-row">
|
|
<h2>Deployment Architecture</h2>
|
|
<a
|
|
href="architecture/graph.html?g=03-deployment"
|
|
class="view-btn"
|
|
>View Full</a
|
|
>
|
|
</div>
|
|
<a
|
|
href="architecture/graph.html?g=03-deployment"
|
|
class="graph-preview"
|
|
>
|
|
<img
|
|
src="architecture/03-deployment.svg"
|
|
alt="Deployment"
|
|
/>
|
|
</a>
|
|
<div class="graph-details">
|
|
<p>
|
|
Deployment options from local development to AWS
|
|
production.
|
|
</p>
|
|
<h4>Environments</h4>
|
|
<ul>
|
|
<li>
|
|
<strong>Local</strong>: Docker Compose with
|
|
aggregator, gateway, Redis, TimescaleDB, alerts
|
|
</li>
|
|
<li>
|
|
<strong>Edge (AWS)</strong>: Lightweight
|
|
WebSocket relay at sysmonstm.mcrn.ar, receives
|
|
forwarded metrics from local gateway
|
|
</li>
|
|
<li>
|
|
<strong>Collectors</strong>: Run on remote
|
|
machines, stream to local aggregator via gRPC
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</section>
|
|
|
|
<section class="graph-section" id="grpc">
|
|
<div class="graph-header-row">
|
|
<h2>gRPC Service Definitions</h2>
|
|
<a
|
|
href="architecture/graph.html?g=04-grpc-services"
|
|
class="view-btn"
|
|
>View Full</a
|
|
>
|
|
</div>
|
|
<a
|
|
href="architecture/graph.html?g=04-grpc-services"
|
|
class="graph-preview"
|
|
>
|
|
<img
|
|
src="architecture/04-grpc-services.svg"
|
|
alt="gRPC Services"
|
|
/>
|
|
</a>
|
|
<div class="graph-details">
|
|
<p>Protocol Buffer service and message definitions.</p>
|
|
<h4>Services</h4>
|
|
<ul>
|
|
<li>
|
|
<strong>MetricsService</strong>: Client-side
|
|
streaming for metrics ingestion
|
|
</li>
|
|
<li>
|
|
<strong>ControlService</strong>: Bidirectional
|
|
streaming for collector control
|
|
</li>
|
|
<li>
|
|
<strong>ConfigService</strong>: Server-side
|
|
streaming for config updates
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</section>
|
|
|
|
<hr class="section-divider" />
|
|
|
|
<section class="findings-section">
|
|
<h2>Key Design Decisions</h2>
|
|
<div class="findings-grid">
|
|
<article class="finding-card">
|
|
<h3>Domain Mapping</h3>
|
|
<ul>
|
|
<li>Machine = Payment Processor</li>
|
|
<li>Metrics Stream = Transaction Stream</li>
|
|
<li>Thresholds = Fraud Detection</li>
|
|
<li>Aggregator = Payment Hub</li>
|
|
</ul>
|
|
</article>
|
|
<article class="finding-card">
|
|
<h3>gRPC Patterns</h3>
|
|
<ul>
|
|
<li>Client streaming (metrics)</li>
|
|
<li>Server streaming (config)</li>
|
|
<li>Bidirectional (control)</li>
|
|
<li>Health checking</li>
|
|
</ul>
|
|
</article>
|
|
<article class="finding-card">
|
|
<h3>Event-Driven</h3>
|
|
<ul>
|
|
<li>Redis Pub/Sub (current)</li>
|
|
<li>Abstraction for Kafka switch</li>
|
|
<li>Decoupled alert processing</li>
|
|
<li>Real-time WebSocket push</li>
|
|
</ul>
|
|
</article>
|
|
<article class="finding-card">
|
|
<h3>Resilience</h3>
|
|
<ul>
|
|
<li>Collectors are independent</li>
|
|
<li>Graceful degradation</li>
|
|
<li>Retry with backoff</li>
|
|
<li>Health checks everywhere</li>
|
|
</ul>
|
|
</article>
|
|
</div>
|
|
</section>
|
|
|
|
<section class="tech-section">
|
|
<h2>Technology Stack</h2>
|
|
<div class="tech-grid">
|
|
<div class="tech-column">
|
|
<h3>Core</h3>
|
|
<ul>
|
|
<li>Python 3.11+</li>
|
|
<li>FastAPI</li>
|
|
<li>gRPC / protobuf</li>
|
|
<li>asyncio</li>
|
|
</ul>
|
|
</div>
|
|
<div class="tech-column">
|
|
<h3>Data</h3>
|
|
<ul>
|
|
<li>TimescaleDB</li>
|
|
<li>Redis</li>
|
|
<li>Redis Pub/Sub</li>
|
|
</ul>
|
|
</div>
|
|
<div class="tech-column">
|
|
<h3>Infrastructure</h3>
|
|
<ul>
|
|
<li>Docker</li>
|
|
<li>Docker Compose</li>
|
|
</ul>
|
|
</div>
|
|
<div class="tech-column">
|
|
<h3>CI/CD</h3>
|
|
<ul>
|
|
<li>Woodpecker CI</li>
|
|
<li>Container Registry</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
</section>
|
|
</main>
|
|
|
|
<footer>
|
|
<p>System Monitoring Platform - Documentation</p>
|
|
<p class="date">
|
|
Generated: <time datetime="2026-03-16">March 2026</time>
|
|
</p>
|
|
</footer>
|
|
</body>
|
|
</html>
|