first claude draft
This commit is contained in:
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
def
|
||||||
184
.woodpecker.yml
Normal file
184
.woodpecker.yml
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
# Woodpecker CI Pipeline
|
||||||
|
# https://woodpecker-ci.org/docs/usage/pipeline-syntax
|
||||||
|
|
||||||
|
variables:
|
||||||
|
- &python_image python:3.11-slim
|
||||||
|
- &docker_image docker:24-dind
|
||||||
|
|
||||||
|
# Clone settings
|
||||||
|
clone:
|
||||||
|
git:
|
||||||
|
image: woodpeckerci/plugin-git
|
||||||
|
settings:
|
||||||
|
depth: 50
|
||||||
|
|
||||||
|
# Pipeline steps
|
||||||
|
steps:
|
||||||
|
# ==========================================================================
|
||||||
|
# Lint and Test
|
||||||
|
# ==========================================================================
|
||||||
|
|
||||||
|
lint:
|
||||||
|
image: *python_image
|
||||||
|
commands:
|
||||||
|
- pip install ruff mypy
|
||||||
|
- ruff check services/ shared/
|
||||||
|
- ruff format --check services/ shared/
|
||||||
|
when:
|
||||||
|
event: [push, pull_request]
|
||||||
|
|
||||||
|
test-shared:
|
||||||
|
image: *python_image
|
||||||
|
commands:
|
||||||
|
- pip install pytest pytest-asyncio redis asyncpg
|
||||||
|
- pip install -r shared/events/requirements.txt || true
|
||||||
|
- pytest shared/ -v --tb=short
|
||||||
|
when:
|
||||||
|
event: [push, pull_request]
|
||||||
|
|
||||||
|
test-services:
|
||||||
|
image: *python_image
|
||||||
|
commands:
|
||||||
|
- pip install pytest pytest-asyncio grpcio grpcio-tools
|
||||||
|
- |
|
||||||
|
for svc in collector aggregator gateway alerts; do
|
||||||
|
if [ -f "services/$svc/requirements.txt" ]; then
|
||||||
|
pip install -r "services/$svc/requirements.txt"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
- pytest services/ -v --tb=short || true
|
||||||
|
when:
|
||||||
|
event: [push, pull_request]
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# Build Docker Images
|
||||||
|
# ==========================================================================
|
||||||
|
|
||||||
|
build-aggregator:
|
||||||
|
image: *docker_image
|
||||||
|
commands:
|
||||||
|
- docker build -t sysmonstm/aggregator:${CI_COMMIT_SHA:0:7} -f services/aggregator/Dockerfile --target production .
|
||||||
|
- docker tag sysmonstm/aggregator:${CI_COMMIT_SHA:0:7} sysmonstm/aggregator:latest
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
when:
|
||||||
|
event: push
|
||||||
|
branch: main
|
||||||
|
|
||||||
|
build-gateway:
|
||||||
|
image: *docker_image
|
||||||
|
commands:
|
||||||
|
- docker build -t sysmonstm/gateway:${CI_COMMIT_SHA:0:7} -f services/gateway/Dockerfile --target production .
|
||||||
|
- docker tag sysmonstm/gateway:${CI_COMMIT_SHA:0:7} sysmonstm/gateway:latest
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
when:
|
||||||
|
event: push
|
||||||
|
branch: main
|
||||||
|
|
||||||
|
build-collector:
|
||||||
|
image: *docker_image
|
||||||
|
commands:
|
||||||
|
- docker build -t sysmonstm/collector:${CI_COMMIT_SHA:0:7} -f services/collector/Dockerfile --target production .
|
||||||
|
- docker tag sysmonstm/collector:${CI_COMMIT_SHA:0:7} sysmonstm/collector:latest
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
when:
|
||||||
|
event: push
|
||||||
|
branch: main
|
||||||
|
|
||||||
|
build-alerts:
|
||||||
|
image: *docker_image
|
||||||
|
commands:
|
||||||
|
- docker build -t sysmonstm/alerts:${CI_COMMIT_SHA:0:7} -f services/alerts/Dockerfile --target production .
|
||||||
|
- docker tag sysmonstm/alerts:${CI_COMMIT_SHA:0:7} sysmonstm/alerts:latest
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
when:
|
||||||
|
event: push
|
||||||
|
branch: main
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# Push to Registry
|
||||||
|
# ==========================================================================
|
||||||
|
|
||||||
|
push-images:
|
||||||
|
image: *docker_image
|
||||||
|
commands:
|
||||||
|
- echo "$REGISTRY_PASSWORD" | docker login -u "$REGISTRY_USER" --password-stdin "$REGISTRY_URL"
|
||||||
|
- |
|
||||||
|
for img in aggregator gateway collector alerts; do
|
||||||
|
docker tag sysmonstm/$img:latest $REGISTRY_URL/sysmonstm/$img:${CI_COMMIT_SHA:0:7}
|
||||||
|
docker tag sysmonstm/$img:latest $REGISTRY_URL/sysmonstm/$img:latest
|
||||||
|
docker push $REGISTRY_URL/sysmonstm/$img:${CI_COMMIT_SHA:0:7}
|
||||||
|
docker push $REGISTRY_URL/sysmonstm/$img:latest
|
||||||
|
done
|
||||||
|
secrets: [registry_user, registry_password, registry_url]
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
when:
|
||||||
|
event: push
|
||||||
|
branch: main
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# Deploy to EC2
|
||||||
|
# ==========================================================================
|
||||||
|
|
||||||
|
deploy-staging:
|
||||||
|
image: appleboy/drone-ssh
|
||||||
|
settings:
|
||||||
|
host:
|
||||||
|
from_secret: deploy_host
|
||||||
|
username:
|
||||||
|
from_secret: deploy_user
|
||||||
|
key:
|
||||||
|
from_secret: deploy_key
|
||||||
|
script:
|
||||||
|
- cd /home/ec2-user/sysmonstm
|
||||||
|
- git pull origin main
|
||||||
|
- docker-compose pull
|
||||||
|
- docker-compose up -d --remove-orphans
|
||||||
|
- docker system prune -f
|
||||||
|
when:
|
||||||
|
event: push
|
||||||
|
branch: main
|
||||||
|
|
||||||
|
# ==========================================================================
|
||||||
|
# Notifications
|
||||||
|
# ==========================================================================
|
||||||
|
|
||||||
|
notify-success:
|
||||||
|
image: plugins/webhook
|
||||||
|
settings:
|
||||||
|
urls:
|
||||||
|
from_secret: webhook_url
|
||||||
|
content_type: application/json
|
||||||
|
template: |
|
||||||
|
{
|
||||||
|
"text": "✅ Build succeeded: ${CI_REPO_NAME}#${CI_BUILD_NUMBER}",
|
||||||
|
"commit": "${CI_COMMIT_SHA:0:7}",
|
||||||
|
"branch": "${CI_COMMIT_BRANCH}",
|
||||||
|
"author": "${CI_COMMIT_AUTHOR}"
|
||||||
|
}
|
||||||
|
when:
|
||||||
|
status: success
|
||||||
|
event: push
|
||||||
|
branch: main
|
||||||
|
|
||||||
|
notify-failure:
|
||||||
|
image: plugins/webhook
|
||||||
|
settings:
|
||||||
|
urls:
|
||||||
|
from_secret: webhook_url
|
||||||
|
content_type: application/json
|
||||||
|
template: |
|
||||||
|
{
|
||||||
|
"text": "❌ Build failed: ${CI_REPO_NAME}#${CI_BUILD_NUMBER}",
|
||||||
|
"commit": "${CI_COMMIT_SHA:0:7}",
|
||||||
|
"branch": "${CI_COMMIT_BRANCH}",
|
||||||
|
"author": "${CI_COMMIT_AUTHOR}"
|
||||||
|
}
|
||||||
|
when:
|
||||||
|
status: failure
|
||||||
|
event: push
|
||||||
|
branch: main
|
||||||
43
.woodpecker/build.yml
Normal file
43
.woodpecker/build.yml
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
# Woodpecker CI - Build Pipeline (runs on main branch pushes)
|
||||||
|
|
||||||
|
steps:
|
||||||
|
build-images:
|
||||||
|
image: docker:24-dind
|
||||||
|
commands:
|
||||||
|
- echo "=== Building Docker images ==="
|
||||||
|
- docker build -t sysmonstm/aggregator:${CI_COMMIT_SHA:0:7} -f services/aggregator/Dockerfile --target production .
|
||||||
|
- docker build -t sysmonstm/gateway:${CI_COMMIT_SHA:0:7} -f services/gateway/Dockerfile --target production .
|
||||||
|
- docker build -t sysmonstm/collector:${CI_COMMIT_SHA:0:7} -f services/collector/Dockerfile --target production .
|
||||||
|
- docker build -t sysmonstm/alerts:${CI_COMMIT_SHA:0:7} -f services/alerts/Dockerfile --target production .
|
||||||
|
- echo "=== Tagging as latest ==="
|
||||||
|
- docker tag sysmonstm/aggregator:${CI_COMMIT_SHA:0:7} sysmonstm/aggregator:latest
|
||||||
|
- docker tag sysmonstm/gateway:${CI_COMMIT_SHA:0:7} sysmonstm/gateway:latest
|
||||||
|
- docker tag sysmonstm/collector:${CI_COMMIT_SHA:0:7} sysmonstm/collector:latest
|
||||||
|
- docker tag sysmonstm/alerts:${CI_COMMIT_SHA:0:7} sysmonstm/alerts:latest
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
|
||||||
|
push-to-registry:
|
||||||
|
image: docker:24-dind
|
||||||
|
commands:
|
||||||
|
- echo "=== Logging into registry ==="
|
||||||
|
- echo "$REGISTRY_PASSWORD" | docker login -u "$REGISTRY_USER" --password-stdin "$REGISTRY_URL"
|
||||||
|
- echo "=== Pushing images ==="
|
||||||
|
- |
|
||||||
|
for svc in aggregator gateway collector alerts; do
|
||||||
|
docker tag sysmonstm/$svc:${CI_COMMIT_SHA:0:7} $REGISTRY_URL/sysmonstm/$svc:${CI_COMMIT_SHA:0:7}
|
||||||
|
docker tag sysmonstm/$svc:latest $REGISTRY_URL/sysmonstm/$svc:latest
|
||||||
|
docker push $REGISTRY_URL/sysmonstm/$svc:${CI_COMMIT_SHA:0:7}
|
||||||
|
docker push $REGISTRY_URL/sysmonstm/$svc:latest
|
||||||
|
echo "Pushed $svc"
|
||||||
|
done
|
||||||
|
secrets: [registry_user, registry_password, registry_url]
|
||||||
|
volumes:
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
|
||||||
|
depends_on:
|
||||||
|
- test
|
||||||
|
|
||||||
|
when:
|
||||||
|
event: push
|
||||||
|
branch: main
|
||||||
61
.woodpecker/deploy.yml
Normal file
61
.woodpecker/deploy.yml
Normal file
@@ -0,0 +1,61 @@
|
|||||||
|
# Woodpecker CI - Deploy Pipeline
|
||||||
|
|
||||||
|
steps:
|
||||||
|
deploy-to-staging:
|
||||||
|
image: appleboy/drone-ssh
|
||||||
|
settings:
|
||||||
|
host:
|
||||||
|
from_secret: deploy_host
|
||||||
|
username:
|
||||||
|
from_secret: deploy_user
|
||||||
|
key:
|
||||||
|
from_secret: deploy_key
|
||||||
|
port: 22
|
||||||
|
script:
|
||||||
|
- echo "=== Deploying to staging ==="
|
||||||
|
- cd /home/ec2-user/sysmonstm
|
||||||
|
- git fetch origin main
|
||||||
|
- git reset --hard origin/main
|
||||||
|
- echo "=== Pulling new images ==="
|
||||||
|
- docker-compose pull
|
||||||
|
- echo "=== Restarting services ==="
|
||||||
|
- docker-compose up -d --remove-orphans
|
||||||
|
- echo "=== Cleaning up ==="
|
||||||
|
- docker system prune -f
|
||||||
|
- echo "=== Deployment complete ==="
|
||||||
|
- docker-compose ps
|
||||||
|
|
||||||
|
health-check:
|
||||||
|
image: curlimages/curl
|
||||||
|
commands:
|
||||||
|
- echo "=== Waiting for services to start ==="
|
||||||
|
- sleep 10
|
||||||
|
- echo "=== Checking gateway health ==="
|
||||||
|
- curl -f http://$DEPLOY_HOST:8000/health || exit 1
|
||||||
|
- echo "=== Health check passed ==="
|
||||||
|
secrets: [deploy_host]
|
||||||
|
|
||||||
|
notify:
|
||||||
|
image: plugins/webhook
|
||||||
|
settings:
|
||||||
|
urls:
|
||||||
|
from_secret: webhook_url
|
||||||
|
content_type: application/json
|
||||||
|
template: |
|
||||||
|
{
|
||||||
|
"text": "🚀 Deployed to staging",
|
||||||
|
"repo": "${CI_REPO_NAME}",
|
||||||
|
"commit": "${CI_COMMIT_SHA:0:7}",
|
||||||
|
"message": "${CI_COMMIT_MESSAGE}",
|
||||||
|
"author": "${CI_COMMIT_AUTHOR}",
|
||||||
|
"url": "https://sysmonstm.mcrn.ar"
|
||||||
|
}
|
||||||
|
when:
|
||||||
|
status: success
|
||||||
|
|
||||||
|
depends_on:
|
||||||
|
- build
|
||||||
|
|
||||||
|
when:
|
||||||
|
event: push
|
||||||
|
branch: main
|
||||||
40
.woodpecker/test.yml
Normal file
40
.woodpecker/test.yml
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
# Woodpecker CI - Test Pipeline (runs on PRs and pushes)
|
||||||
|
# Separate file for cleaner organization
|
||||||
|
|
||||||
|
steps:
|
||||||
|
lint:
|
||||||
|
image: python:3.11-slim
|
||||||
|
commands:
|
||||||
|
- pip install --quiet ruff mypy
|
||||||
|
- echo "=== Linting with ruff ==="
|
||||||
|
- ruff check services/ shared/ --output-format=github
|
||||||
|
- echo "=== Checking formatting ==="
|
||||||
|
- ruff format --check services/ shared/
|
||||||
|
|
||||||
|
typecheck:
|
||||||
|
image: python:3.11-slim
|
||||||
|
commands:
|
||||||
|
- pip install --quiet mypy types-redis
|
||||||
|
- echo "=== Type checking shared/ ==="
|
||||||
|
- mypy shared/ --ignore-missing-imports || true
|
||||||
|
|
||||||
|
unit-tests:
|
||||||
|
image: python:3.11-slim
|
||||||
|
commands:
|
||||||
|
- pip install --quiet pytest pytest-asyncio pytest-cov
|
||||||
|
- pip install --quiet redis asyncpg grpcio grpcio-tools psutil pydantic pydantic-settings structlog
|
||||||
|
- echo "=== Running unit tests ==="
|
||||||
|
- pytest shared/ services/ -v --tb=short --cov=shared --cov=services --cov-report=term-missing || true
|
||||||
|
|
||||||
|
proto-check:
|
||||||
|
image: python:3.11-slim
|
||||||
|
commands:
|
||||||
|
- pip install --quiet grpcio-tools
|
||||||
|
- echo "=== Validating proto definitions ==="
|
||||||
|
- python -m grpc_tools.protoc -I./proto --python_out=/tmp --grpc_python_out=/tmp ./proto/metrics.proto
|
||||||
|
- echo "Proto compilation successful"
|
||||||
|
|
||||||
|
depends_on: []
|
||||||
|
|
||||||
|
when:
|
||||||
|
event: [push, pull_request]
|
||||||
492
CLAUDE.md
Normal file
492
CLAUDE.md
Normal file
@@ -0,0 +1,492 @@
|
|||||||
|
# Distributed System Monitoring Platform
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
A real-time system monitoring platform that streams metrics from multiple machines to a central hub with live web dashboard. Built to demonstrate production microservices patterns (gRPC, FastAPI, streaming, event-driven architecture) while solving a real problem: monitoring development infrastructure across multiple machines.
|
||||||
|
|
||||||
|
**Primary Goal:** Interview demonstration project for Python Microservices Engineer position
|
||||||
|
**Secondary Goal:** Actually useful tool for managing multi-machine development environment
|
||||||
|
**Time Investment:** Phased approach - MVP in weekend, polish over 2-3 weeks
|
||||||
|
|
||||||
|
## Why This Project
|
||||||
|
|
||||||
|
**Interview Alignment:**
|
||||||
|
- Demonstrates gRPC-based microservices architecture (core requirement)
|
||||||
|
- Shows streaming patterns (server-side and bidirectional)
|
||||||
|
- Real-time data aggregation and processing
|
||||||
|
- Alert/threshold monitoring (maps to fraud detection)
|
||||||
|
- Event-driven patterns
|
||||||
|
- Multiple data sources requiring normalization (maps to multiple payment processors)
|
||||||
|
|
||||||
|
**Personal Utility:**
|
||||||
|
- Monitors existing multi-machine dev setup
|
||||||
|
- Dashboard stays open, provides real value
|
||||||
|
- Solves actual pain point
|
||||||
|
- Will continue running post-interview
|
||||||
|
|
||||||
|
**Domain Mapping for Interview:**
|
||||||
|
- Machine = Payment Processor
|
||||||
|
- Metrics Stream = Transaction Stream
|
||||||
|
- Resource Thresholds = Fraud/Limit Detection
|
||||||
|
- Alert System = Risk Management
|
||||||
|
- Aggregation Service = Payment Processing Hub
|
||||||
|
|
||||||
|
## Technical Stack
|
||||||
|
|
||||||
|
### Core Technologies (Must Use - From JD)
|
||||||
|
- **Python 3.11+** - Primary language
|
||||||
|
- **FastAPI** - Web gateway, REST endpoints, WebSocket streaming
|
||||||
|
- **gRPC** - Inter-service communication, metric streaming
|
||||||
|
- **PostgreSQL/TimescaleDB** - Time-series historical data
|
||||||
|
- **Redis** - Current state, caching, alert rules
|
||||||
|
- **Docker Compose** - Orchestration
|
||||||
|
|
||||||
|
### Supporting Technologies
|
||||||
|
- **Protocol Buffers** - gRPC message definitions
|
||||||
|
- **WebSockets** - Browser streaming
|
||||||
|
- **htmx + Alpine.js** - Lightweight reactive frontend (avoid heavy SPA)
|
||||||
|
- **Chart.js or Apache ECharts** - Real-time graphs
|
||||||
|
- **asyncio** - Async patterns throughout
|
||||||
|
|
||||||
|
### Development Tools
|
||||||
|
- **grpcio & grpcio-tools** - Python gRPC
|
||||||
|
- **psutil** - System metrics collection
|
||||||
|
- **uvicorn** - FastAPI server
|
||||||
|
- **pytest** - Testing
|
||||||
|
- **docker-compose** - Local orchestration
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Browser │
|
||||||
|
│ ┌──────────────────────────────────────────────────────┐ │
|
||||||
|
│ │ Dashboard (htmx + Alpine.js + WebSockets) │ │
|
||||||
|
│ └──────────────────────────────────────────────────────┘ │
|
||||||
|
└────────────────────────┬────────────────────────────────────┘
|
||||||
|
│ WebSocket
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Web Gateway Service │
|
||||||
|
│ (FastAPI + WebSockets) │
|
||||||
|
│ - Serves dashboard │
|
||||||
|
│ - Streams updates to browser │
|
||||||
|
│ - REST API for historical queries │
|
||||||
|
└────────────────────────┬────────────────────────────────────┘
|
||||||
|
│ gRPC
|
||||||
|
▼
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Aggregator Service (gRPC) │
|
||||||
|
│ - Receives metric streams from all collectors │
|
||||||
|
│ - Normalizes data from different sources │
|
||||||
|
│ - Enriches with machine context │
|
||||||
|
│ - Publishes to event stream │
|
||||||
|
│ - Checks alert thresholds │
|
||||||
|
└─────┬───────────────────────────────────┬───────────────────┘
|
||||||
|
│ │
|
||||||
|
│ Stores │ Publishes events
|
||||||
|
▼ ▼
|
||||||
|
┌──────────────┐ ┌────────────────┐
|
||||||
|
│ TimescaleDB │ │ Event Stream │
|
||||||
|
│ (historical)│ │ (Redis Pub/Sub│
|
||||||
|
└──────────────┘ │ or RabbitMQ) │
|
||||||
|
└────────┬───────┘
|
||||||
|
┌──────────────┐ │
|
||||||
|
│ Redis │ │ Subscribes
|
||||||
|
│ (current │◄───────────────────────────┘
|
||||||
|
│ state) │ │
|
||||||
|
└──────────────┘ ▼
|
||||||
|
┌────────────────┐
|
||||||
|
▲ │ Alert Service │
|
||||||
|
│ │ - Processes │
|
||||||
|
│ │ events │
|
||||||
|
│ gRPC Streaming │ - Triggers │
|
||||||
|
│ │ actions │
|
||||||
|
┌─────┴────────────────────────────┴────────────────┘
|
||||||
|
│
|
||||||
|
│ Multiple Collector Services (one per machine)
|
||||||
|
│ ┌───────────────────────────────────────┐
|
||||||
|
│ │ Metrics Collector (gRPC Client) │
|
||||||
|
│ │ - Gathers system metrics (psutil) │
|
||||||
|
│ │ - Streams to Aggregator via gRPC │
|
||||||
|
│ │ - CPU, Memory, Disk, Network │
|
||||||
|
│ │ - Process list │
|
||||||
|
│ │ - Docker container stats (optional) │
|
||||||
|
│ └───────────────────────────────────────┘
|
||||||
|
│
|
||||||
|
└──► Machine 1, Machine 2, Machine 3, ...
|
||||||
|
```
|
||||||
|
|
||||||
|
## Implementation Phases
|
||||||
|
|
||||||
|
### Phase 1: MVP - Core Streaming (Weekend - 8-12 hours)
|
||||||
|
|
||||||
|
**Goal:** Prove the gRPC streaming works end-to-end
|
||||||
|
|
||||||
|
**Deliverables:**
|
||||||
|
1. Metrics Collector Service (gRPC client)
|
||||||
|
- Collects CPU, memory, disk on localhost
|
||||||
|
- Streams to aggregator every 5 seconds
|
||||||
|
|
||||||
|
2. Aggregator Service (gRPC server)
|
||||||
|
- Receives metric stream
|
||||||
|
- Stores current state in Redis
|
||||||
|
- Logs to console
|
||||||
|
|
||||||
|
3. Proto definitions for metric messages
|
||||||
|
|
||||||
|
4. Docker Compose setup
|
||||||
|
|
||||||
|
**Success Criteria:**
|
||||||
|
- Run collector, see metrics flowing to aggregator
|
||||||
|
- Redis contains current state
|
||||||
|
- Can query Redis manually for latest metrics
|
||||||
|
|
||||||
|
### Phase 2: Web Dashboard (1 week)
|
||||||
|
|
||||||
|
**Goal:** Make it visible and useful
|
||||||
|
|
||||||
|
**Deliverables:**
|
||||||
|
1. Web Gateway Service (FastAPI)
|
||||||
|
- WebSocket endpoint for streaming
|
||||||
|
- REST endpoints for current/historical data
|
||||||
|
|
||||||
|
2. Dashboard UI
|
||||||
|
- Real-time CPU/Memory graphs per machine
|
||||||
|
- Current state table
|
||||||
|
- Simple, clean design
|
||||||
|
|
||||||
|
3. WebSocket bridge (Gateway ↔ Aggregator)
|
||||||
|
|
||||||
|
4. TimescaleDB integration
|
||||||
|
- Store historical metrics
|
||||||
|
- Query endpoints for time ranges
|
||||||
|
|
||||||
|
**Success Criteria:**
|
||||||
|
- Open dashboard, see live graphs updating
|
||||||
|
- Graphs show last hour of data
|
||||||
|
- Multiple machines displayed separately
|
||||||
|
|
||||||
|
### Phase 3: Alerts & Intelligence (1 week)
|
||||||
|
|
||||||
|
**Goal:** Add decision-making layer (interview focus)
|
||||||
|
|
||||||
|
**Deliverables:**
|
||||||
|
1. Alert Service
|
||||||
|
- Subscribes to event stream
|
||||||
|
- Evaluates threshold rules
|
||||||
|
- Triggers notifications
|
||||||
|
|
||||||
|
2. Configuration Service (gRPC)
|
||||||
|
- Dynamic threshold management
|
||||||
|
- Alert rule CRUD
|
||||||
|
- Stored in PostgreSQL
|
||||||
|
|
||||||
|
3. Event Stream implementation (Redis Pub/Sub or RabbitMQ)
|
||||||
|
|
||||||
|
4. Enhanced dashboard
|
||||||
|
- Alert indicators
|
||||||
|
- Alert history
|
||||||
|
- Threshold configuration UI
|
||||||
|
|
||||||
|
**Success Criteria:**
|
||||||
|
- Set CPU threshold at 80%
|
||||||
|
- Generate load (stress-ng)
|
||||||
|
- See alert trigger in dashboard
|
||||||
|
- Alert logged to database
|
||||||
|
|
||||||
|
### Phase 4: Interview Polish (Final week)
|
||||||
|
|
||||||
|
**Goal:** Demo-ready, production patterns visible
|
||||||
|
|
||||||
|
**Deliverables:**
|
||||||
|
1. Observability
|
||||||
|
- OpenTelemetry tracing (optional)
|
||||||
|
- Structured logging
|
||||||
|
- Health check endpoints
|
||||||
|
|
||||||
|
2. "Synthetic Transactions"
|
||||||
|
- Simulate business operations through system
|
||||||
|
- Track end-to-end latency
|
||||||
|
- Maps directly to payment processing demo
|
||||||
|
|
||||||
|
3. Documentation
|
||||||
|
- Architecture diagram
|
||||||
|
- Service interaction flows
|
||||||
|
- Deployment guide
|
||||||
|
|
||||||
|
4. Demo script
|
||||||
|
- Story to walk through
|
||||||
|
- Key talking points
|
||||||
|
- Domain mapping explanations
|
||||||
|
|
||||||
|
**Success Criteria:**
|
||||||
|
- Can deploy entire stack with one command
|
||||||
|
- Can explain every service's role
|
||||||
|
- Can map architecture to payment processing
|
||||||
|
- Demo runs smoothly without hiccups
|
||||||
|
|
||||||
|
## Key Technical Patterns to Demonstrate
|
||||||
|
|
||||||
|
### 1. gRPC Streaming Patterns
|
||||||
|
|
||||||
|
**Server-Side Streaming:**
|
||||||
|
```python
|
||||||
|
# Collector streams metrics to aggregator
|
||||||
|
service MetricsService {
|
||||||
|
rpc StreamMetrics(MetricsRequest) returns (stream Metric) {}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Bidirectional Streaming:**
|
||||||
|
```python
|
||||||
|
# Two-way communication between services
|
||||||
|
service ControlService {
|
||||||
|
rpc ManageStream(stream Command) returns (stream Response) {}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Service Communication Patterns
|
||||||
|
|
||||||
|
- **Synchronous (gRPC):** Query current state, configuration
|
||||||
|
- **Asynchronous (Events):** Metric updates, alerts, audit logs
|
||||||
|
- **Streaming (gRPC + WebSocket):** Real-time data flow
|
||||||
|
|
||||||
|
### 3. Data Storage Patterns
|
||||||
|
|
||||||
|
- **Hot data (Redis):** Current state, recent metrics (last 5 minutes)
|
||||||
|
- **Warm data (TimescaleDB):** Historical metrics (last 30 days)
|
||||||
|
- **Cold data (Optional):** Archive to S3-compatible storage
|
||||||
|
|
||||||
|
### 4. Error Handling & Resilience
|
||||||
|
|
||||||
|
- gRPC retry logic with exponential backoff
|
||||||
|
- Circuit breaker pattern for service calls
|
||||||
|
- Graceful degradation (continue if one collector fails)
|
||||||
|
- Dead letter queue for failed events
|
||||||
|
|
||||||
|
## Proto Definitions (Starting Point)
|
||||||
|
|
||||||
|
```protobuf
|
||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
package monitoring;
|
||||||
|
|
||||||
|
service MetricsService {
|
||||||
|
rpc StreamMetrics(MetricsRequest) returns (stream Metric) {}
|
||||||
|
rpc GetCurrentState(StateRequest) returns (MachineState) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
message MetricsRequest {
|
||||||
|
string machine_id = 1;
|
||||||
|
int32 interval_seconds = 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
message Metric {
|
||||||
|
string machine_id = 1;
|
||||||
|
int64 timestamp = 2;
|
||||||
|
MetricType type = 3;
|
||||||
|
double value = 4;
|
||||||
|
map<string, string> labels = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum MetricType {
|
||||||
|
CPU_PERCENT = 0;
|
||||||
|
MEMORY_PERCENT = 1;
|
||||||
|
MEMORY_USED_GB = 2;
|
||||||
|
DISK_PERCENT = 3;
|
||||||
|
NETWORK_SENT_MBPS = 4;
|
||||||
|
NETWORK_RECV_MBPS = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
message MachineState {
|
||||||
|
string machine_id = 1;
|
||||||
|
int64 last_seen = 2;
|
||||||
|
repeated Metric current_metrics = 3;
|
||||||
|
HealthStatus health = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum HealthStatus {
|
||||||
|
HEALTHY = 0;
|
||||||
|
WARNING = 1;
|
||||||
|
CRITICAL = 2;
|
||||||
|
UNKNOWN = 3;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Project Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
system-monitor/
|
||||||
|
├── docker-compose.yml
|
||||||
|
├── proto/
|
||||||
|
│ └── metrics.proto
|
||||||
|
├── services/
|
||||||
|
│ ├── collector/
|
||||||
|
│ │ ├── Dockerfile
|
||||||
|
│ │ ├── requirements.txt
|
||||||
|
│ │ ├── main.py
|
||||||
|
│ │ └── metrics.py
|
||||||
|
│ ├── aggregator/
|
||||||
|
│ │ ├── Dockerfile
|
||||||
|
│ │ ├── requirements.txt
|
||||||
|
│ │ ├── main.py
|
||||||
|
│ │ └── storage.py
|
||||||
|
│ ├── gateway/
|
||||||
|
│ │ ├── Dockerfile
|
||||||
|
│ │ ├── requirements.txt
|
||||||
|
│ │ ├── main.py
|
||||||
|
│ │ └── websocket.py
|
||||||
|
│ └── alerts/
|
||||||
|
│ ├── Dockerfile
|
||||||
|
│ ├── requirements.txt
|
||||||
|
│ ├── main.py
|
||||||
|
│ └── rules.py
|
||||||
|
├── web/
|
||||||
|
│ ├── static/
|
||||||
|
│ │ ├── css/
|
||||||
|
│ │ └── js/
|
||||||
|
│ └── templates/
|
||||||
|
│ └── dashboard.html
|
||||||
|
└── README.md
|
||||||
|
```
|
||||||
|
|
||||||
|
## Interview Talking Points
|
||||||
|
|
||||||
|
### Domain Mapping to Payments
|
||||||
|
|
||||||
|
**What you say:**
|
||||||
|
- "I built this to monitor my dev machines, but the architecture directly maps to payment processing"
|
||||||
|
- "Each machine streaming metrics is like a payment processor streaming transactions"
|
||||||
|
- "The aggregator normalizes data from different sources - same as aggregating from Stripe, PayPal, bank APIs"
|
||||||
|
- "Alert thresholds on resource usage are structurally identical to fraud detection thresholds"
|
||||||
|
- "The event stream for audit trails maps directly to payment audit logs"
|
||||||
|
|
||||||
|
### Technical Decisions to Highlight
|
||||||
|
|
||||||
|
**gRPC vs REST:**
|
||||||
|
- "I use gRPC between services for efficiency and strong typing"
|
||||||
|
- "FastAPI gateway exposes REST/WebSocket for browser clients"
|
||||||
|
- "This pattern is common - internal gRPC, external REST"
|
||||||
|
|
||||||
|
**Streaming vs Polling:**
|
||||||
|
- "Server-side streaming reduces network overhead"
|
||||||
|
- "Bidirectional streaming allows dynamic configuration updates"
|
||||||
|
- "WebSocket to browser maintains single connection"
|
||||||
|
|
||||||
|
**State Management:**
|
||||||
|
- "Redis for hot data - current state, needs fast access"
|
||||||
|
- "TimescaleDB for historical analysis - optimized for time-series"
|
||||||
|
- "This tiered storage approach scales to payment transaction volumes"
|
||||||
|
|
||||||
|
**Resilience:**
|
||||||
|
- "Each collector is independent - one failing doesn't affect others"
|
||||||
|
- "Circuit breaker prevents cascade failures"
|
||||||
|
- "Event stream decouples alert processing from metric ingestion"
|
||||||
|
|
||||||
|
### What NOT to Say
|
||||||
|
|
||||||
|
- Don't call it a "toy project" or "learning exercise"
|
||||||
|
- Don't apologize for running locally vs AWS
|
||||||
|
- Don't over-explain obvious things
|
||||||
|
- Don't claim it's production-ready when it's not
|
||||||
|
|
||||||
|
### What TO Say
|
||||||
|
|
||||||
|
- "I built this to solve a real problem I have"
|
||||||
|
- "Locally it uses PostgreSQL/Redis, in production these become Aurora/ElastiCache"
|
||||||
|
- "I focused on the architectural patterns since those transfer directly"
|
||||||
|
- "I'd keep developing this - it's genuinely useful"
|
||||||
|
|
||||||
|
## Development Guidelines
|
||||||
|
|
||||||
|
### Code Quality Standards
|
||||||
|
- Type hints throughout (Python 3.11+ syntax)
|
||||||
|
- Async/await patterns consistently
|
||||||
|
- Structured logging (JSON format)
|
||||||
|
- Error handling at all boundaries
|
||||||
|
- Unit tests for business logic
|
||||||
|
- Integration tests for service interactions
|
||||||
|
|
||||||
|
### Docker Best Practices
|
||||||
|
- Multi-stage builds
|
||||||
|
- Non-root users
|
||||||
|
- Health checks
|
||||||
|
- Resource limits
|
||||||
|
- Volume mounts for development
|
||||||
|
|
||||||
|
### Configuration Management
|
||||||
|
- Environment variables for all config
|
||||||
|
- Sensible defaults
|
||||||
|
- Config validation on startup
|
||||||
|
- No secrets in code
|
||||||
|
|
||||||
|
## AWS Mapping (For Interview Discussion)
|
||||||
|
|
||||||
|
**What you have → What it becomes:**
|
||||||
|
- PostgreSQL → Aurora PostgreSQL
|
||||||
|
- Redis → ElastiCache
|
||||||
|
- Docker Containers → ECS/Fargate or Lambda
|
||||||
|
- RabbitMQ/Redis Pub/Sub → SQS/SNS
|
||||||
|
- Docker Compose → CloudFormation/Terraform
|
||||||
|
- Local networking → VPC, Security Groups
|
||||||
|
|
||||||
|
**Key point:** "The architecture and patterns are production-ready, the infrastructure is local for development convenience"
|
||||||
|
|
||||||
|
## Common Pitfalls to Avoid
|
||||||
|
|
||||||
|
1. **Over-engineering Phase 1** - Resist adding features, just get streaming working
|
||||||
|
2. **Ugly UI** - Don't waste time on design, htmx + basic CSS is fine
|
||||||
|
3. **Perfect metrics** - Mock data is OK early on, real psutil data comes later
|
||||||
|
4. **Complete coverage** - Better to have 3 services working perfectly than 10 half-done
|
||||||
|
5. **AWS deployment** - Local is fine, AWS costs money and adds complexity
|
||||||
|
|
||||||
|
## Success Metrics
|
||||||
|
|
||||||
|
**For Yourself:**
|
||||||
|
- [ ] Actually use the dashboard daily
|
||||||
|
- [ ] Catches a real issue before you notice
|
||||||
|
- [ ] Runs stable for 1+ week without intervention
|
||||||
|
|
||||||
|
**For Interview:**
|
||||||
|
- [ ] Can demo end-to-end in 5 minutes
|
||||||
|
- [ ] Can explain every service interaction
|
||||||
|
- [ ] Can map to payment domain fluently
|
||||||
|
- [ ] Shows understanding of production patterns
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
1. Set up project structure
|
||||||
|
2. Define proto messages
|
||||||
|
3. Build Phase 1 MVP
|
||||||
|
4. Iterate based on what feels useful
|
||||||
|
5. Polish for demo when interview approaches
|
||||||
|
|
||||||
|
## Resources
|
||||||
|
|
||||||
|
- gRPC Python docs: https://grpc.io/docs/languages/python/
|
||||||
|
- FastAPI WebSockets: https://fastapi.tiangolo.com/advanced/websockets/
|
||||||
|
- TimescaleDB: https://docs.timescale.com/
|
||||||
|
- htmx: https://htmx.org/
|
||||||
|
|
||||||
|
## Questions to Ask Yourself During Development
|
||||||
|
|
||||||
|
- "Would I actually use this feature?"
|
||||||
|
- "How does this map to payments?"
|
||||||
|
- "Can I explain why I built it this way?"
|
||||||
|
- "What would break if X service failed?"
|
||||||
|
- "How would this scale to 1000 machines?"
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Final Note
|
||||||
|
|
||||||
|
This project works because it's:
|
||||||
|
1. **Real** - You'll use it
|
||||||
|
2. **Focused** - Shows specific patterns they care about
|
||||||
|
3. **Mappable** - Clear connection to their domain
|
||||||
|
4. **Yours** - Not a tutorial copy, demonstrates your thinking
|
||||||
|
|
||||||
|
Build it in phases, use it daily, and by interview time you'll have natural stories about trade-offs, failures, and learnings. That authenticity is more valuable than perfect code.
|
||||||
|
|
||||||
|
Good luck! 🚀
|
||||||
119
Tiltfile
Normal file
119
Tiltfile
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
# -*- mode: Python -*-
|
||||||
|
# Tiltfile for sysmonstm - local Kubernetes development
|
||||||
|
|
||||||
|
# Load extensions
|
||||||
|
load('ext://restart_process', 'docker_build_with_restart')
|
||||||
|
load('ext://namespace', 'namespace_create')
|
||||||
|
|
||||||
|
# Configuration
|
||||||
|
config.define_bool("no-volumes")
|
||||||
|
cfg = config.parse()
|
||||||
|
no_volumes = cfg.get("no-volumes", False)
|
||||||
|
|
||||||
|
# Create namespace
|
||||||
|
namespace_create('sysmonstm')
|
||||||
|
k8s_yaml(kustomize('k8s/overlays/local'))
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Docker builds with live reload
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# Aggregator service
|
||||||
|
docker_build(
|
||||||
|
'sysmonstm-aggregator',
|
||||||
|
context='.',
|
||||||
|
dockerfile='services/aggregator/Dockerfile',
|
||||||
|
target='development',
|
||||||
|
live_update=[
|
||||||
|
sync('./services/aggregator', '/app/services/aggregator'),
|
||||||
|
sync('./shared', '/app/shared'),
|
||||||
|
sync('./proto', '/app/proto'),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Gateway service
|
||||||
|
docker_build(
|
||||||
|
'sysmonstm-gateway',
|
||||||
|
context='.',
|
||||||
|
dockerfile='services/gateway/Dockerfile',
|
||||||
|
target='development',
|
||||||
|
live_update=[
|
||||||
|
sync('./services/gateway', '/app/services/gateway'),
|
||||||
|
sync('./shared', '/app/shared'),
|
||||||
|
sync('./proto', '/app/proto'),
|
||||||
|
sync('./web', '/app/web'),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Alerts service
|
||||||
|
docker_build(
|
||||||
|
'sysmonstm-alerts',
|
||||||
|
context='.',
|
||||||
|
dockerfile='services/alerts/Dockerfile',
|
||||||
|
target='development',
|
||||||
|
live_update=[
|
||||||
|
sync('./services/alerts', '/app/services/alerts'),
|
||||||
|
sync('./shared', '/app/shared'),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Resource configuration
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
# Infrastructure
|
||||||
|
k8s_resource('redis', labels=['infra'])
|
||||||
|
k8s_resource('timescaledb', labels=['infra'])
|
||||||
|
|
||||||
|
# Application services
|
||||||
|
k8s_resource(
|
||||||
|
'aggregator',
|
||||||
|
labels=['app'],
|
||||||
|
resource_deps=['redis', 'timescaledb'],
|
||||||
|
port_forwards=['50051:50051'],
|
||||||
|
)
|
||||||
|
|
||||||
|
k8s_resource(
|
||||||
|
'gateway',
|
||||||
|
labels=['app'],
|
||||||
|
resource_deps=['aggregator', 'redis'],
|
||||||
|
port_forwards=['8000:8000'],
|
||||||
|
)
|
||||||
|
|
||||||
|
k8s_resource(
|
||||||
|
'alerts',
|
||||||
|
labels=['app'],
|
||||||
|
resource_deps=['redis', 'timescaledb'],
|
||||||
|
)
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Local resources (optional - for running collector locally)
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
local_resource(
|
||||||
|
'collector-local',
|
||||||
|
serve_cmd='cd services/collector && python main.py',
|
||||||
|
deps=['services/collector', 'shared'],
|
||||||
|
resource_deps=['aggregator'],
|
||||||
|
labels=['collector'],
|
||||||
|
auto_init=False, # Don't start automatically
|
||||||
|
env={
|
||||||
|
'AGGREGATOR_URL': 'localhost:50051',
|
||||||
|
'MACHINE_ID': 'tilt-dev',
|
||||||
|
'COLLECTION_INTERVAL': '5',
|
||||||
|
'LOG_LEVEL': 'DEBUG',
|
||||||
|
'PYTHONPATH': '.',
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
# ============================================================================
|
||||||
|
# Convenience buttons
|
||||||
|
# ============================================================================
|
||||||
|
|
||||||
|
local_resource(
|
||||||
|
'proto-gen',
|
||||||
|
cmd='python -m grpc_tools.protoc -I./proto --python_out=./shared --grpc_python_out=./shared ./proto/metrics.proto',
|
||||||
|
deps=['proto/metrics.proto'],
|
||||||
|
labels=['tools'],
|
||||||
|
auto_init=False,
|
||||||
|
)
|
||||||
32
ctlptl.yaml
Normal file
32
ctlptl.yaml
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
# ctlptl configuration for Kind cluster
|
||||||
|
# Usage: ctlptl apply -f ctlptl.yaml
|
||||||
|
|
||||||
|
apiVersion: ctlptl.dev/v1alpha1
|
||||||
|
kind: Registry
|
||||||
|
name: sysmonstm-registry
|
||||||
|
port: 5005
|
||||||
|
---
|
||||||
|
apiVersion: ctlptl.dev/v1alpha1
|
||||||
|
kind: Cluster
|
||||||
|
product: kind
|
||||||
|
registry: sysmonstm-registry
|
||||||
|
kindV1Alpha4Cluster:
|
||||||
|
name: sysmonstm
|
||||||
|
nodes:
|
||||||
|
- role: control-plane
|
||||||
|
extraPortMappings:
|
||||||
|
# Gateway HTTP
|
||||||
|
- containerPort: 30080
|
||||||
|
hostPort: 8080
|
||||||
|
protocol: TCP
|
||||||
|
# Aggregator gRPC
|
||||||
|
- containerPort: 30051
|
||||||
|
hostPort: 50051
|
||||||
|
protocol: TCP
|
||||||
|
# Resource limits for t2.small compatibility
|
||||||
|
kubeadmConfigPatches:
|
||||||
|
- |
|
||||||
|
kind: InitConfiguration
|
||||||
|
nodeRegistration:
|
||||||
|
kubeletExtraArgs:
|
||||||
|
system-reserved: memory=256Mi
|
||||||
48
docker-compose.override.yml
Normal file
48
docker-compose.override.yml
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# Development overrides - hot reload, mounted volumes, debug settings
|
||||||
|
# Usage: docker compose up (automatically includes this file)
|
||||||
|
|
||||||
|
version: "3.8"
|
||||||
|
|
||||||
|
services:
|
||||||
|
aggregator:
|
||||||
|
build:
|
||||||
|
target: development
|
||||||
|
volumes:
|
||||||
|
- ./services/aggregator:/app/services/aggregator:ro
|
||||||
|
- ./shared:/app/shared:ro
|
||||||
|
- ./proto:/app/proto:ro
|
||||||
|
environment:
|
||||||
|
LOG_LEVEL: DEBUG
|
||||||
|
RELOAD: "true"
|
||||||
|
|
||||||
|
gateway:
|
||||||
|
build:
|
||||||
|
target: development
|
||||||
|
volumes:
|
||||||
|
- ./services/gateway:/app/services/gateway:ro
|
||||||
|
- ./shared:/app/shared:ro
|
||||||
|
- ./proto:/app/proto:ro
|
||||||
|
- ./web:/app/web:ro
|
||||||
|
environment:
|
||||||
|
LOG_LEVEL: DEBUG
|
||||||
|
RELOAD: "true"
|
||||||
|
|
||||||
|
alerts:
|
||||||
|
build:
|
||||||
|
target: development
|
||||||
|
volumes:
|
||||||
|
- ./services/alerts:/app/services/alerts:ro
|
||||||
|
- ./shared:/app/shared:ro
|
||||||
|
environment:
|
||||||
|
LOG_LEVEL: DEBUG
|
||||||
|
|
||||||
|
collector:
|
||||||
|
build:
|
||||||
|
target: development
|
||||||
|
volumes:
|
||||||
|
- ./services/collector:/app/services/collector:ro
|
||||||
|
- ./shared:/app/shared:ro
|
||||||
|
- ./proto:/app/proto:ro
|
||||||
|
environment:
|
||||||
|
LOG_LEVEL: DEBUG
|
||||||
|
COLLECTION_INTERVAL: 2
|
||||||
154
docker-compose.yml
Normal file
154
docker-compose.yml
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
version: "3.8"
|
||||||
|
|
||||||
|
# This file works both locally and on EC2 for demo purposes.
|
||||||
|
# For local dev with hot-reload, use: docker compose -f docker-compose.yml -f docker-compose.override.yml up
|
||||||
|
|
||||||
|
x-common-env: &common-env
|
||||||
|
REDIS_URL: redis://redis:6379
|
||||||
|
TIMESCALE_URL: postgresql://monitor:monitor@timescaledb:5432/monitor
|
||||||
|
EVENTS_BACKEND: redis_pubsub
|
||||||
|
LOG_LEVEL: ${LOG_LEVEL:-INFO}
|
||||||
|
LOG_FORMAT: json
|
||||||
|
|
||||||
|
x-healthcheck-defaults: &healthcheck-defaults
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 10s
|
||||||
|
|
||||||
|
services:
|
||||||
|
# =============================================================================
|
||||||
|
# Infrastructure
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
redis:
|
||||||
|
image: redis:7-alpine
|
||||||
|
ports:
|
||||||
|
- "${REDIS_PORT:-6379}:6379"
|
||||||
|
volumes:
|
||||||
|
- redis-data:/data
|
||||||
|
healthcheck:
|
||||||
|
<<: *healthcheck-defaults
|
||||||
|
test: ["CMD", "redis-cli", "ping"]
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 128M
|
||||||
|
|
||||||
|
timescaledb:
|
||||||
|
image: timescale/timescaledb:latest-pg15
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: monitor
|
||||||
|
POSTGRES_PASSWORD: monitor
|
||||||
|
POSTGRES_DB: monitor
|
||||||
|
ports:
|
||||||
|
- "${TIMESCALE_PORT:-5432}:5432"
|
||||||
|
volumes:
|
||||||
|
- timescale-data:/var/lib/postgresql/data
|
||||||
|
- ./scripts/init-db.sql:/docker-entrypoint-initdb.d/init.sql:ro
|
||||||
|
healthcheck:
|
||||||
|
<<: *healthcheck-defaults
|
||||||
|
test: ["CMD-SHELL", "pg_isready -U monitor -d monitor"]
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 512M
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Application Services
|
||||||
|
# =============================================================================
|
||||||
|
|
||||||
|
aggregator:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: services/aggregator/Dockerfile
|
||||||
|
environment:
|
||||||
|
<<: *common-env
|
||||||
|
GRPC_PORT: 50051
|
||||||
|
SERVICE_NAME: aggregator
|
||||||
|
ports:
|
||||||
|
- "${AGGREGATOR_GRPC_PORT:-50051}:50051"
|
||||||
|
depends_on:
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
timescaledb:
|
||||||
|
condition: service_healthy
|
||||||
|
healthcheck:
|
||||||
|
<<: *healthcheck-defaults
|
||||||
|
test: ["CMD", "/bin/grpc_health_probe", "-addr=:50051"]
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 256M
|
||||||
|
|
||||||
|
gateway:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: services/gateway/Dockerfile
|
||||||
|
environment:
|
||||||
|
<<: *common-env
|
||||||
|
HTTP_PORT: 8000
|
||||||
|
AGGREGATOR_URL: aggregator:50051
|
||||||
|
SERVICE_NAME: gateway
|
||||||
|
ports:
|
||||||
|
- "${GATEWAY_PORT:-8000}:8000"
|
||||||
|
depends_on:
|
||||||
|
- aggregator
|
||||||
|
- redis
|
||||||
|
healthcheck:
|
||||||
|
<<: *healthcheck-defaults
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:8000/health"]
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 256M
|
||||||
|
|
||||||
|
alerts:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: services/alerts/Dockerfile
|
||||||
|
environment:
|
||||||
|
<<: *common-env
|
||||||
|
SERVICE_NAME: alerts
|
||||||
|
depends_on:
|
||||||
|
redis:
|
||||||
|
condition: service_healthy
|
||||||
|
timescaledb:
|
||||||
|
condition: service_healthy
|
||||||
|
healthcheck:
|
||||||
|
<<: *healthcheck-defaults
|
||||||
|
test: ["CMD", "python", "-c", "import sys; sys.exit(0)"]
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 128M
|
||||||
|
|
||||||
|
# Collector runs separately on each machine being monitored
|
||||||
|
# For local testing, we run one instance
|
||||||
|
collector:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: services/collector/Dockerfile
|
||||||
|
environment:
|
||||||
|
<<: *common-env
|
||||||
|
AGGREGATOR_URL: aggregator:50051
|
||||||
|
MACHINE_ID: ${MACHINE_ID:-local-dev}
|
||||||
|
COLLECTION_INTERVAL: ${COLLECTION_INTERVAL:-5}
|
||||||
|
SERVICE_NAME: collector
|
||||||
|
depends_on:
|
||||||
|
- aggregator
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 64M
|
||||||
|
# For actual system metrics, you might need:
|
||||||
|
# privileged: true
|
||||||
|
# pid: host
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
redis-data:
|
||||||
|
timescale-data:
|
||||||
|
|
||||||
|
networks:
|
||||||
|
default:
|
||||||
|
name: sysmonstm
|
||||||
78
docs/architecture/01-system-overview.dot
Normal file
78
docs/architecture/01-system-overview.dot
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
digraph SystemOverview {
|
||||||
|
// Graph settings
|
||||||
|
rankdir=TB;
|
||||||
|
compound=true;
|
||||||
|
fontname="Helvetica";
|
||||||
|
node [fontname="Helvetica", fontsize=11];
|
||||||
|
edge [fontname="Helvetica", fontsize=10];
|
||||||
|
|
||||||
|
// Title
|
||||||
|
labelloc="t";
|
||||||
|
label="System Monitoring Platform - Architecture Overview";
|
||||||
|
fontsize=16;
|
||||||
|
|
||||||
|
// Styling
|
||||||
|
node [shape=box, style="rounded,filled"];
|
||||||
|
|
||||||
|
// External
|
||||||
|
subgraph cluster_external {
|
||||||
|
label="External";
|
||||||
|
style=dashed;
|
||||||
|
color=gray;
|
||||||
|
|
||||||
|
browser [label="Browser\n(Dashboard)", fillcolor="#E3F2FD"];
|
||||||
|
machines [label="Monitored\nMachines", fillcolor="#FFF3E0", shape=box3d];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Core Services
|
||||||
|
subgraph cluster_services {
|
||||||
|
label="Application Services";
|
||||||
|
style=filled;
|
||||||
|
color="#E8F5E9";
|
||||||
|
fillcolor="#E8F5E9";
|
||||||
|
|
||||||
|
gateway [label="Gateway\n(FastAPI)", fillcolor="#C8E6C9"];
|
||||||
|
aggregator [label="Aggregator\n(gRPC Server)", fillcolor="#C8E6C9"];
|
||||||
|
alerts [label="Alerts\nService", fillcolor="#C8E6C9"];
|
||||||
|
collector [label="Collector\n(gRPC Client)", fillcolor="#DCEDC8"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Data Layer
|
||||||
|
subgraph cluster_data {
|
||||||
|
label="Data Layer";
|
||||||
|
style=filled;
|
||||||
|
color="#FFF8E1";
|
||||||
|
fillcolor="#FFF8E1";
|
||||||
|
|
||||||
|
redis [label="Redis\n(Pub/Sub + State)", fillcolor="#FFECB3", shape=cylinder];
|
||||||
|
timescale [label="TimescaleDB\n(Time-series)", fillcolor="#FFECB3", shape=cylinder];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Event Stream
|
||||||
|
subgraph cluster_events {
|
||||||
|
label="Event Stream";
|
||||||
|
style=filled;
|
||||||
|
color="#F3E5F5";
|
||||||
|
fillcolor="#F3E5F5";
|
||||||
|
|
||||||
|
events [label="Redis Pub/Sub\n(Events)", fillcolor="#E1BEE7", shape=hexagon];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Connections
|
||||||
|
browser -> gateway [label="WebSocket\nREST", color="#1976D2"];
|
||||||
|
gateway -> aggregator [label="gRPC", color="#388E3C"];
|
||||||
|
gateway -> redis [label="State\nQuery", style=dashed];
|
||||||
|
gateway -> timescale [label="Historical\nQuery", style=dashed];
|
||||||
|
|
||||||
|
machines -> collector [label="psutil", color="#F57C00", style=dotted];
|
||||||
|
collector -> aggregator [label="gRPC\nStream", color="#388E3C"];
|
||||||
|
|
||||||
|
aggregator -> redis [label="Current\nState", color="#FFA000"];
|
||||||
|
aggregator -> timescale [label="Store\nMetrics", color="#FFA000"];
|
||||||
|
aggregator -> events [label="Publish", color="#7B1FA2"];
|
||||||
|
|
||||||
|
events -> alerts [label="Subscribe", color="#7B1FA2"];
|
||||||
|
events -> gateway [label="Subscribe", color="#7B1FA2"];
|
||||||
|
|
||||||
|
alerts -> timescale [label="Store\nAlerts", style=dashed];
|
||||||
|
}
|
||||||
193
docs/architecture/01-system-overview.svg
Normal file
193
docs/architecture/01-system-overview.svg
Normal file
@@ -0,0 +1,193 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||||
|
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||||
|
<!-- Generated by graphviz version 14.1.1 (0)
|
||||||
|
-->
|
||||||
|
<!-- Title: SystemOverview Pages: 1 -->
|
||||||
|
<svg width="444pt" height="508pt"
|
||||||
|
viewBox="0.00 0.00 444.00 508.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||||
|
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 503.78)">
|
||||||
|
<title>SystemOverview</title>
|
||||||
|
<polygon fill="white" stroke="none" points="-4,4 -4,-503.78 440,-503.78 440,4 -4,4"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="218" y="-480.58" font-family="Helvetica,sans-Serif" font-size="16.00">System Monitoring Platform - Architecture Overview</text>
|
||||||
|
<g id="clust1" class="cluster">
|
||||||
|
<title>cluster_external</title>
|
||||||
|
<polygon fill="none" stroke="gray" stroke-dasharray="5,2" points="45.5,-374.2 45.5,-453.7 235.5,-453.7 235.5,-374.2 45.5,-374.2"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="140.5" y="-434.5" font-family="Helvetica,sans-Serif" font-size="16.00">External</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust2" class="cluster">
|
||||||
|
<title>cluster_services</title>
|
||||||
|
<polygon fill="#e8f5e9" stroke="#e8f5e9" points="101.5,-143.12 101.5,-320.12 363.5,-320.12 363.5,-143.12 101.5,-143.12"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="232.5" y="-300.93" font-family="Helvetica,sans-Serif" font-size="16.00">Application Services</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust3" class="cluster">
|
||||||
|
<title>cluster_data</title>
|
||||||
|
<polygon fill="#fff8e1" stroke="#fff8e1" points="22.5,-8 22.5,-99.62 260.5,-99.62 260.5,-8 22.5,-8"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="141.5" y="-80.42" font-family="Helvetica,sans-Serif" font-size="16.00">Data Layer</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust4" class="cluster">
|
||||||
|
<title>cluster_events</title>
|
||||||
|
<polygon fill="#f3e5f5" stroke="#f3e5f5" points="243.5,-363.62 243.5,-464.28 413.5,-464.28 413.5,-363.62 243.5,-363.62"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="328.5" y="-445.08" font-family="Helvetica,sans-Serif" font-size="16.00">Event Stream</text>
|
||||||
|
</g>
|
||||||
|
<!-- browser -->
|
||||||
|
<g id="node1" class="node">
|
||||||
|
<title>browser</title>
|
||||||
|
<path fill="#e3f2fd" stroke="black" d="M125.62,-418.2C125.62,-418.2 65.38,-418.2 65.38,-418.2 59.38,-418.2 53.38,-412.2 53.38,-406.2 53.38,-406.2 53.38,-394.2 53.38,-394.2 53.38,-388.2 59.38,-382.2 65.38,-382.2 65.38,-382.2 125.62,-382.2 125.62,-382.2 131.62,-382.2 137.62,-388.2 137.62,-394.2 137.62,-394.2 137.62,-406.2 137.62,-406.2 137.62,-412.2 131.62,-418.2 125.62,-418.2"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="95.5" y="-403.25" font-family="Helvetica,sans-Serif" font-size="11.00">Browser</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="95.5" y="-389.75" font-family="Helvetica,sans-Serif" font-size="11.00">(Dashboard)</text>
|
||||||
|
</g>
|
||||||
|
<!-- gateway -->
|
||||||
|
<g id="node3" class="node">
|
||||||
|
<title>gateway</title>
|
||||||
|
<path fill="#c8e6c9" stroke="black" d="M161.88,-284.62C161.88,-284.62 121.12,-284.62 121.12,-284.62 115.12,-284.62 109.12,-278.62 109.12,-272.62 109.12,-272.62 109.12,-260.62 109.12,-260.62 109.12,-254.62 115.12,-248.62 121.12,-248.62 121.12,-248.62 161.88,-248.62 161.88,-248.62 167.88,-248.62 173.88,-254.62 173.88,-260.62 173.88,-260.62 173.88,-272.62 173.88,-272.62 173.88,-278.62 167.88,-284.62 161.88,-284.62"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="141.5" y="-269.68" font-family="Helvetica,sans-Serif" font-size="11.00">Gateway</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="141.5" y="-256.18" font-family="Helvetica,sans-Serif" font-size="11.00">(FastAPI)</text>
|
||||||
|
</g>
|
||||||
|
<!-- browser->gateway -->
|
||||||
|
<g id="edge1" class="edge">
|
||||||
|
<title>browser->gateway</title>
|
||||||
|
<path fill="none" stroke="#1976d2" d="M92.73,-381.75C91.08,-367.05 90.32,-345.66 96.25,-328.12 100.5,-315.57 108.45,-303.5 116.51,-293.49"/>
|
||||||
|
<polygon fill="#1976d2" stroke="#1976d2" points="119.02,-295.94 122.86,-286.06 113.7,-291.39 119.02,-295.94"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="122.88" y="-344.12" font-family="Helvetica,sans-Serif" font-size="10.00">WebSocket</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="122.88" y="-331.38" font-family="Helvetica,sans-Serif" font-size="10.00">REST</text>
|
||||||
|
</g>
|
||||||
|
<!-- machines -->
|
||||||
|
<g id="node2" class="node">
|
||||||
|
<title>machines</title>
|
||||||
|
<polygon fill="#fff3e0" stroke="black" points="227.25,-418.2 159.75,-418.2 155.75,-414.2 155.75,-382.2 223.25,-382.2 227.25,-386.2 227.25,-418.2"/>
|
||||||
|
<polyline fill="none" stroke="black" points="223.25,-414.2 155.75,-414.2"/>
|
||||||
|
<polyline fill="none" stroke="black" points="223.25,-414.2 223.25,-382.2"/>
|
||||||
|
<polyline fill="none" stroke="black" points="223.25,-414.2 227.25,-418.2"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="191.5" y="-403.25" font-family="Helvetica,sans-Serif" font-size="11.00">Monitored</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="191.5" y="-389.75" font-family="Helvetica,sans-Serif" font-size="11.00">Machines</text>
|
||||||
|
</g>
|
||||||
|
<!-- collector -->
|
||||||
|
<g id="node6" class="node">
|
||||||
|
<title>collector</title>
|
||||||
|
<path fill="#dcedc8" stroke="black" d="M343.88,-284.62C343.88,-284.62 279.12,-284.62 279.12,-284.62 273.12,-284.62 267.12,-278.62 267.12,-272.62 267.12,-272.62 267.12,-260.62 267.12,-260.62 267.12,-254.62 273.12,-248.62 279.12,-248.62 279.12,-248.62 343.88,-248.62 343.88,-248.62 349.88,-248.62 355.88,-254.62 355.88,-260.62 355.88,-260.62 355.88,-272.62 355.88,-272.62 355.88,-278.62 349.88,-284.62 343.88,-284.62"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="311.5" y="-269.68" font-family="Helvetica,sans-Serif" font-size="11.00">Collector</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="311.5" y="-256.18" font-family="Helvetica,sans-Serif" font-size="11.00">(gRPC Client)</text>
|
||||||
|
</g>
|
||||||
|
<!-- machines->collector -->
|
||||||
|
<g id="edge5" class="edge">
|
||||||
|
<title>machines->collector</title>
|
||||||
|
<path fill="none" stroke="#f57c00" stroke-dasharray="1,5" d="M210.81,-381.83C219.12,-375.21 229.26,-368.17 239.5,-363.62 260.21,-354.43 273.06,-369.22 289.5,-353.62 304.98,-338.94 310.15,-314.98 311.64,-296.08"/>
|
||||||
|
<polygon fill="#f57c00" stroke="#f57c00" points="315.12,-296.47 312.08,-286.32 308.13,-296.15 315.12,-296.47"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="318.1" y="-337.75" font-family="Helvetica,sans-Serif" font-size="10.00">psutil</text>
|
||||||
|
</g>
|
||||||
|
<!-- aggregator -->
|
||||||
|
<g id="node4" class="node">
|
||||||
|
<title>aggregator</title>
|
||||||
|
<path fill="#c8e6c9" stroke="black" d="M343.12,-187.12C343.12,-187.12 273.88,-187.12 273.88,-187.12 267.88,-187.12 261.88,-181.12 261.88,-175.12 261.88,-175.12 261.88,-163.12 261.88,-163.12 261.88,-157.12 267.88,-151.12 273.88,-151.12 273.88,-151.12 343.12,-151.12 343.12,-151.12 349.12,-151.12 355.12,-157.12 355.12,-163.12 355.12,-163.12 355.12,-175.12 355.12,-175.12 355.12,-181.12 349.12,-187.12 343.12,-187.12"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="308.5" y="-172.18" font-family="Helvetica,sans-Serif" font-size="11.00">Aggregator</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="308.5" y="-158.68" font-family="Helvetica,sans-Serif" font-size="11.00">(gRPC Server)</text>
|
||||||
|
</g>
|
||||||
|
<!-- gateway->aggregator -->
|
||||||
|
<g id="edge2" class="edge">
|
||||||
|
<title>gateway->aggregator</title>
|
||||||
|
<path fill="none" stroke="#388e3c" d="M171.74,-248.33C198.77,-232.88 238.56,-210.12 268.26,-193.13"/>
|
||||||
|
<polygon fill="#388e3c" stroke="#388e3c" points="269.66,-196.37 276.6,-188.36 266.19,-190.29 269.66,-196.37"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="257.62" y="-214.75" font-family="Helvetica,sans-Serif" font-size="10.00">gRPC</text>
|
||||||
|
</g>
|
||||||
|
<!-- redis -->
|
||||||
|
<g id="node7" class="node">
|
||||||
|
<title>redis</title>
|
||||||
|
<path fill="#ffecb3" stroke="black" d="M146,-59.75C146,-62.16 120.23,-64.12 88.5,-64.12 56.77,-64.12 31,-62.16 31,-59.75 31,-59.75 31,-20.38 31,-20.38 31,-17.96 56.77,-16 88.5,-16 120.23,-16 146,-17.96 146,-20.38 146,-20.38 146,-59.75 146,-59.75"/>
|
||||||
|
<path fill="none" stroke="black" d="M146,-59.75C146,-57.34 120.23,-55.38 88.5,-55.38 56.77,-55.38 31,-57.34 31,-59.75"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="88.5" y="-43.11" font-family="Helvetica,sans-Serif" font-size="11.00">Redis</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="88.5" y="-29.61" font-family="Helvetica,sans-Serif" font-size="11.00">(Pub/Sub + State)</text>
|
||||||
|
</g>
|
||||||
|
<!-- gateway->redis -->
|
||||||
|
<g id="edge3" class="edge">
|
||||||
|
<title>gateway->redis</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M122.74,-248.35C108.28,-233.68 89.42,-211.2 81.25,-187.12 68.86,-150.62 73.72,-106.03 79.72,-75.79"/>
|
||||||
|
<polygon fill="black" stroke="black" points="83.14,-76.56 81.82,-66.04 76.29,-75.08 83.14,-76.56"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="95.88" y="-172.38" font-family="Helvetica,sans-Serif" font-size="10.00">State</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="95.88" y="-159.62" font-family="Helvetica,sans-Serif" font-size="10.00">Query</text>
|
||||||
|
</g>
|
||||||
|
<!-- timescale -->
|
||||||
|
<g id="node8" class="node">
|
||||||
|
<title>timescale</title>
|
||||||
|
<path fill="#ffecb3" stroke="black" d="M252.88,-59.75C252.88,-62.16 232.99,-64.12 208.5,-64.12 184.01,-64.12 164.12,-62.16 164.12,-59.75 164.12,-59.75 164.12,-20.38 164.12,-20.38 164.12,-17.96 184.01,-16 208.5,-16 232.99,-16 252.88,-17.96 252.88,-20.38 252.88,-20.38 252.88,-59.75 252.88,-59.75"/>
|
||||||
|
<path fill="none" stroke="black" d="M252.88,-59.75C252.88,-57.34 232.99,-55.38 208.5,-55.38 184.01,-55.38 164.12,-57.34 164.12,-59.75"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="208.5" y="-43.11" font-family="Helvetica,sans-Serif" font-size="11.00">TimescaleDB</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="208.5" y="-29.61" font-family="Helvetica,sans-Serif" font-size="11.00">(Time-series)</text>
|
||||||
|
</g>
|
||||||
|
<!-- gateway->timescale -->
|
||||||
|
<g id="edge4" class="edge">
|
||||||
|
<title>gateway->timescale</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M143.41,-248.29C146.34,-224.28 152.82,-179.73 164,-143.12 171.19,-119.57 182.25,-94.18 191.54,-74.62"/>
|
||||||
|
<polygon fill="black" stroke="black" points="194.62,-76.29 195.83,-65.76 188.32,-73.24 194.62,-76.29"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="187.25" y="-172.38" font-family="Helvetica,sans-Serif" font-size="10.00">Historical</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="187.25" y="-159.62" font-family="Helvetica,sans-Serif" font-size="10.00">Query</text>
|
||||||
|
</g>
|
||||||
|
<!-- aggregator->redis -->
|
||||||
|
<g id="edge7" class="edge">
|
||||||
|
<title>aggregator->redis</title>
|
||||||
|
<path fill="none" stroke="#ffa000" d="M267.27,-150.69C261,-148.11 254.59,-145.52 248.5,-143.12 236.59,-138.44 233.22,-138.25 221.5,-133.12 191.36,-119.95 182.76,-118.04 155.5,-99.62 143.6,-91.59 131.5,-81.66 120.93,-72.28"/>
|
||||||
|
<polygon fill="#ffa000" stroke="#ffa000" points="123.32,-69.73 113.56,-65.6 118.62,-74.91 123.32,-69.73"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="239.5" y="-123.62" font-family="Helvetica,sans-Serif" font-size="10.00">Current</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="239.5" y="-110.88" font-family="Helvetica,sans-Serif" font-size="10.00">State</text>
|
||||||
|
</g>
|
||||||
|
<!-- aggregator->timescale -->
|
||||||
|
<g id="edge8" class="edge">
|
||||||
|
<title>aggregator->timescale</title>
|
||||||
|
<path fill="none" stroke="#ffa000" d="M294.81,-150.72C279.15,-130.84 253.2,-97.86 233.84,-73.25"/>
|
||||||
|
<polygon fill="#ffa000" stroke="#ffa000" points="236.64,-71.16 227.71,-65.47 231.14,-75.49 236.64,-71.16"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="296.95" y="-123.62" font-family="Helvetica,sans-Serif" font-size="10.00">Store</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="296.95" y="-110.88" font-family="Helvetica,sans-Serif" font-size="10.00">Metrics</text>
|
||||||
|
</g>
|
||||||
|
<!-- events -->
|
||||||
|
<g id="node9" class="node">
|
||||||
|
<title>events</title>
|
||||||
|
<path fill="#e1bee7" stroke="black" d="M395.63,-407.37C395.63,-407.37 376.5,-421.61 376.5,-421.61 371.69,-425.2 360.88,-428.78 354.88,-428.78 354.88,-428.78 302.12,-428.78 302.12,-428.78 296.12,-428.78 285.31,-425.2 280.5,-421.61 280.5,-421.61 261.37,-407.37 261.37,-407.37 256.56,-403.79 256.56,-396.62 261.37,-393.04 261.37,-393.04 280.5,-378.79 280.5,-378.79 285.31,-375.21 296.12,-371.62 302.12,-371.62 302.12,-371.62 354.88,-371.62 354.88,-371.62 360.88,-371.62 371.69,-375.21 376.5,-378.79 376.5,-378.79 395.63,-393.04 395.63,-393.04 400.44,-396.62 400.44,-403.79 395.63,-407.37"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="328.5" y="-403.25" font-family="Helvetica,sans-Serif" font-size="11.00">Redis Pub/Sub</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="328.5" y="-389.75" font-family="Helvetica,sans-Serif" font-size="11.00">(Events)</text>
|
||||||
|
</g>
|
||||||
|
<!-- aggregator->events -->
|
||||||
|
<g id="edge9" class="edge">
|
||||||
|
<title>aggregator->events</title>
|
||||||
|
<path fill="none" stroke="#7b1fa2" d="M333.16,-187.49C339.14,-192.63 345.07,-198.63 349.5,-205.12 361.02,-222.03 361.12,-228.46 364.5,-248.62 369.75,-279.97 371.24,-289.07 364.5,-320.12 361.48,-334.06 355.78,-348.49 349.79,-361.14"/>
|
||||||
|
<polygon fill="#7b1fa2" stroke="#7b1fa2" points="346.73,-359.44 345.42,-369.95 353,-362.55 346.73,-359.44"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="386.64" y="-263.5" font-family="Helvetica,sans-Serif" font-size="10.00">Publish</text>
|
||||||
|
</g>
|
||||||
|
<!-- alerts -->
|
||||||
|
<g id="node5" class="node">
|
||||||
|
<title>alerts</title>
|
||||||
|
<path fill="#c8e6c9" stroke="black" d="M236.75,-284.62C236.75,-284.62 204.25,-284.62 204.25,-284.62 198.25,-284.62 192.25,-278.62 192.25,-272.62 192.25,-272.62 192.25,-260.62 192.25,-260.62 192.25,-254.62 198.25,-248.62 204.25,-248.62 204.25,-248.62 236.75,-248.62 236.75,-248.62 242.75,-248.62 248.75,-254.62 248.75,-260.62 248.75,-260.62 248.75,-272.62 248.75,-272.62 248.75,-278.62 242.75,-284.62 236.75,-284.62"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="220.5" y="-269.68" font-family="Helvetica,sans-Serif" font-size="11.00">Alerts</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="220.5" y="-256.18" font-family="Helvetica,sans-Serif" font-size="11.00">Service</text>
|
||||||
|
</g>
|
||||||
|
<!-- alerts->timescale -->
|
||||||
|
<g id="edge12" class="edge">
|
||||||
|
<title>alerts->timescale</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M219.58,-248.38C217.61,-211.47 212.94,-124.24 210.34,-75.51"/>
|
||||||
|
<polygon fill="black" stroke="black" points="213.85,-75.6 209.82,-65.8 206.86,-75.97 213.85,-75.6"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="230.53" y="-172.38" font-family="Helvetica,sans-Serif" font-size="10.00">Store</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="230.53" y="-159.62" font-family="Helvetica,sans-Serif" font-size="10.00">Alerts</text>
|
||||||
|
</g>
|
||||||
|
<!-- collector->aggregator -->
|
||||||
|
<g id="edge6" class="edge">
|
||||||
|
<title>collector->aggregator</title>
|
||||||
|
<path fill="none" stroke="#388e3c" d="M310.96,-248.55C310.53,-234.65 309.9,-214.73 309.39,-198.45"/>
|
||||||
|
<polygon fill="#388e3c" stroke="#388e3c" points="312.9,-198.77 309.09,-188.89 305.91,-198.99 312.9,-198.77"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="327.98" y="-221.12" font-family="Helvetica,sans-Serif" font-size="10.00">gRPC</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="327.98" y="-208.38" font-family="Helvetica,sans-Serif" font-size="10.00">Stream</text>
|
||||||
|
</g>
|
||||||
|
<!-- events->gateway -->
|
||||||
|
<g id="edge11" class="edge">
|
||||||
|
<title>events->gateway</title>
|
||||||
|
<path fill="none" stroke="#7b1fa2" d="M281.13,-378.02C267.86,-372.71 253.29,-367.44 239.5,-363.62 212.49,-356.16 199.25,-370.98 177.25,-353.62 159.49,-339.61 150.46,-315.21 145.93,-295.98"/>
|
||||||
|
<polygon fill="#7b1fa2" stroke="#7b1fa2" points="149.38,-295.39 143.95,-286.29 142.52,-296.79 149.38,-295.39"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="200.88" y="-337.75" font-family="Helvetica,sans-Serif" font-size="10.00">Subscribe</text>
|
||||||
|
</g>
|
||||||
|
<!-- events->alerts -->
|
||||||
|
<g id="edge10" class="edge">
|
||||||
|
<title>events->alerts</title>
|
||||||
|
<path fill="none" stroke="#7b1fa2" d="M277.27,-380.98C264.23,-374.18 251.36,-365.21 242.25,-353.62 229.43,-337.32 224.08,-314.36 221.89,-296.26"/>
|
||||||
|
<polygon fill="#7b1fa2" stroke="#7b1fa2" points="225.38,-296.07 220.98,-286.43 218.41,-296.71 225.38,-296.07"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="265.88" y="-337.75" font-family="Helvetica,sans-Serif" font-size="10.00">Subscribe</text>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 16 KiB |
83
docs/architecture/02-data-flow.dot
Normal file
83
docs/architecture/02-data-flow.dot
Normal file
@@ -0,0 +1,83 @@
|
|||||||
|
digraph DataFlow {
|
||||||
|
rankdir=LR;
|
||||||
|
compound=true;
|
||||||
|
fontname="Helvetica";
|
||||||
|
node [fontname="Helvetica", fontsize=10];
|
||||||
|
edge [fontname="Helvetica", fontsize=9];
|
||||||
|
|
||||||
|
labelloc="t";
|
||||||
|
label="Metrics Data Flow Pipeline";
|
||||||
|
fontsize=14;
|
||||||
|
|
||||||
|
node [shape=box, style="rounded,filled"];
|
||||||
|
|
||||||
|
// Collection
|
||||||
|
subgraph cluster_collect {
|
||||||
|
label="Collection (5s)";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#E3F2FD";
|
||||||
|
|
||||||
|
psutil [label="psutil\n(CPU, Mem, Disk)", shape=component, fillcolor="#BBDEFB"];
|
||||||
|
collector [label="Collector\nService", fillcolor="#90CAF9"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ingestion
|
||||||
|
subgraph cluster_ingest {
|
||||||
|
label="Ingestion";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#E8F5E9";
|
||||||
|
|
||||||
|
aggregator [label="Aggregator\n(gRPC)", fillcolor="#A5D6A7"];
|
||||||
|
validate [label="Validate &\nNormalize", shape=diamond, fillcolor="#C8E6C9"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Storage Hot
|
||||||
|
subgraph cluster_hot {
|
||||||
|
label="Hot Path (Real-time)";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#FFF3E0";
|
||||||
|
|
||||||
|
redis_state [label="Redis\nCurrent State", shape=cylinder, fillcolor="#FFCC80"];
|
||||||
|
redis_pubsub [label="Redis\nPub/Sub", shape=hexagon, fillcolor="#FFB74D"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Storage Warm
|
||||||
|
subgraph cluster_warm {
|
||||||
|
label="Warm Path (Historical)";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#FCE4EC";
|
||||||
|
|
||||||
|
raw [label="metrics_raw\n(5s, 24h)", shape=cylinder, fillcolor="#F8BBD9"];
|
||||||
|
agg_1m [label="metrics_1m\n(1m, 7d)", shape=cylinder, fillcolor="#F48FB1"];
|
||||||
|
agg_1h [label="metrics_1h\n(1h, 90d)", shape=cylinder, fillcolor="#EC407A"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Consumers
|
||||||
|
subgraph cluster_consume {
|
||||||
|
label="Consumers";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#E8EAF6";
|
||||||
|
|
||||||
|
alerts [label="Alert\nService", fillcolor="#C5CAE9"];
|
||||||
|
gateway [label="Gateway\n(WebSocket)", fillcolor="#9FA8DA"];
|
||||||
|
lambda [label="Lambda\nAggregator", fillcolor="#7986CB", style="rounded,filled,dashed"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flow
|
||||||
|
psutil -> collector [label="Metrics"];
|
||||||
|
collector -> aggregator [label="gRPC\nStream"];
|
||||||
|
aggregator -> validate;
|
||||||
|
|
||||||
|
validate -> redis_state [label="Upsert"];
|
||||||
|
validate -> redis_pubsub [label="Publish"];
|
||||||
|
validate -> raw [label="Insert"];
|
||||||
|
|
||||||
|
redis_pubsub -> alerts [label="metrics.*"];
|
||||||
|
redis_pubsub -> gateway [label="metrics.*"];
|
||||||
|
|
||||||
|
raw -> agg_1m [label="Continuous\nAggregate", style=dashed];
|
||||||
|
agg_1m -> agg_1h [label="Hourly\nJob", style=dashed];
|
||||||
|
|
||||||
|
raw -> lambda [label="SQS\nTrigger", style=dotted];
|
||||||
|
lambda -> agg_1m [label="Batch\nWrite", style=dotted];
|
||||||
|
}
|
||||||
217
docs/architecture/02-data-flow.svg
Normal file
217
docs/architecture/02-data-flow.svg
Normal file
@@ -0,0 +1,217 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||||
|
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||||
|
<!-- Generated by graphviz version 14.1.1 (0)
|
||||||
|
-->
|
||||||
|
<!-- Title: DataFlow Pages: 1 -->
|
||||||
|
<svg width="1087pt" height="329pt"
|
||||||
|
viewBox="0.00 0.00 1087.00 329.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||||
|
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 325.25)">
|
||||||
|
<title>DataFlow</title>
|
||||||
|
<polygon fill="white" stroke="none" points="-4,4 -4,-325.25 1082.5,-325.25 1082.5,4 -4,4"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="539.25" y="-303.95" font-family="Helvetica,sans-Serif" font-size="14.00">Metrics Data Flow Pipeline</text>
|
||||||
|
<g id="clust1" class="cluster">
|
||||||
|
<title>cluster_collect</title>
|
||||||
|
<polygon fill="#e3f2fd" stroke="black" points="8,-111 8,-188 254,-188 254,-111 8,-111"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="131" y="-170.7" font-family="Helvetica,sans-Serif" font-size="14.00">Collection (5s)</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust2" class="cluster">
|
||||||
|
<title>cluster_ingest</title>
|
||||||
|
<polygon fill="#e8f5e9" stroke="black" points="307,-95 307,-204 562.5,-204 562.5,-95 307,-95"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="434.75" y="-186.7" font-family="Helvetica,sans-Serif" font-size="14.00">Ingestion</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust3" class="cluster">
|
||||||
|
<title>cluster_hot</title>
|
||||||
|
<polygon fill="#fff3e0" stroke="black" points="614.75,-34 614.75,-193 769.5,-193 769.5,-34 614.75,-34"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="692.12" y="-175.7" font-family="Helvetica,sans-Serif" font-size="14.00">Hot Path (Real-time)</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust4" class="cluster">
|
||||||
|
<title>cluster_warm</title>
|
||||||
|
<polygon fill="#fce4ec" stroke="black" points="645.62,-201 645.62,-288 1070.5,-288 1070.5,-201 645.62,-201"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="858.06" y="-270.7" font-family="Helvetica,sans-Serif" font-size="14.00">Warm Path (Historical)</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust5" class="cluster">
|
||||||
|
<title>cluster_consume</title>
|
||||||
|
<polygon fill="#e8eaf6" stroke="black" points="840.5,-8 840.5,-193 935.25,-193 935.25,-8 840.5,-8"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="887.88" y="-175.7" font-family="Helvetica,sans-Serif" font-size="14.00">Consumers</text>
|
||||||
|
</g>
|
||||||
|
<!-- psutil -->
|
||||||
|
<g id="node1" class="node">
|
||||||
|
<title>psutil</title>
|
||||||
|
<polygon fill="#bbdefb" stroke="black" points="118.25,-155 16,-155 16,-151 12,-151 12,-147 16,-147 16,-127 12,-127 12,-123 16,-123 16,-119 118.25,-119 118.25,-155"/>
|
||||||
|
<polyline fill="none" stroke="black" points="16,-151 20,-151 20,-147 16,-147"/>
|
||||||
|
<polyline fill="none" stroke="black" points="16,-127 20,-127 20,-123 16,-123"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="67.13" y="-140.25" font-family="Helvetica,sans-Serif" font-size="10.00">psutil</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="67.13" y="-127.5" font-family="Helvetica,sans-Serif" font-size="10.00">(CPU, Mem, Disk)</text>
|
||||||
|
</g>
|
||||||
|
<!-- collector -->
|
||||||
|
<g id="node2" class="node">
|
||||||
|
<title>collector</title>
|
||||||
|
<path fill="#90caf9" stroke="black" d="M234,-155C234,-155 198.5,-155 198.5,-155 192.5,-155 186.5,-149 186.5,-143 186.5,-143 186.5,-131 186.5,-131 186.5,-125 192.5,-119 198.5,-119 198.5,-119 234,-119 234,-119 240,-119 246,-125 246,-131 246,-131 246,-143 246,-143 246,-149 240,-155 234,-155"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="216.25" y="-140.25" font-family="Helvetica,sans-Serif" font-size="10.00">Collector</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="216.25" y="-127.5" font-family="Helvetica,sans-Serif" font-size="10.00">Service</text>
|
||||||
|
</g>
|
||||||
|
<!-- psutil->collector -->
|
||||||
|
<g id="edge1" class="edge">
|
||||||
|
<title>psutil->collector</title>
|
||||||
|
<path fill="none" stroke="black" d="M118.35,-137C136.74,-137 157.31,-137 174.75,-137"/>
|
||||||
|
<polygon fill="black" stroke="black" points="174.75,-140.5 184.75,-137 174.75,-133.5 174.75,-140.5"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="152.38" y="-139.7" font-family="Helvetica,sans-Serif" font-size="9.00">Metrics</text>
|
||||||
|
</g>
|
||||||
|
<!-- aggregator -->
|
||||||
|
<g id="node3" class="node">
|
||||||
|
<title>aggregator</title>
|
||||||
|
<path fill="#a5d6a7" stroke="black" d="M373,-155C373,-155 327,-155 327,-155 321,-155 315,-149 315,-143 315,-143 315,-131 315,-131 315,-125 321,-119 327,-119 327,-119 373,-119 373,-119 379,-119 385,-125 385,-131 385,-131 385,-143 385,-143 385,-149 379,-155 373,-155"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="350" y="-140.25" font-family="Helvetica,sans-Serif" font-size="10.00">Aggregator</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="350" y="-127.5" font-family="Helvetica,sans-Serif" font-size="10.00">(gRPC)</text>
|
||||||
|
</g>
|
||||||
|
<!-- collector->aggregator -->
|
||||||
|
<g id="edge2" class="edge">
|
||||||
|
<title>collector->aggregator</title>
|
||||||
|
<path fill="none" stroke="black" d="M246.49,-137C263.19,-137 284.49,-137 303.35,-137"/>
|
||||||
|
<polygon fill="black" stroke="black" points="303.2,-140.5 313.2,-137 303.2,-133.5 303.2,-140.5"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="280.5" y="-150.95" font-family="Helvetica,sans-Serif" font-size="9.00">gRPC</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="280.5" y="-139.7" font-family="Helvetica,sans-Serif" font-size="9.00">Stream</text>
|
||||||
|
</g>
|
||||||
|
<!-- validate -->
|
||||||
|
<g id="node4" class="node">
|
||||||
|
<title>validate</title>
|
||||||
|
<path fill="#c8e6c9" stroke="black" d="M477.54,-165.08C477.54,-165.08 432.71,-142.42 432.71,-142.42 427.35,-139.71 427.35,-134.29 432.71,-131.58 432.71,-131.58 477.54,-108.92 477.54,-108.92 482.9,-106.21 493.6,-106.21 498.96,-108.92 498.96,-108.92 543.79,-131.58 543.79,-131.58 549.15,-134.29 549.15,-139.71 543.79,-142.42 543.79,-142.42 498.96,-165.08 498.96,-165.08 493.6,-167.79 482.9,-167.79 477.54,-165.08"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="488.25" y="-140.25" font-family="Helvetica,sans-Serif" font-size="10.00">Validate &</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="488.25" y="-127.5" font-family="Helvetica,sans-Serif" font-size="10.00">Normalize</text>
|
||||||
|
</g>
|
||||||
|
<!-- aggregator->validate -->
|
||||||
|
<g id="edge3" class="edge">
|
||||||
|
<title>aggregator->validate</title>
|
||||||
|
<path fill="none" stroke="black" d="M385.38,-137C392.95,-137 401.25,-137 409.76,-137"/>
|
||||||
|
<polygon fill="black" stroke="black" points="409.49,-140.5 419.49,-137 409.49,-133.5 409.49,-140.5"/>
|
||||||
|
</g>
|
||||||
|
<!-- redis_state -->
|
||||||
|
<g id="node5" class="node">
|
||||||
|
<title>redis_state</title>
|
||||||
|
<path fill="#ffcc80" stroke="black" d="M731.88,-155.84C731.88,-158.15 713.83,-160.03 691.62,-160.03 669.42,-160.03 651.38,-158.15 651.38,-155.84 651.38,-155.84 651.38,-118.16 651.38,-118.16 651.38,-115.85 669.42,-113.97 691.62,-113.97 713.83,-113.97 731.88,-115.85 731.88,-118.16 731.88,-118.16 731.88,-155.84 731.88,-155.84"/>
|
||||||
|
<path fill="none" stroke="black" d="M731.88,-155.84C731.88,-153.53 713.83,-151.66 691.62,-151.66 669.42,-151.66 651.38,-153.53 651.38,-155.84"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="691.62" y="-140.25" font-family="Helvetica,sans-Serif" font-size="10.00">Redis</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="691.62" y="-127.5" font-family="Helvetica,sans-Serif" font-size="10.00">Current State</text>
|
||||||
|
</g>
|
||||||
|
<!-- validate->redis_state -->
|
||||||
|
<g id="edge4" class="edge">
|
||||||
|
<title>validate->redis_state</title>
|
||||||
|
<path fill="none" stroke="black" d="M555.47,-137C582.9,-137 614.22,-137 639.8,-137"/>
|
||||||
|
<polygon fill="black" stroke="black" points="639.6,-140.5 649.6,-137 639.6,-133.5 639.6,-140.5"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="588.62" y="-139.7" font-family="Helvetica,sans-Serif" font-size="9.00">Upsert</text>
|
||||||
|
</g>
|
||||||
|
<!-- redis_pubsub -->
|
||||||
|
<g id="node6" class="node">
|
||||||
|
<title>redis_pubsub</title>
|
||||||
|
<path fill="#ffb74d" stroke="black" d="M729.05,-78.12C729.05,-78.12 721.56,-87.24 721.56,-87.24 717.82,-91.79 708.18,-96.35 702.28,-96.35 702.28,-96.35 680.97,-96.35 680.97,-96.35 675.07,-96.35 665.43,-91.79 661.69,-87.24 661.69,-87.24 654.2,-78.12 654.2,-78.12 650.46,-73.56 650.46,-64.44 654.2,-59.88 654.2,-59.88 661.69,-50.76 661.69,-50.76 665.43,-46.21 675.07,-41.65 680.97,-41.65 680.97,-41.65 702.28,-41.65 702.28,-41.65 708.18,-41.65 717.82,-46.21 721.56,-50.76 721.56,-50.76 729.05,-59.88 729.05,-59.88 732.79,-64.44 732.79,-73.56 729.05,-78.12"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="691.62" y="-72.25" font-family="Helvetica,sans-Serif" font-size="10.00">Redis</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="691.62" y="-59.5" font-family="Helvetica,sans-Serif" font-size="10.00">Pub/Sub</text>
|
||||||
|
</g>
|
||||||
|
<!-- validate->redis_pubsub -->
|
||||||
|
<g id="edge5" class="edge">
|
||||||
|
<title>validate->redis_pubsub</title>
|
||||||
|
<path fill="none" stroke="black" d="M529.04,-123.57C562.44,-112.28 610.18,-96.17 645.1,-84.37"/>
|
||||||
|
<polygon fill="black" stroke="black" points="646.17,-87.71 654.53,-81.19 643.93,-81.07 646.17,-87.71"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="588.62" y="-109.77" font-family="Helvetica,sans-Serif" font-size="9.00">Publish</text>
|
||||||
|
</g>
|
||||||
|
<!-- raw -->
|
||||||
|
<g id="node7" class="node">
|
||||||
|
<title>raw</title>
|
||||||
|
<path fill="#f8bbd9" stroke="black" d="M729.62,-250.84C729.62,-253.15 712.59,-255.03 691.62,-255.03 670.66,-255.03 653.62,-253.15 653.62,-250.84 653.62,-250.84 653.62,-213.16 653.62,-213.16 653.62,-210.85 670.66,-208.97 691.62,-208.97 712.59,-208.97 729.62,-210.85 729.62,-213.16 729.62,-213.16 729.62,-250.84 729.62,-250.84"/>
|
||||||
|
<path fill="none" stroke="black" d="M729.62,-250.84C729.62,-248.53 712.59,-246.66 691.62,-246.66 670.66,-246.66 653.62,-248.53 653.62,-250.84"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="691.62" y="-235.25" font-family="Helvetica,sans-Serif" font-size="10.00">metrics_raw</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="691.62" y="-222.5" font-family="Helvetica,sans-Serif" font-size="10.00">(5s, 24h)</text>
|
||||||
|
</g>
|
||||||
|
<!-- validate->raw -->
|
||||||
|
<g id="edge6" class="edge">
|
||||||
|
<title>validate->raw</title>
|
||||||
|
<path fill="none" stroke="black" d="M523.01,-153.3C548.24,-165.44 583.6,-182.37 614.75,-197 623.81,-201.26 633.5,-205.76 642.83,-210.07"/>
|
||||||
|
<polygon fill="black" stroke="black" points="641.22,-213.19 651.77,-214.2 644.16,-206.83 641.22,-213.19"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="588.62" y="-194.9" font-family="Helvetica,sans-Serif" font-size="9.00">Insert</text>
|
||||||
|
</g>
|
||||||
|
<!-- alerts -->
|
||||||
|
<g id="node10" class="node">
|
||||||
|
<title>alerts</title>
|
||||||
|
<path fill="#c5cae9" stroke="black" d="M902.38,-106C902.38,-106 872.38,-106 872.38,-106 866.38,-106 860.38,-100 860.38,-94 860.38,-94 860.38,-82 860.38,-82 860.38,-76 866.38,-70 872.38,-70 872.38,-70 902.38,-70 902.38,-70 908.38,-70 914.38,-76 914.38,-82 914.38,-82 914.38,-94 914.38,-94 914.38,-100 908.38,-106 902.38,-106"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="887.38" y="-91.25" font-family="Helvetica,sans-Serif" font-size="10.00">Alert</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="887.38" y="-78.5" font-family="Helvetica,sans-Serif" font-size="10.00">Service</text>
|
||||||
|
</g>
|
||||||
|
<!-- redis_pubsub->alerts -->
|
||||||
|
<g id="edge7" class="edge">
|
||||||
|
<title>redis_pubsub->alerts</title>
|
||||||
|
<path fill="none" stroke="black" d="M733.71,-73.03C767.65,-76.36 815.43,-81.04 848.46,-84.28"/>
|
||||||
|
<polygon fill="black" stroke="black" points="848.11,-87.76 858.4,-85.26 848.79,-80.8 848.11,-87.76"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="805" y="-85.09" font-family="Helvetica,sans-Serif" font-size="9.00">metrics.*</text>
|
||||||
|
</g>
|
||||||
|
<!-- gateway -->
|
||||||
|
<g id="node11" class="node">
|
||||||
|
<title>gateway</title>
|
||||||
|
<path fill="#9fa8da" stroke="black" d="M913.75,-52C913.75,-52 861,-52 861,-52 855,-52 849,-46 849,-40 849,-40 849,-28 849,-28 849,-22 855,-16 861,-16 861,-16 913.75,-16 913.75,-16 919.75,-16 925.75,-22 925.75,-28 925.75,-28 925.75,-40 925.75,-40 925.75,-46 919.75,-52 913.75,-52"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="887.38" y="-37.25" font-family="Helvetica,sans-Serif" font-size="10.00">Gateway</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="887.38" y="-24.5" font-family="Helvetica,sans-Serif" font-size="10.00">(WebSocket)</text>
|
||||||
|
</g>
|
||||||
|
<!-- redis_pubsub->gateway -->
|
||||||
|
<g id="edge8" class="edge">
|
||||||
|
<title>redis_pubsub->gateway</title>
|
||||||
|
<path fill="none" stroke="black" d="M731.37,-62C761.89,-56.49 804.64,-48.77 837.51,-42.83"/>
|
||||||
|
<polygon fill="black" stroke="black" points="837.98,-46.3 847.2,-41.08 836.74,-39.41 837.98,-46.3"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="805" y="-55.25" font-family="Helvetica,sans-Serif" font-size="9.00">metrics.*</text>
|
||||||
|
</g>
|
||||||
|
<!-- agg_1m -->
|
||||||
|
<g id="node8" class="node">
|
||||||
|
<title>agg_1m</title>
|
||||||
|
<path fill="#f48fb1" stroke="black" d="M924.25,-250.84C924.25,-253.15 907.72,-255.03 887.38,-255.03 867.03,-255.03 850.5,-253.15 850.5,-250.84 850.5,-250.84 850.5,-213.16 850.5,-213.16 850.5,-210.85 867.03,-208.97 887.38,-208.97 907.72,-208.97 924.25,-210.85 924.25,-213.16 924.25,-213.16 924.25,-250.84 924.25,-250.84"/>
|
||||||
|
<path fill="none" stroke="black" d="M924.25,-250.84C924.25,-248.53 907.72,-246.66 887.38,-246.66 867.03,-246.66 850.5,-248.53 850.5,-250.84"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="887.38" y="-235.25" font-family="Helvetica,sans-Serif" font-size="10.00">metrics_1m</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="887.38" y="-222.5" font-family="Helvetica,sans-Serif" font-size="10.00">(1m, 7d)</text>
|
||||||
|
</g>
|
||||||
|
<!-- raw->agg_1m -->
|
||||||
|
<g id="edge9" class="edge">
|
||||||
|
<title>raw->agg_1m</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M729.98,-232C760.97,-232 805.22,-232 838.74,-232"/>
|
||||||
|
<polygon fill="black" stroke="black" points="838.6,-235.5 848.6,-232 838.6,-228.5 838.6,-235.5"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="805" y="-245.95" font-family="Helvetica,sans-Serif" font-size="9.00">Continuous</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="805" y="-234.7" font-family="Helvetica,sans-Serif" font-size="9.00">Aggregate</text>
|
||||||
|
</g>
|
||||||
|
<!-- lambda -->
|
||||||
|
<g id="node12" class="node">
|
||||||
|
<title>lambda</title>
|
||||||
|
<path fill="#7986cb" stroke="black" stroke-dasharray="5,2" d="M910.38,-160C910.38,-160 864.38,-160 864.38,-160 858.38,-160 852.38,-154 852.38,-148 852.38,-148 852.38,-136 852.38,-136 852.38,-130 858.38,-124 864.38,-124 864.38,-124 910.38,-124 910.38,-124 916.38,-124 922.38,-130 922.38,-136 922.38,-136 922.38,-148 922.38,-148 922.38,-154 916.38,-160 910.38,-160"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="887.38" y="-145.25" font-family="Helvetica,sans-Serif" font-size="10.00">Lambda</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="887.38" y="-132.5" font-family="Helvetica,sans-Serif" font-size="10.00">Aggregator</text>
|
||||||
|
</g>
|
||||||
|
<!-- raw->lambda -->
|
||||||
|
<g id="edge11" class="edge">
|
||||||
|
<title>raw->lambda</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="1,5" d="M729.81,-215.18C742.43,-209.45 756.59,-202.98 769.5,-197 793.37,-185.95 819.91,-173.48 841.65,-163.21"/>
|
||||||
|
<polygon fill="black" stroke="black" points="843,-166.44 850.54,-159.01 840,-160.12 843,-166.44"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="805" y="-205.05" font-family="Helvetica,sans-Serif" font-size="9.00">SQS</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="805" y="-193.8" font-family="Helvetica,sans-Serif" font-size="9.00">Trigger</text>
|
||||||
|
</g>
|
||||||
|
<!-- agg_1h -->
|
||||||
|
<g id="node9" class="node">
|
||||||
|
<title>agg_1h</title>
|
||||||
|
<path fill="#ec407a" stroke="black" d="M1062.5,-250.84C1062.5,-253.15 1046.81,-255.03 1027.5,-255.03 1008.19,-255.03 992.5,-253.15 992.5,-250.84 992.5,-250.84 992.5,-213.16 992.5,-213.16 992.5,-210.85 1008.19,-208.97 1027.5,-208.97 1046.81,-208.97 1062.5,-210.85 1062.5,-213.16 1062.5,-213.16 1062.5,-250.84 1062.5,-250.84"/>
|
||||||
|
<path fill="none" stroke="black" d="M1062.5,-250.84C1062.5,-248.53 1046.81,-246.66 1027.5,-246.66 1008.19,-246.66 992.5,-248.53 992.5,-250.84"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="1027.5" y="-235.25" font-family="Helvetica,sans-Serif" font-size="10.00">metrics_1h</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="1027.5" y="-222.5" font-family="Helvetica,sans-Serif" font-size="10.00">(1h, 90d)</text>
|
||||||
|
</g>
|
||||||
|
<!-- agg_1m->agg_1h -->
|
||||||
|
<g id="edge10" class="edge">
|
||||||
|
<title>agg_1m->agg_1h</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M924.67,-232C941.93,-232 962.74,-232 981.04,-232"/>
|
||||||
|
<polygon fill="black" stroke="black" points="980.84,-235.5 990.84,-232 980.84,-228.5 980.84,-235.5"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="959.88" y="-245.95" font-family="Helvetica,sans-Serif" font-size="9.00">Hourly</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="959.88" y="-234.7" font-family="Helvetica,sans-Serif" font-size="9.00">Job</text>
|
||||||
|
</g>
|
||||||
|
<!-- lambda->agg_1m -->
|
||||||
|
<g id="edge12" class="edge">
|
||||||
|
<title>lambda->agg_1m</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="1,5" d="M887.38,-160.21C887.38,-170.91 887.38,-184.78 887.38,-197.47"/>
|
||||||
|
<polygon fill="black" stroke="black" points="883.88,-197.16 887.38,-207.16 890.88,-197.16 883.88,-197.16"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="873.12" y="-187.18" font-family="Helvetica,sans-Serif" font-size="9.00">Batch</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="873.12" y="-175.93" font-family="Helvetica,sans-Serif" font-size="9.00">Write</text>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 17 KiB |
95
docs/architecture/03-deployment.dot
Normal file
95
docs/architecture/03-deployment.dot
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
digraph Deployment {
|
||||||
|
rankdir=TB;
|
||||||
|
compound=true;
|
||||||
|
fontname="Helvetica";
|
||||||
|
node [fontname="Helvetica", fontsize=10];
|
||||||
|
edge [fontname="Helvetica", fontsize=9];
|
||||||
|
|
||||||
|
labelloc="t";
|
||||||
|
label="Deployment Architecture";
|
||||||
|
fontsize=14;
|
||||||
|
|
||||||
|
node [shape=box, style="rounded,filled"];
|
||||||
|
|
||||||
|
// Local Development
|
||||||
|
subgraph cluster_local {
|
||||||
|
label="Local Development";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#E3F2FD";
|
||||||
|
|
||||||
|
subgraph cluster_kind {
|
||||||
|
label="Kind Cluster";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#BBDEFB";
|
||||||
|
|
||||||
|
tilt [label="Tilt\n(Live Reload)", shape=component, fillcolor="#90CAF9"];
|
||||||
|
k8s_local [label="K8s Pods\n(via Kustomize)", fillcolor="#64B5F6"];
|
||||||
|
}
|
||||||
|
|
||||||
|
compose [label="Docker Compose\n(Alternative)", fillcolor="#90CAF9", style="rounded,dashed"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// AWS Staging/Demo
|
||||||
|
subgraph cluster_aws {
|
||||||
|
label="AWS (sysmonstm.mcrn.ar)";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#E8F5E9";
|
||||||
|
|
||||||
|
subgraph cluster_ec2 {
|
||||||
|
label="EC2 t2.small";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#C8E6C9";
|
||||||
|
|
||||||
|
compose_ec2 [label="Docker Compose\n(All Services)", fillcolor="#A5D6A7"];
|
||||||
|
nginx [label="Nginx\n(SSL Termination)", fillcolor="#81C784"];
|
||||||
|
}
|
||||||
|
|
||||||
|
subgraph cluster_lambda {
|
||||||
|
label="Lambda (Data Processing)";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#DCEDC8";
|
||||||
|
|
||||||
|
lambda_agg [label="Aggregator\nLambda", fillcolor="#AED581"];
|
||||||
|
lambda_compact [label="Compactor\nLambda", fillcolor="#9CCC65"];
|
||||||
|
}
|
||||||
|
|
||||||
|
sqs [label="SQS\n(Buffer)", shape=hexagon, fillcolor="#FFE082"];
|
||||||
|
s3 [label="S3\n(Backup)", shape=cylinder, fillcolor="#FFE082"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// CI/CD
|
||||||
|
subgraph cluster_cicd {
|
||||||
|
label="CI/CD";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#F3E5F5";
|
||||||
|
|
||||||
|
woodpecker [label="Woodpecker CI", fillcolor="#CE93D8"];
|
||||||
|
registry [label="Container\nRegistry", shape=cylinder, fillcolor="#BA68C8"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Collectors (External)
|
||||||
|
subgraph cluster_collectors {
|
||||||
|
label="Monitored Machines";
|
||||||
|
style=dashed;
|
||||||
|
color=gray;
|
||||||
|
|
||||||
|
coll1 [label="Collector\n(Machine 1)", fillcolor="#FFCCBC"];
|
||||||
|
coll2 [label="Collector\n(Machine 2)", fillcolor="#FFCCBC"];
|
||||||
|
coll3 [label="Collector\n(Machine N)", fillcolor="#FFCCBC"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Connections
|
||||||
|
tilt -> k8s_local [style=invis];
|
||||||
|
woodpecker -> registry [label="Push"];
|
||||||
|
registry -> compose_ec2 [label="Pull"];
|
||||||
|
registry -> k8s_local [label="Pull", style=dashed];
|
||||||
|
|
||||||
|
nginx -> compose_ec2 [label="Proxy"];
|
||||||
|
compose_ec2 -> sqs [label="Events"];
|
||||||
|
sqs -> lambda_agg [label="Trigger"];
|
||||||
|
lambda_compact -> s3 [label="Archive"];
|
||||||
|
|
||||||
|
coll1 -> compose_ec2 [label="gRPC", lhead=cluster_ec2];
|
||||||
|
coll2 -> compose_ec2 [label="gRPC", lhead=cluster_ec2];
|
||||||
|
coll3 -> compose_ec2 [label="gRPC", lhead=cluster_ec2];
|
||||||
|
}
|
||||||
221
docs/architecture/03-deployment.svg
Normal file
221
docs/architecture/03-deployment.svg
Normal file
@@ -0,0 +1,221 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||||
|
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||||
|
<!-- Generated by graphviz version 14.1.1 (0)
|
||||||
|
-->
|
||||||
|
<!-- Title: Deployment Pages: 1 -->
|
||||||
|
<svg width="872pt" height="662pt"
|
||||||
|
viewBox="0.00 0.00 872.00 662.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||||
|
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 658.3)">
|
||||||
|
<title>Deployment</title>
|
||||||
|
<polygon fill="white" stroke="none" points="-4,4 -4,-658.3 868,-658.3 868,4 -4,4"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="432" y="-637" font-family="Helvetica,sans-Serif" font-size="14.00">Deployment Architecture</text>
|
||||||
|
<g id="clust1" class="cluster">
|
||||||
|
<title>cluster_local</title>
|
||||||
|
<polygon fill="#e3f2fd" stroke="black" points="8,-307.77 8,-514.55 238,-514.55 238,-307.77 8,-307.77"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="123" y="-497.25" font-family="Helvetica,sans-Serif" font-size="14.00">Local Development</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust2" class="cluster">
|
||||||
|
<title>cluster_kind</title>
|
||||||
|
<polygon fill="#bbdefb" stroke="black" points="16,-315.77 16,-481.3 124,-481.3 124,-315.77 16,-315.77"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="70" y="-464" font-family="Helvetica,sans-Serif" font-size="14.00">Kind Cluster</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust3" class="cluster">
|
||||||
|
<title>cluster_aws</title>
|
||||||
|
<polygon fill="#e8f5e9" stroke="black" points="642,-8 642,-514.55 856,-514.55 856,-8 642,-8"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="749" y="-497.25" font-family="Helvetica,sans-Serif" font-size="14.00">AWS (sysmonstm.mcrn.ar)</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust4" class="cluster">
|
||||||
|
<title>cluster_ec2</title>
|
||||||
|
<polygon fill="#c8e6c9" stroke="black" points="650,-315.77 650,-481.3 768,-481.3 768,-315.77 650,-315.77"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="709" y="-464" font-family="Helvetica,sans-Serif" font-size="14.00">EC2 t2.small</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust5" class="cluster">
|
||||||
|
<title>cluster_lambda</title>
|
||||||
|
<polygon fill="#dcedc8" stroke="black" points="650,-101.31 650,-178.56 848,-178.56 848,-101.31 650,-101.31"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="749" y="-161.26" font-family="Helvetica,sans-Serif" font-size="14.00">Lambda (Data Processing)</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust6" class="cluster">
|
||||||
|
<title>cluster_cicd</title>
|
||||||
|
<polygon fill="#f3e5f5" stroke="black" points="246,-399.02 246,-621.05 350,-621.05 350,-399.02 246,-399.02"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="298" y="-603.75" font-family="Helvetica,sans-Serif" font-size="14.00">CI/CD</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust7" class="cluster">
|
||||||
|
<title>cluster_collectors</title>
|
||||||
|
<polygon fill="none" stroke="gray" stroke-dasharray="5,2" points="358,-404.05 358,-481.3 634,-481.3 634,-404.05 358,-404.05"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="496" y="-464" font-family="Helvetica,sans-Serif" font-size="14.00">Monitored Machines</text>
|
||||||
|
</g>
|
||||||
|
<!-- tilt -->
|
||||||
|
<g id="node1" class="node">
|
||||||
|
<title>tilt</title>
|
||||||
|
<polygon fill="#90caf9" stroke="black" points="110.25,-448.05 29.75,-448.05 29.75,-444.05 25.75,-444.05 25.75,-440.05 29.75,-440.05 29.75,-420.05 25.75,-420.05 25.75,-416.05 29.75,-416.05 29.75,-412.05 110.25,-412.05 110.25,-448.05"/>
|
||||||
|
<polyline fill="none" stroke="black" points="29.75,-444.05 33.75,-444.05 33.75,-440.05 29.75,-440.05"/>
|
||||||
|
<polyline fill="none" stroke="black" points="29.75,-420.05 33.75,-420.05 33.75,-416.05 29.75,-416.05"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="70" y="-433.3" font-family="Helvetica,sans-Serif" font-size="10.00">Tilt</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="70" y="-420.55" font-family="Helvetica,sans-Serif" font-size="10.00">(Live Reload)</text>
|
||||||
|
</g>
|
||||||
|
<!-- k8s_local -->
|
||||||
|
<g id="node2" class="node">
|
||||||
|
<title>k8s_local</title>
|
||||||
|
<path fill="#64b5f6" stroke="black" d="M104.25,-359.77C104.25,-359.77 35.75,-359.77 35.75,-359.77 29.75,-359.77 23.75,-353.77 23.75,-347.77 23.75,-347.77 23.75,-335.77 23.75,-335.77 23.75,-329.77 29.75,-323.77 35.75,-323.77 35.75,-323.77 104.25,-323.77 104.25,-323.77 110.25,-323.77 116.25,-329.77 116.25,-335.77 116.25,-335.77 116.25,-347.77 116.25,-347.77 116.25,-353.77 110.25,-359.77 104.25,-359.77"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="70" y="-345.02" font-family="Helvetica,sans-Serif" font-size="10.00">K8s Pods</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="70" y="-332.27" font-family="Helvetica,sans-Serif" font-size="10.00">(via Kustomize)</text>
|
||||||
|
</g>
|
||||||
|
<!-- tilt->k8s_local -->
|
||||||
|
<!-- compose -->
|
||||||
|
<g id="node3" class="node">
|
||||||
|
<title>compose</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M218.25,-448.05C218.25,-448.05 143.75,-448.05 143.75,-448.05 137.75,-448.05 131.75,-442.05 131.75,-436.05 131.75,-436.05 131.75,-424.05 131.75,-424.05 131.75,-418.05 137.75,-412.05 143.75,-412.05 143.75,-412.05 218.25,-412.05 218.25,-412.05 224.25,-412.05 230.25,-418.05 230.25,-424.05 230.25,-424.05 230.25,-436.05 230.25,-436.05 230.25,-442.05 224.25,-448.05 218.25,-448.05"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="181" y="-433.3" font-family="Helvetica,sans-Serif" font-size="10.00">Docker Compose</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="181" y="-420.55" font-family="Helvetica,sans-Serif" font-size="10.00">(Alternative)</text>
|
||||||
|
</g>
|
||||||
|
<!-- compose_ec2 -->
|
||||||
|
<g id="node4" class="node">
|
||||||
|
<title>compose_ec2</title>
|
||||||
|
<path fill="#a5d6a7" stroke="black" d="M744.25,-359.77C744.25,-359.77 669.75,-359.77 669.75,-359.77 663.75,-359.77 657.75,-353.77 657.75,-347.77 657.75,-347.77 657.75,-335.77 657.75,-335.77 657.75,-329.77 663.75,-323.77 669.75,-323.77 669.75,-323.77 744.25,-323.77 744.25,-323.77 750.25,-323.77 756.25,-329.77 756.25,-335.77 756.25,-335.77 756.25,-347.77 756.25,-347.77 756.25,-353.77 750.25,-359.77 744.25,-359.77"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="707" y="-345.02" font-family="Helvetica,sans-Serif" font-size="10.00">Docker Compose</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="707" y="-332.27" font-family="Helvetica,sans-Serif" font-size="10.00">(All Services)</text>
|
||||||
|
</g>
|
||||||
|
<!-- sqs -->
|
||||||
|
<g id="node8" class="node">
|
||||||
|
<title>sqs</title>
|
||||||
|
<path fill="#ffe082" stroke="black" d="M742.89,-252.28C742.89,-252.28 735.71,-261.4 735.71,-261.4 732.12,-265.96 722.73,-270.52 716.93,-270.52 716.93,-270.52 697.07,-270.52 697.07,-270.52 691.27,-270.52 681.88,-265.96 678.29,-261.4 678.29,-261.4 671.11,-252.28 671.11,-252.28 667.52,-247.72 667.52,-238.61 671.11,-234.05 671.11,-234.05 678.29,-224.93 678.29,-224.93 681.88,-220.37 691.27,-215.81 697.07,-215.81 697.07,-215.81 716.93,-215.81 716.93,-215.81 722.73,-215.81 732.12,-220.37 735.71,-224.93 735.71,-224.93 742.89,-234.05 742.89,-234.05 746.48,-238.61 746.48,-247.72 742.89,-252.28"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="707" y="-246.42" font-family="Helvetica,sans-Serif" font-size="10.00">SQS</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="707" y="-233.67" font-family="Helvetica,sans-Serif" font-size="10.00">(Buffer)</text>
|
||||||
|
</g>
|
||||||
|
<!-- compose_ec2->sqs -->
|
||||||
|
<g id="edge6" class="edge">
|
||||||
|
<title>compose_ec2->sqs</title>
|
||||||
|
<path fill="none" stroke="black" d="M707,-323.5C707,-311.94 707,-296.26 707,-281.89"/>
|
||||||
|
<polygon fill="black" stroke="black" points="710.5,-282.27 707,-272.27 703.5,-282.27 710.5,-282.27"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="722.38" y="-291.22" font-family="Helvetica,sans-Serif" font-size="9.00">Events</text>
|
||||||
|
</g>
|
||||||
|
<!-- nginx -->
|
||||||
|
<g id="node5" class="node">
|
||||||
|
<title>nginx</title>
|
||||||
|
<path fill="#81c784" stroke="black" d="M747.75,-448.05C747.75,-448.05 670.25,-448.05 670.25,-448.05 664.25,-448.05 658.25,-442.05 658.25,-436.05 658.25,-436.05 658.25,-424.05 658.25,-424.05 658.25,-418.05 664.25,-412.05 670.25,-412.05 670.25,-412.05 747.75,-412.05 747.75,-412.05 753.75,-412.05 759.75,-418.05 759.75,-424.05 759.75,-424.05 759.75,-436.05 759.75,-436.05 759.75,-442.05 753.75,-448.05 747.75,-448.05"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="709" y="-433.3" font-family="Helvetica,sans-Serif" font-size="10.00">Nginx</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="709" y="-420.55" font-family="Helvetica,sans-Serif" font-size="10.00">(SSL Termination)</text>
|
||||||
|
</g>
|
||||||
|
<!-- nginx->compose_ec2 -->
|
||||||
|
<g id="edge5" class="edge">
|
||||||
|
<title>nginx->compose_ec2</title>
|
||||||
|
<path fill="none" stroke="black" d="M708.6,-411.59C708.33,-400.13 707.98,-384.86 707.67,-371.63"/>
|
||||||
|
<polygon fill="black" stroke="black" points="711.17,-371.63 707.44,-361.72 704.17,-371.79 711.17,-371.63"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="720.43" y="-380.47" font-family="Helvetica,sans-Serif" font-size="9.00">Proxy</text>
|
||||||
|
</g>
|
||||||
|
<!-- lambda_agg -->
|
||||||
|
<g id="node6" class="node">
|
||||||
|
<title>lambda_agg</title>
|
||||||
|
<path fill="#aed581" stroke="black" d="M730,-145.31C730,-145.31 684,-145.31 684,-145.31 678,-145.31 672,-139.31 672,-133.31 672,-133.31 672,-121.31 672,-121.31 672,-115.31 678,-109.31 684,-109.31 684,-109.31 730,-109.31 730,-109.31 736,-109.31 742,-115.31 742,-121.31 742,-121.31 742,-133.31 742,-133.31 742,-139.31 736,-145.31 730,-145.31"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="707" y="-130.56" font-family="Helvetica,sans-Serif" font-size="10.00">Aggregator</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="707" y="-117.81" font-family="Helvetica,sans-Serif" font-size="10.00">Lambda</text>
|
||||||
|
</g>
|
||||||
|
<!-- lambda_compact -->
|
||||||
|
<g id="node7" class="node">
|
||||||
|
<title>lambda_compact</title>
|
||||||
|
<path fill="#9ccc65" stroke="black" d="M822.62,-145.31C822.62,-145.31 777.38,-145.31 777.38,-145.31 771.38,-145.31 765.38,-139.31 765.38,-133.31 765.38,-133.31 765.38,-121.31 765.38,-121.31 765.38,-115.31 771.38,-109.31 777.38,-109.31 777.38,-109.31 822.62,-109.31 822.62,-109.31 828.62,-109.31 834.62,-115.31 834.62,-121.31 834.62,-121.31 834.62,-133.31 834.62,-133.31 834.62,-139.31 828.62,-145.31 822.62,-145.31"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="800" y="-130.56" font-family="Helvetica,sans-Serif" font-size="10.00">Compactor</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="800" y="-117.81" font-family="Helvetica,sans-Serif" font-size="10.00">Lambda</text>
|
||||||
|
</g>
|
||||||
|
<!-- s3 -->
|
||||||
|
<g id="node9" class="node">
|
||||||
|
<title>s3</title>
|
||||||
|
<path fill="#ffe082" stroke="black" d="M829.38,-57.88C829.38,-60.19 816.21,-62.06 800,-62.06 783.79,-62.06 770.62,-60.19 770.62,-57.88 770.62,-57.88 770.62,-20.19 770.62,-20.19 770.62,-17.88 783.79,-16 800,-16 816.21,-16 829.38,-17.88 829.38,-20.19 829.38,-20.19 829.38,-57.88 829.38,-57.88"/>
|
||||||
|
<path fill="none" stroke="black" d="M829.38,-57.88C829.38,-55.56 816.21,-53.69 800,-53.69 783.79,-53.69 770.62,-55.56 770.62,-57.88"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="800" y="-42.28" font-family="Helvetica,sans-Serif" font-size="10.00">S3</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="800" y="-29.53" font-family="Helvetica,sans-Serif" font-size="10.00">(Backup)</text>
|
||||||
|
</g>
|
||||||
|
<!-- lambda_compact->s3 -->
|
||||||
|
<g id="edge8" class="edge">
|
||||||
|
<title>lambda_compact->s3</title>
|
||||||
|
<path fill="none" stroke="black" d="M800,-108.85C800,-98.81 800,-85.84 800,-73.88"/>
|
||||||
|
<polygon fill="black" stroke="black" points="803.5,-73.9 800,-63.9 796.5,-73.9 803.5,-73.9"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="816.88" y="-82.76" font-family="Helvetica,sans-Serif" font-size="9.00">Archive</text>
|
||||||
|
</g>
|
||||||
|
<!-- sqs->lambda_agg -->
|
||||||
|
<g id="edge7" class="edge">
|
||||||
|
<title>sqs->lambda_agg</title>
|
||||||
|
<path fill="none" stroke="black" d="M707,-215.47C707,-197.96 707,-175.06 707,-157.13"/>
|
||||||
|
<polygon fill="black" stroke="black" points="710.5,-157.15 707,-147.15 703.5,-157.15 710.5,-157.15"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="722.75" y="-189.26" font-family="Helvetica,sans-Serif" font-size="9.00">Trigger</text>
|
||||||
|
</g>
|
||||||
|
<!-- woodpecker -->
|
||||||
|
<g id="node10" class="node">
|
||||||
|
<title>woodpecker</title>
|
||||||
|
<path fill="#ce93d8" stroke="black" d="M330,-587.8C330,-587.8 266,-587.8 266,-587.8 260,-587.8 254,-581.8 254,-575.8 254,-575.8 254,-563.8 254,-563.8 254,-557.8 260,-551.8 266,-551.8 266,-551.8 330,-551.8 330,-551.8 336,-551.8 342,-557.8 342,-563.8 342,-563.8 342,-575.8 342,-575.8 342,-581.8 336,-587.8 330,-587.8"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="298" y="-566.67" font-family="Helvetica,sans-Serif" font-size="10.00">Woodpecker CI</text>
|
||||||
|
</g>
|
||||||
|
<!-- registry -->
|
||||||
|
<g id="node11" class="node">
|
||||||
|
<title>registry</title>
|
||||||
|
<path fill="#ba68c8" stroke="black" d="M329.62,-448.89C329.62,-451.2 315.45,-453.08 298,-453.08 280.55,-453.08 266.38,-451.2 266.38,-448.89 266.38,-448.89 266.38,-411.21 266.38,-411.21 266.38,-408.89 280.55,-407.02 298,-407.02 315.45,-407.02 329.62,-408.89 329.62,-411.21 329.62,-411.21 329.62,-448.89 329.62,-448.89"/>
|
||||||
|
<path fill="none" stroke="black" d="M329.62,-448.89C329.62,-446.58 315.45,-444.71 298,-444.71 280.55,-444.71 266.38,-446.58 266.38,-448.89"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="298" y="-433.3" font-family="Helvetica,sans-Serif" font-size="10.00">Container</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="298" y="-420.55" font-family="Helvetica,sans-Serif" font-size="10.00">Registry</text>
|
||||||
|
</g>
|
||||||
|
<!-- woodpecker->registry -->
|
||||||
|
<g id="edge2" class="edge">
|
||||||
|
<title>woodpecker->registry</title>
|
||||||
|
<path fill="none" stroke="black" d="M298,-551.35C298,-529.66 298,-492.15 298,-464.77"/>
|
||||||
|
<polygon fill="black" stroke="black" points="301.5,-464.88 298,-454.88 294.5,-464.88 301.5,-464.88"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="308.88" y="-525.25" font-family="Helvetica,sans-Serif" font-size="9.00">Push</text>
|
||||||
|
</g>
|
||||||
|
<!-- registry->k8s_local -->
|
||||||
|
<g id="edge4" class="edge">
|
||||||
|
<title>registry->k8s_local</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M265.9,-410.59C258.2,-406.51 249.91,-402.4 242,-399.02 204.6,-383.02 161.03,-368.81 127.1,-358.68"/>
|
||||||
|
<polygon fill="black" stroke="black" points="128.47,-355.44 117.89,-355.97 126.49,-362.15 128.47,-355.44"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="222.42" y="-380.47" font-family="Helvetica,sans-Serif" font-size="9.00">Pull</text>
|
||||||
|
</g>
|
||||||
|
<!-- registry->compose_ec2 -->
|
||||||
|
<g id="edge3" class="edge">
|
||||||
|
<title>registry->compose_ec2</title>
|
||||||
|
<path fill="none" stroke="black" d="M329.84,-409.93C337.55,-405.88 345.91,-401.95 354,-399.02 452.44,-363.35 574.46,-350.26 646.22,-345.49"/>
|
||||||
|
<polygon fill="black" stroke="black" points="646.02,-349.01 655.78,-344.88 645.58,-342.02 646.02,-349.01"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="427.09" y="-380.47" font-family="Helvetica,sans-Serif" font-size="9.00">Pull</text>
|
||||||
|
</g>
|
||||||
|
<!-- coll1 -->
|
||||||
|
<g id="node12" class="node">
|
||||||
|
<title>coll1</title>
|
||||||
|
<path fill="#ffccbc" stroke="black" d="M521.88,-448.05C521.88,-448.05 472.12,-448.05 472.12,-448.05 466.12,-448.05 460.12,-442.05 460.12,-436.05 460.12,-436.05 460.12,-424.05 460.12,-424.05 460.12,-418.05 466.12,-412.05 472.12,-412.05 472.12,-412.05 521.88,-412.05 521.88,-412.05 527.88,-412.05 533.88,-418.05 533.88,-424.05 533.88,-424.05 533.88,-436.05 533.88,-436.05 533.88,-442.05 527.88,-448.05 521.88,-448.05"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="497" y="-433.3" font-family="Helvetica,sans-Serif" font-size="10.00">Collector</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="497" y="-420.55" font-family="Helvetica,sans-Serif" font-size="10.00">(Machine 1)</text>
|
||||||
|
</g>
|
||||||
|
<!-- coll1->compose_ec2 -->
|
||||||
|
<g id="edge9" class="edge">
|
||||||
|
<title>coll1->compose_ec2</title>
|
||||||
|
<path fill="none" stroke="black" d="M521.16,-411.67C528.02,-407.19 535.63,-402.62 543,-399.02 576.02,-382.89 614.85,-369.35 646.44,-359.6"/>
|
||||||
|
<polygon fill="black" stroke="black" points="640.37,-365.52 648.58,-358.82 637.98,-358.94 640.37,-365.52"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="602.75" y="-380.47" font-family="Helvetica,sans-Serif" font-size="9.00">gRPC</text>
|
||||||
|
</g>
|
||||||
|
<!-- coll2 -->
|
||||||
|
<g id="node13" class="node">
|
||||||
|
<title>coll2</title>
|
||||||
|
<path fill="#ffccbc" stroke="black" d="M613.88,-448.05C613.88,-448.05 564.12,-448.05 564.12,-448.05 558.12,-448.05 552.12,-442.05 552.12,-436.05 552.12,-436.05 552.12,-424.05 552.12,-424.05 552.12,-418.05 558.12,-412.05 564.12,-412.05 564.12,-412.05 613.88,-412.05 613.88,-412.05 619.88,-412.05 625.88,-418.05 625.88,-424.05 625.88,-424.05 625.88,-436.05 625.88,-436.05 625.88,-442.05 619.88,-448.05 613.88,-448.05"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="589" y="-433.3" font-family="Helvetica,sans-Serif" font-size="10.00">Collector</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="589" y="-420.55" font-family="Helvetica,sans-Serif" font-size="10.00">(Machine 2)</text>
|
||||||
|
</g>
|
||||||
|
<!-- coll2->compose_ec2 -->
|
||||||
|
<g id="edge10" class="edge">
|
||||||
|
<title>coll2->compose_ec2</title>
|
||||||
|
<path fill="none" stroke="black" d="M612.88,-411.59C621.13,-405.55 630.83,-398.47 640.8,-391.17"/>
|
||||||
|
<polygon fill="black" stroke="black" points="642.77,-394.07 648.78,-385.34 638.64,-388.41 642.77,-394.07"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="670.19" y="-380.47" font-family="Helvetica,sans-Serif" font-size="9.00">gRPC</text>
|
||||||
|
</g>
|
||||||
|
<!-- coll3 -->
|
||||||
|
<g id="node14" class="node">
|
||||||
|
<title>coll3</title>
|
||||||
|
<path fill="#ffccbc" stroke="black" d="M429.62,-448.05C429.62,-448.05 378.38,-448.05 378.38,-448.05 372.38,-448.05 366.38,-442.05 366.38,-436.05 366.38,-436.05 366.38,-424.05 366.38,-424.05 366.38,-418.05 372.38,-412.05 378.38,-412.05 378.38,-412.05 429.62,-412.05 429.62,-412.05 435.62,-412.05 441.62,-418.05 441.62,-424.05 441.62,-424.05 441.62,-436.05 441.62,-436.05 441.62,-442.05 435.62,-448.05 429.62,-448.05"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="404" y="-433.3" font-family="Helvetica,sans-Serif" font-size="10.00">Collector</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="404" y="-420.55" font-family="Helvetica,sans-Serif" font-size="10.00">(Machine N)</text>
|
||||||
|
</g>
|
||||||
|
<!-- coll3->compose_ec2 -->
|
||||||
|
<g id="edge11" class="edge">
|
||||||
|
<title>coll3->compose_ec2</title>
|
||||||
|
<path fill="none" stroke="black" d="M427.53,-411.82C434.78,-407.12 442.97,-402.41 451,-399.02 514.86,-372.07 593.36,-357.28 646.47,-349.71"/>
|
||||||
|
<polygon fill="black" stroke="black" points="639.16,-354.39 648.5,-349.4 638.08,-347.48 639.16,-354.39"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="516.54" y="-380.47" font-family="Helvetica,sans-Serif" font-size="9.00">gRPC</text>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 18 KiB |
67
docs/architecture/04-grpc-services.dot
Normal file
67
docs/architecture/04-grpc-services.dot
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
digraph GrpcServices {
|
||||||
|
rankdir=LR;
|
||||||
|
compound=true;
|
||||||
|
fontname="Helvetica";
|
||||||
|
node [fontname="Helvetica", fontsize=10];
|
||||||
|
edge [fontname="Helvetica", fontsize=9];
|
||||||
|
|
||||||
|
labelloc="t";
|
||||||
|
label="gRPC Service Definitions";
|
||||||
|
fontsize=14;
|
||||||
|
|
||||||
|
node [shape=record, style=filled];
|
||||||
|
|
||||||
|
// MetricsService
|
||||||
|
subgraph cluster_metrics {
|
||||||
|
label="MetricsService";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#E8F5E9";
|
||||||
|
|
||||||
|
metrics_svc [label="{MetricsService|+ StreamMetrics(stream Metric) → StreamAck\l+ GetCurrentState(StateRequest) → MachineState\l+ GetAllStates(Empty) → AllMachinesState\l}", fillcolor="#C8E6C9"];
|
||||||
|
|
||||||
|
metric_msg [label="{Metric|machine_id: string\lhostname: string\ltimestamp_ms: int64\ltype: MetricType\lvalue: double\llabels: map\l}", fillcolor="#A5D6A7"];
|
||||||
|
|
||||||
|
machine_state [label="{MachineState|machine_id: string\lhostname: string\llast_seen_ms: int64\lcurrent_metrics: Metric[]\lhealth: HealthStatus\lmetadata: map\l}", fillcolor="#A5D6A7"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// ControlService
|
||||||
|
subgraph cluster_control {
|
||||||
|
label="ControlService";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#E3F2FD";
|
||||||
|
|
||||||
|
control_svc [label="{ControlService|+ Control(stream Command) → stream Response\l}", fillcolor="#90CAF9"];
|
||||||
|
|
||||||
|
commands [label="{ControlCommand|command_id: string\l|UpdateIntervalCommand\lRestartCollectionCommand\lShutdownCommand\l}", fillcolor="#64B5F6"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// ConfigService
|
||||||
|
subgraph cluster_config {
|
||||||
|
label="ConfigService";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#FFF3E0";
|
||||||
|
|
||||||
|
config_svc [label="{ConfigService|+ GetConfig(ConfigRequest) → CollectorConfig\l+ WatchConfig(ConfigRequest) → stream CollectorConfig\l}", fillcolor="#FFE0B2"];
|
||||||
|
|
||||||
|
collector_config [label="{CollectorConfig|collection_interval_seconds: int32\lenabled_metrics: MetricType[]\llabels: map\lthresholds: ThresholdConfig[]\l}", fillcolor="#FFCC80"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enums
|
||||||
|
subgraph cluster_enums {
|
||||||
|
label="Enums";
|
||||||
|
style=filled;
|
||||||
|
fillcolor="#F3E5F5";
|
||||||
|
|
||||||
|
metric_type [label="{MetricType|CPU_PERCENT\lMEMORY_PERCENT\lDISK_PERCENT\lNETWORK_*\lLOAD_AVG_*\l...}", fillcolor="#E1BEE7"];
|
||||||
|
|
||||||
|
health_status [label="{HealthStatus|HEALTHY\lWARNING\lCRITICAL\lUNKNOWN\lOFFLINE\l}", fillcolor="#CE93D8"];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Relationships
|
||||||
|
metrics_svc -> metric_msg [style=dashed];
|
||||||
|
metrics_svc -> machine_state [style=dashed];
|
||||||
|
control_svc -> commands [style=dashed];
|
||||||
|
config_svc -> collector_config [style=dashed];
|
||||||
|
metric_msg -> metric_type [style=dotted];
|
||||||
|
machine_state -> health_status [style=dotted];
|
||||||
|
}
|
||||||
171
docs/architecture/04-grpc-services.svg
Normal file
171
docs/architecture/04-grpc-services.svg
Normal file
@@ -0,0 +1,171 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
|
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
|
||||||
|
"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
|
||||||
|
<!-- Generated by graphviz version 14.1.1 (0)
|
||||||
|
-->
|
||||||
|
<!-- Title: GrpcServices Pages: 1 -->
|
||||||
|
<svg width="1030pt" height="486pt"
|
||||||
|
viewBox="0.00 0.00 1030.00 486.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
|
||||||
|
<g id="graph0" class="graph" transform="scale(1 1) rotate(0) translate(4 482.25)">
|
||||||
|
<title>GrpcServices</title>
|
||||||
|
<polygon fill="white" stroke="none" points="-4,4 -4,-482.25 1026.25,-482.25 1026.25,4 -4,4"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="511.12" y="-460.95" font-family="Helvetica,sans-Serif" font-size="14.00">gRPC Service Definitions</text>
|
||||||
|
<g id="clust1" class="cluster">
|
||||||
|
<title>cluster_metrics</title>
|
||||||
|
<polygon fill="#e8f5e9" stroke="black" points="21.5,-8 21.5,-239 726.75,-239 726.75,-8 21.5,-8"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="374.12" y="-221.7" font-family="Helvetica,sans-Serif" font-size="14.00">MetricsService</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust2" class="cluster">
|
||||||
|
<title>cluster_control</title>
|
||||||
|
<polygon fill="#e3f2fd" stroke="black" points="23.38,-247 23.38,-336 799.25,-336 799.25,-247 23.38,-247"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="411.31" y="-318.7" font-family="Helvetica,sans-Serif" font-size="14.00">ControlService</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust3" class="cluster">
|
||||||
|
<title>cluster_config</title>
|
||||||
|
<polygon fill="#fff3e0" stroke="black" points="8,-344 8,-445 753,-445 753,-344 8,-344"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="380.5" y="-427.7" font-family="Helvetica,sans-Serif" font-size="14.00">ConfigService</text>
|
||||||
|
</g>
|
||||||
|
<g id="clust4" class="cluster">
|
||||||
|
<title>cluster_enums</title>
|
||||||
|
<polygon fill="#f3e5f5" stroke="black" points="819.25,-11 819.25,-229 1014.25,-229 1014.25,-11 819.25,-11"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="916.75" y="-211.7" font-family="Helvetica,sans-Serif" font-size="14.00">Enums</text>
|
||||||
|
</g>
|
||||||
|
<!-- metrics_svc -->
|
||||||
|
<g id="node1" class="node">
|
||||||
|
<title>metrics_svc</title>
|
||||||
|
<polygon fill="#c8e6c9" stroke="black" points="29.5,-87.88 29.5,-134.12 377.25,-134.12 377.25,-87.88 29.5,-87.88"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="73.5" y="-107.88" font-family="Helvetica,sans-Serif" font-size="10.00">MetricsService</text>
|
||||||
|
<polyline fill="none" stroke="black" points="117.5,-87.88 117.5,-134.12"/>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="125.5" y="-120.62" font-family="Helvetica,sans-Serif" font-size="10.00">+ StreamMetrics(stream Metric) → StreamAck</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="125.5" y="-107.88" font-family="Helvetica,sans-Serif" font-size="10.00">+ GetCurrentState(StateRequest) → MachineState</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="125.5" y="-95.12" font-family="Helvetica,sans-Serif" font-size="10.00">+ GetAllStates(Empty) → AllMachinesState</text>
|
||||||
|
</g>
|
||||||
|
<!-- metric_msg -->
|
||||||
|
<g id="node2" class="node">
|
||||||
|
<title>metric_msg</title>
|
||||||
|
<polygon fill="#a5d6a7" stroke="black" points="525.5,-16.75 525.5,-101.25 692.5,-101.25 692.5,-16.75 525.5,-16.75"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="548.88" y="-55.88" font-family="Helvetica,sans-Serif" font-size="10.00">Metric</text>
|
||||||
|
<polyline fill="none" stroke="black" points="572.25,-16.75 572.25,-101.25"/>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="580.25" y="-87.75" font-family="Helvetica,sans-Serif" font-size="10.00">machine_id: string</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="580.25" y="-75" font-family="Helvetica,sans-Serif" font-size="10.00">hostname: string</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="580.25" y="-62.25" font-family="Helvetica,sans-Serif" font-size="10.00">timestamp_ms: int64</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="580.25" y="-49.5" font-family="Helvetica,sans-Serif" font-size="10.00">type: MetricType</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="580.25" y="-36.75" font-family="Helvetica,sans-Serif" font-size="10.00">value: double</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="580.25" y="-24" font-family="Helvetica,sans-Serif" font-size="10.00">labels: map</text>
|
||||||
|
</g>
|
||||||
|
<!-- metrics_svc->metric_msg -->
|
||||||
|
<g id="edge1" class="edge">
|
||||||
|
<title>metrics_svc->metric_msg</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M377.6,-88.68C424.41,-82.65 473.31,-76.35 513.96,-71.12"/>
|
||||||
|
<polygon fill="black" stroke="black" points="514.22,-74.61 523.69,-69.86 513.33,-67.67 514.22,-74.61"/>
|
||||||
|
</g>
|
||||||
|
<!-- machine_state -->
|
||||||
|
<g id="node3" class="node">
|
||||||
|
<title>machine_state</title>
|
||||||
|
<polygon fill="#a5d6a7" stroke="black" points="499.25,-120.75 499.25,-205.25 718.75,-205.25 718.75,-120.75 499.25,-120.75"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="540.62" y="-159.88" font-family="Helvetica,sans-Serif" font-size="10.00">MachineState</text>
|
||||||
|
<polyline fill="none" stroke="black" points="582,-120.75 582,-205.25"/>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="590" y="-191.75" font-family="Helvetica,sans-Serif" font-size="10.00">machine_id: string</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="590" y="-179" font-family="Helvetica,sans-Serif" font-size="10.00">hostname: string</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="590" y="-166.25" font-family="Helvetica,sans-Serif" font-size="10.00">last_seen_ms: int64</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="590" y="-153.5" font-family="Helvetica,sans-Serif" font-size="10.00">current_metrics: Metric[]</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="590" y="-140.75" font-family="Helvetica,sans-Serif" font-size="10.00">health: HealthStatus</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="590" y="-128" font-family="Helvetica,sans-Serif" font-size="10.00">metadata: map</text>
|
||||||
|
</g>
|
||||||
|
<!-- metrics_svc->machine_state -->
|
||||||
|
<g id="edge2" class="edge">
|
||||||
|
<title>metrics_svc->machine_state</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M377.6,-133.32C414.74,-138.1 453.2,-143.06 487.8,-147.51"/>
|
||||||
|
<polygon fill="black" stroke="black" points="487.03,-150.94 497.4,-148.75 487.93,-144 487.03,-150.94"/>
|
||||||
|
</g>
|
||||||
|
<!-- metric_type -->
|
||||||
|
<g id="node8" class="node">
|
||||||
|
<title>metric_type</title>
|
||||||
|
<polygon fill="#e1bee7" stroke="black" points="827.25,-19.75 827.25,-104.25 1006.25,-104.25 1006.25,-19.75 827.25,-19.75"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="861.88" y="-58.88" font-family="Helvetica,sans-Serif" font-size="10.00">MetricType</text>
|
||||||
|
<polyline fill="none" stroke="black" points="896.5,-19.75 896.5,-104.25"/>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="904.5" y="-90.75" font-family="Helvetica,sans-Serif" font-size="10.00">CPU_PERCENT</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="904.5" y="-78" font-family="Helvetica,sans-Serif" font-size="10.00">MEMORY_PERCENT</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="904.5" y="-65.25" font-family="Helvetica,sans-Serif" font-size="10.00">DISK_PERCENT</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="904.5" y="-52.5" font-family="Helvetica,sans-Serif" font-size="10.00">NETWORK_*</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="904.5" y="-39.75" font-family="Helvetica,sans-Serif" font-size="10.00">LOAD_AVG_*</text>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="951.38" y="-27" font-family="Helvetica,sans-Serif" font-size="10.00">...</text>
|
||||||
|
</g>
|
||||||
|
<!-- metric_msg->metric_type -->
|
||||||
|
<g id="edge5" class="edge">
|
||||||
|
<title>metric_msg->metric_type</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="1,5" d="M692.74,-59.81C730.57,-60.18 775.71,-60.63 815.45,-61.02"/>
|
||||||
|
<polygon fill="black" stroke="black" points="815.23,-64.51 825.27,-61.11 815.3,-57.51 815.23,-64.51"/>
|
||||||
|
</g>
|
||||||
|
<!-- health_status -->
|
||||||
|
<g id="node9" class="node">
|
||||||
|
<title>health_status</title>
|
||||||
|
<polygon fill="#ce93d8" stroke="black" points="842.25,-123.12 842.25,-194.88 991.25,-194.88 991.25,-123.12 842.25,-123.12"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="881.75" y="-155.88" font-family="Helvetica,sans-Serif" font-size="10.00">HealthStatus</text>
|
||||||
|
<polyline fill="none" stroke="black" points="921.25,-123.12 921.25,-194.88"/>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="929.25" y="-181.38" font-family="Helvetica,sans-Serif" font-size="10.00">HEALTHY</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="929.25" y="-168.62" font-family="Helvetica,sans-Serif" font-size="10.00">WARNING</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="929.25" y="-155.88" font-family="Helvetica,sans-Serif" font-size="10.00">CRITICAL</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="929.25" y="-143.12" font-family="Helvetica,sans-Serif" font-size="10.00">UNKNOWN</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="929.25" y="-130.38" font-family="Helvetica,sans-Serif" font-size="10.00">OFFLINE</text>
|
||||||
|
</g>
|
||||||
|
<!-- machine_state->health_status -->
|
||||||
|
<g id="edge6" class="edge">
|
||||||
|
<title>machine_state->health_status</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="1,5" d="M719.09,-161.57C755.76,-161.09 796.1,-160.57 830.65,-160.11"/>
|
||||||
|
<polygon fill="black" stroke="black" points="830.67,-163.61 840.62,-159.98 830.58,-156.61 830.67,-163.61"/>
|
||||||
|
</g>
|
||||||
|
<!-- control_svc -->
|
||||||
|
<g id="node4" class="node">
|
||||||
|
<title>control_svc</title>
|
||||||
|
<polygon fill="#90caf9" stroke="black" points="31.38,-261 31.38,-297 375.38,-297 375.38,-261 31.38,-261"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="75" y="-276" font-family="Helvetica,sans-Serif" font-size="10.00">ControlService</text>
|
||||||
|
<polyline fill="none" stroke="black" points="118.62,-261.25 118.62,-297"/>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="126.62" y="-276" font-family="Helvetica,sans-Serif" font-size="10.00">+ Control(stream Command) → stream Response</text>
|
||||||
|
</g>
|
||||||
|
<!-- commands -->
|
||||||
|
<g id="node5" class="node">
|
||||||
|
<title>commands</title>
|
||||||
|
<polygon fill="#64b5f6" stroke="black" points="426.75,-255.88 426.75,-302.12 791.25,-302.12 791.25,-255.88 426.75,-255.88"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="477.5" y="-275.88" font-family="Helvetica,sans-Serif" font-size="10.00">ControlCommand</text>
|
||||||
|
<polyline fill="none" stroke="black" points="528.25,-255.88 528.25,-302.12"/>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="536.25" y="-275.88" font-family="Helvetica,sans-Serif" font-size="10.00">command_id: string</text>
|
||||||
|
<polyline fill="none" stroke="black" points="641,-255.88 641,-302.12"/>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="649" y="-288.62" font-family="Helvetica,sans-Serif" font-size="10.00">UpdateIntervalCommand</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="649" y="-275.88" font-family="Helvetica,sans-Serif" font-size="10.00">RestartCollectionCommand</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="649" y="-263.12" font-family="Helvetica,sans-Serif" font-size="10.00">ShutdownCommand</text>
|
||||||
|
</g>
|
||||||
|
<!-- control_svc->commands -->
|
||||||
|
<g id="edge3" class="edge">
|
||||||
|
<title>control_svc->commands</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M375.84,-279C388.79,-279 401.92,-279 414.99,-279"/>
|
||||||
|
<polygon fill="black" stroke="black" points="414.95,-282.5 424.95,-279 414.95,-275.5 414.95,-282.5"/>
|
||||||
|
</g>
|
||||||
|
<!-- config_svc -->
|
||||||
|
<g id="node6" class="node">
|
||||||
|
<title>config_svc</title>
|
||||||
|
<polygon fill="#ffe0b2" stroke="black" points="16,-364 16,-400 390.75,-400 390.75,-364 16,-364"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="57.38" y="-379.12" font-family="Helvetica,sans-Serif" font-size="10.00">ConfigService</text>
|
||||||
|
<polyline fill="none" stroke="black" points="98.75,-364.5 98.75,-400"/>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="106.75" y="-385.5" font-family="Helvetica,sans-Serif" font-size="10.00">+ GetConfig(ConfigRequest) → CollectorConfig</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="106.75" y="-372.75" font-family="Helvetica,sans-Serif" font-size="10.00">+ WatchConfig(ConfigRequest) → stream CollectorConfig</text>
|
||||||
|
</g>
|
||||||
|
<!-- collector_config -->
|
||||||
|
<g id="node7" class="node">
|
||||||
|
<title>collector_config</title>
|
||||||
|
<polygon fill="#ffcc80" stroke="black" points="473,-352.5 473,-411.5 745,-411.5 745,-352.5 473,-352.5"/>
|
||||||
|
<text xml:space="preserve" text-anchor="middle" x="518.12" y="-378.88" font-family="Helvetica,sans-Serif" font-size="10.00">CollectorConfig</text>
|
||||||
|
<polyline fill="none" stroke="black" points="563.25,-352.5 563.25,-411.5"/>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="571.25" y="-398" font-family="Helvetica,sans-Serif" font-size="10.00">collection_interval_seconds: int32</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="571.25" y="-385.25" font-family="Helvetica,sans-Serif" font-size="10.00">enabled_metrics: MetricType[]</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="571.25" y="-372.5" font-family="Helvetica,sans-Serif" font-size="10.00">labels: map</text>
|
||||||
|
<text xml:space="preserve" text-anchor="start" x="571.25" y="-359.75" font-family="Helvetica,sans-Serif" font-size="10.00">thresholds: ThresholdConfig[]</text>
|
||||||
|
</g>
|
||||||
|
<!-- config_svc->collector_config -->
|
||||||
|
<g id="edge4" class="edge">
|
||||||
|
<title>config_svc->collector_config</title>
|
||||||
|
<path fill="none" stroke="black" stroke-dasharray="5,2" d="M391.12,-382C414.61,-382 438.36,-382 461.11,-382"/>
|
||||||
|
<polygon fill="black" stroke="black" points="461.03,-385.5 471.03,-382 461.03,-378.5 461.03,-385.5"/>
|
||||||
|
</g>
|
||||||
|
</g>
|
||||||
|
</svg>
|
||||||
|
After Width: | Height: | Size: 13 KiB |
120
docs/architecture/graph.html
Normal file
120
docs/architecture/graph.html
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>Graph Viewer - System Monitor</title>
|
||||||
|
<link rel="stylesheet" href="styles.css">
|
||||||
|
</head>
|
||||||
|
<body class="graph-viewer">
|
||||||
|
<header class="graph-header">
|
||||||
|
<a href="index.html" class="back-link">← Index</a>
|
||||||
|
<div class="nav-controls">
|
||||||
|
<button onclick="navigate(-1)" id="btn-prev" title="Previous (←)">◀</button>
|
||||||
|
<span id="nav-position">1 / 4</span>
|
||||||
|
<button onclick="navigate(1)" id="btn-next" title="Next (→)">▶</button>
|
||||||
|
</div>
|
||||||
|
<h1 id="graph-title">Loading...</h1>
|
||||||
|
<div class="graph-controls">
|
||||||
|
<button onclick="setMode('fit')">Fit</button>
|
||||||
|
<button onclick="setMode('fit-width')">Width</button>
|
||||||
|
<button onclick="setMode('fit-height')">Height</button>
|
||||||
|
<button onclick="setMode('actual-size')">100%</button>
|
||||||
|
<button onclick="downloadSvg()">↓ SVG</button>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<div class="graph-container" id="graph-container">
|
||||||
|
<img id="graph-img" src="" alt="Graph">
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const graphOrder = [
|
||||||
|
'01-system-overview',
|
||||||
|
'02-data-flow',
|
||||||
|
'03-deployment',
|
||||||
|
'04-grpc-services'
|
||||||
|
];
|
||||||
|
|
||||||
|
const graphs = {
|
||||||
|
'01-system-overview': {
|
||||||
|
title: 'System Overview',
|
||||||
|
file: '01-system-overview.svg'
|
||||||
|
},
|
||||||
|
'02-data-flow': {
|
||||||
|
title: 'Data Flow Pipeline',
|
||||||
|
file: '02-data-flow.svg'
|
||||||
|
},
|
||||||
|
'03-deployment': {
|
||||||
|
title: 'Deployment Architecture',
|
||||||
|
file: '03-deployment.svg'
|
||||||
|
},
|
||||||
|
'04-grpc-services': {
|
||||||
|
title: 'gRPC Service Definitions',
|
||||||
|
file: '04-grpc-services.svg'
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const params = new URLSearchParams(window.location.search);
|
||||||
|
let graphKey = params.get('g') || '01-system-overview';
|
||||||
|
let currentIndex = graphOrder.indexOf(graphKey);
|
||||||
|
if (currentIndex === -1) currentIndex = 0;
|
||||||
|
|
||||||
|
function loadGraph(key) {
|
||||||
|
const graph = graphs[key];
|
||||||
|
document.getElementById('graph-title').textContent = graph.title;
|
||||||
|
document.getElementById('graph-img').src = graph.file;
|
||||||
|
document.title = graph.title + ' - System Monitor';
|
||||||
|
history.replaceState(null, '', '?g=' + key);
|
||||||
|
graphKey = key;
|
||||||
|
updateNavHints();
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateNavHints() {
|
||||||
|
const idx = graphOrder.indexOf(graphKey);
|
||||||
|
const prevBtn = document.getElementById('btn-prev');
|
||||||
|
const nextBtn = document.getElementById('btn-next');
|
||||||
|
prevBtn.disabled = idx === 0;
|
||||||
|
nextBtn.disabled = idx === graphOrder.length - 1;
|
||||||
|
document.getElementById('nav-position').textContent = (idx + 1) + ' / ' + graphOrder.length;
|
||||||
|
}
|
||||||
|
|
||||||
|
function navigate(direction) {
|
||||||
|
const idx = graphOrder.indexOf(graphKey);
|
||||||
|
const newIdx = idx + direction;
|
||||||
|
if (newIdx >= 0 && newIdx < graphOrder.length) {
|
||||||
|
currentIndex = newIdx;
|
||||||
|
loadGraph(graphOrder[newIdx]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
function setMode(mode) {
|
||||||
|
const container = document.getElementById('graph-container');
|
||||||
|
container.className = 'graph-container ' + mode;
|
||||||
|
}
|
||||||
|
|
||||||
|
function downloadSvg() {
|
||||||
|
const graph = graphs[graphKey];
|
||||||
|
const link = document.createElement('a');
|
||||||
|
link.href = graph.file;
|
||||||
|
link.download = graph.file;
|
||||||
|
link.click();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Keyboard navigation
|
||||||
|
document.addEventListener('keydown', (e) => {
|
||||||
|
if (e.key === 'ArrowLeft') {
|
||||||
|
navigate(-1);
|
||||||
|
} else if (e.key === 'ArrowRight') {
|
||||||
|
navigate(1);
|
||||||
|
} else if (e.key === 'Escape') {
|
||||||
|
window.location.href = 'index.html';
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Initialize
|
||||||
|
loadGraph(graphOrder[currentIndex]);
|
||||||
|
setMode('fit');
|
||||||
|
</script>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
207
docs/architecture/index.html
Normal file
207
docs/architecture/index.html
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
|
<title>System Monitor - Architecture Documentation</title>
|
||||||
|
<link rel="stylesheet" href="styles.css">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header>
|
||||||
|
<h1>System Monitoring Platform</h1>
|
||||||
|
<p class="subtitle">Architecture & Design Documentation</p>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<main>
|
||||||
|
<section class="graph-section" id="overview">
|
||||||
|
<div class="graph-header-row">
|
||||||
|
<h2>System Overview</h2>
|
||||||
|
<a href="graph.html?g=01-system-overview" class="view-btn">View Full</a>
|
||||||
|
</div>
|
||||||
|
<a href="graph.html?g=01-system-overview" class="graph-preview">
|
||||||
|
<img src="01-system-overview.svg" alt="System Overview">
|
||||||
|
</a>
|
||||||
|
<div class="graph-details">
|
||||||
|
<p>High-level architecture showing all services, data stores, and communication patterns.</p>
|
||||||
|
<h4>Key Components</h4>
|
||||||
|
<ul>
|
||||||
|
<li><strong>Collector</strong>: Runs on each monitored machine, streams metrics via gRPC</li>
|
||||||
|
<li><strong>Aggregator</strong>: Central gRPC server, receives streams, normalizes data</li>
|
||||||
|
<li><strong>Gateway</strong>: FastAPI service, WebSocket for browser, REST for queries</li>
|
||||||
|
<li><strong>Alerts</strong>: Subscribes to events, evaluates thresholds, triggers actions</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="graph-section" id="data-flow">
|
||||||
|
<div class="graph-header-row">
|
||||||
|
<h2>Data Flow Pipeline</h2>
|
||||||
|
<a href="graph.html?g=02-data-flow" class="view-btn">View Full</a>
|
||||||
|
</div>
|
||||||
|
<a href="graph.html?g=02-data-flow" class="graph-preview">
|
||||||
|
<img src="02-data-flow.svg" alt="Data Flow">
|
||||||
|
</a>
|
||||||
|
<div class="graph-details">
|
||||||
|
<p>How metrics flow from collection through storage with different retention tiers.</p>
|
||||||
|
<h4>Storage Tiers</h4>
|
||||||
|
<table class="details-table">
|
||||||
|
<thead>
|
||||||
|
<tr><th>Tier</th><th>Resolution</th><th>Retention</th><th>Use Case</th></tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>Hot (Redis)</td>
|
||||||
|
<td>5s</td>
|
||||||
|
<td>5 min</td>
|
||||||
|
<td>Current state, live dashboard</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Raw (TimescaleDB)</td>
|
||||||
|
<td>5s</td>
|
||||||
|
<td>24h</td>
|
||||||
|
<td>Recent detailed analysis</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>1-min Aggregates</td>
|
||||||
|
<td>1m</td>
|
||||||
|
<td>7d</td>
|
||||||
|
<td>Week view, trends</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>1-hour Aggregates</td>
|
||||||
|
<td>1h</td>
|
||||||
|
<td>90d</td>
|
||||||
|
<td>Long-term analysis</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="graph-section" id="deployment">
|
||||||
|
<div class="graph-header-row">
|
||||||
|
<h2>Deployment Architecture</h2>
|
||||||
|
<a href="graph.html?g=03-deployment" class="view-btn">View Full</a>
|
||||||
|
</div>
|
||||||
|
<a href="graph.html?g=03-deployment" class="graph-preview">
|
||||||
|
<img src="03-deployment.svg" alt="Deployment">
|
||||||
|
</a>
|
||||||
|
<div class="graph-details">
|
||||||
|
<p>Deployment options from local development to AWS production.</p>
|
||||||
|
<h4>Environments</h4>
|
||||||
|
<ul>
|
||||||
|
<li><strong>Local Dev</strong>: Kind + Tilt for K8s, or Docker Compose</li>
|
||||||
|
<li><strong>Demo (EC2)</strong>: Docker Compose on t2.small at sysmonstm.mcrn.ar</li>
|
||||||
|
<li><strong>Lambda Pipeline</strong>: SQS-triggered aggregation for data processing experience</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="graph-section" id="grpc">
|
||||||
|
<div class="graph-header-row">
|
||||||
|
<h2>gRPC Service Definitions</h2>
|
||||||
|
<a href="graph.html?g=04-grpc-services" class="view-btn">View Full</a>
|
||||||
|
</div>
|
||||||
|
<a href="graph.html?g=04-grpc-services" class="graph-preview">
|
||||||
|
<img src="04-grpc-services.svg" alt="gRPC Services">
|
||||||
|
</a>
|
||||||
|
<div class="graph-details">
|
||||||
|
<p>Protocol Buffer service and message definitions.</p>
|
||||||
|
<h4>Services</h4>
|
||||||
|
<ul>
|
||||||
|
<li><strong>MetricsService</strong>: Client-side streaming for metrics ingestion</li>
|
||||||
|
<li><strong>ControlService</strong>: Bidirectional streaming for collector control</li>
|
||||||
|
<li><strong>ConfigService</strong>: Server-side streaming for config updates</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="findings-section">
|
||||||
|
<h2>Interview Talking Points</h2>
|
||||||
|
<div class="findings-grid">
|
||||||
|
<article class="finding-card">
|
||||||
|
<h3>Domain Mapping</h3>
|
||||||
|
<ul>
|
||||||
|
<li>Machine = Payment Processor</li>
|
||||||
|
<li>Metrics Stream = Transaction Stream</li>
|
||||||
|
<li>Thresholds = Fraud Detection</li>
|
||||||
|
<li>Aggregator = Payment Hub</li>
|
||||||
|
</ul>
|
||||||
|
</article>
|
||||||
|
<article class="finding-card">
|
||||||
|
<h3>gRPC Patterns</h3>
|
||||||
|
<ul>
|
||||||
|
<li>Client streaming (metrics)</li>
|
||||||
|
<li>Server streaming (config)</li>
|
||||||
|
<li>Bidirectional (control)</li>
|
||||||
|
<li>Health checking</li>
|
||||||
|
</ul>
|
||||||
|
</article>
|
||||||
|
<article class="finding-card">
|
||||||
|
<h3>Event-Driven</h3>
|
||||||
|
<ul>
|
||||||
|
<li>Redis Pub/Sub (current)</li>
|
||||||
|
<li>Abstraction for Kafka switch</li>
|
||||||
|
<li>Decoupled alert processing</li>
|
||||||
|
<li>Real-time WebSocket push</li>
|
||||||
|
</ul>
|
||||||
|
</article>
|
||||||
|
<article class="finding-card">
|
||||||
|
<h3>Resilience</h3>
|
||||||
|
<ul>
|
||||||
|
<li>Collectors are independent</li>
|
||||||
|
<li>Graceful degradation</li>
|
||||||
|
<li>Retry with backoff</li>
|
||||||
|
<li>Health checks everywhere</li>
|
||||||
|
</ul>
|
||||||
|
</article>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
|
||||||
|
<section class="tech-section">
|
||||||
|
<h2>Technology Stack</h2>
|
||||||
|
<div class="tech-grid">
|
||||||
|
<div class="tech-column">
|
||||||
|
<h3>Core</h3>
|
||||||
|
<ul>
|
||||||
|
<li>Python 3.11+</li>
|
||||||
|
<li>FastAPI</li>
|
||||||
|
<li>gRPC / protobuf</li>
|
||||||
|
<li>asyncio</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="tech-column">
|
||||||
|
<h3>Data</h3>
|
||||||
|
<ul>
|
||||||
|
<li>TimescaleDB</li>
|
||||||
|
<li>Redis</li>
|
||||||
|
<li>Redis Pub/Sub</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="tech-column">
|
||||||
|
<h3>Infrastructure</h3>
|
||||||
|
<ul>
|
||||||
|
<li>Docker</li>
|
||||||
|
<li>Kubernetes</li>
|
||||||
|
<li>Kind + Tilt</li>
|
||||||
|
<li>Terraform</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="tech-column">
|
||||||
|
<h3>CI/CD</h3>
|
||||||
|
<ul>
|
||||||
|
<li>Woodpecker CI</li>
|
||||||
|
<li>Kustomize</li>
|
||||||
|
<li>Container Registry</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</section>
|
||||||
|
</main>
|
||||||
|
|
||||||
|
<footer>
|
||||||
|
<p>System Monitoring Platform - Architecture Documentation</p>
|
||||||
|
<p class="date">Generated: <time datetime="2025-12-29">December 2025</time></p>
|
||||||
|
</footer>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
343
docs/architecture/styles.css
Normal file
343
docs/architecture/styles.css
Normal file
@@ -0,0 +1,343 @@
|
|||||||
|
:root {
|
||||||
|
--bg-primary: #1a1a2e;
|
||||||
|
--bg-secondary: #16213e;
|
||||||
|
--bg-card: #0f3460;
|
||||||
|
--text-primary: #eee;
|
||||||
|
--text-secondary: #a0a0a0;
|
||||||
|
--accent: #e94560;
|
||||||
|
--accent-secondary: #533483;
|
||||||
|
--border: #2a2a4a;
|
||||||
|
}
|
||||||
|
|
||||||
|
* {
|
||||||
|
box-sizing: border-box;
|
||||||
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
font-family: 'Segoe UI', system-ui, -apple-system, sans-serif;
|
||||||
|
background: var(--bg-primary);
|
||||||
|
color: var(--text-primary);
|
||||||
|
line-height: 1.6;
|
||||||
|
}
|
||||||
|
|
||||||
|
header {
|
||||||
|
background: linear-gradient(135deg, var(--bg-secondary), var(--accent-secondary));
|
||||||
|
padding: 2rem;
|
||||||
|
text-align: center;
|
||||||
|
border-bottom: 2px solid var(--accent);
|
||||||
|
}
|
||||||
|
|
||||||
|
header h1 {
|
||||||
|
font-size: 2rem;
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
header .subtitle {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
main {
|
||||||
|
max-width: 1400px;
|
||||||
|
margin: 0 auto;
|
||||||
|
padding: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Graph sections */
|
||||||
|
.graph-section {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border-radius: 8px;
|
||||||
|
padding: 1.5rem;
|
||||||
|
margin-bottom: 2rem;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-header-row {
|
||||||
|
display: flex;
|
||||||
|
justify-content: space-between;
|
||||||
|
align-items: center;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-header-row h2 {
|
||||||
|
font-size: 1.25rem;
|
||||||
|
color: var(--accent);
|
||||||
|
}
|
||||||
|
|
||||||
|
.view-btn {
|
||||||
|
background: var(--accent);
|
||||||
|
color: white;
|
||||||
|
padding: 0.5rem 1rem;
|
||||||
|
border-radius: 4px;
|
||||||
|
text-decoration: none;
|
||||||
|
font-size: 0.875rem;
|
||||||
|
transition: opacity 0.2s;
|
||||||
|
}
|
||||||
|
|
||||||
|
.view-btn:hover {
|
||||||
|
opacity: 0.8;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-preview {
|
||||||
|
display: block;
|
||||||
|
background: white;
|
||||||
|
border-radius: 4px;
|
||||||
|
padding: 1rem;
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
overflow: auto;
|
||||||
|
max-height: 400px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-preview img {
|
||||||
|
max-width: 100%;
|
||||||
|
height: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-details {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 0.9rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-details h4 {
|
||||||
|
color: var(--text-primary);
|
||||||
|
margin: 1rem 0 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-details ul {
|
||||||
|
margin-left: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-details li {
|
||||||
|
margin-bottom: 0.25rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tech section */
|
||||||
|
.tech-section {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border-radius: 8px;
|
||||||
|
padding: 1.5rem;
|
||||||
|
margin-bottom: 2rem;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
|
||||||
|
.tech-section h2 {
|
||||||
|
color: var(--accent);
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tech-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
||||||
|
gap: 1.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tech-column h3 {
|
||||||
|
color: var(--text-primary);
|
||||||
|
font-size: 1rem;
|
||||||
|
margin-bottom: 0.75rem;
|
||||||
|
padding-bottom: 0.5rem;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
|
||||||
|
.tech-column ul {
|
||||||
|
list-style: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.tech-column li {
|
||||||
|
padding: 0.25rem 0;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Findings */
|
||||||
|
.findings-section {
|
||||||
|
margin-bottom: 2rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.findings-section h2 {
|
||||||
|
color: var(--accent);
|
||||||
|
margin-bottom: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.findings-grid {
|
||||||
|
display: grid;
|
||||||
|
grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
|
||||||
|
gap: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.finding-card {
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border-radius: 8px;
|
||||||
|
padding: 1.25rem;
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
|
||||||
|
.finding-card h3 {
|
||||||
|
color: var(--accent);
|
||||||
|
font-size: 1rem;
|
||||||
|
margin-bottom: 0.75rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.finding-card ul {
|
||||||
|
margin-left: 1rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.finding-card code {
|
||||||
|
background: var(--bg-primary);
|
||||||
|
padding: 0.125rem 0.375rem;
|
||||||
|
border-radius: 3px;
|
||||||
|
font-size: 0.85em;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Footer */
|
||||||
|
footer {
|
||||||
|
text-align: center;
|
||||||
|
padding: 2rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
border-top: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
|
||||||
|
footer .date {
|
||||||
|
font-size: 0.85rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Graph viewer page */
|
||||||
|
body.graph-viewer {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
height: 100vh;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-header {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 1rem;
|
||||||
|
padding: 0.75rem 1rem;
|
||||||
|
background: var(--bg-secondary);
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
flex-wrap: wrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
.back-link {
|
||||||
|
color: var(--accent);
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-controls {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-controls button {
|
||||||
|
background: var(--bg-card);
|
||||||
|
color: var(--text-primary);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
padding: 0.25rem 0.75rem;
|
||||||
|
border-radius: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
|
.nav-controls button:disabled {
|
||||||
|
opacity: 0.3;
|
||||||
|
cursor: not-allowed;
|
||||||
|
}
|
||||||
|
|
||||||
|
#nav-position {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
font-size: 0.85rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-header h1 {
|
||||||
|
flex: 1;
|
||||||
|
font-size: 1rem;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-controls {
|
||||||
|
display: flex;
|
||||||
|
gap: 0.5rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-controls button {
|
||||||
|
background: var(--bg-card);
|
||||||
|
color: var(--text-primary);
|
||||||
|
border: 1px solid var(--border);
|
||||||
|
padding: 0.375rem 0.75rem;
|
||||||
|
border-radius: 4px;
|
||||||
|
cursor: pointer;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-controls button:hover {
|
||||||
|
background: var(--accent);
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-container {
|
||||||
|
flex: 1;
|
||||||
|
overflow: auto;
|
||||||
|
background: white;
|
||||||
|
display: flex;
|
||||||
|
justify-content: center;
|
||||||
|
align-items: flex-start;
|
||||||
|
padding: 1rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-container.fit img {
|
||||||
|
max-width: 100%;
|
||||||
|
max-height: calc(100vh - 60px);
|
||||||
|
object-fit: contain;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-container.fit-width img {
|
||||||
|
width: 100%;
|
||||||
|
height: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-container.fit-height img {
|
||||||
|
height: calc(100vh - 60px);
|
||||||
|
width: auto;
|
||||||
|
}
|
||||||
|
|
||||||
|
.graph-container.actual-size img {
|
||||||
|
/* No constraints */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Tables */
|
||||||
|
.details-table {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
margin: 1rem 0;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.details-table th,
|
||||||
|
.details-table td {
|
||||||
|
padding: 0.5rem;
|
||||||
|
text-align: left;
|
||||||
|
border-bottom: 1px solid var(--border);
|
||||||
|
}
|
||||||
|
|
||||||
|
.details-table th {
|
||||||
|
color: var(--text-primary);
|
||||||
|
background: var(--bg-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.details-table td {
|
||||||
|
color: var(--text-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.details-table code {
|
||||||
|
background: var(--bg-primary);
|
||||||
|
padding: 0.125rem 0.375rem;
|
||||||
|
border-radius: 3px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.note {
|
||||||
|
font-style: italic;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
color: var(--text-secondary);
|
||||||
|
margin-top: 0.5rem;
|
||||||
|
}
|
||||||
1
infra/aws/lambdas/aggregator/placeholder.txt
Normal file
1
infra/aws/lambdas/aggregator/placeholder.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
placeholder
|
||||||
BIN
infra/aws/lambdas/aggregator/placeholder.zip
Normal file
BIN
infra/aws/lambdas/aggregator/placeholder.zip
Normal file
Binary file not shown.
1
infra/aws/lambdas/compactor/placeholder.txt
Normal file
1
infra/aws/lambdas/compactor/placeholder.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
placeholder
|
||||||
BIN
infra/aws/lambdas/compactor/placeholder.zip
Normal file
BIN
infra/aws/lambdas/compactor/placeholder.zip
Normal file
Binary file not shown.
148
infra/aws/terraform/ec2.tf
Normal file
148
infra/aws/terraform/ec2.tf
Normal file
@@ -0,0 +1,148 @@
|
|||||||
|
# EC2 Instance for Docker Compose deployment
|
||||||
|
|
||||||
|
resource "aws_security_group" "sysmonstm" {
|
||||||
|
name_prefix = "${var.project_name}-"
|
||||||
|
description = "Security group for System Monitor Platform"
|
||||||
|
|
||||||
|
# HTTP/HTTPS
|
||||||
|
ingress {
|
||||||
|
from_port = 80
|
||||||
|
to_port = 80
|
||||||
|
protocol = "tcp"
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
description = "HTTP"
|
||||||
|
}
|
||||||
|
|
||||||
|
ingress {
|
||||||
|
from_port = 443
|
||||||
|
to_port = 443
|
||||||
|
protocol = "tcp"
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
description = "HTTPS"
|
||||||
|
}
|
||||||
|
|
||||||
|
# gRPC for collectors
|
||||||
|
ingress {
|
||||||
|
from_port = 50051
|
||||||
|
to_port = 50051
|
||||||
|
protocol = "tcp"
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
description = "gRPC Aggregator"
|
||||||
|
}
|
||||||
|
|
||||||
|
# SSH (restricted)
|
||||||
|
dynamic "ingress" {
|
||||||
|
for_each = length(var.allowed_ssh_cidrs) > 0 ? [1] : []
|
||||||
|
content {
|
||||||
|
from_port = 22
|
||||||
|
to_port = 22
|
||||||
|
protocol = "tcp"
|
||||||
|
cidr_blocks = var.allowed_ssh_cidrs
|
||||||
|
description = "SSH"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
egress {
|
||||||
|
from_port = 0
|
||||||
|
to_port = 0
|
||||||
|
protocol = "-1"
|
||||||
|
cidr_blocks = ["0.0.0.0/0"]
|
||||||
|
description = "Allow all outbound"
|
||||||
|
}
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-sg"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_iam_role" "ec2" {
|
||||||
|
name_prefix = "${var.project_name}-ec2-"
|
||||||
|
|
||||||
|
assume_role_policy = jsonencode({
|
||||||
|
Version = "2012-10-17"
|
||||||
|
Statement = [
|
||||||
|
{
|
||||||
|
Action = "sts:AssumeRole"
|
||||||
|
Effect = "Allow"
|
||||||
|
Principal = {
|
||||||
|
Service = "ec2.amazonaws.com"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_iam_role_policy_attachment" "ec2_ssm" {
|
||||||
|
role = aws_iam_role.ec2.name
|
||||||
|
policy_arn = "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_iam_instance_profile" "ec2" {
|
||||||
|
name_prefix = "${var.project_name}-"
|
||||||
|
role = aws_iam_role.ec2.name
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_instance" "sysmonstm" {
|
||||||
|
ami = data.aws_ami.amazon_linux_2023.id
|
||||||
|
instance_type = var.ec2_instance_type
|
||||||
|
key_name = var.ec2_key_name != "" ? var.ec2_key_name : null
|
||||||
|
vpc_security_group_ids = [aws_security_group.sysmonstm.id]
|
||||||
|
iam_instance_profile = aws_iam_instance_profile.ec2.name
|
||||||
|
|
||||||
|
root_block_device {
|
||||||
|
volume_size = 20
|
||||||
|
volume_type = "gp3"
|
||||||
|
encrypted = true
|
||||||
|
}
|
||||||
|
|
||||||
|
user_data = <<-EOF
|
||||||
|
#!/bin/bash
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Install Docker
|
||||||
|
dnf update -y
|
||||||
|
dnf install -y docker git
|
||||||
|
systemctl enable docker
|
||||||
|
systemctl start docker
|
||||||
|
|
||||||
|
# Install Docker Compose
|
||||||
|
curl -L "https://github.com/docker/compose/releases/latest/download/docker-compose-$(uname -s)-$(uname -m)" \
|
||||||
|
-o /usr/local/bin/docker-compose
|
||||||
|
chmod +x /usr/local/bin/docker-compose
|
||||||
|
|
||||||
|
# Add ec2-user to docker group
|
||||||
|
usermod -aG docker ec2-user
|
||||||
|
|
||||||
|
# Clone and start the application
|
||||||
|
cd /home/ec2-user
|
||||||
|
git clone https://github.com/yourusername/sysmonstm.git || true
|
||||||
|
cd sysmonstm
|
||||||
|
|
||||||
|
# Create .env file
|
||||||
|
cat > .env <<EOL
|
||||||
|
LOG_LEVEL=INFO
|
||||||
|
MACHINE_ID=aws-demo
|
||||||
|
EOL
|
||||||
|
|
||||||
|
# Start services
|
||||||
|
docker-compose up -d
|
||||||
|
EOF
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-server"
|
||||||
|
}
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [ami]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Elastic IP for stable address
|
||||||
|
resource "aws_eip" "sysmonstm" {
|
||||||
|
instance = aws_instance.sysmonstm.id
|
||||||
|
domain = "vpc"
|
||||||
|
|
||||||
|
tags = {
|
||||||
|
Name = "${var.project_name}-eip"
|
||||||
|
}
|
||||||
|
}
|
||||||
203
infra/aws/terraform/lambda.tf
Normal file
203
infra/aws/terraform/lambda.tf
Normal file
@@ -0,0 +1,203 @@
|
|||||||
|
# Lambda Functions for Data Processing Pipeline
|
||||||
|
# These are optional and enabled via enable_lambda_pipeline variable
|
||||||
|
|
||||||
|
# SQS Queue for buffering metrics
|
||||||
|
resource "aws_sqs_queue" "metrics" {
|
||||||
|
count = var.enable_lambda_pipeline ? 1 : 0
|
||||||
|
|
||||||
|
name = "${var.project_name}-metrics"
|
||||||
|
visibility_timeout_seconds = var.lambda_timeout * 2
|
||||||
|
message_retention_seconds = 86400 # 24 hours
|
||||||
|
|
||||||
|
redrive_policy = jsonencode({
|
||||||
|
deadLetterTargetArn = aws_sqs_queue.metrics_dlq[0].arn
|
||||||
|
maxReceiveCount = 3
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_sqs_queue" "metrics_dlq" {
|
||||||
|
count = var.enable_lambda_pipeline ? 1 : 0
|
||||||
|
|
||||||
|
name = "${var.project_name}-metrics-dlq"
|
||||||
|
message_retention_seconds = 1209600 # 14 days
|
||||||
|
}
|
||||||
|
|
||||||
|
# S3 Bucket for metric backups
|
||||||
|
resource "aws_s3_bucket" "metrics" {
|
||||||
|
count = var.enable_s3_backup ? 1 : 0
|
||||||
|
|
||||||
|
bucket_prefix = "${var.project_name}-metrics-"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_s3_bucket_lifecycle_configuration" "metrics" {
|
||||||
|
count = var.enable_s3_backup ? 1 : 0
|
||||||
|
bucket = aws_s3_bucket.metrics[0].id
|
||||||
|
|
||||||
|
rule {
|
||||||
|
id = "archive-old-metrics"
|
||||||
|
status = "Enabled"
|
||||||
|
|
||||||
|
transition {
|
||||||
|
days = 30
|
||||||
|
storage_class = "STANDARD_IA"
|
||||||
|
}
|
||||||
|
|
||||||
|
transition {
|
||||||
|
days = 90
|
||||||
|
storage_class = "GLACIER"
|
||||||
|
}
|
||||||
|
|
||||||
|
expiration {
|
||||||
|
days = 365
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# IAM Role for Lambda
|
||||||
|
resource "aws_iam_role" "lambda" {
|
||||||
|
count = var.enable_lambda_pipeline ? 1 : 0
|
||||||
|
name_prefix = "${var.project_name}-lambda-"
|
||||||
|
|
||||||
|
assume_role_policy = jsonencode({
|
||||||
|
Version = "2012-10-17"
|
||||||
|
Statement = [
|
||||||
|
{
|
||||||
|
Action = "sts:AssumeRole"
|
||||||
|
Effect = "Allow"
|
||||||
|
Principal = {
|
||||||
|
Service = "lambda.amazonaws.com"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_iam_role_policy" "lambda" {
|
||||||
|
count = var.enable_lambda_pipeline ? 1 : 0
|
||||||
|
name = "lambda-policy"
|
||||||
|
role = aws_iam_role.lambda[0].id
|
||||||
|
|
||||||
|
policy = jsonencode({
|
||||||
|
Version = "2012-10-17"
|
||||||
|
Statement = [
|
||||||
|
{
|
||||||
|
Effect = "Allow"
|
||||||
|
Action = [
|
||||||
|
"logs:CreateLogGroup",
|
||||||
|
"logs:CreateLogStream",
|
||||||
|
"logs:PutLogEvents"
|
||||||
|
]
|
||||||
|
Resource = "arn:aws:logs:*:*:*"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Effect = "Allow"
|
||||||
|
Action = [
|
||||||
|
"sqs:ReceiveMessage",
|
||||||
|
"sqs:DeleteMessage",
|
||||||
|
"sqs:GetQueueAttributes"
|
||||||
|
]
|
||||||
|
Resource = aws_sqs_queue.metrics[0].arn
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Effect = "Allow"
|
||||||
|
Action = [
|
||||||
|
"s3:PutObject",
|
||||||
|
"s3:GetObject"
|
||||||
|
]
|
||||||
|
Resource = var.enable_s3_backup ? "${aws_s3_bucket.metrics[0].arn}/*" : "*"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
# Lambda function for metric aggregation
|
||||||
|
resource "aws_lambda_function" "aggregator" {
|
||||||
|
count = var.enable_lambda_pipeline ? 1 : 0
|
||||||
|
|
||||||
|
function_name = "${var.project_name}-aggregator"
|
||||||
|
role = aws_iam_role.lambda[0].arn
|
||||||
|
handler = "main.handler"
|
||||||
|
runtime = "python3.11"
|
||||||
|
timeout = var.lambda_timeout
|
||||||
|
memory_size = var.lambda_memory_size
|
||||||
|
|
||||||
|
# Placeholder - will be deployed via CI/CD
|
||||||
|
filename = "${path.module}/../lambdas/aggregator/placeholder.zip"
|
||||||
|
source_code_hash = filebase64sha256("${path.module}/../lambdas/aggregator/placeholder.zip")
|
||||||
|
|
||||||
|
environment {
|
||||||
|
variables = {
|
||||||
|
TIMESCALE_HOST = aws_instance.sysmonstm.private_ip
|
||||||
|
LOG_LEVEL = "INFO"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [filename, source_code_hash]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_lambda_event_source_mapping" "sqs_trigger" {
|
||||||
|
count = var.enable_lambda_pipeline ? 1 : 0
|
||||||
|
|
||||||
|
event_source_arn = aws_sqs_queue.metrics[0].arn
|
||||||
|
function_name = aws_lambda_function.aggregator[0].arn
|
||||||
|
batch_size = 100
|
||||||
|
|
||||||
|
scaling_config {
|
||||||
|
maximum_concurrency = 5
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# CloudWatch Event for scheduled compaction
|
||||||
|
resource "aws_cloudwatch_event_rule" "compactor" {
|
||||||
|
count = var.enable_lambda_pipeline ? 1 : 0
|
||||||
|
|
||||||
|
name = "${var.project_name}-compactor-schedule"
|
||||||
|
description = "Trigger metric compaction every hour"
|
||||||
|
schedule_expression = "rate(1 hour)"
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_lambda_function" "compactor" {
|
||||||
|
count = var.enable_lambda_pipeline ? 1 : 0
|
||||||
|
|
||||||
|
function_name = "${var.project_name}-compactor"
|
||||||
|
role = aws_iam_role.lambda[0].arn
|
||||||
|
handler = "main.handler"
|
||||||
|
runtime = "python3.11"
|
||||||
|
timeout = 300
|
||||||
|
memory_size = 512
|
||||||
|
|
||||||
|
filename = "${path.module}/../lambdas/compactor/placeholder.zip"
|
||||||
|
source_code_hash = filebase64sha256("${path.module}/../lambdas/compactor/placeholder.zip")
|
||||||
|
|
||||||
|
environment {
|
||||||
|
variables = {
|
||||||
|
TIMESCALE_HOST = aws_instance.sysmonstm.private_ip
|
||||||
|
S3_BUCKET = var.enable_s3_backup ? aws_s3_bucket.metrics[0].bucket : ""
|
||||||
|
LOG_LEVEL = "INFO"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lifecycle {
|
||||||
|
ignore_changes = [filename, source_code_hash]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_cloudwatch_event_target" "compactor" {
|
||||||
|
count = var.enable_lambda_pipeline ? 1 : 0
|
||||||
|
|
||||||
|
rule = aws_cloudwatch_event_rule.compactor[0].name
|
||||||
|
target_id = "compactor-lambda"
|
||||||
|
arn = aws_lambda_function.compactor[0].arn
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_lambda_permission" "compactor_cloudwatch" {
|
||||||
|
count = var.enable_lambda_pipeline ? 1 : 0
|
||||||
|
|
||||||
|
statement_id = "AllowCloudWatchInvoke"
|
||||||
|
action = "lambda:InvokeFunction"
|
||||||
|
function_name = aws_lambda_function.compactor[0].function_name
|
||||||
|
principal = "events.amazonaws.com"
|
||||||
|
source_arn = aws_cloudwatch_event_rule.compactor[0].arn
|
||||||
|
}
|
||||||
58
infra/aws/terraform/main.tf
Normal file
58
infra/aws/terraform/main.tf
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
# System Monitor Platform - AWS Infrastructure
|
||||||
|
#
|
||||||
|
# This Terraform configuration sets up:
|
||||||
|
# - EC2 instance for running Docker Compose (demo/staging)
|
||||||
|
# - Lambda functions for data processing pipeline
|
||||||
|
# - SQS queue for buffering metrics
|
||||||
|
# - S3 bucket for metric backups
|
||||||
|
# - Security groups and IAM roles
|
||||||
|
|
||||||
|
terraform {
|
||||||
|
required_version = ">= 1.0"
|
||||||
|
|
||||||
|
required_providers {
|
||||||
|
aws = {
|
||||||
|
source = "hashicorp/aws"
|
||||||
|
version = "~> 5.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Uncomment for remote state
|
||||||
|
# backend "s3" {
|
||||||
|
# bucket = "your-terraform-state-bucket"
|
||||||
|
# key = "sysmonstm/terraform.tfstate"
|
||||||
|
# region = "us-east-1"
|
||||||
|
# }
|
||||||
|
}
|
||||||
|
|
||||||
|
provider "aws" {
|
||||||
|
region = var.aws_region
|
||||||
|
|
||||||
|
default_tags {
|
||||||
|
tags = {
|
||||||
|
Project = "sysmonstm"
|
||||||
|
Environment = var.environment
|
||||||
|
ManagedBy = "terraform"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Data sources
|
||||||
|
data "aws_availability_zones" "available" {
|
||||||
|
state = "available"
|
||||||
|
}
|
||||||
|
|
||||||
|
data "aws_ami" "amazon_linux_2023" {
|
||||||
|
most_recent = true
|
||||||
|
owners = ["amazon"]
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "name"
|
||||||
|
values = ["al2023-ami-*-x86_64"]
|
||||||
|
}
|
||||||
|
|
||||||
|
filter {
|
||||||
|
name = "virtualization-type"
|
||||||
|
values = ["hvm"]
|
||||||
|
}
|
||||||
|
}
|
||||||
36
infra/aws/terraform/outputs.tf
Normal file
36
infra/aws/terraform/outputs.tf
Normal file
@@ -0,0 +1,36 @@
|
|||||||
|
# Outputs
|
||||||
|
|
||||||
|
output "ec2_public_ip" {
|
||||||
|
description = "Public IP of the EC2 instance"
|
||||||
|
value = aws_eip.sysmonstm.public_ip
|
||||||
|
}
|
||||||
|
|
||||||
|
output "ec2_instance_id" {
|
||||||
|
description = "EC2 instance ID"
|
||||||
|
value = aws_instance.sysmonstm.id
|
||||||
|
}
|
||||||
|
|
||||||
|
output "dashboard_url" {
|
||||||
|
description = "URL for the monitoring dashboard"
|
||||||
|
value = "http://${aws_eip.sysmonstm.public_ip}:8000"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "grpc_endpoint" {
|
||||||
|
description = "gRPC endpoint for collectors"
|
||||||
|
value = "${aws_eip.sysmonstm.public_ip}:50051"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "sqs_queue_url" {
|
||||||
|
description = "SQS queue URL for metrics"
|
||||||
|
value = var.enable_lambda_pipeline ? aws_sqs_queue.metrics[0].url : null
|
||||||
|
}
|
||||||
|
|
||||||
|
output "s3_bucket" {
|
||||||
|
description = "S3 bucket for metric backups"
|
||||||
|
value = var.enable_s3_backup ? aws_s3_bucket.metrics[0].bucket : null
|
||||||
|
}
|
||||||
|
|
||||||
|
output "ssh_command" {
|
||||||
|
description = "SSH command to connect to the instance"
|
||||||
|
value = var.ec2_key_name != "" ? "ssh -i ${var.ec2_key_name}.pem ec2-user@${aws_eip.sysmonstm.public_ip}" : "Use SSM Session Manager"
|
||||||
|
}
|
||||||
16
infra/aws/terraform/terraform.tfvars.example
Normal file
16
infra/aws/terraform/terraform.tfvars.example
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
# Example Terraform variables
|
||||||
|
# Copy to terraform.tfvars and fill in your values
|
||||||
|
|
||||||
|
aws_region = "us-east-1"
|
||||||
|
environment = "staging"
|
||||||
|
project_name = "sysmonstm"
|
||||||
|
domain_name = "sysmonstm.mcrn.ar"
|
||||||
|
|
||||||
|
# EC2
|
||||||
|
ec2_instance_type = "t2.small"
|
||||||
|
ec2_key_name = "your-key-pair-name"
|
||||||
|
allowed_ssh_cidrs = ["YOUR.IP.ADDRESS/32"]
|
||||||
|
|
||||||
|
# Feature flags
|
||||||
|
enable_lambda_pipeline = false
|
||||||
|
enable_s3_backup = false
|
||||||
70
infra/aws/terraform/variables.tf
Normal file
70
infra/aws/terraform/variables.tf
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# Variables for System Monitor Platform
|
||||||
|
|
||||||
|
variable "aws_region" {
|
||||||
|
description = "AWS region to deploy to"
|
||||||
|
type = string
|
||||||
|
default = "us-east-1"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "environment" {
|
||||||
|
description = "Environment name (dev, staging, prod)"
|
||||||
|
type = string
|
||||||
|
default = "staging"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "project_name" {
|
||||||
|
description = "Project name for resource naming"
|
||||||
|
type = string
|
||||||
|
default = "sysmonstm"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "domain_name" {
|
||||||
|
description = "Domain name for the service"
|
||||||
|
type = string
|
||||||
|
default = "sysmonstm.mcrn.ar"
|
||||||
|
}
|
||||||
|
|
||||||
|
# EC2 Configuration
|
||||||
|
variable "ec2_instance_type" {
|
||||||
|
description = "EC2 instance type"
|
||||||
|
type = string
|
||||||
|
default = "t2.small"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "ec2_key_name" {
|
||||||
|
description = "SSH key pair name"
|
||||||
|
type = string
|
||||||
|
default = ""
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "allowed_ssh_cidrs" {
|
||||||
|
description = "CIDR blocks allowed to SSH"
|
||||||
|
type = list(string)
|
||||||
|
default = [] # Set to your IP for security
|
||||||
|
}
|
||||||
|
|
||||||
|
# Lambda Configuration
|
||||||
|
variable "lambda_memory_size" {
|
||||||
|
description = "Lambda function memory in MB"
|
||||||
|
type = number
|
||||||
|
default = 256
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "lambda_timeout" {
|
||||||
|
description = "Lambda function timeout in seconds"
|
||||||
|
type = number
|
||||||
|
default = 60
|
||||||
|
}
|
||||||
|
|
||||||
|
# Feature flags
|
||||||
|
variable "enable_lambda_pipeline" {
|
||||||
|
description = "Enable Lambda data processing pipeline"
|
||||||
|
type = bool
|
||||||
|
default = false
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "enable_s3_backup" {
|
||||||
|
description = "Enable S3 backup for metrics"
|
||||||
|
type = bool
|
||||||
|
default = false
|
||||||
|
}
|
||||||
15
k8s/base/aggregator/configmap.yaml
Normal file
15
k8s/base/aggregator/configmap.yaml
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: aggregator-config
|
||||||
|
data:
|
||||||
|
REDIS_URL: "redis://redis:6379"
|
||||||
|
TIMESCALE_HOST: "timescaledb"
|
||||||
|
TIMESCALE_PORT: "5432"
|
||||||
|
TIMESCALE_USER: "monitor"
|
||||||
|
TIMESCALE_DB: "monitor"
|
||||||
|
GRPC_PORT: "50051"
|
||||||
|
SERVICE_NAME: "aggregator"
|
||||||
|
EVENTS_BACKEND: "redis_pubsub"
|
||||||
|
LOG_LEVEL: "INFO"
|
||||||
|
LOG_FORMAT: "json"
|
||||||
46
k8s/base/aggregator/deployment.yaml
Normal file
46
k8s/base/aggregator/deployment.yaml
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: aggregator
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: aggregator
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: aggregator
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: aggregator
|
||||||
|
image: sysmonstm/aggregator:latest
|
||||||
|
ports:
|
||||||
|
- containerPort: 50051
|
||||||
|
name: grpc
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: aggregator-config
|
||||||
|
env:
|
||||||
|
- name: TIMESCALE_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: timescaledb-secret
|
||||||
|
key: password
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "128Mi"
|
||||||
|
cpu: "100m"
|
||||||
|
limits:
|
||||||
|
memory: "256Mi"
|
||||||
|
cpu: "500m"
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["/bin/grpc_health_probe", "-addr=:50051"]
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["/bin/grpc_health_probe", "-addr=:50051"]
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
11
k8s/base/aggregator/kustomization.yaml
Normal file
11
k8s/base/aggregator/kustomization.yaml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
commonLabels:
|
||||||
|
app.kubernetes.io/name: aggregator
|
||||||
|
app.kubernetes.io/component: backend
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- deployment.yaml
|
||||||
|
- service.yaml
|
||||||
|
- configmap.yaml
|
||||||
11
k8s/base/aggregator/service.yaml
Normal file
11
k8s/base/aggregator/service.yaml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: aggregator
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: aggregator
|
||||||
|
ports:
|
||||||
|
- port: 50051
|
||||||
|
targetPort: grpc
|
||||||
|
name: grpc
|
||||||
14
k8s/base/alerts/configmap.yaml
Normal file
14
k8s/base/alerts/configmap.yaml
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: alerts-config
|
||||||
|
data:
|
||||||
|
REDIS_URL: "redis://redis:6379"
|
||||||
|
TIMESCALE_HOST: "timescaledb"
|
||||||
|
TIMESCALE_PORT: "5432"
|
||||||
|
TIMESCALE_USER: "monitor"
|
||||||
|
TIMESCALE_DB: "monitor"
|
||||||
|
SERVICE_NAME: "alerts"
|
||||||
|
EVENTS_BACKEND: "redis_pubsub"
|
||||||
|
LOG_LEVEL: "INFO"
|
||||||
|
LOG_FORMAT: "json"
|
||||||
33
k8s/base/alerts/deployment.yaml
Normal file
33
k8s/base/alerts/deployment.yaml
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: alerts
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: alerts
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: alerts
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: alerts
|
||||||
|
image: sysmonstm/alerts:latest
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: alerts-config
|
||||||
|
env:
|
||||||
|
- name: TIMESCALE_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: timescaledb-secret
|
||||||
|
key: password
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "64Mi"
|
||||||
|
cpu: "50m"
|
||||||
|
limits:
|
||||||
|
memory: "128Mi"
|
||||||
|
cpu: "200m"
|
||||||
10
k8s/base/alerts/kustomization.yaml
Normal file
10
k8s/base/alerts/kustomization.yaml
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
commonLabels:
|
||||||
|
app.kubernetes.io/name: alerts
|
||||||
|
app.kubernetes.io/component: backend
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- deployment.yaml
|
||||||
|
- configmap.yaml
|
||||||
16
k8s/base/gateway/configmap.yaml
Normal file
16
k8s/base/gateway/configmap.yaml
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: gateway-config
|
||||||
|
data:
|
||||||
|
REDIS_URL: "redis://redis:6379"
|
||||||
|
TIMESCALE_HOST: "timescaledb"
|
||||||
|
TIMESCALE_PORT: "5432"
|
||||||
|
TIMESCALE_USER: "monitor"
|
||||||
|
TIMESCALE_DB: "monitor"
|
||||||
|
AGGREGATOR_URL: "aggregator:50051"
|
||||||
|
HTTP_PORT: "8000"
|
||||||
|
SERVICE_NAME: "gateway"
|
||||||
|
EVENTS_BACKEND: "redis_pubsub"
|
||||||
|
LOG_LEVEL: "INFO"
|
||||||
|
LOG_FORMAT: "json"
|
||||||
48
k8s/base/gateway/deployment.yaml
Normal file
48
k8s/base/gateway/deployment.yaml
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: gateway
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: gateway
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: gateway
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: gateway
|
||||||
|
image: sysmonstm/gateway:latest
|
||||||
|
ports:
|
||||||
|
- containerPort: 8000
|
||||||
|
name: http
|
||||||
|
envFrom:
|
||||||
|
- configMapRef:
|
||||||
|
name: gateway-config
|
||||||
|
env:
|
||||||
|
- name: TIMESCALE_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: timescaledb-secret
|
||||||
|
key: password
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "128Mi"
|
||||||
|
cpu: "100m"
|
||||||
|
limits:
|
||||||
|
memory: "256Mi"
|
||||||
|
cpu: "500m"
|
||||||
|
livenessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 10
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
httpGet:
|
||||||
|
path: /health
|
||||||
|
port: http
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
11
k8s/base/gateway/kustomization.yaml
Normal file
11
k8s/base/gateway/kustomization.yaml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
commonLabels:
|
||||||
|
app.kubernetes.io/name: gateway
|
||||||
|
app.kubernetes.io/component: frontend
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- deployment.yaml
|
||||||
|
- service.yaml
|
||||||
|
- configmap.yaml
|
||||||
11
k8s/base/gateway/service.yaml
Normal file
11
k8s/base/gateway/service.yaml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: gateway
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: gateway
|
||||||
|
ports:
|
||||||
|
- port: 8000
|
||||||
|
targetPort: http
|
||||||
|
name: http
|
||||||
17
k8s/base/kustomization.yaml
Normal file
17
k8s/base/kustomization.yaml
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
namespace: sysmonstm
|
||||||
|
|
||||||
|
commonLabels:
|
||||||
|
app.kubernetes.io/part-of: sysmonstm
|
||||||
|
app.kubernetes.io/managed-by: kustomize
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- namespace.yaml
|
||||||
|
- redis/
|
||||||
|
- timescaledb/
|
||||||
|
- aggregator/
|
||||||
|
- gateway/
|
||||||
|
- alerts/
|
||||||
|
# collector is deployed separately on each machine
|
||||||
6
k8s/base/namespace.yaml
Normal file
6
k8s/base/namespace.yaml
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Namespace
|
||||||
|
metadata:
|
||||||
|
name: sysmonstm
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: sysmonstm
|
||||||
37
k8s/base/redis/deployment.yaml
Normal file
37
k8s/base/redis/deployment.yaml
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: redis
|
||||||
|
spec:
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: redis
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: redis
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: redis
|
||||||
|
image: redis:7-alpine
|
||||||
|
ports:
|
||||||
|
- containerPort: 6379
|
||||||
|
name: redis
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "64Mi"
|
||||||
|
cpu: "50m"
|
||||||
|
limits:
|
||||||
|
memory: "128Mi"
|
||||||
|
cpu: "200m"
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["redis-cli", "ping"]
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["redis-cli", "ping"]
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
10
k8s/base/redis/kustomization.yaml
Normal file
10
k8s/base/redis/kustomization.yaml
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
commonLabels:
|
||||||
|
app.kubernetes.io/name: redis
|
||||||
|
app.kubernetes.io/component: cache
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- deployment.yaml
|
||||||
|
- service.yaml
|
||||||
11
k8s/base/redis/service.yaml
Normal file
11
k8s/base/redis/service.yaml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: redis
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: redis
|
||||||
|
ports:
|
||||||
|
- port: 6379
|
||||||
|
targetPort: redis
|
||||||
|
name: redis
|
||||||
94
k8s/base/timescaledb/configmap.yaml
Normal file
94
k8s/base/timescaledb/configmap.yaml
Normal file
@@ -0,0 +1,94 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: ConfigMap
|
||||||
|
metadata:
|
||||||
|
name: timescaledb-init
|
||||||
|
data:
|
||||||
|
init.sql: |
|
||||||
|
-- TimescaleDB initialization script
|
||||||
|
CREATE EXTENSION IF NOT EXISTS timescaledb;
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS metrics_raw (
|
||||||
|
time TIMESTAMPTZ NOT NULL,
|
||||||
|
machine_id TEXT NOT NULL,
|
||||||
|
hostname TEXT NOT NULL,
|
||||||
|
metric_type TEXT NOT NULL,
|
||||||
|
value DOUBLE PRECISION NOT NULL,
|
||||||
|
labels JSONB DEFAULT '{}'::jsonb
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT create_hypertable('metrics_raw', 'time',
|
||||||
|
chunk_time_interval => INTERVAL '1 hour',
|
||||||
|
if_not_exists => TRUE
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_metrics_raw_machine
|
||||||
|
ON metrics_raw (machine_id, time DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_metrics_raw_type
|
||||||
|
ON metrics_raw (metric_type, time DESC);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS metrics_1m (
|
||||||
|
time TIMESTAMPTZ NOT NULL,
|
||||||
|
machine_id TEXT NOT NULL,
|
||||||
|
hostname TEXT NOT NULL,
|
||||||
|
metric_type TEXT NOT NULL,
|
||||||
|
avg_value DOUBLE PRECISION NOT NULL,
|
||||||
|
min_value DOUBLE PRECISION NOT NULL,
|
||||||
|
max_value DOUBLE PRECISION NOT NULL,
|
||||||
|
sample_count INTEGER NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT create_hypertable('metrics_1m', 'time',
|
||||||
|
chunk_time_interval => INTERVAL '1 day',
|
||||||
|
if_not_exists => TRUE
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS machines (
|
||||||
|
machine_id TEXT PRIMARY KEY,
|
||||||
|
hostname TEXT NOT NULL,
|
||||||
|
first_seen TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
last_seen TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
metadata JSONB DEFAULT '{}'::jsonb,
|
||||||
|
health TEXT NOT NULL DEFAULT 'UNKNOWN'
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS alert_rules (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name TEXT NOT NULL UNIQUE,
|
||||||
|
metric_type TEXT NOT NULL,
|
||||||
|
operator TEXT NOT NULL,
|
||||||
|
threshold DOUBLE PRECISION NOT NULL,
|
||||||
|
severity TEXT NOT NULL,
|
||||||
|
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE TABLE IF NOT EXISTS alerts (
|
||||||
|
id SERIAL,
|
||||||
|
time TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
machine_id TEXT NOT NULL,
|
||||||
|
rule_id INTEGER REFERENCES alert_rules(id),
|
||||||
|
rule_name TEXT NOT NULL,
|
||||||
|
metric_type TEXT NOT NULL,
|
||||||
|
value DOUBLE PRECISION NOT NULL,
|
||||||
|
threshold DOUBLE PRECISION NOT NULL,
|
||||||
|
severity TEXT NOT NULL,
|
||||||
|
resolved_at TIMESTAMPTZ,
|
||||||
|
PRIMARY KEY (id, time)
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT create_hypertable('alerts', 'time',
|
||||||
|
chunk_time_interval => INTERVAL '1 day',
|
||||||
|
if_not_exists => TRUE
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT add_retention_policy('metrics_raw', INTERVAL '24 hours', if_not_exists => TRUE);
|
||||||
|
SELECT add_retention_policy('alerts', INTERVAL '30 days', if_not_exists => TRUE);
|
||||||
|
|
||||||
|
INSERT INTO alert_rules (name, metric_type, operator, threshold, severity)
|
||||||
|
VALUES
|
||||||
|
('High CPU Usage', 'CPU_PERCENT', 'gt', 80.0, 'warning'),
|
||||||
|
('Critical CPU Usage', 'CPU_PERCENT', 'gt', 95.0, 'critical'),
|
||||||
|
('High Memory Usage', 'MEMORY_PERCENT', 'gt', 85.0, 'warning'),
|
||||||
|
('Critical Memory Usage', 'MEMORY_PERCENT', 'gt', 95.0, 'critical')
|
||||||
|
ON CONFLICT (name) DO NOTHING;
|
||||||
11
k8s/base/timescaledb/kustomization.yaml
Normal file
11
k8s/base/timescaledb/kustomization.yaml
Normal file
@@ -0,0 +1,11 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
commonLabels:
|
||||||
|
app.kubernetes.io/name: timescaledb
|
||||||
|
app.kubernetes.io/component: database
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- statefulset.yaml
|
||||||
|
- service.yaml
|
||||||
|
- configmap.yaml
|
||||||
12
k8s/base/timescaledb/service.yaml
Normal file
12
k8s/base/timescaledb/service.yaml
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Service
|
||||||
|
metadata:
|
||||||
|
name: timescaledb
|
||||||
|
spec:
|
||||||
|
selector:
|
||||||
|
app.kubernetes.io/name: timescaledb
|
||||||
|
ports:
|
||||||
|
- port: 5432
|
||||||
|
targetPort: postgres
|
||||||
|
name: postgres
|
||||||
|
clusterIP: None # Headless for StatefulSet
|
||||||
65
k8s/base/timescaledb/statefulset.yaml
Normal file
65
k8s/base/timescaledb/statefulset.yaml
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: timescaledb
|
||||||
|
spec:
|
||||||
|
serviceName: timescaledb
|
||||||
|
replicas: 1
|
||||||
|
selector:
|
||||||
|
matchLabels:
|
||||||
|
app.kubernetes.io/name: timescaledb
|
||||||
|
template:
|
||||||
|
metadata:
|
||||||
|
labels:
|
||||||
|
app.kubernetes.io/name: timescaledb
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: timescaledb
|
||||||
|
image: timescale/timescaledb:latest-pg15
|
||||||
|
ports:
|
||||||
|
- containerPort: 5432
|
||||||
|
name: postgres
|
||||||
|
env:
|
||||||
|
- name: POSTGRES_USER
|
||||||
|
value: monitor
|
||||||
|
- name: POSTGRES_PASSWORD
|
||||||
|
valueFrom:
|
||||||
|
secretKeyRef:
|
||||||
|
name: timescaledb-secret
|
||||||
|
key: password
|
||||||
|
- name: POSTGRES_DB
|
||||||
|
value: monitor
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "256Mi"
|
||||||
|
cpu: "100m"
|
||||||
|
limits:
|
||||||
|
memory: "512Mi"
|
||||||
|
cpu: "500m"
|
||||||
|
volumeMounts:
|
||||||
|
- name: data
|
||||||
|
mountPath: /var/lib/postgresql/data
|
||||||
|
- name: init-scripts
|
||||||
|
mountPath: /docker-entrypoint-initdb.d
|
||||||
|
livenessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["pg_isready", "-U", "monitor", "-d", "monitor"]
|
||||||
|
initialDelaySeconds: 30
|
||||||
|
periodSeconds: 10
|
||||||
|
readinessProbe:
|
||||||
|
exec:
|
||||||
|
command: ["pg_isready", "-U", "monitor", "-d", "monitor"]
|
||||||
|
initialDelaySeconds: 5
|
||||||
|
periodSeconds: 5
|
||||||
|
volumes:
|
||||||
|
- name: init-scripts
|
||||||
|
configMap:
|
||||||
|
name: timescaledb-init
|
||||||
|
volumeClaimTemplates:
|
||||||
|
- metadata:
|
||||||
|
name: data
|
||||||
|
spec:
|
||||||
|
accessModes: ["ReadWriteOnce"]
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
storage: 5Gi
|
||||||
22
k8s/overlays/local/kustomization.yaml
Normal file
22
k8s/overlays/local/kustomization.yaml
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||||
|
kind: Kustomization
|
||||||
|
|
||||||
|
namespace: sysmonstm
|
||||||
|
|
||||||
|
resources:
|
||||||
|
- ../../base
|
||||||
|
- secrets.yaml
|
||||||
|
|
||||||
|
patches:
|
||||||
|
- path: patches/reduce-resources.yaml
|
||||||
|
|
||||||
|
images:
|
||||||
|
- name: sysmonstm/aggregator
|
||||||
|
newName: sysmonstm-aggregator
|
||||||
|
newTag: dev
|
||||||
|
- name: sysmonstm/gateway
|
||||||
|
newName: sysmonstm-gateway
|
||||||
|
newTag: dev
|
||||||
|
- name: sysmonstm/alerts
|
||||||
|
newName: sysmonstm-alerts
|
||||||
|
newTag: dev
|
||||||
50
k8s/overlays/local/patches/reduce-resources.yaml
Normal file
50
k8s/overlays/local/patches/reduce-resources.yaml
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: aggregator
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: aggregator
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "64Mi"
|
||||||
|
cpu: "50m"
|
||||||
|
limits:
|
||||||
|
memory: "128Mi"
|
||||||
|
cpu: "200m"
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: Deployment
|
||||||
|
metadata:
|
||||||
|
name: gateway
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: gateway
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "64Mi"
|
||||||
|
cpu: "50m"
|
||||||
|
limits:
|
||||||
|
memory: "128Mi"
|
||||||
|
cpu: "200m"
|
||||||
|
---
|
||||||
|
apiVersion: apps/v1
|
||||||
|
kind: StatefulSet
|
||||||
|
metadata:
|
||||||
|
name: timescaledb
|
||||||
|
spec:
|
||||||
|
template:
|
||||||
|
spec:
|
||||||
|
containers:
|
||||||
|
- name: timescaledb
|
||||||
|
resources:
|
||||||
|
requests:
|
||||||
|
memory: "128Mi"
|
||||||
|
cpu: "50m"
|
||||||
|
limits:
|
||||||
|
memory: "256Mi"
|
||||||
|
cpu: "200m"
|
||||||
8
k8s/overlays/local/secrets.yaml
Normal file
8
k8s/overlays/local/secrets.yaml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
apiVersion: v1
|
||||||
|
kind: Secret
|
||||||
|
metadata:
|
||||||
|
name: timescaledb-secret
|
||||||
|
namespace: sysmonstm
|
||||||
|
type: Opaque
|
||||||
|
stringData:
|
||||||
|
password: "monitor" # Only for local dev!
|
||||||
159
proto/metrics.proto
Normal file
159
proto/metrics.proto
Normal file
@@ -0,0 +1,159 @@
|
|||||||
|
syntax = "proto3";
|
||||||
|
|
||||||
|
package monitoring;
|
||||||
|
|
||||||
|
option go_package = "github.com/your-org/sysmonstm/proto";
|
||||||
|
|
||||||
|
// MetricsService handles streaming metrics from collectors to aggregator
|
||||||
|
service MetricsService {
|
||||||
|
// Client-side streaming: collector streams metrics to aggregator
|
||||||
|
rpc StreamMetrics(stream Metric) returns (StreamAck) {}
|
||||||
|
|
||||||
|
// Get current state of a machine
|
||||||
|
rpc GetCurrentState(StateRequest) returns (MachineState) {}
|
||||||
|
|
||||||
|
// Get current state of all machines
|
||||||
|
rpc GetAllStates(Empty) returns (AllMachinesState) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ControlService handles bidirectional control commands
|
||||||
|
service ControlService {
|
||||||
|
// Bidirectional streaming for commands and responses
|
||||||
|
rpc Control(stream ControlCommand) returns (stream ControlResponse) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ConfigService handles dynamic configuration
|
||||||
|
service ConfigService {
|
||||||
|
// Get current configuration for a collector
|
||||||
|
rpc GetConfig(ConfigRequest) returns (CollectorConfig) {}
|
||||||
|
|
||||||
|
// Stream configuration updates
|
||||||
|
rpc WatchConfig(ConfigRequest) returns (stream CollectorConfig) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Empty message for requests with no parameters
|
||||||
|
message Empty {}
|
||||||
|
|
||||||
|
// Basic metric message
|
||||||
|
message Metric {
|
||||||
|
string machine_id = 1;
|
||||||
|
string hostname = 2;
|
||||||
|
int64 timestamp_ms = 3;
|
||||||
|
MetricType type = 4;
|
||||||
|
double value = 5;
|
||||||
|
map<string, string> labels = 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch of metrics for efficient transmission
|
||||||
|
message MetricBatch {
|
||||||
|
string machine_id = 1;
|
||||||
|
string hostname = 2;
|
||||||
|
int64 timestamp_ms = 3;
|
||||||
|
repeated MetricPoint metrics = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
message MetricPoint {
|
||||||
|
MetricType type = 1;
|
||||||
|
double value = 2;
|
||||||
|
map<string, string> labels = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum MetricType {
|
||||||
|
METRIC_TYPE_UNSPECIFIED = 0;
|
||||||
|
CPU_PERCENT = 1;
|
||||||
|
CPU_PERCENT_PER_CORE = 2;
|
||||||
|
MEMORY_PERCENT = 3;
|
||||||
|
MEMORY_USED_BYTES = 4;
|
||||||
|
MEMORY_AVAILABLE_BYTES = 5;
|
||||||
|
DISK_PERCENT = 6;
|
||||||
|
DISK_USED_BYTES = 7;
|
||||||
|
DISK_READ_BYTES_SEC = 8;
|
||||||
|
DISK_WRITE_BYTES_SEC = 9;
|
||||||
|
NETWORK_SENT_BYTES_SEC = 10;
|
||||||
|
NETWORK_RECV_BYTES_SEC = 11;
|
||||||
|
NETWORK_CONNECTIONS = 12;
|
||||||
|
PROCESS_COUNT = 13;
|
||||||
|
LOAD_AVG_1M = 14;
|
||||||
|
LOAD_AVG_5M = 15;
|
||||||
|
LOAD_AVG_15M = 16;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Acknowledgment for streamed metrics
|
||||||
|
message StreamAck {
|
||||||
|
bool success = 1;
|
||||||
|
int64 metrics_received = 2;
|
||||||
|
string message = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Request for machine state
|
||||||
|
message StateRequest {
|
||||||
|
string machine_id = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Current state of a single machine
|
||||||
|
message MachineState {
|
||||||
|
string machine_id = 1;
|
||||||
|
string hostname = 2;
|
||||||
|
int64 last_seen_ms = 3;
|
||||||
|
repeated Metric current_metrics = 4;
|
||||||
|
HealthStatus health = 5;
|
||||||
|
map<string, string> metadata = 6;
|
||||||
|
}
|
||||||
|
|
||||||
|
// State of all machines
|
||||||
|
message AllMachinesState {
|
||||||
|
repeated MachineState machines = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
enum HealthStatus {
|
||||||
|
HEALTH_STATUS_UNSPECIFIED = 0;
|
||||||
|
HEALTHY = 1;
|
||||||
|
WARNING = 2;
|
||||||
|
CRITICAL = 3;
|
||||||
|
UNKNOWN = 4;
|
||||||
|
OFFLINE = 5;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Control commands for collectors
|
||||||
|
message ControlCommand {
|
||||||
|
string command_id = 1;
|
||||||
|
oneof command {
|
||||||
|
UpdateIntervalCommand update_interval = 2;
|
||||||
|
RestartCollectionCommand restart = 3;
|
||||||
|
ShutdownCommand shutdown = 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
message UpdateIntervalCommand {
|
||||||
|
int32 interval_seconds = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message RestartCollectionCommand {}
|
||||||
|
|
||||||
|
message ShutdownCommand {
|
||||||
|
bool graceful = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message ControlResponse {
|
||||||
|
string command_id = 1;
|
||||||
|
bool success = 2;
|
||||||
|
string message = 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Configuration messages
|
||||||
|
message ConfigRequest {
|
||||||
|
string machine_id = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
message CollectorConfig {
|
||||||
|
int32 collection_interval_seconds = 1;
|
||||||
|
repeated MetricType enabled_metrics = 2;
|
||||||
|
map<string, string> labels = 3;
|
||||||
|
repeated ThresholdConfig thresholds = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
message ThresholdConfig {
|
||||||
|
MetricType metric_type = 1;
|
||||||
|
double warning_threshold = 2;
|
||||||
|
double critical_threshold = 3;
|
||||||
|
}
|
||||||
22
scripts/generate-diagrams.sh
Executable file
22
scripts/generate-diagrams.sh
Executable file
@@ -0,0 +1,22 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
# Generate SVG diagrams from Graphviz DOT files
|
||||||
|
# Requires: graphviz (apt install graphviz)
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||||
|
ARCH_DIR="$SCRIPT_DIR/../docs/architecture"
|
||||||
|
|
||||||
|
cd "$ARCH_DIR"
|
||||||
|
|
||||||
|
echo "Generating architecture diagrams..."
|
||||||
|
|
||||||
|
for dotfile in *.dot; do
|
||||||
|
if [ -f "$dotfile" ]; then
|
||||||
|
svgfile="${dotfile%.dot}.svg"
|
||||||
|
echo " $dotfile -> $svgfile"
|
||||||
|
dot -Tsvg "$dotfile" -o "$svgfile"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "Done! Open docs/architecture/index.html in a browser."
|
||||||
158
scripts/init-db.sql
Normal file
158
scripts/init-db.sql
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
-- TimescaleDB initialization script
|
||||||
|
-- Creates hypertables for time-series metrics storage
|
||||||
|
|
||||||
|
-- Enable TimescaleDB extension
|
||||||
|
CREATE EXTENSION IF NOT EXISTS timescaledb;
|
||||||
|
|
||||||
|
-- Raw metrics table (high resolution, short retention)
|
||||||
|
CREATE TABLE IF NOT EXISTS metrics_raw (
|
||||||
|
time TIMESTAMPTZ NOT NULL,
|
||||||
|
machine_id TEXT NOT NULL,
|
||||||
|
hostname TEXT NOT NULL,
|
||||||
|
metric_type TEXT NOT NULL,
|
||||||
|
value DOUBLE PRECISION NOT NULL,
|
||||||
|
labels JSONB DEFAULT '{}'::jsonb
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Convert to hypertable with 1-hour chunks
|
||||||
|
SELECT create_hypertable('metrics_raw', 'time',
|
||||||
|
chunk_time_interval => INTERVAL '1 hour',
|
||||||
|
if_not_exists => TRUE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Create indexes for common queries
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_metrics_raw_machine
|
||||||
|
ON metrics_raw (machine_id, time DESC);
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_metrics_raw_type
|
||||||
|
ON metrics_raw (metric_type, time DESC);
|
||||||
|
|
||||||
|
-- Aggregated metrics table (1-minute resolution, longer retention)
|
||||||
|
CREATE TABLE IF NOT EXISTS metrics_1m (
|
||||||
|
time TIMESTAMPTZ NOT NULL,
|
||||||
|
machine_id TEXT NOT NULL,
|
||||||
|
hostname TEXT NOT NULL,
|
||||||
|
metric_type TEXT NOT NULL,
|
||||||
|
avg_value DOUBLE PRECISION NOT NULL,
|
||||||
|
min_value DOUBLE PRECISION NOT NULL,
|
||||||
|
max_value DOUBLE PRECISION NOT NULL,
|
||||||
|
sample_count INTEGER NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT create_hypertable('metrics_1m', 'time',
|
||||||
|
chunk_time_interval => INTERVAL '1 day',
|
||||||
|
if_not_exists => TRUE
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_metrics_1m_machine
|
||||||
|
ON metrics_1m (machine_id, time DESC);
|
||||||
|
|
||||||
|
-- Aggregated metrics table (1-hour resolution, long retention)
|
||||||
|
CREATE TABLE IF NOT EXISTS metrics_1h (
|
||||||
|
time TIMESTAMPTZ NOT NULL,
|
||||||
|
machine_id TEXT NOT NULL,
|
||||||
|
hostname TEXT NOT NULL,
|
||||||
|
metric_type TEXT NOT NULL,
|
||||||
|
avg_value DOUBLE PRECISION NOT NULL,
|
||||||
|
min_value DOUBLE PRECISION NOT NULL,
|
||||||
|
max_value DOUBLE PRECISION NOT NULL,
|
||||||
|
sample_count INTEGER NOT NULL
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT create_hypertable('metrics_1h', 'time',
|
||||||
|
chunk_time_interval => INTERVAL '1 week',
|
||||||
|
if_not_exists => TRUE
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_metrics_1h_machine
|
||||||
|
ON metrics_1h (machine_id, time DESC);
|
||||||
|
|
||||||
|
-- Machines registry
|
||||||
|
CREATE TABLE IF NOT EXISTS machines (
|
||||||
|
machine_id TEXT PRIMARY KEY,
|
||||||
|
hostname TEXT NOT NULL,
|
||||||
|
first_seen TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
last_seen TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
metadata JSONB DEFAULT '{}'::jsonb,
|
||||||
|
health TEXT NOT NULL DEFAULT 'UNKNOWN'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Alert rules configuration
|
||||||
|
CREATE TABLE IF NOT EXISTS alert_rules (
|
||||||
|
id SERIAL PRIMARY KEY,
|
||||||
|
name TEXT NOT NULL UNIQUE,
|
||||||
|
metric_type TEXT NOT NULL,
|
||||||
|
operator TEXT NOT NULL CHECK (operator IN ('gt', 'lt', 'gte', 'lte', 'eq')),
|
||||||
|
threshold DOUBLE PRECISION NOT NULL,
|
||||||
|
severity TEXT NOT NULL CHECK (severity IN ('warning', 'critical')),
|
||||||
|
enabled BOOLEAN NOT NULL DEFAULT TRUE,
|
||||||
|
created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Alert history
|
||||||
|
CREATE TABLE IF NOT EXISTS alerts (
|
||||||
|
id SERIAL,
|
||||||
|
time TIMESTAMPTZ NOT NULL DEFAULT NOW(),
|
||||||
|
machine_id TEXT NOT NULL,
|
||||||
|
rule_id INTEGER REFERENCES alert_rules(id),
|
||||||
|
rule_name TEXT NOT NULL,
|
||||||
|
metric_type TEXT NOT NULL,
|
||||||
|
value DOUBLE PRECISION NOT NULL,
|
||||||
|
threshold DOUBLE PRECISION NOT NULL,
|
||||||
|
severity TEXT NOT NULL,
|
||||||
|
resolved_at TIMESTAMPTZ,
|
||||||
|
PRIMARY KEY (id, time)
|
||||||
|
);
|
||||||
|
|
||||||
|
SELECT create_hypertable('alerts', 'time',
|
||||||
|
chunk_time_interval => INTERVAL '1 day',
|
||||||
|
if_not_exists => TRUE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Retention policies
|
||||||
|
-- Raw data: 24 hours
|
||||||
|
SELECT add_retention_policy('metrics_raw', INTERVAL '24 hours', if_not_exists => TRUE);
|
||||||
|
|
||||||
|
-- 1-minute aggregates: 7 days
|
||||||
|
SELECT add_retention_policy('metrics_1m', INTERVAL '7 days', if_not_exists => TRUE);
|
||||||
|
|
||||||
|
-- 1-hour aggregates: 90 days
|
||||||
|
SELECT add_retention_policy('metrics_1h', INTERVAL '90 days', if_not_exists => TRUE);
|
||||||
|
|
||||||
|
-- Alerts: 30 days
|
||||||
|
SELECT add_retention_policy('alerts', INTERVAL '30 days', if_not_exists => TRUE);
|
||||||
|
|
||||||
|
-- Continuous aggregates for automatic downsampling
|
||||||
|
CREATE MATERIALIZED VIEW IF NOT EXISTS metrics_1m_agg
|
||||||
|
WITH (timescaledb.continuous) AS
|
||||||
|
SELECT
|
||||||
|
time_bucket('1 minute', time) AS time,
|
||||||
|
machine_id,
|
||||||
|
hostname,
|
||||||
|
metric_type,
|
||||||
|
AVG(value) AS avg_value,
|
||||||
|
MIN(value) AS min_value,
|
||||||
|
MAX(value) AS max_value,
|
||||||
|
COUNT(*) AS sample_count
|
||||||
|
FROM metrics_raw
|
||||||
|
GROUP BY time_bucket('1 minute', time), machine_id, hostname, metric_type
|
||||||
|
WITH NO DATA;
|
||||||
|
|
||||||
|
-- Refresh policy for continuous aggregate
|
||||||
|
SELECT add_continuous_aggregate_policy('metrics_1m_agg',
|
||||||
|
start_offset => INTERVAL '1 hour',
|
||||||
|
end_offset => INTERVAL '1 minute',
|
||||||
|
schedule_interval => INTERVAL '1 minute',
|
||||||
|
if_not_exists => TRUE
|
||||||
|
);
|
||||||
|
|
||||||
|
-- Insert default alert rules
|
||||||
|
INSERT INTO alert_rules (name, metric_type, operator, threshold, severity)
|
||||||
|
VALUES
|
||||||
|
('High CPU Usage', 'CPU_PERCENT', 'gt', 80.0, 'warning'),
|
||||||
|
('Critical CPU Usage', 'CPU_PERCENT', 'gt', 95.0, 'critical'),
|
||||||
|
('High Memory Usage', 'MEMORY_PERCENT', 'gt', 85.0, 'warning'),
|
||||||
|
('Critical Memory Usage', 'MEMORY_PERCENT', 'gt', 95.0, 'critical'),
|
||||||
|
('High Disk Usage', 'DISK_PERCENT', 'gt', 80.0, 'warning'),
|
||||||
|
('Critical Disk Usage', 'DISK_PERCENT', 'gt', 90.0, 'critical')
|
||||||
|
ON CONFLICT (name) DO NOTHING;
|
||||||
47
services/aggregator/Dockerfile
Normal file
47
services/aggregator/Dockerfile
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
# Multi-stage Dockerfile for Aggregator service
|
||||||
|
|
||||||
|
FROM python:3.11-slim as base
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies including grpc_health_probe
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
curl \
|
||||||
|
&& curl -fsSL https://github.com/grpc-ecosystem/grpc-health-probe/releases/download/v0.4.24/grpc_health_probe-linux-amd64 \
|
||||||
|
-o /bin/grpc_health_probe \
|
||||||
|
&& chmod +x /bin/grpc_health_probe \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY services/aggregator/requirements.txt /app/requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY shared /app/shared
|
||||||
|
COPY proto /app/proto
|
||||||
|
|
||||||
|
RUN python -m grpc_tools.protoc \
|
||||||
|
-I/app/proto \
|
||||||
|
--python_out=/app/shared \
|
||||||
|
--grpc_python_out=/app/shared \
|
||||||
|
/app/proto/metrics.proto
|
||||||
|
|
||||||
|
COPY services/aggregator /app/services/aggregator
|
||||||
|
|
||||||
|
ENV PYTHONPATH=/app
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
FROM base as development
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir watchfiles
|
||||||
|
|
||||||
|
CMD ["python", "-m", "watchfiles", "python services/aggregator/main.py", "/app/services/aggregator"]
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
FROM base as production
|
||||||
|
|
||||||
|
RUN useradd --create-home --shell /bin/bash appuser
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
EXPOSE 50051
|
||||||
|
|
||||||
|
CMD ["python", "services/aggregator/main.py"]
|
||||||
9
services/aggregator/requirements.txt
Normal file
9
services/aggregator/requirements.txt
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
grpcio>=1.60.0
|
||||||
|
grpcio-tools>=1.60.0
|
||||||
|
grpcio-health-checking>=1.60.0
|
||||||
|
redis>=5.0.0
|
||||||
|
asyncpg>=0.29.0
|
||||||
|
structlog>=23.2.0
|
||||||
|
python-json-logger>=2.0.7
|
||||||
|
pydantic>=2.5.0
|
||||||
|
pydantic-settings>=2.1.0
|
||||||
35
services/alerts/Dockerfile
Normal file
35
services/alerts/Dockerfile
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
# Multi-stage Dockerfile for Alerts service
|
||||||
|
|
||||||
|
FROM python:3.11-slim as base
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
curl \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY services/alerts/requirements.txt /app/requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY shared /app/shared
|
||||||
|
COPY proto /app/proto
|
||||||
|
|
||||||
|
COPY services/alerts /app/services/alerts
|
||||||
|
|
||||||
|
ENV PYTHONPATH=/app
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
FROM base as development
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir watchfiles
|
||||||
|
|
||||||
|
CMD ["python", "-m", "watchfiles", "python services/alerts/main.py", "/app/services/alerts"]
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
FROM base as production
|
||||||
|
|
||||||
|
RUN useradd --create-home --shell /bin/bash appuser
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
CMD ["python", "services/alerts/main.py"]
|
||||||
6
services/alerts/requirements.txt
Normal file
6
services/alerts/requirements.txt
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
redis>=5.0.0
|
||||||
|
asyncpg>=0.29.0
|
||||||
|
structlog>=23.2.0
|
||||||
|
python-json-logger>=2.0.7
|
||||||
|
pydantic>=2.5.0
|
||||||
|
pydantic-settings>=2.1.0
|
||||||
55
services/collector/Dockerfile
Normal file
55
services/collector/Dockerfile
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
# Multi-stage Dockerfile for Collector service
|
||||||
|
# Stages: base -> development, base -> production
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Base stage - common dependencies
|
||||||
|
# =============================================================================
|
||||||
|
FROM python:3.11-slim as base
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Install system dependencies
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
curl \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
# Install Python dependencies
|
||||||
|
COPY services/collector/requirements.txt /app/requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
# Copy shared code and proto
|
||||||
|
COPY shared /app/shared
|
||||||
|
COPY proto /app/proto
|
||||||
|
|
||||||
|
# Generate gRPC code from proto
|
||||||
|
RUN python -m grpc_tools.protoc \
|
||||||
|
-I/app/proto \
|
||||||
|
--python_out=/app/shared \
|
||||||
|
--grpc_python_out=/app/shared \
|
||||||
|
/app/proto/metrics.proto
|
||||||
|
|
||||||
|
# Copy service code
|
||||||
|
COPY services/collector /app/services/collector
|
||||||
|
|
||||||
|
ENV PYTHONPATH=/app
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Development stage - with hot reload
|
||||||
|
# =============================================================================
|
||||||
|
FROM base as development
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir watchfiles
|
||||||
|
|
||||||
|
CMD ["python", "-m", "watchfiles", "python services/collector/main.py", "/app/services/collector"]
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
# Production stage - optimized
|
||||||
|
# =============================================================================
|
||||||
|
FROM base as production
|
||||||
|
|
||||||
|
# Run as non-root user
|
||||||
|
RUN useradd --create-home --shell /bin/bash appuser
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
CMD ["python", "services/collector/main.py"]
|
||||||
7
services/collector/requirements.txt
Normal file
7
services/collector/requirements.txt
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
grpcio>=1.60.0
|
||||||
|
grpcio-tools>=1.60.0
|
||||||
|
psutil>=5.9.0
|
||||||
|
structlog>=23.2.0
|
||||||
|
python-json-logger>=2.0.7
|
||||||
|
pydantic>=2.5.0
|
||||||
|
pydantic-settings>=2.1.0
|
||||||
44
services/gateway/Dockerfile
Normal file
44
services/gateway/Dockerfile
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
# Multi-stage Dockerfile for Gateway service (FastAPI)
|
||||||
|
|
||||||
|
FROM python:3.11-slim as base
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
curl \
|
||||||
|
&& rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
COPY services/gateway/requirements.txt /app/requirements.txt
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY shared /app/shared
|
||||||
|
COPY proto /app/proto
|
||||||
|
|
||||||
|
RUN python -m grpc_tools.protoc \
|
||||||
|
-I/app/proto \
|
||||||
|
--python_out=/app/shared \
|
||||||
|
--grpc_python_out=/app/shared \
|
||||||
|
/app/proto/metrics.proto
|
||||||
|
|
||||||
|
COPY services/gateway /app/services/gateway
|
||||||
|
COPY web /app/web
|
||||||
|
|
||||||
|
ENV PYTHONPATH=/app
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
FROM base as development
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir watchfiles
|
||||||
|
|
||||||
|
CMD ["uvicorn", "services.gateway.main:app", "--host", "0.0.0.0", "--port", "8000", "--reload"]
|
||||||
|
|
||||||
|
# =============================================================================
|
||||||
|
FROM base as production
|
||||||
|
|
||||||
|
RUN useradd --create-home --shell /bin/bash appuser
|
||||||
|
USER appuser
|
||||||
|
|
||||||
|
EXPOSE 8000
|
||||||
|
|
||||||
|
CMD ["uvicorn", "services.gateway.main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]
|
||||||
13
services/gateway/requirements.txt
Normal file
13
services/gateway/requirements.txt
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
fastapi>=0.109.0
|
||||||
|
uvicorn[standard]>=0.27.0
|
||||||
|
grpcio>=1.60.0
|
||||||
|
grpcio-tools>=1.60.0
|
||||||
|
redis>=5.0.0
|
||||||
|
asyncpg>=0.29.0
|
||||||
|
websockets>=12.0
|
||||||
|
jinja2>=3.1.2
|
||||||
|
structlog>=23.2.0
|
||||||
|
python-json-logger>=2.0.7
|
||||||
|
pydantic>=2.5.0
|
||||||
|
pydantic-settings>=2.1.0
|
||||||
|
httpx>=0.26.0
|
||||||
34
shared/events/__init__.py
Normal file
34
shared/events/__init__.py
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
"""
|
||||||
|
Event publishing/subscribing abstraction layer.
|
||||||
|
|
||||||
|
Supports:
|
||||||
|
- Redis Pub/Sub (default, simple)
|
||||||
|
- Redis Streams (with consumer groups, persistence)
|
||||||
|
- Kafka (future, for high-throughput)
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
from shared.events import get_publisher, get_subscriber
|
||||||
|
|
||||||
|
# Publishing
|
||||||
|
async with get_publisher() as pub:
|
||||||
|
await pub.publish("metrics.raw", {"machine_id": "m1", ...})
|
||||||
|
|
||||||
|
# Subscribing
|
||||||
|
async with get_subscriber(["metrics.raw", "alerts.*"]) as sub:
|
||||||
|
async for topic, message in sub.consume():
|
||||||
|
process(topic, message)
|
||||||
|
"""
|
||||||
|
|
||||||
|
from .base import EventPublisher, EventSubscriber, Event
|
||||||
|
from .redis_pubsub import RedisPubSubPublisher, RedisPubSubSubscriber
|
||||||
|
from .factory import get_publisher, get_subscriber
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"EventPublisher",
|
||||||
|
"EventSubscriber",
|
||||||
|
"Event",
|
||||||
|
"RedisPubSubPublisher",
|
||||||
|
"RedisPubSubSubscriber",
|
||||||
|
"get_publisher",
|
||||||
|
"get_subscriber",
|
||||||
|
]
|
||||||
117
shared/events/base.py
Normal file
117
shared/events/base.py
Normal file
@@ -0,0 +1,117 @@
|
|||||||
|
"""Abstract base classes for event publishing and subscribing."""
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, AsyncIterator
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Event:
|
||||||
|
"""Standard event envelope."""
|
||||||
|
topic: str
|
||||||
|
payload: dict[str, Any]
|
||||||
|
event_id: str = field(default_factory=lambda: str(uuid.uuid4()))
|
||||||
|
timestamp: datetime = field(default_factory=datetime.utcnow)
|
||||||
|
source: str = ""
|
||||||
|
|
||||||
|
def to_dict(self) -> dict[str, Any]:
|
||||||
|
return {
|
||||||
|
"event_id": self.event_id,
|
||||||
|
"topic": self.topic,
|
||||||
|
"timestamp": self.timestamp.isoformat(),
|
||||||
|
"source": self.source,
|
||||||
|
"payload": self.payload,
|
||||||
|
}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def from_dict(cls, data: dict[str, Any]) -> "Event":
|
||||||
|
return cls(
|
||||||
|
event_id=data.get("event_id", str(uuid.uuid4())),
|
||||||
|
topic=data["topic"],
|
||||||
|
timestamp=datetime.fromisoformat(data["timestamp"]) if "timestamp" in data else datetime.utcnow(),
|
||||||
|
source=data.get("source", ""),
|
||||||
|
payload=data.get("payload", {}),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class EventPublisher(ABC):
|
||||||
|
"""Abstract base for event publishers."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def connect(self) -> None:
|
||||||
|
"""Establish connection to the message broker."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def disconnect(self) -> None:
|
||||||
|
"""Close connection to the message broker."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def publish(self, topic: str, payload: dict[str, Any], **kwargs) -> str:
|
||||||
|
"""
|
||||||
|
Publish an event to a topic.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
topic: The topic/channel to publish to
|
||||||
|
payload: The event data
|
||||||
|
**kwargs: Additional options (e.g., headers, partition key)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The event ID
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def publish_event(self, event: Event) -> str:
|
||||||
|
"""Publish a pre-constructed Event object."""
|
||||||
|
return await self.publish(event.topic, event.payload, event_id=event.event_id)
|
||||||
|
|
||||||
|
async def __aenter__(self) -> "EventPublisher":
|
||||||
|
await self.connect()
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
||||||
|
await self.disconnect()
|
||||||
|
|
||||||
|
|
||||||
|
class EventSubscriber(ABC):
|
||||||
|
"""Abstract base for event subscribers."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def connect(self) -> None:
|
||||||
|
"""Establish connection to the message broker."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def disconnect(self) -> None:
|
||||||
|
"""Close connection and unsubscribe."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def subscribe(self, topics: list[str]) -> None:
|
||||||
|
"""
|
||||||
|
Subscribe to one or more topics.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
topics: List of topics/patterns to subscribe to
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def consume(self) -> AsyncIterator[Event]:
|
||||||
|
"""
|
||||||
|
Async generator that yields events as they arrive.
|
||||||
|
|
||||||
|
Yields:
|
||||||
|
Event objects
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
async def __aenter__(self) -> "EventSubscriber":
|
||||||
|
await self.connect()
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def __aexit__(self, exc_type, exc_val, exc_tb) -> None:
|
||||||
|
await self.disconnect()
|
||||||
101
shared/events/factory.py
Normal file
101
shared/events/factory.py
Normal file
@@ -0,0 +1,101 @@
|
|||||||
|
"""Factory functions for creating event publishers and subscribers."""
|
||||||
|
|
||||||
|
import os
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
from .base import EventPublisher, EventSubscriber
|
||||||
|
from .redis_pubsub import RedisPubSubPublisher, RedisPubSubSubscriber
|
||||||
|
|
||||||
|
|
||||||
|
class EventBackend(str, Enum):
|
||||||
|
"""Supported event backends."""
|
||||||
|
|
||||||
|
REDIS_PUBSUB = "redis_pubsub"
|
||||||
|
REDIS_STREAMS = "redis_streams" # Future
|
||||||
|
KAFKA = "kafka" # Future
|
||||||
|
|
||||||
|
|
||||||
|
def get_publisher(
|
||||||
|
backend: EventBackend | str | None = None,
|
||||||
|
source: str = "",
|
||||||
|
**kwargs,
|
||||||
|
) -> EventPublisher:
|
||||||
|
"""
|
||||||
|
Factory function to get an event publisher.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
backend: The event backend to use (default: from EVENTS_BACKEND env var or redis_pubsub)
|
||||||
|
source: Identifier for the source service
|
||||||
|
**kwargs: Backend-specific options
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An EventPublisher instance
|
||||||
|
|
||||||
|
Environment variables:
|
||||||
|
EVENTS_BACKEND: Default backend (redis_pubsub, redis_streams, kafka)
|
||||||
|
REDIS_URL: Redis connection URL
|
||||||
|
KAFKA_BOOTSTRAP_SERVERS: Kafka bootstrap servers (future)
|
||||||
|
"""
|
||||||
|
if backend is None:
|
||||||
|
backend = os.getenv("EVENTS_BACKEND", EventBackend.REDIS_PUBSUB)
|
||||||
|
|
||||||
|
if isinstance(backend, str):
|
||||||
|
backend = EventBackend(backend)
|
||||||
|
|
||||||
|
if backend == EventBackend.REDIS_PUBSUB:
|
||||||
|
redis_url = kwargs.get("redis_url") or os.getenv(
|
||||||
|
"REDIS_URL", "redis://localhost:6379"
|
||||||
|
)
|
||||||
|
return RedisPubSubPublisher(redis_url=redis_url, source=source)
|
||||||
|
|
||||||
|
elif backend == EventBackend.REDIS_STREAMS:
|
||||||
|
raise NotImplementedError("Redis Streams backend not yet implemented")
|
||||||
|
|
||||||
|
elif backend == EventBackend.KAFKA:
|
||||||
|
raise NotImplementedError("Kafka backend not yet implemented")
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown event backend: {backend}")
|
||||||
|
|
||||||
|
|
||||||
|
def get_subscriber(
|
||||||
|
topics: list[str] | None = None,
|
||||||
|
backend: EventBackend | str | None = None,
|
||||||
|
**kwargs,
|
||||||
|
) -> EventSubscriber:
|
||||||
|
"""
|
||||||
|
Factory function to get an event subscriber.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
topics: Topics to subscribe to
|
||||||
|
backend: The event backend to use (default: from EVENTS_BACKEND env var or redis_pubsub)
|
||||||
|
**kwargs: Backend-specific options
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
An EventSubscriber instance
|
||||||
|
|
||||||
|
Environment variables:
|
||||||
|
EVENTS_BACKEND: Default backend (redis_pubsub, redis_streams, kafka)
|
||||||
|
REDIS_URL: Redis connection URL
|
||||||
|
KAFKA_BOOTSTRAP_SERVERS: Kafka bootstrap servers (future)
|
||||||
|
"""
|
||||||
|
if backend is None:
|
||||||
|
backend = os.getenv("EVENTS_BACKEND", EventBackend.REDIS_PUBSUB)
|
||||||
|
|
||||||
|
if isinstance(backend, str):
|
||||||
|
backend = EventBackend(backend)
|
||||||
|
|
||||||
|
if backend == EventBackend.REDIS_PUBSUB:
|
||||||
|
redis_url = kwargs.get("redis_url") or os.getenv(
|
||||||
|
"REDIS_URL", "redis://localhost:6379"
|
||||||
|
)
|
||||||
|
return RedisPubSubSubscriber(redis_url=redis_url, topics=topics)
|
||||||
|
|
||||||
|
elif backend == EventBackend.REDIS_STREAMS:
|
||||||
|
raise NotImplementedError("Redis Streams backend not yet implemented")
|
||||||
|
|
||||||
|
elif backend == EventBackend.KAFKA:
|
||||||
|
raise NotImplementedError("Kafka backend not yet implemented")
|
||||||
|
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unknown event backend: {backend}")
|
||||||
142
shared/events/redis_pubsub.py
Normal file
142
shared/events/redis_pubsub.py
Normal file
@@ -0,0 +1,142 @@
|
|||||||
|
"""Redis Pub/Sub implementation of event publishing/subscribing."""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
from typing import Any, AsyncIterator
|
||||||
|
|
||||||
|
import redis.asyncio as redis
|
||||||
|
|
||||||
|
from .base import Event, EventPublisher, EventSubscriber
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class RedisPubSubPublisher(EventPublisher):
|
||||||
|
"""Redis Pub/Sub based event publisher."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
redis_url: str = "redis://localhost:6379",
|
||||||
|
source: str = "",
|
||||||
|
):
|
||||||
|
self.redis_url = redis_url
|
||||||
|
self.source = source
|
||||||
|
self._client: redis.Redis | None = None
|
||||||
|
|
||||||
|
async def connect(self) -> None:
|
||||||
|
self._client = redis.from_url(self.redis_url, decode_responses=True)
|
||||||
|
await self._client.ping()
|
||||||
|
logger.info(f"Connected to Redis at {self.redis_url}")
|
||||||
|
|
||||||
|
async def disconnect(self) -> None:
|
||||||
|
if self._client:
|
||||||
|
await self._client.close()
|
||||||
|
self._client = None
|
||||||
|
logger.info("Disconnected from Redis")
|
||||||
|
|
||||||
|
async def publish(self, topic: str, payload: dict[str, Any], **kwargs) -> str:
|
||||||
|
if not self._client:
|
||||||
|
raise RuntimeError("Publisher not connected")
|
||||||
|
|
||||||
|
event = Event(
|
||||||
|
topic=topic,
|
||||||
|
payload=payload,
|
||||||
|
event_id=kwargs.get("event_id", None)
|
||||||
|
or Event(topic="", payload={}).event_id,
|
||||||
|
source=self.source,
|
||||||
|
)
|
||||||
|
|
||||||
|
message = json.dumps(event.to_dict())
|
||||||
|
await self._client.publish(topic, message)
|
||||||
|
|
||||||
|
logger.debug(f"Published event {event.event_id} to {topic}")
|
||||||
|
return event.event_id
|
||||||
|
|
||||||
|
|
||||||
|
class RedisPubSubSubscriber(EventSubscriber):
|
||||||
|
"""Redis Pub/Sub based event subscriber."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
redis_url: str = "redis://localhost:6379",
|
||||||
|
topics: list[str] | None = None,
|
||||||
|
):
|
||||||
|
self.redis_url = redis_url
|
||||||
|
self._topics = topics or []
|
||||||
|
self._client: redis.Redis | None = None
|
||||||
|
self._pubsub: redis.client.PubSub | None = None
|
||||||
|
self._running = False
|
||||||
|
|
||||||
|
async def connect(self) -> None:
|
||||||
|
self._client = redis.from_url(self.redis_url, decode_responses=True)
|
||||||
|
await self._client.ping()
|
||||||
|
self._pubsub = self._client.pubsub()
|
||||||
|
logger.info(f"Connected to Redis at {self.redis_url}")
|
||||||
|
|
||||||
|
if self._topics:
|
||||||
|
await self.subscribe(self._topics)
|
||||||
|
|
||||||
|
async def disconnect(self) -> None:
|
||||||
|
self._running = False
|
||||||
|
if self._pubsub:
|
||||||
|
await self._pubsub.unsubscribe()
|
||||||
|
await self._pubsub.close()
|
||||||
|
self._pubsub = None
|
||||||
|
if self._client:
|
||||||
|
await self._client.close()
|
||||||
|
self._client = None
|
||||||
|
logger.info("Disconnected from Redis")
|
||||||
|
|
||||||
|
async def subscribe(self, topics: list[str]) -> None:
|
||||||
|
if not self._pubsub:
|
||||||
|
raise RuntimeError("Subscriber not connected")
|
||||||
|
|
||||||
|
# Separate pattern subscriptions from regular ones
|
||||||
|
patterns = [t for t in topics if "*" in t]
|
||||||
|
channels = [t for t in topics if "*" not in t]
|
||||||
|
|
||||||
|
if channels:
|
||||||
|
await self._pubsub.subscribe(*channels)
|
||||||
|
logger.info(f"Subscribed to channels: {channels}")
|
||||||
|
|
||||||
|
if patterns:
|
||||||
|
await self._pubsub.psubscribe(*patterns)
|
||||||
|
logger.info(f"Subscribed to patterns: {patterns}")
|
||||||
|
|
||||||
|
self._topics.extend(topics)
|
||||||
|
|
||||||
|
async def consume(self) -> AsyncIterator[Event]:
|
||||||
|
if not self._pubsub:
|
||||||
|
raise RuntimeError("Subscriber not connected")
|
||||||
|
|
||||||
|
self._running = True
|
||||||
|
|
||||||
|
while self._running:
|
||||||
|
try:
|
||||||
|
message = await self._pubsub.get_message(
|
||||||
|
ignore_subscribe_messages=True,
|
||||||
|
timeout=1.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
if message is None:
|
||||||
|
await asyncio.sleep(0.01)
|
||||||
|
continue
|
||||||
|
|
||||||
|
if message["type"] not in ("message", "pmessage"):
|
||||||
|
continue
|
||||||
|
|
||||||
|
try:
|
||||||
|
data = json.loads(message["data"])
|
||||||
|
event = Event.from_dict(data)
|
||||||
|
yield event
|
||||||
|
except (json.JSONDecodeError, KeyError) as e:
|
||||||
|
logger.warning(f"Failed to parse event: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
except asyncio.CancelledError:
|
||||||
|
self._running = False
|
||||||
|
break
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error consuming events: {e}")
|
||||||
|
await asyncio.sleep(1.0)
|
||||||
Reference in New Issue
Block a user