add tester ui, and restructure folders

2026-05-13 17:00:00 -03:00
parent 7c5aa14409
commit 6652cb26e6
17 changed files with 2656 additions and 1251 deletions
--- a/ctrl/Dockerfile.lambda
+++ b/ctrl/Dockerfile.lambda
@@ -2,9 +2,27 @@ FROM python:3.13-slim

 WORKDIR /app

+# Function-specific deps first. Each function carries its own requirements.txt
+# (so a real AWS deploy zips the function folder verbatim). Locally, the pod
+# installs the union of all of them.
+COPY functions/ ./functions/
+RUN set -e; \
+    for r in functions/*/requirements.txt; do \
+        [ -f "$r" ] && pip install --no-cache-dir -r "$r"; \
+    done
+
+# Runner deps (FastAPI + uvicorn). Lives only in the runner pod; NOT bundled
+# with any function zip for AWS.
 COPY requirements.txt ./
 RUN pip install --no-cache-dir -r requirements.txt

-COPY lambda_function.py invoke.py seed.py ./
+# Shared modules (Lambda-Layer equivalent) and generic tooling.
+COPY shared/ ./shared/
+COPY invoke.py runner.py ./

-CMD ["sleep", "infinity"]
+# uvicorn --reload restarts on .py change → every code edit produces a fresh
+# cold start, matching how AWS Lambda's container lifecycle works when you
+# redeploy. Watching the whole /app tree picks up function-folder edits too.
+CMD ["uvicorn", "runner:app", \
+     "--host", "0.0.0.0", "--port", "8000", \
+     "--reload", "--reload-dir", "/app"]
--- a/ctrl/Tiltfile
+++ b/ctrl/Tiltfile
@@ -23,9 +23,16 @@ docker_build(
    dockerfile='Dockerfile.lambda',
    ignore=['.git', 'def', '.venv', 'docs', '__pycache__', '.pytest_cache'],
    live_update=[
-        sync('../lambda_function.py', '/app/lambda_function.py'),
+        # Whole functions/ directory — new files appear in the tester's
+        # function list automatically; edits to existing files cause uvicorn
+        # to drop them from the warm-cache so the next invoke is cold (the
+        # `reset_modules` endpoint also lets you force it manually).
+        sync('../functions', '/app/functions'),
        sync('../invoke.py', '/app/invoke.py'),
        sync('../seed.py', '/app/seed.py'),
+        # runner.py change → uvicorn --reload restarts the process → all
+        # function modules drop out of the cache, next invocation cold.
+        sync('../runner.py', '/app/runner.py'),
    ],
 )

@@ -45,7 +52,18 @@ k8s_resource('lambda', resource_deps=['minio'])
 k8s_resource('docs')
 k8s_resource('gateway', resource_deps=['docs', 'minio'])

+# Hot-reload gateway Caddy on Caddyfile edit. configMapGenerator uses
+# disableNameSuffixHash so the Deployment template doesn't change → kustomize
+# won't roll the pod on its own. This local_resource closes the loop.
+local_resource(
+    'gateway-reload',
+    cmd='kubectl --context kind-eth -n eth rollout restart deployment/gateway',
+    deps=['k8s/base/Caddyfile'],
+    resource_deps=['gateway'],
+    auto_init=False,
+)
+
 k8s_resource(
-    objects=['eth:namespace', 'eth-config:configmap'],
+    objects=['eth:namespace', 'eth-config:configmap', 'gateway-config:configmap'],
    new_name='infra',
 )
--- a/ctrl/invoke.sh
+++ b/ctrl/invoke.sh
@@ -1,3 +1,12 @@
 #!/usr/bin/env bash
+# Invoke a function from functions/ directly via `python invoke.py`.
+# Usage:
+#   ctrl/invoke.sh                                  # default: first function found
+#   ctrl/invoke.sh lambda_function                  # specific function
+#   ctrl/invoke.sh lambda_function '{"key":"val"}'  # with event payload
+#
+# For the same invocation through the FastAPI tester (with cold/warm + memory
+# metrics) use the Lambda Tester tab at http://eth.local.ar or POST to
+# http://eth.local.ar/runner/invoke/<name>.
 set -euo pipefail
 kubectl --context kind-eth -n eth exec -i deploy/lambda -- python invoke.py "$@"
--- a/ctrl/k8s/base/Caddyfile
+++ b/ctrl/k8s/base/Caddyfile
@@ -4,6 +4,22 @@
 }

 eth.local.ar:80 {
+    # API surface for the local Lambda tester (FastAPI in the lambda pod).
+    # Path-based is fine for our own API — no SPA assumes ownership of `/`,
+    # and a single origin means no CORS headaches with the frontend at /.
+    handle_path /runner/* {
+        reverse_proxy lambda:8000
+    }
+
+    # Everything else → static docs viewer (the frontend lives here too).
+    handle {
+        reverse_proxy docs:80
+    }
+}
+
+docs.eth.local.ar:80 {
+    # Serve /docs.html for the root path; everything else (graphs, viewer.html) passes through.
+    rewrite / /docs.html
    reverse_proxy docs:80
 }

--- a/ctrl/k8s/base/kustomization.yaml
+++ b/ctrl/k8s/base/kustomization.yaml
@@ -12,10 +12,14 @@ resources:
  - gateway.yaml

 # Generate the gateway Caddyfile ConfigMap from the standalone file.
-# Hash suffix is on by default — when Caddyfile changes, the ConfigMap gets
-# a new hashed name, kustomize rewrites the Deployment volume reference,
-# and the gateway pod restarts automatically with the new config.
+# Hash suffix disabled so the name stays static — lets Tilt group it under
+# the 'infra' resource (no "uncategorized" pill). Trade-off: pod doesn't
+# auto-restart on Caddyfile change; the Tiltfile has a local_resource
+# 'gateway-reload' that does `kubectl rollout restart` whenever Caddyfile
+# is edited, so the experience is the same in practice.
 configMapGenerator:
  - name: gateway-config
    files:
      - Caddyfile
+    options:
+      disableNameSuffixHash: true
--- a/ctrl/k8s/base/lambda.yaml
+++ b/ctrl/k8s/base/lambda.yaml
@@ -16,8 +16,14 @@ spec:
      containers:
        - name: lambda
          image: eth-lambda
-          command: ["sleep", "infinity"]
+          # The container runs the FastAPI runner (see runner.py + Dockerfile).
+          # The CMD comes from the Dockerfile (uvicorn). Container also stays
+          # exec-able for `bash ctrl/seed.sh` / `bash ctrl/invoke.sh` which
+          # spawn separate python processes alongside uvicorn.
          workingDir: /app
+          ports:
+            - name: http
+              containerPort: 8000
          envFrom:
            - configMapRef:
                name: eth-config
@@ -25,14 +31,33 @@ spec:
            - name: documents
              mountPath: /mnt/documents
              readOnly: true
+          readinessProbe:
+            httpGet:
+              path: /health
+              port: 8000
+            initialDelaySeconds: 3
+            periodSeconds: 5
          resources:
            requests:
              memory: 128Mi
              cpu: 100m
            limits:
-              memory: 512Mi
+              memory: 1Gi
      volumes:
        - name: documents
          hostPath:
            path: /mnt/documents
            type: Directory
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: lambda
+  namespace: eth
+spec:
+  selector:
+    app: lambda
+  ports:
+    - name: http
+      port: 8000
+      targetPort: 8000
--- a/ctrl/seed.sh
+++ b/ctrl/seed.sh
@@ -1,3 +1,8 @@
 #!/usr/bin/env bash
+# Uploads the host's /mnt/documents tree into the in-cluster MinIO. Lives
+# inside the sign_pdfs function folder because it's specific to *this*
+# function's data shape (bucket "my-company-reports-bucket", prefix "2026/04/").
+# Other functions will have their own seed scripts in their own folders.
 set -euo pipefail
-kubectl --context kind-eth -n eth exec -i deploy/lambda -- python seed.py /mnt/documents
+kubectl --context kind-eth -n eth exec -i deploy/lambda -- \
+  python functions/sign_pdfs/seed.py /mnt/documents
--- a/docs/docs.html
+++ b/docs/docs.html
--- a/docs/index.html
+++ b/docs/index.html
--- a/functions/sign_pdfs/events/default.json
+++ b/functions/sign_pdfs/events/default.json
@@ -0,0 +1 @@
+{}
--- a/functions/sign_pdfs/handler.py
+++ b/functions/sign_pdfs/handler.py
@@ -23,11 +23,13 @@ async def _run():

        async def producer():
            paginator = s3.get_paginator("list_objects_v2")
+            try:
                async for page in paginator.paginate(Bucket=BUCKET, Prefix=PREFIX, PaginationConfig={"PageSize": 100}):
                    for obj in page.get("Contents", []) or []:
                        key = obj["Key"]
                        if key.lower().endswith(".pdf"):
                            await queue.put(key)
+            finally:
                await queue.put(_DONE)

        async def consumer():
--- a/functions/sign_pdfs/requirements.txt
+++ b/functions/sign_pdfs/requirements.txt
@@ -0,0 +1,6 @@
+# Deps for the sign_pdfs lambda. Bundled into its deployment zip when
+# uploading to AWS; locally, the runner pod installs the union of all
+# per-function requirements (see Dockerfile.lambda).
+aioboto3>=15.0    # async S3 client used in handler.py
+aiofiles>=23.2    # async file I/O for the JSONL manifest in /tmp
+boto3>=1.40       # sync S3 client used by seed.py (data setup utility)
--- a/functions/sign_pdfs/seed.py
+++ b/functions/sign_pdfs/seed.py
--- a/invoke.py
+++ b/invoke.py
@@ -1,15 +1,65 @@
+"""Shared CLI invoker for any function in functions/.
+
+Usage (inside the lambda pod, via `bash ctrl/invoke.sh [name [event_json]]`):
+    python invoke.py                       # invokes the only function, or the first found
+    python invoke.py lambda_function       # specific function
+    python invoke.py lambda_function '{}'  # with event payload
+"""
+
+import importlib.util
 import json
 import os
+import sys
+from pathlib import Path

+# Defaults match the in-cluster configmap so behavior is identical whether
+# this script is invoked directly or via the FastAPI runner. `setdefault`
+# never overrides an existing env var — the configmap wins in the pod.
 os.environ.setdefault("BUCKET_NAME", "my-company-reports-bucket")
 os.environ.setdefault("PREFIX", "2026/04/")
-os.environ.setdefault("S3_ENDPOINT_URL", "http://localhost:9000")
+os.environ.setdefault("S3_ENDPOINT_URL", "http://minio:9000")
 os.environ.setdefault("AWS_ACCESS_KEY_ID", "minioadmin")
 os.environ.setdefault("AWS_SECRET_ACCESS_KEY", "minioadmin")
 os.environ.setdefault("AWS_REGION", "us-east-1")

-from lambda_function import handler  # noqa: E402
+REPO_ROOT = Path(__file__).parent
+FUNCTIONS_DIR = Path(os.environ.get("FUNCTIONS_DIR", str(REPO_ROOT / "functions")))
+SHARED_DIR = REPO_ROOT / "shared"
+
+# Make shared/ importable from any handler ("from shared import ..."). Matches
+# how a Lambda Layer would expose code on PYTHONPATH at runtime.
+if SHARED_DIR.exists() and str(REPO_ROOT) not in sys.path:
+    sys.path.insert(0, str(REPO_ROOT))
+
+
+def _pick_default_function() -> str:
+    candidates = sorted(
+        d.name for d in FUNCTIONS_DIR.iterdir()
+        if d.is_dir() and not d.name.startswith("_") and (d / "handler.py").exists()
+    )
+    if not candidates:
+        print(f"no function folders with handler.py in {FUNCTIONS_DIR}", file=sys.stderr)
+        sys.exit(2)
+    return candidates[0]
+
+
+def _load(name: str):
+    path = FUNCTIONS_DIR / name / "handler.py"
+    if not path.exists():
+        print(f"function not found: {path}", file=sys.stderr)
+        sys.exit(2)
+    spec = importlib.util.spec_from_file_location(f"functions.{name}.handler", path)
+    module = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(module)
+    if not hasattr(module, "handler"):
+        print(f"{path} has no handler(event, context)", file=sys.stderr)
+        sys.exit(2)
+    return module
+

 if __name__ == "__main__":
-    response = handler({}, None)
+    name = sys.argv[1] if len(sys.argv) > 1 else _pick_default_function()
+    event = json.loads(sys.argv[2]) if len(sys.argv) > 2 else {}
+    module = _load(name)
+    response = module.handler(event, None)
    print(json.dumps(response, indent=2))
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,5 @@
-aioboto3>=15.0
-aiofiles>=23.2
-boto3>=1.40
+# Root requirements = runner + invoker deps only. Per-function deps live
+# alongside each function (functions/<name>/requirements.txt) so they're
+# bundled with the function's deployment zip for AWS.
+fastapi>=0.115
+uvicorn[standard]>=0.32
--- a/runner.py
+++ b/runner.py
@@ -0,0 +1,338 @@
+"""Local Lambda runner — FastAPI wrapper that invokes any `handler(event, context)`
+file in /app and reports AWS-equivalent metrics. Nothing in this file is touched
+by the lambda function itself; functions stay verbatim-uploadable to AWS.
+
+Features that are scaffolded now and "light up" later when matching improvements
+land in the function:
+  - event payload pass-through  (improvement #1: BUCKET/PREFIX from event)
+  - structured JSON log capture (improvement #2: JSON logging to stdout)
+  - EMF metric extraction       (improvement #3: CloudWatch EMF embedded metrics)
+Until the function emits those, the corresponding output fields are empty.
+"""
+
+import asyncio
+import importlib.util
+import io
+import json
+import math
+import os
+import resource
+import subprocess
+import sys
+import time
+import traceback
+import uuid
+from contextlib import redirect_stderr, redirect_stdout
+from pathlib import Path
+
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel, Field
+
+FUNCTIONS_DIR = Path(os.environ.get("FUNCTIONS_DIR", "/app/functions"))
+SHARED_DIR = Path(os.environ.get("SHARED_DIR", "/app/shared"))
+MAX_INVOCATIONS = int(os.environ.get("RUNNER_MAX_INVOCATIONS", "200"))
+
+# Make shared/ importable for any function. Mirrors AWS Lambda Layer behavior
+# (layer code is added to PYTHONPATH for all functions that attach it).
+if SHARED_DIR.exists():
+    _repo_root = str(SHARED_DIR.parent)
+    if _repo_root not in sys.path:
+        sys.path.insert(0, _repo_root)
+
+app = FastAPI(title="Lambda Local Runner")
+
+_modules: dict = {}             # name -> imported module (cache; presence = warm)
+_invocations: list[dict] = []   # newest last; capped at MAX_INVOCATIONS
+
+
+class LambdaContext:
+    """Minimal stand-in for the AWS Lambda context object. The function file
+    doesn't use any of this today, but the shape is right for when improvements
+    add `context.aws_request_id` to structured logs etc."""
+
+    def __init__(self, request_id: str, function_name: str, memory_mb: int, timeout_ms: int):
+        self.aws_request_id = request_id
+        self.function_name = function_name
+        self.function_version = "$LATEST"
+        self.invoked_function_arn = f"arn:aws:lambda:local:000000000000:function:{function_name}"
+        self.memory_limit_in_mb = memory_mb
+        self.log_group_name = f"/aws/lambda/{function_name}"
+        self.log_stream_name = f"local/{time.strftime('%Y/%m/%d')}/[$LATEST]{request_id}"
+        self._deadline_ms = time.monotonic() * 1000 + timeout_ms
+
+    def get_remaining_time_in_millis(self) -> int:
+        return max(0, int(self._deadline_ms - time.monotonic() * 1000))
+
+
+class InvokeRequest(BaseModel):
+    event: dict = Field(default_factory=dict)
+    # AWS Lambda memory sizes: 128, 256, 512, 1024, 1536, 2048, 3008, 5120, 10240
+    memory_mb: int = 128
+    # AWS Lambda timeout: 1-900 seconds. Locally we record it but don't kill
+    # the handler (matches "function works verbatim" — no signal interruption).
+    timeout_ms: int = 30_000
+
+
+class ScriptRequest(BaseModel):
+    args: list[str] = Field(default_factory=list)
+
+
+@app.get("/functions")
+def list_functions():
+    """Scan FUNCTIONS_DIR for subfolders containing a handler.py with
+    `def handler(event, context):`. Each function lives in its own folder
+    (matches AWS Lambda's deployment-package shape)."""
+    funcs: list[dict] = []
+    if not FUNCTIONS_DIR.exists():
+        return {"functions": [], "functions_dir": str(FUNCTIONS_DIR), "error": "directory not found"}
+    for d in sorted(FUNCTIONS_DIR.iterdir()):
+        if not d.is_dir() or d.name.startswith("_"):
+            continue
+        handler = d / "handler.py"
+        if not handler.exists():
+            continue
+        try:
+            text = handler.read_text()
+        except Exception:
+            continue
+        if "def handler(event, context)" not in text and "def handler(event,context)" not in text:
+            continue
+        # Discover sample events (events/*.json) so the UI can populate a dropdown.
+        events = sorted(p.name for p in (d / "events").glob("*.json")) if (d / "events").is_dir() else []
+        funcs.append({"name": d.name, "events": events})
+    return {"functions": funcs, "functions_dir": str(FUNCTIONS_DIR)}
+
+
+@app.get("/functions/{name}/events/{filename}")
+def get_event(name: str, filename: str):
+    """Serve a sample event file so the UI can preview/select it."""
+    path = FUNCTIONS_DIR / name / "events" / filename
+    if not path.exists() or not path.is_file():
+        raise HTTPException(status_code=404, detail="event file not found")
+    try:
+        return json.loads(path.read_text())
+    except json.JSONDecodeError as e:
+        raise HTTPException(status_code=400, detail=f"invalid JSON in {path}: {e}")
+
+
+@app.post("/invoke/{name}")
+def invoke(name: str, req: InvokeRequest):
+    """Invoke a handler. Sync def so FastAPI runs us in a thread — that lets
+    the function call `asyncio.run(...)` internally without nested-loop errors
+    (which is how the current handler.py works)."""
+    target = FUNCTIONS_DIR / name / "handler.py"
+    if not target.exists():
+        raise HTTPException(status_code=404, detail=f"{target} not found")
+
+    invocation_id = str(uuid.uuid4())
+    cold_start = name not in _modules
+    init_duration_ms = None
+    record: dict = {
+        "invocation_id": invocation_id,
+        "function": name,
+        "timestamp": time.time(),
+        "event": req.event,
+        "result": None,
+        "error": None,
+        "stdout": "",
+        "stderr": "",
+        "structured_logs": [],
+        "emf_metrics": [],
+        "metrics": {
+            "cold_start": cold_start,
+            "init_duration_ms": None,
+            "duration_ms": 0.0,
+            "billed_duration_ms": 0,
+            "memory_size_mb": req.memory_mb,
+            "max_memory_used_mb": 0.0,
+        },
+    }
+
+    # Cold-start: import the module and time the import. This matches AWS's
+    # "Init Duration" — time to load module-level code (imports + module-scope
+    # statements). On warm invocations this whole block is skipped.
+    if cold_start:
+        spec = importlib.util.spec_from_file_location(
+            f"functions.{name}.handler", target,
+        )
+        module = importlib.util.module_from_spec(spec)
+        t0 = time.monotonic()
+        try:
+            spec.loader.exec_module(module)
+        except Exception as e:
+            init_duration_ms = (time.monotonic() - t0) * 1000
+            record["error"] = _format_exception(e)
+            record["metrics"]["init_duration_ms"] = round(init_duration_ms, 2)
+            return _record(record)
+        init_duration_ms = (time.monotonic() - t0) * 1000
+        _modules[name] = module
+        record["metrics"]["init_duration_ms"] = round(init_duration_ms, 2)
+    module = _modules[name]
+
+    if not hasattr(module, "handler"):
+        raise HTTPException(status_code=400, detail=f"{name}.py has no handler() function")
+
+    context = LambdaContext(
+        request_id=invocation_id,
+        function_name=name,
+        memory_mb=req.memory_mb,
+        timeout_ms=req.timeout_ms,
+    )
+
+    stdout_buf = io.StringIO()
+    stderr_buf = io.StringIO()
+    t_handler = time.monotonic()
+    try:
+        with redirect_stdout(stdout_buf), redirect_stderr(stderr_buf):
+            result = module.handler(req.event, context)
+            # Defensive: if a future async handler ever returns a coroutine
+            # (AWS doesn't support that natively, but we might), run it.
+            if asyncio.iscoroutine(result):
+                result = asyncio.run(result)
+        record["result"] = result
+    except Exception as e:
+        record["error"] = _format_exception(e)
+    duration_ms = (time.monotonic() - t_handler) * 1000
+
+    # ru_maxrss: kilobytes on Linux, bytes on macOS. We run on Linux in kind.
+    rusage = resource.getrusage(resource.RUSAGE_SELF)
+    max_memory_mb = rusage.ru_maxrss / 1024
+
+    record["stdout"] = stdout_buf.getvalue()
+    record["stderr"] = stderr_buf.getvalue()
+    record["structured_logs"] = _extract_json_logs(record["stdout"])
+    record["emf_metrics"] = _extract_emf_metrics(record["stdout"])
+    record["metrics"]["duration_ms"] = round(duration_ms, 2)
+    record["metrics"]["billed_duration_ms"] = int(math.ceil(duration_ms))
+    record["metrics"]["max_memory_used_mb"] = round(max_memory_mb, 2)
+    return _record(record)
+
+
+@app.get("/invocations")
+def list_invocations(limit: int = 50):
+    """Index of past invocations, newest first. Lightweight summary only —
+    use /invocations/{id} for the full record."""
+    items = []
+    for r in reversed(_invocations[-limit:]):
+        items.append({
+            "invocation_id": r["invocation_id"],
+            "function": r["function"],
+            "timestamp": r["timestamp"],
+            "cold_start": r["metrics"]["cold_start"],
+            "duration_ms": r["metrics"]["duration_ms"],
+            "init_duration_ms": r["metrics"]["init_duration_ms"],
+            "max_memory_used_mb": r["metrics"]["max_memory_used_mb"],
+            "ok": r["error"] is None,
+        })
+    return {"invocations": items, "total": len(_invocations)}
+
+
+@app.get("/invocations/{invocation_id}")
+def get_invocation(invocation_id: str):
+    for r in _invocations:
+        if r["invocation_id"] == invocation_id:
+            return r
+    raise HTTPException(status_code=404, detail="invocation not found")
+
+
+@app.delete("/invocations")
+def clear_invocations():
+    n = len(_invocations)
+    _invocations.clear()
+    return {"cleared": n}
+
+
+@app.post("/reset")
+def reset_modules():
+    """Clear the module cache so the next invocation is cold. Useful for
+    A/B-ing cold-start cost without restarting the FastAPI process."""
+    cleared = list(_modules.keys())
+    _modules.clear()
+    for name in cleared:
+        sys.modules.pop(name, None)
+    return {"cleared": cleared}
+
+
+@app.get("/functions/{name}/scripts")
+def list_scripts(name: str):
+    """List support scripts for a function — any .py file that isn't handler.py."""
+    func_dir = FUNCTIONS_DIR / name
+    if not func_dir.is_dir():
+        raise HTTPException(status_code=404, detail=f"function {name!r} not found")
+    scripts = [
+        p.name for p in sorted(func_dir.glob("*.py"))
+        if p.name not in ("handler.py", "__init__.py")
+    ]
+    return {"scripts": scripts, "function": name}
+
+
+@app.post("/scripts/{fn_name}/{script_name}")
+def run_script(fn_name: str, script_name: str, req: ScriptRequest):
+    """Run a support script from functions/<fn_name>/<script_name> with optional args."""
+    if ".." in script_name or "/" in script_name:
+        raise HTTPException(status_code=400, detail="invalid script name")
+    if not script_name.endswith(".py"):
+        raise HTTPException(status_code=400, detail="only .py scripts allowed")
+    script_path = FUNCTIONS_DIR / fn_name / script_name
+    if not script_path.exists():
+        raise HTTPException(status_code=404, detail=f"{script_path} not found")
+    cmd = [sys.executable, str(script_path)] + list(req.args)
+    t0 = time.monotonic()
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=300)
+    except subprocess.TimeoutExpired:
+        return {"returncode": -1, "stdout": "", "stderr": "timed out after 300 s",
+                "duration_ms": 300_000.0}
+    return {
+        "returncode": result.returncode,
+        "stdout": result.stdout,
+        "stderr": result.stderr,
+        "duration_ms": round((time.monotonic() - t0) * 1000, 2),
+    }
+
+
+@app.get("/health")
+def health():
+    return {"ok": True, "loaded_modules": list(_modules.keys()), "invocations": len(_invocations)}
+
+
+def _format_exception(e: BaseException) -> dict:
+    return {
+        "type": type(e).__name__,
+        "message": str(e),
+        "traceback": traceback.format_exc(),
+    }
+
+
+def _extract_json_logs(stdout_text: str) -> list[dict]:
+    """Parse JSON-per-line structured logs out of stdout. Fails silently —
+    until the function emits structured logs (improvement #2), this returns []."""
+    logs: list[dict] = []
+    for line in stdout_text.splitlines():
+        line = line.strip()
+        if not (line.startswith("{") and line.endswith("}")):
+            continue
+        try:
+            logs.append(json.loads(line))
+        except json.JSONDecodeError:
+            continue
+    return logs
+
+
+def _extract_emf_metrics(stdout_text: str) -> list[dict]:
+    """Parse CloudWatch EMF metric records out of stdout. EMF format:
+        {"_aws": {"CloudWatchMetrics": [...], "Timestamp": ...}, "<metric>": value, ...}
+    Fails silently — until the function emits EMF (improvement #3), returns []."""
+    metrics: list[dict] = []
+    for entry in _extract_json_logs(stdout_text):
+        aws = entry.get("_aws")
+        if isinstance(aws, dict) and "CloudWatchMetrics" in aws:
+            metrics.append(entry)
+    return metrics
+
+
+def _record(rec: dict) -> dict:
+    _invocations.append(rec)
+    if len(_invocations) > MAX_INVOCATIONS:
+        del _invocations[: len(_invocations) - MAX_INVOCATIONS]
+    return rec
--- a/shared/README.md
+++ b/shared/README.md
@@ -0,0 +1,21 @@
+# shared/
+
+Cross-function Python modules — the local equivalent of an AWS Lambda Layer.
+
+Anything in this directory is available to every function under
+`functions/<name>/handler.py` as a regular import:
+
+```python
+# functions/sign_pdfs/handler.py
+from shared import common_utils
+```
+
+The `shared/` directory is added to `sys.path` by the runner. For an AWS
+deploy, you'd either:
+
+1. Package it as a real Lambda Layer (preferred), and reference the layer ARN
+   from each function's deploy spec, **or**
+2. Vendor a copy into each function's deployment zip (simpler, less DRY).
+
+Currently empty — no cross-function code yet. First candidate would probably
+be a structured-logging helper once multiple functions need it.