real handler, sign_pdfs CodeUri, env vars, S3 IAM, fail-loud config
This commit is contained in:
@@ -22,6 +22,7 @@ Production refinements vs sign_pdfs_v1 (see docs/lambdas-md/lambda-20-sign-pdfs.
|
||||
8. N concurrent consumers via asyncio.gather — presign throughput scales with
|
||||
the concurrency knob in the event.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import os
|
||||
@@ -35,12 +36,17 @@ _DONE = object()
|
||||
|
||||
|
||||
def _cfg(event: dict) -> dict:
|
||||
# bucket and prefix are required — fail loud if neither event nor env supplies
|
||||
# them. The function deliberately has no fallback default (no "2026/04/" baked
|
||||
# in) because that's a deployment-time concern, not handler code.
|
||||
return {
|
||||
"bucket": event.get("bucket") or os.environ.get("BUCKET_NAME", "my-company-reports-bucket"),
|
||||
"prefix": event.get("prefix") or os.environ.get("PREFIX", "2026/04/"),
|
||||
"expiry": int(event.get("expiry_seconds") or os.environ.get("URL_EXPIRY_SECONDS", "900")),
|
||||
"bucket": event.get("bucket") or os.environ["BUCKET_NAME"],
|
||||
"prefix": event.get("prefix") or os.environ["PREFIX"], # e.g. "2026/04/"
|
||||
"expiry": int(
|
||||
event.get("expiry_seconds") or os.environ.get("URL_EXPIRY_SECONDS", "900")
|
||||
),
|
||||
"endpoint": os.environ.get("S3_ENDPOINT_URL") or None,
|
||||
"page_size": int(event.get("page_size") or 1000),
|
||||
"page_size": int(event.get("page_size") or 100),
|
||||
"concurrency": int(event.get("concurrency") or 4),
|
||||
"queue_max": int(os.environ.get("QUEUE_MAX", "2000")),
|
||||
}
|
||||
@@ -51,26 +57,42 @@ def _log(event_type: str, **fields):
|
||||
|
||||
|
||||
def _emit_emf(metrics: dict, **dims):
|
||||
print(json.dumps({
|
||||
"_aws": {
|
||||
"Timestamp": int(time.time() * 1000),
|
||||
"CloudWatchMetrics": [{
|
||||
"Namespace": "eth/sign_pdfs",
|
||||
"Dimensions": [list(dims.keys())],
|
||||
"Metrics": [
|
||||
{"Name": k, "Unit": "Bytes" if k.endswith("Bytes") else "Count"}
|
||||
for k in metrics
|
||||
],
|
||||
}],
|
||||
},
|
||||
**dims, **metrics,
|
||||
}))
|
||||
print(
|
||||
json.dumps(
|
||||
{
|
||||
"_aws": {
|
||||
"Timestamp": int(time.time() * 1000),
|
||||
"CloudWatchMetrics": [
|
||||
{
|
||||
"Namespace": "eth/sign_pdfs",
|
||||
"Dimensions": [list(dims.keys())],
|
||||
"Metrics": [
|
||||
{
|
||||
"Name": k,
|
||||
"Unit": "Bytes" if k.endswith("Bytes") else "Count",
|
||||
}
|
||||
for k in metrics
|
||||
],
|
||||
}
|
||||
],
|
||||
},
|
||||
**dims,
|
||||
**metrics,
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
async def _run(event: dict, request_id: str):
|
||||
cfg = _cfg(event)
|
||||
_log("start", request_id=request_id, bucket=cfg["bucket"], prefix=cfg["prefix"],
|
||||
page_size=cfg["page_size"], concurrency=cfg["concurrency"])
|
||||
_log(
|
||||
"start",
|
||||
request_id=request_id,
|
||||
bucket=cfg["bucket"],
|
||||
prefix=cfg["prefix"],
|
||||
page_size=cfg["page_size"],
|
||||
concurrency=cfg["concurrency"],
|
||||
)
|
||||
t0 = time.monotonic()
|
||||
|
||||
pages = 0
|
||||
@@ -85,7 +107,8 @@ async def _run(event: dict, request_id: str):
|
||||
try:
|
||||
paginator = s3.get_paginator("list_objects_v2")
|
||||
async for page in paginator.paginate(
|
||||
Bucket=cfg["bucket"], Prefix=cfg["prefix"],
|
||||
Bucket=cfg["bucket"],
|
||||
Prefix=cfg["prefix"],
|
||||
PaginationConfig={"PageSize": cfg["page_size"]},
|
||||
):
|
||||
pages += 1
|
||||
@@ -120,10 +143,17 @@ async def _run(event: dict, request_id: str):
|
||||
local += 1
|
||||
except Exception as exc:
|
||||
errors += 1
|
||||
_log("presign_error", request_id=request_id, key=item, error=str(exc))
|
||||
_log(
|
||||
"presign_error",
|
||||
request_id=request_id,
|
||||
key=item,
|
||||
error=str(exc),
|
||||
)
|
||||
|
||||
prod_task = asyncio.create_task(producer())
|
||||
counts = await asyncio.gather(*(consumer() for _ in range(cfg["concurrency"])))
|
||||
counts = await asyncio.gather(
|
||||
*(consumer() for _ in range(cfg["concurrency"]))
|
||||
)
|
||||
await prod_task
|
||||
|
||||
count = sum(counts)
|
||||
@@ -134,7 +164,9 @@ async def _run(event: dict, request_id: str):
|
||||
# the whole manifest in memory like `body = await f.read()` would.
|
||||
with open(manifest_path, "rb") as f:
|
||||
await s3.put_object(
|
||||
Bucket=cfg["bucket"], Key=manifest_key, Body=f,
|
||||
Bucket=cfg["bucket"],
|
||||
Key=manifest_key,
|
||||
Body=f,
|
||||
ContentType="application/x-ndjson",
|
||||
)
|
||||
|
||||
@@ -154,13 +186,24 @@ async def _run(event: dict, request_id: str):
|
||||
response_bytes = len(json.dumps(result))
|
||||
duration_ms = (time.monotonic() - t0) * 1000
|
||||
|
||||
_log("complete", request_id=request_id, count=count, errors=errors,
|
||||
pages=pages, duration_ms=round(duration_ms, 2))
|
||||
_emit_emf({
|
||||
"PDFsProcessed": count, "S3ListPages": pages,
|
||||
"PresignCount": count, "ManifestBytes": manifest_bytes,
|
||||
"ResponseBytes": response_bytes,
|
||||
}, Function="sign_pdfs")
|
||||
_log(
|
||||
"complete",
|
||||
request_id=request_id,
|
||||
count=count,
|
||||
errors=errors,
|
||||
pages=pages,
|
||||
duration_ms=round(duration_ms, 2),
|
||||
)
|
||||
_emit_emf(
|
||||
{
|
||||
"PDFsProcessed": count,
|
||||
"S3ListPages": pages,
|
||||
"PresignCount": count,
|
||||
"ManifestBytes": manifest_bytes,
|
||||
"ResponseBytes": response_bytes,
|
||||
},
|
||||
Function="sign_pdfs",
|
||||
)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
Reference in New Issue
Block a user