run idempotency

This commit is contained in:
2026-05-18 06:48:51 -03:00
parent 84fef80339
commit d3008676e0
2 changed files with 80 additions and 1 deletions

View File

@@ -24,6 +24,7 @@ Production refinements vs sign_pdfs_v1 (see docs/lambdas-md/lambda-20-sign-pdfs.
"""
import asyncio
import hashlib
import json
import os
import time
@@ -35,6 +36,14 @@ import aiofiles
_DONE = object()
def _dedup_key(cfg: dict) -> str:
# Identity of a logical job: same (bucket, prefix) = same job. Hashing keeps
# the DDB key bounded regardless of prefix length and avoids leaking the
# bucket/prefix into the dedup key in cleartext.
raw = f"{cfg['bucket']}#{cfg['prefix']}"
return hashlib.sha256(raw.encode()).hexdigest()[:32]
def _cfg(event: dict) -> dict:
# bucket and prefix are required — fail loud if neither event nor env supplies
# them. The function deliberately has no fallback default (no "2026/04/" baked
@@ -95,9 +104,41 @@ async def _run(event: dict, request_id: str):
)
t0 = time.monotonic()
session = aioboto3.Session()
dedup_table = os.environ.get("DEDUP_TABLE")
dedup_key = _dedup_key(cfg)
# Idempotency check — return the cached result if (bucket, prefix) was
# processed within the TTL window. Regenerate the manifest URL so the
# cached response is always usable, even after the original URL expired.
if dedup_table:
async with session.resource("dynamodb") as ddb:
table = await ddb.Table(dedup_table)
try:
resp = await table.get_item(Key={"id": dedup_key})
except Exception as exc:
_log("dedup_lookup_error", request_id=request_id, error=str(exc))
resp = {}
if "Item" in resp:
cached = json.loads(resp["Item"]["result"])
async with session.client("s3", endpoint_url=cfg["endpoint"]) as s3:
cached["manifest_url"] = await s3.generate_presigned_url(
"get_object",
Params={"Bucket": cfg["bucket"], "Key": cached["manifest_key"]},
ExpiresIn=cfg["expiry"],
)
cached["idempotent"] = True
_log(
"cache_hit",
request_id=request_id,
dedup_key=dedup_key,
count=cached["count"],
duration_ms=round((time.monotonic() - t0) * 1000, 2),
)
return cached
pages = 0
errors = 0
session = aioboto3.Session()
async with session.client("s3", endpoint_url=cfg["endpoint"]) as s3:
queue: asyncio.Queue = asyncio.Queue(maxsize=cfg["queue_max"])
manifest_path = f"/tmp/{uuid.uuid4()}.jsonl"
@@ -205,6 +246,22 @@ async def _run(event: dict, request_id: str):
Function="sign_pdfs",
)
if dedup_table:
async with session.resource("dynamodb") as ddb:
table = await ddb.Table(dedup_table)
try:
await table.put_item(
Item={
"id": dedup_key,
"result": json.dumps(result),
"ttl": int(time.time()) + 86400, # 24h
}
)
except Exception as exc:
# cache write failure is non-critical — the actual work
# succeeded. Log it so we know if dedup is broken.
_log("dedup_write_error", request_id=request_id, error=str(exc))
return result