run idempotency
This commit is contained in:
@@ -24,6 +24,7 @@ Production refinements vs sign_pdfs_v1 (see docs/lambdas-md/lambda-20-sign-pdfs.
|
|||||||
"""
|
"""
|
||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import hashlib
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
@@ -35,6 +36,14 @@ import aiofiles
|
|||||||
_DONE = object()
|
_DONE = object()
|
||||||
|
|
||||||
|
|
||||||
|
def _dedup_key(cfg: dict) -> str:
|
||||||
|
# Identity of a logical job: same (bucket, prefix) = same job. Hashing keeps
|
||||||
|
# the DDB key bounded regardless of prefix length and avoids leaking the
|
||||||
|
# bucket/prefix into the dedup key in cleartext.
|
||||||
|
raw = f"{cfg['bucket']}#{cfg['prefix']}"
|
||||||
|
return hashlib.sha256(raw.encode()).hexdigest()[:32]
|
||||||
|
|
||||||
|
|
||||||
def _cfg(event: dict) -> dict:
|
def _cfg(event: dict) -> dict:
|
||||||
# bucket and prefix are required — fail loud if neither event nor env supplies
|
# bucket and prefix are required — fail loud if neither event nor env supplies
|
||||||
# them. The function deliberately has no fallback default (no "2026/04/" baked
|
# them. The function deliberately has no fallback default (no "2026/04/" baked
|
||||||
@@ -95,9 +104,41 @@ async def _run(event: dict, request_id: str):
|
|||||||
)
|
)
|
||||||
t0 = time.monotonic()
|
t0 = time.monotonic()
|
||||||
|
|
||||||
|
session = aioboto3.Session()
|
||||||
|
dedup_table = os.environ.get("DEDUP_TABLE")
|
||||||
|
dedup_key = _dedup_key(cfg)
|
||||||
|
|
||||||
|
# Idempotency check — return the cached result if (bucket, prefix) was
|
||||||
|
# processed within the TTL window. Regenerate the manifest URL so the
|
||||||
|
# cached response is always usable, even after the original URL expired.
|
||||||
|
if dedup_table:
|
||||||
|
async with session.resource("dynamodb") as ddb:
|
||||||
|
table = await ddb.Table(dedup_table)
|
||||||
|
try:
|
||||||
|
resp = await table.get_item(Key={"id": dedup_key})
|
||||||
|
except Exception as exc:
|
||||||
|
_log("dedup_lookup_error", request_id=request_id, error=str(exc))
|
||||||
|
resp = {}
|
||||||
|
if "Item" in resp:
|
||||||
|
cached = json.loads(resp["Item"]["result"])
|
||||||
|
async with session.client("s3", endpoint_url=cfg["endpoint"]) as s3:
|
||||||
|
cached["manifest_url"] = await s3.generate_presigned_url(
|
||||||
|
"get_object",
|
||||||
|
Params={"Bucket": cfg["bucket"], "Key": cached["manifest_key"]},
|
||||||
|
ExpiresIn=cfg["expiry"],
|
||||||
|
)
|
||||||
|
cached["idempotent"] = True
|
||||||
|
_log(
|
||||||
|
"cache_hit",
|
||||||
|
request_id=request_id,
|
||||||
|
dedup_key=dedup_key,
|
||||||
|
count=cached["count"],
|
||||||
|
duration_ms=round((time.monotonic() - t0) * 1000, 2),
|
||||||
|
)
|
||||||
|
return cached
|
||||||
|
|
||||||
pages = 0
|
pages = 0
|
||||||
errors = 0
|
errors = 0
|
||||||
session = aioboto3.Session()
|
|
||||||
async with session.client("s3", endpoint_url=cfg["endpoint"]) as s3:
|
async with session.client("s3", endpoint_url=cfg["endpoint"]) as s3:
|
||||||
queue: asyncio.Queue = asyncio.Queue(maxsize=cfg["queue_max"])
|
queue: asyncio.Queue = asyncio.Queue(maxsize=cfg["queue_max"])
|
||||||
manifest_path = f"/tmp/{uuid.uuid4()}.jsonl"
|
manifest_path = f"/tmp/{uuid.uuid4()}.jsonl"
|
||||||
@@ -205,6 +246,22 @@ async def _run(event: dict, request_id: str):
|
|||||||
Function="sign_pdfs",
|
Function="sign_pdfs",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if dedup_table:
|
||||||
|
async with session.resource("dynamodb") as ddb:
|
||||||
|
table = await ddb.Table(dedup_table)
|
||||||
|
try:
|
||||||
|
await table.put_item(
|
||||||
|
Item={
|
||||||
|
"id": dedup_key,
|
||||||
|
"result": json.dumps(result),
|
||||||
|
"ttl": int(time.time()) + 86400, # 24h
|
||||||
|
}
|
||||||
|
)
|
||||||
|
except Exception as exc:
|
||||||
|
# cache write failure is non-critical — the actual work
|
||||||
|
# succeeded. Log it so we know if dedup is broken.
|
||||||
|
_log("dedup_write_error", request_id=request_id, error=str(exc))
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -41,6 +41,21 @@ Resources:
|
|||||||
LogGroupName: /aws/lambda/eth-demo-sign-pdfs
|
LogGroupName: /aws/lambda/eth-demo-sign-pdfs
|
||||||
RetentionInDays: 7
|
RetentionInDays: 7
|
||||||
|
|
||||||
|
DedupTable:
|
||||||
|
Type: AWS::DynamoDB::Table
|
||||||
|
Properties:
|
||||||
|
TableName: !Sub ${AWS::StackName}-sign-pdfs-dedup
|
||||||
|
BillingMode: PAY_PER_REQUEST
|
||||||
|
AttributeDefinitions:
|
||||||
|
- AttributeName: id
|
||||||
|
AttributeType: S
|
||||||
|
KeySchema:
|
||||||
|
- AttributeName: id
|
||||||
|
KeyType: HASH
|
||||||
|
TimeToLiveSpecification:
|
||||||
|
AttributeName: ttl
|
||||||
|
Enabled: true
|
||||||
|
|
||||||
SignPdfsFunction:
|
SignPdfsFunction:
|
||||||
Type: AWS::Serverless::Function
|
Type: AWS::Serverless::Function
|
||||||
Properties:
|
Properties:
|
||||||
@@ -55,6 +70,7 @@ Resources:
|
|||||||
BUCKET_NAME: !Ref ReportsBucket
|
BUCKET_NAME: !Ref ReportsBucket
|
||||||
PREFIX: !Ref Prefix
|
PREFIX: !Ref Prefix
|
||||||
URL_EXPIRY_SECONDS: "900"
|
URL_EXPIRY_SECONDS: "900"
|
||||||
|
DEDUP_TABLE: !Ref DedupTable
|
||||||
Policies:
|
Policies:
|
||||||
- Statement:
|
- Statement:
|
||||||
- Sid: ListReportsBucket
|
- Sid: ListReportsBucket
|
||||||
@@ -69,6 +85,12 @@ Resources:
|
|||||||
Effect: Allow
|
Effect: Allow
|
||||||
Action: s3:PutObject
|
Action: s3:PutObject
|
||||||
Resource: !Sub "${ReportsBucket.Arn}/manifests/*"
|
Resource: !Sub "${ReportsBucket.Arn}/manifests/*"
|
||||||
|
- Sid: DedupTableAccess
|
||||||
|
Effect: Allow
|
||||||
|
Action:
|
||||||
|
- dynamodb:GetItem
|
||||||
|
- dynamodb:PutItem
|
||||||
|
Resource: !GetAtt DedupTable.Arn
|
||||||
|
|
||||||
Outputs:
|
Outputs:
|
||||||
ReportsBucketName:
|
ReportsBucketName:
|
||||||
|
|||||||
Reference in New Issue
Block a user