"""List every .pdf key under the configured bucket+prefix. Output: {"keys": ["2026/04/a.pdf", ...], "count": N, "pages": N} Used as the first state in the metadata-index pipeline. The "keys" array feeds an SFN Map state that runs ExtractMetadata in parallel per key. """ import os import boto3 _s3 = boto3.client("s3", endpoint_url=os.environ.get("S3_ENDPOINT_URL") or None) def handler(event, context): bucket = event.get("bucket") or os.environ["BUCKET_NAME"] prefix = event.get("prefix") or os.environ["PREFIX"] keys = [] pages = 0 paginator = _s3.get_paginator("list_objects_v2") for page in paginator.paginate( Bucket=bucket, Prefix=prefix, PaginationConfig={"PageSize": 1000} ): pages += 1 for obj in page.get("Contents", []) or []: key = obj["Key"] if key.lower().endswith(".pdf"): keys.append(key) return {"keys": keys, "count": len(keys), "pages": pages}