72 lines
2.0 KiB
JSON
72 lines
2.0 KiB
JSON
{
|
|
"Comment": "PDF metadata index pipeline — list keys, then per-key extract + DDB write in parallel.",
|
|
"StartAt": "ListPdfs",
|
|
"States": {
|
|
"ListPdfs": {
|
|
"Type": "Task",
|
|
"Resource": "arn:aws:states:::lambda:invoke",
|
|
"Parameters": {
|
|
"FunctionName": "${ListPdfsFunctionArn}",
|
|
"Payload": {}
|
|
},
|
|
"ResultSelector": {
|
|
"keys.$": "$.Payload.keys",
|
|
"count.$": "$.Payload.count"
|
|
},
|
|
"ResultPath": "$.list",
|
|
"Next": "ExtractAndIndex"
|
|
},
|
|
"ExtractAndIndex": {
|
|
"Type": "Map",
|
|
"ItemsPath": "$.list.keys",
|
|
"MaxConcurrency": 10,
|
|
"ItemProcessor": {
|
|
"ProcessorConfig": {
|
|
"Mode": "INLINE"
|
|
},
|
|
"StartAt": "ExtractMetadata",
|
|
"States": {
|
|
"ExtractMetadata": {
|
|
"Type": "Task",
|
|
"Resource": "arn:aws:states:::lambda:invoke",
|
|
"Parameters": {
|
|
"FunctionName": "${ExtractMetadataFunctionArn}",
|
|
"Payload": {
|
|
"key.$": "$"
|
|
}
|
|
},
|
|
"ResultSelector": {
|
|
"key.$": "$.Payload.key",
|
|
"pages.$": "$.Payload.pages",
|
|
"size_bytes.$": "$.Payload.size_bytes"
|
|
},
|
|
"Retry": [
|
|
{
|
|
"ErrorEquals": ["States.ALL"],
|
|
"IntervalSeconds": 1,
|
|
"MaxAttempts": 2,
|
|
"BackoffRate": 2.0
|
|
}
|
|
],
|
|
"Next": "WriteToIndex"
|
|
},
|
|
"WriteToIndex": {
|
|
"Type": "Task",
|
|
"Resource": "arn:aws:states:::dynamodb:putItem",
|
|
"Parameters": {
|
|
"TableName": "${PdfIndexTableName}",
|
|
"Item": {
|
|
"key": { "S.$": "$.key" },
|
|
"pages": { "N.$": "States.Format('{}', $.pages)" },
|
|
"size_bytes": { "N.$": "States.Format('{}', $.size_bytes)" }
|
|
}
|
|
},
|
|
"End": true
|
|
}
|
|
}
|
|
},
|
|
"End": true
|
|
}
|
|
}
|
|
}
|