Files
lambda_studio/statemachines/pdf-index/definition.asl.json

72 lines
2.0 KiB
JSON

{
"Comment": "PDF metadata index pipeline — list keys, then per-key extract + DDB write in parallel.",
"StartAt": "ListPdfs",
"States": {
"ListPdfs": {
"Type": "Task",
"Resource": "arn:aws:states:::lambda:invoke",
"Parameters": {
"FunctionName": "${ListPdfsFunctionArn}",
"Payload": {}
},
"ResultSelector": {
"keys.$": "$.Payload.keys",
"count.$": "$.Payload.count"
},
"ResultPath": "$.list",
"Next": "ExtractAndIndex"
},
"ExtractAndIndex": {
"Type": "Map",
"ItemsPath": "$.list.keys",
"MaxConcurrency": 10,
"ItemProcessor": {
"ProcessorConfig": {
"Mode": "INLINE"
},
"StartAt": "ExtractMetadata",
"States": {
"ExtractMetadata": {
"Type": "Task",
"Resource": "arn:aws:states:::lambda:invoke",
"Parameters": {
"FunctionName": "${ExtractMetadataFunctionArn}",
"Payload": {
"key.$": "$"
}
},
"ResultSelector": {
"key.$": "$.Payload.key",
"pages.$": "$.Payload.pages",
"size_bytes.$": "$.Payload.size_bytes"
},
"Retry": [
{
"ErrorEquals": ["States.ALL"],
"IntervalSeconds": 1,
"MaxAttempts": 2,
"BackoffRate": 2.0
}
],
"Next": "WriteToIndex"
},
"WriteToIndex": {
"Type": "Task",
"Resource": "arn:aws:states:::dynamodb:putItem",
"Parameters": {
"TableName": "${PdfIndexTableName}",
"Item": {
"key": { "S.$": "$.key" },
"pages": { "N.$": "States.Format('{}', $.pages)" },
"size_bytes": { "N.$": "States.Format('{}', $.size_bytes)" }
}
},
"End": true
}
}
},
"End": true
}
}
}