metadata pipeline via Step Functions
This commit is contained in:
71
statemachines/pdf-index/definition.asl.json
Normal file
71
statemachines/pdf-index/definition.asl.json
Normal file
@@ -0,0 +1,71 @@
|
||||
{
|
||||
"Comment": "PDF metadata index pipeline — list keys, then per-key extract + DDB write in parallel.",
|
||||
"StartAt": "ListPdfs",
|
||||
"States": {
|
||||
"ListPdfs": {
|
||||
"Type": "Task",
|
||||
"Resource": "arn:aws:states:::lambda:invoke",
|
||||
"Parameters": {
|
||||
"FunctionName": "${ListPdfsFunctionArn}",
|
||||
"Payload": {}
|
||||
},
|
||||
"ResultSelector": {
|
||||
"keys.$": "$.Payload.keys",
|
||||
"count.$": "$.Payload.count"
|
||||
},
|
||||
"ResultPath": "$.list",
|
||||
"Next": "ExtractAndIndex"
|
||||
},
|
||||
"ExtractAndIndex": {
|
||||
"Type": "Map",
|
||||
"ItemsPath": "$.list.keys",
|
||||
"MaxConcurrency": 10,
|
||||
"ItemProcessor": {
|
||||
"ProcessorConfig": {
|
||||
"Mode": "INLINE"
|
||||
},
|
||||
"StartAt": "ExtractMetadata",
|
||||
"States": {
|
||||
"ExtractMetadata": {
|
||||
"Type": "Task",
|
||||
"Resource": "arn:aws:states:::lambda:invoke",
|
||||
"Parameters": {
|
||||
"FunctionName": "${ExtractMetadataFunctionArn}",
|
||||
"Payload": {
|
||||
"key.$": "$"
|
||||
}
|
||||
},
|
||||
"ResultSelector": {
|
||||
"key.$": "$.Payload.key",
|
||||
"pages.$": "$.Payload.pages",
|
||||
"size_bytes.$": "$.Payload.size_bytes"
|
||||
},
|
||||
"Retry": [
|
||||
{
|
||||
"ErrorEquals": ["States.ALL"],
|
||||
"IntervalSeconds": 1,
|
||||
"MaxAttempts": 2,
|
||||
"BackoffRate": 2.0
|
||||
}
|
||||
],
|
||||
"Next": "WriteToIndex"
|
||||
},
|
||||
"WriteToIndex": {
|
||||
"Type": "Task",
|
||||
"Resource": "arn:aws:states:::dynamodb:putItem",
|
||||
"Parameters": {
|
||||
"TableName": "${PdfIndexTableName}",
|
||||
"Item": {
|
||||
"key": { "S.$": "$.key" },
|
||||
"pages": { "N.$": "States.Format('{}', $.pages)" },
|
||||
"size_bytes": { "N.$": "States.Format('{}', $.size_bytes)" }
|
||||
}
|
||||
},
|
||||
"End": true
|
||||
}
|
||||
}
|
||||
},
|
||||
"End": true
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user