metadata pipeline via Step Functions

This commit is contained in:
2026-05-18 07:59:13 -03:00
parent d3008676e0
commit e297f97e18
6 changed files with 280 additions and 0 deletions

View File

@@ -56,6 +56,114 @@ Resources:
AttributeName: ttl
Enabled: true
PdfIndexTable:
Type: AWS::DynamoDB::Table
Properties:
TableName: !Sub ${AWS::StackName}-pdf-index
BillingMode: PAY_PER_REQUEST
AttributeDefinitions:
- AttributeName: key
AttributeType: S
KeySchema:
- AttributeName: key
KeyType: HASH
ExtractMetadataLogGroup:
Type: AWS::Logs::LogGroup
Properties:
LogGroupName: /aws/lambda/eth-demo-extract-metadata
RetentionInDays: 7
ExtractMetadataFunction:
Type: AWS::Serverless::Function
Properties:
FunctionName: eth-demo-extract-metadata
CodeUri: functions/extract_metadata/
Handler: handler.handler
Timeout: 60
MemorySize: 512
LoggingConfig:
LogFormat: JSON
LogGroup: !Ref ExtractMetadataLogGroup
Environment:
Variables:
BUCKET_NAME: !Ref ReportsBucket
Policies:
- Statement:
- Sid: ReadPdf
Effect: Allow
Action: s3:GetObject
Resource: !Sub "${ReportsBucket.Arn}/*"
ListPdfsLogGroup:
Type: AWS::Logs::LogGroup
Properties:
LogGroupName: /aws/lambda/eth-demo-list-pdfs
RetentionInDays: 7
ListPdfsFunction:
Type: AWS::Serverless::Function
Properties:
FunctionName: eth-demo-list-pdfs
CodeUri: functions/list_pdfs/
Handler: handler.handler
LoggingConfig:
LogFormat: JSON
LogGroup: !Ref ListPdfsLogGroup
Environment:
Variables:
BUCKET_NAME: !Ref ReportsBucket
PREFIX: !Ref Prefix
Policies:
- Statement:
- Sid: ListReportsBucket
Effect: Allow
Action: s3:ListBucket
Resource: !GetAtt ReportsBucket.Arn
PdfIndexStateMachineLogGroup:
Type: AWS::Logs::LogGroup
Properties:
LogGroupName: !Sub /aws/vendedlogs/states/${AWS::StackName}-pdf-index
RetentionInDays: 7
PdfIndexStateMachine:
Type: AWS::Serverless::StateMachine
Properties:
Name: !Sub ${AWS::StackName}-pdf-index
DefinitionUri: statemachines/pdf-index/definition.asl.json
DefinitionSubstitutions:
ListPdfsFunctionArn: !GetAtt ListPdfsFunction.Arn
ExtractMetadataFunctionArn: !GetAtt ExtractMetadataFunction.Arn
PdfIndexTableName: !Ref PdfIndexTable
Logging:
Destinations:
- CloudWatchLogsLogGroup:
LogGroupArn: !GetAtt PdfIndexStateMachineLogGroup.Arn
IncludeExecutionData: true
Level: ALL
Policies:
- LambdaInvokePolicy:
FunctionName: !Ref ListPdfsFunction
- LambdaInvokePolicy:
FunctionName: !Ref ExtractMetadataFunction
- DynamoDBWritePolicy:
TableName: !Ref PdfIndexTable
# SFN logging requires log-delivery API perms at account scope; SAM
# doesn't auto-add these even with Logging configured.
- Statement:
- Effect: Allow
Action:
- logs:CreateLogDelivery
- logs:GetLogDelivery
- logs:UpdateLogDelivery
- logs:DeleteLogDelivery
- logs:ListLogDeliveries
- logs:PutResourcePolicy
- logs:DescribeResourcePolicies
- logs:DescribeLogGroups
Resource: "*"
SignPdfsFunction:
Type: AWS::Serverless::Function
Properties: