chunker and ui

This commit is contained in:
2026-03-13 14:29:38 -03:00
parent 3eeedebb15
commit ccc478fbaa
69 changed files with 6481 additions and 282 deletions

78
core/api/chunker_sse.py Normal file
View File

@@ -0,0 +1,78 @@
"""
SSE endpoint for chunker pipeline events.
Bridges gRPC StreamProgress to browser-native EventSource.
GET /api/chunker/stream/{job_id} → text/event-stream
"""
import asyncio
import json
import logging
import time
from typing import AsyncGenerator
from fastapi import APIRouter
from starlette.responses import StreamingResponse
logger = logging.getLogger(__name__)
router = APIRouter(prefix="/api/chunker", tags=["chunker"])
async def _event_generator(job_id: str) -> AsyncGenerator[str, None]:
"""
Generate SSE events by polling gRPC job state.
Yields server-sent events in the format:
event: <event_type>
data: <json_payload>
"""
from core.rpc.server import _active_jobs
last_state = None
timeout = time.monotonic() + 600 # 10 min max
while time.monotonic() < timeout:
job_state = _active_jobs.get(job_id)
if job_state is None:
# Job not found yet — may not have started
yield f"event: waiting\ndata: {json.dumps({'job_id': job_id})}\n\n"
await asyncio.sleep(0.5)
continue
# Only send if state changed
if job_state != last_state:
last_state = dict(job_state)
event_type = job_state.get("status", "update")
yield f"event: {event_type}\ndata: {json.dumps({**job_state, 'job_id': job_id})}\n\n"
# End stream when job is terminal
if event_type in ("completed", "failed", "cancelled"):
yield f"event: done\ndata: {json.dumps({'job_id': job_id})}\n\n"
break
await asyncio.sleep(0.2)
yield f"event: timeout\ndata: {json.dumps({'job_id': job_id})}\n\n"
@router.get("/stream/{job_id}")
async def stream_chunk_job(job_id: str):
"""
SSE stream for a chunk pipeline job.
The UI connects via native EventSource:
const es = new EventSource('/api/chunker/stream/<job_id>');
es.addEventListener('processing', (e) => { ... });
"""
return StreamingResponse(
_event_generator(job_id),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)

View File

@@ -15,6 +15,8 @@ from strawberry.schema.config import StrawberryConfig
from strawberry.types import Info
from core.api.schema.graphql import (
ChunkJobType,
CreateChunkJobInput,
CreateJobInput,
DeleteResultType,
MediaAssetType,
@@ -172,30 +174,31 @@ class Mutation:
priority=input.priority or 0,
)
payload = {
"source_key": source.file_path,
"output_key": output_filename,
"preset": preset_snapshot or None,
"trim_start": input.trim_start,
"trim_end": input.trim_end,
"duration": source.duration,
}
executor_mode = os.environ.get("MPR_EXECUTOR", "local")
if executor_mode in ("lambda", "gcp"):
from core.task.executor import get_executor
from core.jobs.executor import get_executor
get_executor().run(
job_type="transcode",
job_id=str(job.id),
source_path=source.file_path,
output_path=output_filename,
preset=preset_snapshot or None,
trim_start=input.trim_start,
trim_end=input.trim_end,
duration=source.duration,
payload=payload,
)
else:
from core.task.tasks import run_transcode_job
from core.jobs.task import run_job
result = run_transcode_job.delay(
result = run_job.delay(
job_type="transcode",
job_id=str(job.id),
source_key=source.file_path,
output_key=output_filename,
preset=preset_snapshot or None,
trim_start=input.trim_start,
trim_end=input.trim_end,
duration=source.duration,
payload=payload,
)
job.celery_task_id = result.id
job.save(update_fields=["celery_task_id"])
@@ -261,6 +264,62 @@ class Mutation:
except Exception:
raise Exception("Asset not found")
@strawberry.mutation
def create_chunk_job(self, info: Info, input: CreateChunkJobInput) -> ChunkJobType:
"""Create and dispatch a chunk pipeline job."""
import uuid
from core.db import get_asset
try:
source = get_asset(input.source_asset_id)
except Exception:
raise Exception("Source asset not found")
job_id = str(uuid.uuid4())
payload = {
"source_key": source.file_path,
"chunk_duration": input.chunk_duration,
"num_workers": input.num_workers,
"max_retries": input.max_retries,
"processor_type": input.processor_type,
}
executor_mode = os.environ.get("MPR_EXECUTOR", "local")
celery_task_id = None
if executor_mode in ("lambda", "gcp"):
from core.jobs.executor import get_executor
get_executor().run(
job_type="chunk",
job_id=job_id,
payload=payload,
)
else:
from core.jobs.task import run_job
result = run_job.delay(
job_type="chunk",
job_id=job_id,
payload=payload,
)
celery_task_id = result.id
return ChunkJobType(
id=uuid.UUID(job_id),
source_asset_id=input.source_asset_id,
chunk_duration=input.chunk_duration,
num_workers=input.num_workers,
max_retries=input.max_retries,
processor_type=input.processor_type,
status="pending",
progress=0.0,
priority=input.priority,
celery_task_id=celery_task_id,
)
# ---------------------------------------------------------------------------
# Schema

View File

@@ -23,6 +23,7 @@ from fastapi import FastAPI, Header, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from strawberry.fastapi import GraphQLRouter
from core.api.chunker_sse import router as chunker_router
from core.api.graphql import schema as graphql_schema
CALLBACK_API_KEY = os.environ.get("CALLBACK_API_KEY", "")
@@ -48,6 +49,9 @@ app.add_middleware(
graphql_router = GraphQLRouter(schema=graphql_schema, graphql_ide="graphiql")
app.include_router(graphql_router, prefix="/graphql")
# Chunker SSE
app.include_router(chunker_router)
@app.get("/")
def root():

View File

@@ -156,3 +156,52 @@ class WorkerStatusType:
active_jobs: Optional[int] = None
supported_codecs: Optional[List[str]] = None
gpu_available: Optional[bool] = None
@strawberry.enum
class ChunkJobStatus(Enum):
PENDING = "pending"
CHUNKING = "chunking"
PROCESSING = "processing"
COLLECTING = "collecting"
COMPLETED = "completed"
FAILED = "failed"
CANCELLED = "cancelled"
@strawberry.type
class ChunkJobType:
"""A chunk pipeline job."""
id: Optional[UUID] = None
source_asset_id: Optional[UUID] = None
chunk_duration: Optional[float] = None
num_workers: Optional[int] = None
max_retries: Optional[int] = None
processor_type: Optional[str] = None
status: Optional[str] = None
progress: Optional[float] = None
total_chunks: Optional[int] = None
processed_chunks: Optional[int] = None
failed_chunks: Optional[int] = None
retry_count: Optional[int] = None
error_message: Optional[str] = None
throughput_mbps: Optional[float] = None
elapsed_seconds: Optional[float] = None
celery_task_id: Optional[str] = None
priority: Optional[int] = None
created_at: Optional[datetime] = None
started_at: Optional[datetime] = None
completed_at: Optional[datetime] = None
@strawberry.input
class CreateChunkJobInput:
"""Request body for creating a chunk pipeline job."""
source_asset_id: UUID
chunk_duration: float = 10.0
num_workers: int = 4
max_retries: int = 3
processor_type: str = "ffmpeg"
priority: int = 0