chunker ui redo

2026-03-15 16:03:53 -03:00
parent d5a3372d6b
commit b40bd68411
62 changed files with 5460 additions and 1493 deletions
--- a/core/api/chunker_sse.py
+++ b/core/api/chunker_sse.py
@@ -1,8 +1,10 @@
 """
 SSE endpoint for chunker pipeline events.

-Bridges gRPC StreamProgress to browser-native EventSource.
-GET /api/chunker/stream/{job_id} → text/event-stream
+Uses Redis as the event bus between Celery workers and the SSE stream.
+Celery worker pushes events via core.events, SSE endpoint polls them.
+
+GET /chunker/stream/{job_id} → text/event-stream
 """

 import asyncio
@@ -14,46 +16,39 @@ from typing import AsyncGenerator
 from fastapi import APIRouter
 from starlette.responses import StreamingResponse

+from core.events import poll_events
+
 logger = logging.getLogger(__name__)

-router = APIRouter(prefix="/api/chunker", tags=["chunker"])
+router = APIRouter(prefix="/chunker", tags=["chunker"])


 async def _event_generator(job_id: str) -> AsyncGenerator[str, None]:
    """
-    Generate SSE events by polling gRPC job state.
-
-    Yields server-sent events in the format:
-        event: <event_type>
-        data: <json_payload>
+    Generate SSE events by polling Redis for chunk job events.
    """
-    from core.rpc.server import _active_jobs
-
-    last_state = None
+    cursor = 0
    timeout = time.monotonic() + 600  # 10 min max

    while time.monotonic() < timeout:
-        job_state = _active_jobs.get(job_id)
+        events, cursor = poll_events(job_id, cursor)

-        if job_state is None:
-            # Job not found yet — may not have started
+        if not events:
            yield f"event: waiting\ndata: {json.dumps({'job_id': job_id})}\n\n"
-            await asyncio.sleep(0.5)
+            await asyncio.sleep(0.1)
            continue

-        # Only send if state changed
-        if job_state != last_state:
-            last_state = dict(job_state)
-            event_type = job_state.get("status", "update")
+        for data in events:
+            event_type = data.pop("event", "update")
+            payload = {**data, "job_id": job_id}

-            yield f"event: {event_type}\ndata: {json.dumps({**job_state, 'job_id': job_id})}\n\n"
+            yield f"event: {event_type}\ndata: {json.dumps(payload)}\n\n"

-            # End stream when job is terminal
-            if event_type in ("completed", "failed", "cancelled"):
+            if event_type in ("pipeline_complete", "pipeline_error", "cancelled"):
                yield f"event: done\ndata: {json.dumps({'job_id': job_id})}\n\n"
-                break
+                return

-        await asyncio.sleep(0.2)
+        await asyncio.sleep(0.05)

    yield f"event: timeout\ndata: {json.dumps({'job_id': job_id})}\n\n"

--- a/core/api/graphql.py
+++ b/core/api/graphql.py
@@ -15,7 +15,9 @@ from strawberry.schema.config import StrawberryConfig
 from strawberry.types import Info

 from core.api.schema.graphql import (
+    CancelResultType,
    ChunkJobType,
+    ChunkOutputFileType,
    CreateChunkJobInput,
    CreateJobInput,
    DeleteResultType,
@@ -26,7 +28,7 @@ from core.api.schema.graphql import (
    TranscodePresetType,
    UpdateAssetInput,
 )
-from core.storage import BUCKET_IN, list_objects
+from core.storage import BUCKET_IN, list_objects, upload_file

 VIDEO_EXTS = {".mp4", ".mkv", ".avi", ".mov", ".webm", ".flv", ".wmv", ".m4v"}
 AUDIO_EXTS = {".mp3", ".wav", ".flac", ".aac", ".ogg", ".m4a"}
@@ -90,6 +92,25 @@ class Query:
    def system_status(self, info: Info) -> SystemStatusType:
        return SystemStatusType(status="ok", version="0.1.0")

+    @strawberry.field
+    def chunk_output_files(self, info: Info, job_id: str) -> List[ChunkOutputFileType]:
+        """List output chunk files for a completed job from media/out/."""
+        from pathlib import Path
+
+        media_out = os.environ.get("MEDIA_OUT_DIR", "/app/media/out")
+        output_dir = Path(media_out) / "chunks" / job_id
+        if not output_dir.is_dir():
+            return []
+        return [
+            ChunkOutputFileType(
+                key=f.name,
+                size=f.stat().st_size,
+                url=f"/media/out/chunks/{job_id}/{f.name}",
+            )
+            for f in sorted(output_dir.iterdir())
+            if f.is_file()
+        ]
+

 # ---------------------------------------------------------------------------
 # Mutations
@@ -100,8 +121,26 @@ class Query:
 class Mutation:
    @strawberry.mutation
    def scan_media_folder(self, info: Info) -> ScanResultType:
+        import logging
+        from pathlib import Path
+
        from core.db import create_asset, get_asset_filenames

+        logger = logging.getLogger(__name__)
+
+        # Sync local media/in/ files to MinIO (handles fresh installs / pruned volumes)
+        local_media = Path("/app/media/in")
+        if local_media.is_dir():
+            existing_keys = {o["key"] for o in list_objects(BUCKET_IN)}
+            for f in local_media.iterdir():
+                if f.is_file() and f.suffix.lower() in MEDIA_EXTS:
+                    if f.name not in existing_keys:
+                        try:
+                            upload_file(str(f), BUCKET_IN, f.name)
+                            logger.info("Uploaded %s to MinIO", f.name)
+                        except Exception as e:
+                            logger.warning("Failed to upload %s: %s", f.name, e)
+
        objects = list_objects(BUCKET_IN, extensions=MEDIA_EXTS)
        existing = get_asset_filenames()

@@ -284,6 +323,8 @@ class Mutation:
            "num_workers": input.num_workers,
            "max_retries": input.max_retries,
            "processor_type": input.processor_type,
+            "start_time": input.start_time,
+            "end_time": input.end_time,
        }

        executor_mode = os.environ.get("MPR_EXECUTOR", "local")
@@ -320,6 +361,17 @@ class Mutation:
            celery_task_id=celery_task_id,
        )

+    @strawberry.mutation
+    def cancel_chunk_job(self, info: Info, celery_task_id: str) -> CancelResultType:
+        """Cancel a running chunk job by revoking its Celery task."""
+        try:
+            from admin.mpr.celery import app as celery_app
+
+            celery_app.control.revoke(celery_task_id, terminate=True, signal="SIGTERM")
+            return CancelResultType(ok=True, message="Task revoked")
+        except Exception as e:
+            return CancelResultType(ok=False, message=str(e))
+

 # ---------------------------------------------------------------------------
 # Schema
--- a/core/api/schema/graphql.py
+++ b/core/api/schema/graphql.py
@@ -37,7 +37,7 @@ class MediaAssetType:
    file_path: Optional[str] = None
    status: Optional[str] = None
    error_message: Optional[str] = None
-    file_size: Optional[int] = None
+    file_size: Optional[float] = None
    duration: Optional[float] = None
    video_codec: Optional[str] = None
    audio_codec: Optional[str] = None
@@ -205,3 +205,22 @@ class CreateChunkJobInput:
    max_retries: int = 3
    processor_type: str = "ffmpeg"
    priority: int = 0
+    start_time: Optional[float] = None
+    end_time: Optional[float] = None
+
+
+@strawberry.type
+class CancelResultType:
+    """Result of cancelling a chunk job."""
+
+    ok: bool = False
+    message: Optional[str] = None
+
+
+@strawberry.type
+class ChunkOutputFileType:
+    """A chunk output file in S3/MinIO with presigned download URL."""
+
+    key: str
+    size: int = 0
+    url: str = ""