diff --git a/app/api/dashboard.py b/app/api/dashboard.py index 8263b7c..94bdbc8 100644 --- a/app/api/dashboard.py +++ b/app/api/dashboard.py @@ -35,6 +35,20 @@ class PipelineStatus(BaseModel): count: int +class QueueLag(BaseModel): + """파이프라인 stage 별 처리 지연 — 운영 카드용. + + pipeline_status 는 24h 누적 통계라 현재 적체 신호로 부족. + queue_lag 는 현재 시점 pending/processing/failed + oldest pending age 로 + "지금 막힌 게 있는가" 를 보여준다. + """ + stage: str + pending: int + processing: int + failed: int + oldest_pending_age_sec: int | None # 가장 오래된 pending 의 created_at 기준 경과 (초) + + class DashboardResponse(BaseModel): today_added: int today_by_domain: list[DomainCount] @@ -48,6 +62,10 @@ class DashboardResponse(BaseModel): documents_count: int = 0 memos_count: int = 0 news_count: int = 0 + # §4 — category 기반 카드 + 승인 pending + queue lag + category_counts: dict[str, int] = {} + library_pending_suggestions: int = 0 + queue_lag: list[QueueLag] = [] @router.get("/", response_model=DashboardResponse) @@ -130,6 +148,56 @@ async def get_dashboard( memos_count = counts[2] news_count = counts[3] + # §4 — 카테고리별 count (§1 documents.category enum) + cat_result = await session.execute( + text(""" + SELECT category, COUNT(*) + FROM documents + WHERE deleted_at IS NULL AND category IS NOT NULL + GROUP BY category + """) + ) + category_counts = {row[0]: row[1] for row in cat_result.all()} + + # §4 — 승인 대기 (library 제안) + pending_result = await session.execute( + text(""" + SELECT COUNT(*) + FROM documents + WHERE deleted_at IS NULL + AND ai_suggestion IS NOT NULL + AND ai_suggestion->>'proposed_category' = 'library' + """) + ) + library_pending_suggestions = pending_result.scalar() or 0 + + # §4 — queue lag (현재 시점 stage 별 적체 신호) + # extract/classify/embed 외에 stt/thumbnail (§3) 도 자동 포함. + lag_result = await session.execute( + text(""" + SELECT + stage, + COUNT(*) FILTER (WHERE status='pending') AS pending, + COUNT(*) FILTER (WHERE status='processing') AS processing, + COUNT(*) FILTER (WHERE status='failed') AS failed, + EXTRACT(EPOCH FROM (NOW() - MIN(created_at) FILTER (WHERE status='pending')))::int + AS oldest_pending_age_sec + FROM processing_queue + GROUP BY stage + ORDER BY stage + """) + ) + queue_lag = [ + QueueLag( + stage=row[0], + pending=row[1] or 0, + processing=row[2] or 0, + failed=row[3] or 0, + oldest_pending_age_sec=row[4], + ) + for row in lag_result.all() + ] + return DashboardResponse( today_added=today_added, today_by_domain=[ @@ -156,4 +224,7 @@ async def get_dashboard( documents_count=documents_count, memos_count=memos_count, news_count=news_count, + category_counts=category_counts, + library_pending_suggestions=library_pending_suggestions, + queue_lag=queue_lag, ) diff --git a/app/models/queue.py b/app/models/queue.py index 0110f65..9fcd94d 100644 --- a/app/models/queue.py +++ b/app/models/queue.py @@ -16,7 +16,15 @@ class ProcessingQueue(Base): id: Mapped[int] = mapped_column(BigInteger, primary_key=True) document_id: Mapped[int] = mapped_column(BigInteger, ForeignKey("documents.id"), nullable=False) stage: Mapped[str] = mapped_column( - Enum("extract", "classify", "summarize", "embed", "chunk", "preview", name="process_stage"), nullable=False + # 'stt' (audio): migration 150 / 'thumbnail' (video): queue_consumer 가 enqueue. + # DB enum 변경은 마이그레이션이 처리하므로 create_type=False. + Enum( + "extract", "classify", "summarize", "embed", "chunk", "preview", + "stt", "thumbnail", + name="process_stage", + create_type=False, + ), + nullable=False, ) status: Mapped[str] = mapped_column( Enum("pending", "processing", "completed", "failed", name="process_status"), diff --git a/docker-compose.yml b/docker-compose.yml index 91904ab..d78b833 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -73,7 +73,9 @@ services: count: 1 capabilities: [gpu] healthcheck: - test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:3300/health')"] + # /ready: CUDA 디바이스 + 모델 적재 둘 다 확인. ready=true 만 healthy 처리. + # /health 는 단순 liveness 라 모델 미적재 상태도 healthy 로 잡혀 운영 신호로 부적합. + test: ["CMD", "python3", "-c", "import json,urllib.request,sys; r=urllib.request.urlopen('http://localhost:3300/ready'); sys.exit(0 if json.load(r).get('ready') else 1)"] interval: 30s timeout: 10s retries: 3 diff --git a/frontend/src/lib/stores/system.ts b/frontend/src/lib/stores/system.ts index 3cfbb5b..d442de4 100644 --- a/frontend/src/lib/stores/system.ts +++ b/frontend/src/lib/stores/system.ts @@ -26,6 +26,14 @@ export interface PipelineStatus { count: number; } +export interface QueueLag { + stage: string; + pending: number; + processing: number; + failed: number; + oldest_pending_age_sec: number | null; +} + export interface DashboardSummary { today_added: number; today_by_domain: DomainCount[]; @@ -38,6 +46,10 @@ export interface DashboardSummary { documents_count: number; memos_count: number; news_count: number; + // §4 — category 카드 + 승인 pending + queue lag + category_counts: Record; + library_pending_suggestions: number; + queue_lag: QueueLag[]; } const POLL_INTERVAL_MS = 60_000; diff --git a/services/stt/server.py b/services/stt/server.py index 6258b34..decc778 100644 --- a/services/stt/server.py +++ b/services/stt/server.py @@ -1,16 +1,37 @@ """STT 마이크로서비스 — faster-whisper (GPU) 기반 음성 전사. -filePath → {text, segments:[{start,end,text}]}. 모델은 첫 요청 시 lazy loading. +filePath → {text, segments:[{start,end,text}]}. +모델은 startup 에서 eager preload (Docker /ready healthcheck 가 모델 적재까지 검증). 기본 모델 large-v3 (VRAM ~3GB, float16). 환경변수로 교체 가능. + +환경변수 `STT_PRELOAD=0` 으로 lazy 로 강제 가능 (개발/테스트용). """ +import logging import os import unicodedata +from contextlib import asynccontextmanager from pathlib import Path from fastapi import FastAPI -app = FastAPI() +logger = logging.getLogger("stt") + + +@asynccontextmanager +async def lifespan(_app: FastAPI): + # startup: 모델 eager preload 시도. 실패해도 프로세스는 살아 있고 + # /ready 가 false 로 남아 healthcheck 가 unhealthy 처리. + if os.getenv("STT_PRELOAD", "1") != "0": + try: + _load_model() + logger.info("stt model preloaded: %s (%s, %s)", _MODEL_NAME, _DEVICE, _COMPUTE_TYPE) + except Exception as e: + logger.exception("stt model preload failed: %s", e) + yield + + +app = FastAPI(lifespan=lifespan) _model = None _MODEL_NAME = os.getenv("WHISPER_MODEL", "large-v3")