fix(media): §3 ship-readiness — stt preload + healthcheck + queue enum + dashboard queue_lag

stt:
- services/stt/server.py: lazy → eager preload in FastAPI lifespan.
  STT_PRELOAD=0 으로 lazy 강제 가능 (개발/테스트). preload 실패해도
  프로세스는 살아 있고 /ready false 로 남아 healthcheck 가 unhealthy 처리.
- docker-compose.yml: healthcheck /health → /ready. /health 는 단순
  liveness 라 모델 미적재 상태도 healthy 로 잡혀 운영 신호 부적합.

queue ORM:
- app/models/queue.py: process_stage enum 에 'stt'/'thumbnail' 추가 +
  create_type=False (migration 150/151 가 DB enum 확장 담당). 이게
  없으면 stt_worker INSERT 시 SQLAlchemy 가 enum value 를 거부.

dashboard 강화 (§4 선제, §3 신규 stage 까지 자동 커버):
- app/api/dashboard.py: category_counts + library_pending_suggestions +
  queue_lag (stage 별 pending/processing/failed + oldest_pending_age_sec).
- frontend/src/lib/stores/system.ts: QueueLag 타입 + DashboardSummary 확장.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-24 07:04:52 +09:00
parent 8f25d396df
commit cec464ae2d
5 changed files with 118 additions and 4 deletions
+71
View File
@@ -35,6 +35,20 @@ class PipelineStatus(BaseModel):
count: int
class QueueLag(BaseModel):
"""파이프라인 stage 별 처리 지연 — 운영 카드용.
pipeline_status 는 24h 누적 통계라 현재 적체 신호로 부족.
queue_lag 는 현재 시점 pending/processing/failed + oldest pending age 로
"지금 막힌 게 있는가" 를 보여준다.
"""
stage: str
pending: int
processing: int
failed: int
oldest_pending_age_sec: int | None # 가장 오래된 pending 의 created_at 기준 경과 (초)
class DashboardResponse(BaseModel):
today_added: int
today_by_domain: list[DomainCount]
@@ -48,6 +62,10 @@ class DashboardResponse(BaseModel):
documents_count: int = 0
memos_count: int = 0
news_count: int = 0
# §4 — category 기반 카드 + 승인 pending + queue lag
category_counts: dict[str, int] = {}
library_pending_suggestions: int = 0
queue_lag: list[QueueLag] = []
@router.get("/", response_model=DashboardResponse)
@@ -130,6 +148,56 @@ async def get_dashboard(
memos_count = counts[2]
news_count = counts[3]
# §4 — 카테고리별 count (§1 documents.category enum)
cat_result = await session.execute(
text("""
SELECT category, COUNT(*)
FROM documents
WHERE deleted_at IS NULL AND category IS NOT NULL
GROUP BY category
""")
)
category_counts = {row[0]: row[1] for row in cat_result.all()}
# §4 — 승인 대기 (library 제안)
pending_result = await session.execute(
text("""
SELECT COUNT(*)
FROM documents
WHERE deleted_at IS NULL
AND ai_suggestion IS NOT NULL
AND ai_suggestion->>'proposed_category' = 'library'
""")
)
library_pending_suggestions = pending_result.scalar() or 0
# §4 — queue lag (현재 시점 stage 별 적체 신호)
# extract/classify/embed 외에 stt/thumbnail (§3) 도 자동 포함.
lag_result = await session.execute(
text("""
SELECT
stage,
COUNT(*) FILTER (WHERE status='pending') AS pending,
COUNT(*) FILTER (WHERE status='processing') AS processing,
COUNT(*) FILTER (WHERE status='failed') AS failed,
EXTRACT(EPOCH FROM (NOW() - MIN(created_at) FILTER (WHERE status='pending')))::int
AS oldest_pending_age_sec
FROM processing_queue
GROUP BY stage
ORDER BY stage
""")
)
queue_lag = [
QueueLag(
stage=row[0],
pending=row[1] or 0,
processing=row[2] or 0,
failed=row[3] or 0,
oldest_pending_age_sec=row[4],
)
for row in lag_result.all()
]
return DashboardResponse(
today_added=today_added,
today_by_domain=[
@@ -156,4 +224,7 @@ async def get_dashboard(
documents_count=documents_count,
memos_count=memos_count,
news_count=news_count,
category_counts=category_counts,
library_pending_suggestions=library_pending_suggestions,
queue_lag=queue_lag,
)
+9 -1
View File
@@ -16,7 +16,15 @@ class ProcessingQueue(Base):
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
document_id: Mapped[int] = mapped_column(BigInteger, ForeignKey("documents.id"), nullable=False)
stage: Mapped[str] = mapped_column(
Enum("extract", "classify", "summarize", "embed", "chunk", "preview", name="process_stage"), nullable=False
# 'stt' (audio): migration 150 / 'thumbnail' (video): queue_consumer 가 enqueue.
# DB enum 변경은 마이그레이션이 처리하므로 create_type=False.
Enum(
"extract", "classify", "summarize", "embed", "chunk", "preview",
"stt", "thumbnail",
name="process_stage",
create_type=False,
),
nullable=False,
)
status: Mapped[str] = mapped_column(
Enum("pending", "processing", "completed", "failed", name="process_status"),
+3 -1
View File
@@ -73,7 +73,9 @@ services:
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "python3", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:3300/health')"]
# /ready: CUDA 디바이스 + 모델 적재 둘 다 확인. ready=true 만 healthy 처리.
# /health 는 단순 liveness 라 모델 미적재 상태도 healthy 로 잡혀 운영 신호로 부적합.
test: ["CMD", "python3", "-c", "import json,urllib.request,sys; r=urllib.request.urlopen('http://localhost:3300/ready'); sys.exit(0 if json.load(r).get('ready') else 1)"]
interval: 30s
timeout: 10s
retries: 3
+12
View File
@@ -26,6 +26,14 @@ export interface PipelineStatus {
count: number;
}
export interface QueueLag {
stage: string;
pending: number;
processing: number;
failed: number;
oldest_pending_age_sec: number | null;
}
export interface DashboardSummary {
today_added: number;
today_by_domain: DomainCount[];
@@ -38,6 +46,10 @@ export interface DashboardSummary {
documents_count: number;
memos_count: number;
news_count: number;
// §4 — category 카드 + 승인 pending + queue lag
category_counts: Record<string, number>;
library_pending_suggestions: number;
queue_lag: QueueLag[];
}
const POLL_INTERVAL_MS = 60_000;
+23 -2
View File
@@ -1,16 +1,37 @@
"""STT 마이크로서비스 — faster-whisper (GPU) 기반 음성 전사.
filePath → {text, segments:[{start,end,text}]}. 모델은 첫 요청 시 lazy loading.
filePath → {text, segments:[{start,end,text}]}.
모델은 startup 에서 eager preload (Docker /ready healthcheck 가 모델 적재까지 검증).
기본 모델 large-v3 (VRAM ~3GB, float16). 환경변수로 교체 가능.
환경변수 `STT_PRELOAD=0` 으로 lazy 로 강제 가능 (개발/테스트용).
"""
import logging
import os
import unicodedata
from contextlib import asynccontextmanager
from pathlib import Path
from fastapi import FastAPI
app = FastAPI()
logger = logging.getLogger("stt")
@asynccontextmanager
async def lifespan(_app: FastAPI):
# startup: 모델 eager preload 시도. 실패해도 프로세스는 살아 있고
# /ready 가 false 로 남아 healthcheck 가 unhealthy 처리.
if os.getenv("STT_PRELOAD", "1") != "0":
try:
_load_model()
logger.info("stt model preloaded: %s (%s, %s)", _MODEL_NAME, _DEVICE, _COMPUTE_TYPE)
except Exception as e:
logger.exception("stt model preload failed: %s", e)
yield
app = FastAPI(lifespan=lifespan)
_model = None
_MODEL_NAME = os.getenv("WHISPER_MODEL", "large-v3")