diff --git a/app/api/news.py b/app/api/news.py index 199e249..f975062 100644 --- a/app/api/news.py +++ b/app/api/news.py @@ -65,7 +65,8 @@ async def create_source( ): from core.url_validator import validate_feed_url try: - validate_feed_url(body.feed_url) + # getaddrinfo(DNS) 는 blocking — 이벤트 루프 점유 방지 위해 off-thread (R5) + await asyncio.to_thread(validate_feed_url, body.feed_url) except ValueError as e: raise HTTPException(status_code=422, detail=f"feed_url 검증 실패: {e}") source = NewsSource(**body.model_dump()) diff --git a/app/services/storage/local.py b/app/services/storage/local.py index 1dcaec5..f3f3e94 100644 --- a/app/services/storage/local.py +++ b/app/services/storage/local.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import os from collections.abc import AsyncIterator from pathlib import Path @@ -42,7 +43,7 @@ class LocalBackend(StorageBackend): to_read = _STREAM_CHUNK if remaining is None else min(_STREAM_CHUNK, remaining) if to_read <= 0: break - data = f.read(to_read) + data = await asyncio.to_thread(f.read, to_read) if not data: break yield data diff --git a/app/workers/file_watcher.py b/app/workers/file_watcher.py index f1987cc..e94c0fb 100644 --- a/app/workers/file_watcher.py +++ b/app/workers/file_watcher.py @@ -17,6 +17,7 @@ Web/Blog ingest (devonagent 트랙, plan db-snuggly-petal.md): - sidecar (.json) 누락 시: skip 안 하고 ingest, web_meta.sidecar_missing=true """ +import asyncio import hashlib import json from pathlib import Path @@ -246,7 +247,8 @@ async def watch_inbox(): async with async_session() as session: # ─── Web/ 트랙 (devonagent) — DEVONthink Smart Rule 이 떨군 .html 만 진입 ─── if web_root.exists(): - for file_path in web_root.rglob("*.html"): + # rglob NFS 디렉토리 walk(blocking stat 다발)를 off-thread 로 수집 (R5). + for file_path in await asyncio.to_thread(lambda: list(web_root.rglob("*.html"))): if not file_path.is_file() or should_skip(file_path): continue rel_path = str(file_path.relative_to(nas_root)) @@ -264,7 +266,8 @@ async def watch_inbox(): Path(sub).name, (None, None, None) ) - for file_path in scan_root.rglob("*"): + # NFS 디렉토리 walk(blocking) off-thread 수집 (R5). + for file_path in await asyncio.to_thread(lambda: list(scan_root.rglob("*"))): if not file_path.is_file() or should_skip(file_path): continue @@ -278,7 +281,11 @@ async def watch_inbox(): continue rel_path = str(file_path.relative_to(nas_root)) - fhash = file_hash(file_path) + # GB 파일 SHA-256 은 이벤트 루프를 점유 → 같은 루프의 모든 1분 주기 consumer + # + FastAPI 요청이 수십초~분 동시 정지. to_thread 오프로드. 스캔 루프가 이미 + # 순차라 file_hash 는 한 번에 하나만 실행(직렬화) — 병렬 해싱 X = NFS 2.5GbE + # 대역폭·버퍼 메모리 blowup 방지 (R5). + fhash = await asyncio.to_thread(file_hash, file_path) result = await session.execute( select(Document).where(Document.file_path == rel_path)