From a08b6208949969a77a9654fc69c320908050e8de Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Sun, 17 May 2026 08:51:57 +0900 Subject: [PATCH] refactor(search): swap 10 call sites to acquire_mlx_gate(Priority.*) (B-1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DS-Mac-mini-26B-Priority-Gate-1 — 사용자-facing 7 + worker 3 = 10 site 의 `async with get_mlx_gate():` → `async with acquire_mlx_gate(Priority.*):` 교체. Foreground 6 (user-facing path): - app/services/search/evidence_service.py:315 (/ask evidence stage) - app/services/search/classifier_service.py:103 (/ask classifier stage) - app/services/search/synthesis_service.py:299 (/ask synthesis stage) - app/api/documents.py:1306 (수동 analyze API) - app/api/study_topics.py:1183 (subject note 동기 생성) - app/api/study_questions.py:1560 (study explanation 동기 API) Background 4 (worker queue / fire-and-forget): - app/services/search/query_analyzer.py:240 (V0 grep 확인: fire-and-forget only, search_pipeline.py:179 trigger_background_analysis 만, docstring rule "analyze() 동기 호출 금지" 부합 → BACKGROUND 확정) - app/workers/deep_summary_worker.py:110 (classify-escalate worker) - app/workers/study_explanation_worker.py:149 - app/workers/study_session_analysis_worker.py:237 Cleanup: - query_analyzer._get_llm_semaphore() 제거 — self-only, unused, signature 거짓말 (이제 get_mlx_gate 가 Semaphore 아닌 context manager 반환) 기존 get_mlx_gate() legacy wrapper 는 보존 (BACKGROUND 매핑). user-facing path 잔재 0 — closure gate grep 검증 통과 (별 commit 에서). --- app/api/documents.py | 4 ++-- app/api/study_questions.py | 4 ++-- app/api/study_topics.py | 4 ++-- app/services/search/classifier_service.py | 4 ++-- app/services/search/evidence_service.py | 4 ++-- app/services/search/query_analyzer.py | 20 ++++++-------------- app/services/search/synthesis_service.py | 4 ++-- app/workers/deep_summary_worker.py | 4 ++-- app/workers/study_explanation_worker.py | 4 ++-- app/workers/study_session_analysis_worker.py | 4 ++-- 10 files changed, 24 insertions(+), 32 deletions(-) diff --git a/app/api/documents.py b/app/api/documents.py index 261781e..441769b 100644 --- a/app/api/documents.py +++ b/app/api/documents.py @@ -38,7 +38,7 @@ from models.queue import ProcessingQueue, enqueue_stage from models.user import User from services.document_telemetry import record_analyze_event, sanitize_source from services.prompt_versions import ANALYZE_PROMPT_VERSION, resolve_primary_model -from services.search.llm_gate import get_mlx_gate +from services.search.llm_gate import Priority, acquire_mlx_gate router = APIRouter() logger = logging.getLogger(__name__) @@ -1303,7 +1303,7 @@ async def analyze_document( ai_client = AIClient() raw: str | None = None try: - async with get_mlx_gate(): + async with acquire_mlx_gate(Priority.FOREGROUND): async with asyncio.timeout(ANALYZE_TIMEOUT_S): raw = await ai_client._call_chat(ai_client.ai.primary, prompt) except asyncio.TimeoutError: diff --git a/app/api/study_questions.py b/app/api/study_questions.py index 8a83dec..ab5a4c6 100644 --- a/app/api/study_questions.py +++ b/app/api/study_questions.py @@ -30,7 +30,7 @@ from models.study_question_image import StudyQuestionImage from models.study_quiz_session import StudyQuizSession from models.study_topic import StudyTopic from models.user import User -from services.search.llm_gate import get_mlx_gate +from services.search.llm_gate import Priority, acquire_mlx_gate from services.study.explanation_rag import ( EvidenceItem, gather_explanation_context, @@ -1557,7 +1557,7 @@ async def generate_ai_explanation( raw_text: str | None = None error_message: str | None = None try: - async with get_mlx_gate(): + async with acquire_mlx_gate(Priority.FOREGROUND): async with asyncio.timeout(LLM_TIMEOUT_S): raw_text = await ai_client.call_primary(prompt) except asyncio.TimeoutError: diff --git a/app/api/study_topics.py b/app/api/study_topics.py index b528329..89b8aef 100644 --- a/app/api/study_topics.py +++ b/app/api/study_topics.py @@ -41,7 +41,7 @@ from models.study_question_image import StudyQuestionImage from models.study_quiz_session import StudyQuizSession from models.study_topic_subject_note import StudyTopicSubjectNote from models.user import User -from services.search.llm_gate import get_mlx_gate +from services.search.llm_gate import Priority, acquire_mlx_gate from services.study.subject_note_rag import ( SubjectNoteContext, gather_subject_note_context, @@ -1180,7 +1180,7 @@ async def generate_subject_note( ai_client = AIClient() raw_text: str | None = None try: - async with get_mlx_gate(): + async with acquire_mlx_gate(Priority.FOREGROUND): async with asyncio.timeout(SUBJECT_NOTE_TIMEOUT_S): raw_text = await ai_client.call_primary(prompt) except asyncio.TimeoutError: diff --git a/app/services/search/classifier_service.py b/app/services/search/classifier_service.py index 23c55ee..626b5c9 100644 --- a/app/services/search/classifier_service.py +++ b/app/services/search/classifier_service.py @@ -20,7 +20,7 @@ from ai.client import AIClient, _load_prompt, parse_json_response from core.config import settings from core.utils import setup_logger -from .llm_gate import get_mlx_gate +from .llm_gate import Priority, acquire_mlx_gate logger = setup_logger("classifier") @@ -100,7 +100,7 @@ async def classify( # Gate 미사용 시 classifier + evidence + synthesis 가 동시에 single-inference # MLX 에 race → 거의 모두 timeout (실측: 8/10 fixture query). docstring 영구 룰: # "MLX primary 호출 경로는 예외 없이 gate 획득 필수". - async with get_mlx_gate(): + async with acquire_mlx_gate(Priority.FOREGROUND): async with asyncio.timeout(LLM_TIMEOUT_MS / 1000): raw = await client._request(settings.ai.classifier, prompt) _failure_count = 0 diff --git a/app/services/search/evidence_service.py b/app/services/search/evidence_service.py index 453b29c..71b50bd 100644 --- a/app/services/search/evidence_service.py +++ b/app/services/search/evidence_service.py @@ -57,7 +57,7 @@ from typing import TYPE_CHECKING from ai.client import AIClient, _load_prompt, parse_json_response from core.utils import setup_logger -from .llm_gate import get_mlx_gate +from .llm_gate import Priority, acquire_mlx_gate from .rerank_service import _extract_window @@ -312,7 +312,7 @@ async def extract_evidence( # 2026-05-17: PR #20 이후 triage/primary 동일 Mac mini 26B endpoint. gate 외부 실행이 docstring # 영구 룰 ("MLX primary 호출 경로는 예외 없이 gate 획득 필수") 위반 — race condition 으로 동시 # 호출 timeout 빈번. gate 안쪽으로 이동. - async with get_mlx_gate(): + async with acquire_mlx_gate(Priority.FOREGROUND): async with asyncio.timeout(LLM_TIMEOUT_MS / 1000): raw = await ai_client.call_triage(prompt) except asyncio.TimeoutError: diff --git a/app/services/search/query_analyzer.py b/app/services/search/query_analyzer.py index 105383c..78a6f90 100644 --- a/app/services/search/query_analyzer.py +++ b/app/services/search/query_analyzer.py @@ -36,7 +36,7 @@ from ai.client import AIClient, _load_prompt, parse_json_response from core.config import settings from core.utils import setup_logger -from .llm_gate import get_mlx_gate +from .llm_gate import Priority, acquire_mlx_gate logger = setup_logger("query_analyzer") @@ -71,16 +71,6 @@ _PENDING: set[asyncio.Task[Any]] = set() _INFLIGHT: set[str] = set() -def _get_llm_semaphore() -> asyncio.Semaphore: - """MLX single-inference gate를 반환. Phase 3.1부터 llm_gate.get_mlx_gate() - 로 위임 — analyzer / evidence / synthesis 가 동일 semaphore 공유. - - `LLM_CONCURRENCY` 상수는 하위 호환/문서용으로 유지하되, 실제 bound는 - `llm_gate.MLX_CONCURRENCY` 가 담당한다. - """ - return get_mlx_gate() - - def _cache_key(query: str) -> str: raw = f"{query}|{PROMPT_VERSION}|{_model_version()}" return hashlib.sha256(raw.encode("utf-8")).hexdigest() @@ -237,11 +227,13 @@ async def analyze(query: str, ai_client: AIClient | None = None) -> dict: client_owned = True t_start = time.perf_counter() - semaphore = _get_llm_semaphore() - # ⚠️ 중요: semaphore 대기는 timeout 포함되면 안됨 (대기만 해도 timeout 발동) + # 2026-05-17 B-1: query_analyzer 의 analyze() 는 fire-and-forget background only + # (search_pipeline.py:179 trigger_background_analysis 만 호출, docstring rule + # "analyze() 동기 호출 금지"). 따라서 Priority.BACKGROUND. + # ⚠️ 중요: gate 대기는 timeout 포함되면 안됨 (대기만 해도 timeout 발동) # timeout은 실제 LLM 호출 구간에만 적용. try: - async with semaphore: + async with acquire_mlx_gate(Priority.BACKGROUND): async with asyncio.timeout(LLM_TIMEOUT_MS / 1000): raw = await ai_client._call_chat( ai_client.ai.primary, diff --git a/app/services/search/synthesis_service.py b/app/services/search/synthesis_service.py index f0e5452..abb573e 100644 --- a/app/services/search/synthesis_service.py +++ b/app/services/search/synthesis_service.py @@ -31,7 +31,7 @@ from ai.client import AIClient, _load_prompt, parse_json_response from core.config import settings from core.utils import setup_logger -from .llm_gate import get_mlx_gate +from .llm_gate import Priority, acquire_mlx_gate if TYPE_CHECKING: from .evidence_service import EvidenceItem @@ -296,7 +296,7 @@ async def synthesize( llm_error: str | None = None try: - async with get_mlx_gate(): + async with acquire_mlx_gate(Priority.FOREGROUND): async with asyncio.timeout(LLM_TIMEOUT_MS / 1000): raw = await ai_client._call_chat(ai_client.ai.primary, prompt) except asyncio.TimeoutError: diff --git a/app/workers/deep_summary_worker.py b/app/workers/deep_summary_worker.py index 08ff909..d955e39 100644 --- a/app/workers/deep_summary_worker.py +++ b/app/workers/deep_summary_worker.py @@ -28,7 +28,7 @@ from models.document import Document from models.queue import ProcessingQueue from policy.prompt_render import render_26b, policy_version as compute_policy_version from services.document_telemetry import record_analyze_event -from services.search.llm_gate import get_mlx_gate +from services.search.llm_gate import Priority, acquire_mlx_gate logger = setup_logger("deep_summary_worker") @@ -107,7 +107,7 @@ async def process(document_id: int, session: AsyncSession) -> None: try: start = time.perf_counter() - async with get_mlx_gate(): # primary(26B) 보호 Semaphore(1) + async with acquire_mlx_gate(Priority.BACKGROUND): # 2026-05-17 B-1: classify-escalate worker raw = await client.call_primary(prompt) latency_ms = int((time.perf_counter() - start) * 1000) except Exception as exc: diff --git a/app/workers/study_explanation_worker.py b/app/workers/study_explanation_worker.py index 2570f71..838494c 100644 --- a/app/workers/study_explanation_worker.py +++ b/app/workers/study_explanation_worker.py @@ -27,7 +27,7 @@ from sqlalchemy.ext.asyncio import AsyncSession from ai.client import AIClient, parse_json_response from models.study_question import StudyQuestion from models.study_question_job import StudyQuestionJob -from services.search.llm_gate import get_mlx_gate +from services.search.llm_gate import Priority, acquire_mlx_gate from services.study.explanation_rag import ( gather_explanation_context, render_evidence_block, @@ -146,7 +146,7 @@ async def run_explanation_job(session: AsyncSession, job: StudyQuestionJob) -> N ai_client = AIClient() try: - async with get_mlx_gate(): + async with acquire_mlx_gate(Priority.BACKGROUND): # 2026-05-17 B-1 async with asyncio.timeout(LLM_TIMEOUT_S): raw_text = await ai_client.call_primary(prompt) primary_name = ( diff --git a/app/workers/study_session_analysis_worker.py b/app/workers/study_session_analysis_worker.py index c073812..fc14937 100644 --- a/app/workers/study_session_analysis_worker.py +++ b/app/workers/study_session_analysis_worker.py @@ -32,7 +32,7 @@ from models.study_question import StudyQuestion, StudyQuestionAttempt from models.study_quiz_session import StudyQuizSession from models.study_quiz_session_analysis import StudyQuizSessionAnalysis from models.study_quiz_session_job import StudyQuizSessionJob -from services.search.llm_gate import get_mlx_gate +from services.search.llm_gate import Priority, acquire_mlx_gate from services.study.session_summary_guard import ( GUARD_PATTERN, calibrate_confidence, @@ -234,7 +234,7 @@ async def run_session_analysis_job(session: AsyncSession, job: StudyQuizSessionJ prompt = _render_session_summary_prompt(qs, prompt_attempts, ctx_docs) ai_client = AIClient() try: - async with get_mlx_gate(): + async with acquire_mlx_gate(Priority.BACKGROUND): # 2026-05-17 B-1 async with asyncio.timeout(LLM_TIMEOUT_S): raw_text = await ai_client.call_primary(prompt) primary_name = (