From 403b05d9717583a746b7e40d05c1c3042b56f274 Mon Sep 17 00:00:00 2001 From: hyungi Date: Sat, 20 Jun 2026 04:51:06 +0000 Subject: [PATCH] =?UTF-8?q?fix(study):=20study/analyze=20LLM=20=ED=83=80?= =?UTF-8?q?=EC=9E=84=EC=95=84=EC=9B=83=EC=9D=84=20config=20=EB=8B=A8?= =?UTF-8?q?=EC=9D=BC=EC=86=8C=EC=8A=A4(llm=5Fcall=5Ftimeout=5Fs)=EB=A1=9C?= =?UTF-8?q?=20=E2=80=94=20=EC=8A=A4=ED=85=8C=EC=9D=BC=20=ED=95=98=EB=93=9C?= =?UTF-8?q?=EC=BD=94=EB=94=A9=20=EC=9D=BC=EC=86=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit study explanation/session-analysis/memo-card 워커 + study_questions/study_topics(subject-note·diagnosis) + documents.analyze 의 하드코딩 30~60s asyncio.timeout 7곳 제거. 빠른 Gemma 기준 리터럴이 Qwen 27B 교체(2026-06-11) sweep 누락 → 느린 콜을 잘라 사용자 대면 504 + 워커가 매 재시도마다 느린 콜 재실행해 문서가 큐에서 영영 못 빠지는 liveness halt. digest_llm_timeout_s 와 동형으로 config.pipeline.llm_call_timeout_s(300) 단일소스화. 다음 모델 교체 때 재발 차단. Co-Authored-By: Claude Opus 4.8 (1M context) --- app/api/documents.py | 2 +- app/api/study_questions.py | 4 ++-- app/api/study_topics.py | 5 +++-- app/core/config.py | 9 +++++++++ app/workers/study_explanation_worker.py | 5 +++-- app/workers/study_memo_card_worker.py | 5 +++-- app/workers/study_session_analysis_worker.py | 5 +++-- config.yaml | 3 +++ 8 files changed, 27 insertions(+), 11 deletions(-) diff --git a/app/api/documents.py b/app/api/documents.py index 78f206e..9732489 100644 --- a/app/api/documents.py +++ b/app/api/documents.py @@ -1557,7 +1557,7 @@ ANALYZE_PROMPT = ( ) ANALYZE_TEXT_LIMIT = 12000 # chars (15000 → 12000, 실측 timeout 빈발) -ANALYZE_TIMEOUT_S = 60 # 15,000자 입력 + 4층 출력. 실측 7~45초, safety margin 포함 +ANALYZE_TIMEOUT_S = settings.llm_call_timeout_s # 2026-06-20 config 단일소스 (구 60s=빠른 Gemma) ANALYZE_CACHE_TTL_S = 1800 # 30분 ANALYZE_CACHE_MAXSIZE = 100 ANALYZE_LAYER_MIN_CHARS = 50 # 이 미만이면 억지 채움으로 보고 제거 diff --git a/app/api/study_questions.py b/app/api/study_questions.py index fcef421..84135c8 100644 --- a/app/api/study_questions.py +++ b/app/api/study_questions.py @@ -1543,8 +1543,8 @@ async def delete_question_image( # ─── PR-3: AI 풀이 생성 엔드포인트 ─── -# MLX 호출 timeout (초). MLX gate + 26B 추론 평균 ~10s, 안전 마진. -LLM_TIMEOUT_S = 30.0 +# 2026-06-20: config 단일소스 (구 하드코딩 30s = 빠른 Gemma 기준). +LLM_TIMEOUT_S = settings.llm_call_timeout_s # 프롬프트 템플릿 lazy load _PROMPT_PATH = "study_question_explanation.txt" _prompt_cache: str | None = None diff --git a/app/api/study_topics.py b/app/api/study_topics.py index cde2b28..345a227 100644 --- a/app/api/study_topics.py +++ b/app/api/study_topics.py @@ -33,6 +33,7 @@ from ai.client import AIClient, strip_thinking from eid.ai import EidAIClient from eid.compose import compose from core.auth import get_current_user +from core.config import settings from core.database import get_session from core.library import LIBRARY_PREFIX, normalize_library_path from models.document import Document @@ -1015,7 +1016,7 @@ async def detach_session_from_topic( # ─── PR-9: 분야 설명 (study_topic_subject_notes) ─── -SUBJECT_NOTE_TIMEOUT_S = 30.0 +SUBJECT_NOTE_TIMEOUT_S = settings.llm_call_timeout_s _SUBJECT_NOTE_PROMPT_PATH = "study_subject_note.txt" _subject_note_prompt_cache: str | None = None @@ -1242,7 +1243,7 @@ async def generate_subject_note( # 워커(study_weakness)가 산출한 최신 eid_study_weakness 스냅샷을 '학습 진단 코치'(study overlay) # 로 번역. 약점/태도 '판정'은 코드 derived(스냅샷) — LLM 은 스냅샷 블록 값만 인용(환각 약점 차단). # compose("study_diagnosis") = persona+rules+study overlay(+{placeholder}) → 표면이 블록 substitute. -DIAGNOSIS_TIMEOUT_S = 40.0 +DIAGNOSIS_TIMEOUT_S = settings.llm_call_timeout_s class StudyDiagnosisResponse(BaseModel): diff --git a/app/core/config.py b/app/core/config.py index fdb1521..c72dc4a 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -176,6 +176,9 @@ class Settings(BaseModel): digest_llm_timeout_s: int = 200 digest_llm_attempts: int = 2 digest_pipeline_hard_cap_s: int = 1800 + # 2026-06-20: study/analyze 단일 primary-call 타임아웃 (구 하드코딩 30~60s = 빠른 Gemma 기준, + # Qwen 27B 교체 sweep 누락 → 사용자 대면 504 + 워커 영구 stuck). digest 와 동형 단일소스. + llm_call_timeout_s: int = 200 # PR-MacMini-Derived-Worker-1: study explanation owner = Mac mini # GPU 측은 false 로 설정 (.env), explanation 분기 skip guard 트리거. @@ -268,6 +271,7 @@ def load_settings() -> Settings: digest_llm_timeout_s = 200 digest_llm_attempts = 2 digest_pipeline_hard_cap_s = 1800 + llm_call_timeout_s = 200 if config_path.exists() and raw and "pipeline" in raw: held_raw = (raw.get("pipeline") or {}).get("held_stages") or [] # 스칼라(문자열) 오기입 시 char-split 방지 — 단일 항목 리스트로 수용. @@ -293,6 +297,10 @@ def load_settings() -> Settings: digest_pipeline_hard_cap_s = max(60, int(_pl.get("digest_pipeline_hard_cap_s", 1800))) except (TypeError, ValueError): digest_pipeline_hard_cap_s = 1800 + try: + llm_call_timeout_s = max(1, int(_pl.get("llm_call_timeout_s", 200))) + except (TypeError, ValueError): + llm_call_timeout_s = 200 taxonomy = raw.get("taxonomy", {}) if config_path.exists() and raw else {} document_types = raw.get("document_types", []) if config_path.exists() and raw else [] @@ -327,6 +335,7 @@ def load_settings() -> Settings: digest_llm_timeout_s=digest_llm_timeout_s, digest_llm_attempts=digest_llm_attempts, digest_pipeline_hard_cap_s=digest_pipeline_hard_cap_s, + llm_call_timeout_s=llm_call_timeout_s, ) diff --git a/app/workers/study_explanation_worker.py b/app/workers/study_explanation_worker.py index 838494c..d3bfd50 100644 --- a/app/workers/study_explanation_worker.py +++ b/app/workers/study_explanation_worker.py @@ -25,6 +25,7 @@ import httpx from sqlalchemy.ext.asyncio import AsyncSession from ai.client import AIClient, parse_json_response +from core.config import settings from models.study_question import StudyQuestion from models.study_question_job import StudyQuestionJob from services.search.llm_gate import Priority, acquire_mlx_gate @@ -35,8 +36,8 @@ from services.study.explanation_rag import ( logger = logging.getLogger(__name__) -# PR-3 LLM_TIMEOUT_S 와 동일 안전 마진 (26B 평균 ~10s, gate 직렬화 고려) -LLM_TIMEOUT_S = 30.0 +# 2026-06-20: config 단일소스 (구 하드코딩 30s = 빠른 Gemma 기준, Qwen 27B 교체 sweep 누락). +LLM_TIMEOUT_S = settings.llm_call_timeout_s # explanation_md hard cap — 운영 데이터 793/838/866자 사례에서 1200 으로 시작 # (800 은 공식·오답·핵심개념 묶이는 기사시험 풀이에 빡빡함). 1차 운영 후 조정. diff --git a/app/workers/study_memo_card_worker.py b/app/workers/study_memo_card_worker.py index 791458c..502ff69 100644 --- a/app/workers/study_memo_card_worker.py +++ b/app/workers/study_memo_card_worker.py @@ -24,6 +24,7 @@ import httpx from sqlalchemy.ext.asyncio import AsyncSession from ai.client import AIClient, parse_json_response +from core.config import settings from models.study_memo_card import ( append_card, append_card_evidence, @@ -41,8 +42,8 @@ from services.study.study_memo_card_guards import guard_cards logger = logging.getLogger("study_memo_card_worker") -# 다카드 출력이라 explanation(30s)보다 여유. config primary.timeout(180, soft-lock)은 미변경. -CARD_LLM_TIMEOUT_S = 45.0 +# 2026-06-20: config 단일소스 (구 하드코딩 45s = 빠른 Gemma 기준). +CARD_LLM_TIMEOUT_S = settings.llm_call_timeout_s SOURCE_KIND_QUESTION = "question" _ENVELOPE_PROMPT_FILE = "study_card_envelope.txt" diff --git a/app/workers/study_session_analysis_worker.py b/app/workers/study_session_analysis_worker.py index fc14937..133326f 100644 --- a/app/workers/study_session_analysis_worker.py +++ b/app/workers/study_session_analysis_worker.py @@ -28,6 +28,7 @@ from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlalchemy.ext.asyncio import AsyncSession from ai.client import AIClient, parse_json_response +from core.config import settings from models.study_question import StudyQuestion, StudyQuestionAttempt from models.study_quiz_session import StudyQuizSession from models.study_quiz_session_analysis import StudyQuizSessionAnalysis @@ -42,8 +43,8 @@ from services.study.session_summary_rag import gather_session_summary_context logger = logging.getLogger(__name__) -# 4-A 와 동일 안전 마진 (26B 평균 ~10s, gate 직렬화 고려) -LLM_TIMEOUT_S = 30.0 +# 2026-06-20: config 단일소스 (구 하드코딩 30s = 빠른 Gemma 기준). +LLM_TIMEOUT_S = settings.llm_call_timeout_s # wrong/unsure 5 미만은 분석 의미 X — insufficient_attempts skip MIN_ATTEMPTS_FOR_ANALYSIS = 5 # 큰 세션 (84건 등) 에서 prompt 과대 + LLM timeout 방어. 가장 최근 attempt 기준 cap. diff --git a/config.yaml b/config.yaml index a6f8b3f..4956e1f 100644 --- a/config.yaml +++ b/config.yaml @@ -208,3 +208,6 @@ pipeline: digest_llm_timeout_s: 300 digest_llm_attempts: 2 digest_pipeline_hard_cap_s: 5400 + # 2026-06-20: study/analyze 단일 primary-call 타임아웃 (구 하드코딩 30~60s = 빠른 Gemma 기준). + # Qwen 27B(콜당 ~40~150s)에 맞춰 단일소스화 — 구 30s 즉사 = 사용자 504 + 워커 영구 재시도. + llm_call_timeout_s: 300