From 403b05d9717583a746b7e40d05c1c3042b56f274 Mon Sep 17 00:00:00 2001
From: hyungi <hyun49196@gmail.com>
Date: Sat, 20 Jun 2026 04:51:06 +0000
Subject: [PATCH] =?UTF-8?q?fix(study):=20study/analyze=20LLM=20=ED=83=80?=
 =?UTF-8?q?=EC=9E=84=EC=95=84=EC=9B=83=EC=9D=84=20config=20=EB=8B=A8?=
 =?UTF-8?q?=EC=9D=BC=EC=86=8C=EC=8A=A4(llm=5Fcall=5Ftimeout=5Fs)=EB=A1=9C?=
 =?UTF-8?q?=20=E2=80=94=20=EC=8A=A4=ED=85=8C=EC=9D=BC=20=ED=95=98=EB=93=9C?=
 =?UTF-8?q?=EC=BD=94=EB=94=A9=20=EC=9D=BC=EC=86=8C?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

study explanation/session-analysis/memo-card 워커 + study_questions/study_topics(subject-note·diagnosis)
+ documents.analyze 의 하드코딩 30~60s asyncio.timeout 7곳 제거. 빠른 Gemma 기준 리터럴이 Qwen 27B
교체(2026-06-11) sweep 누락 → 느린 콜을 잘라 사용자 대면 504 + 워커가 매 재시도마다 느린 콜 재실행해
문서가 큐에서 영영 못 빠지는 liveness halt. digest_llm_timeout_s 와 동형으로 config.pipeline.llm_call_timeout_s(300)
단일소스화. 다음 모델 교체 때 재발 차단.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 app/api/documents.py                         | 2 +-
 app/api/study_questions.py                   | 4 ++--
 app/api/study_topics.py                      | 5 +++--
 app/core/config.py                           | 9 +++++++++
 app/workers/study_explanation_worker.py      | 5 +++--
 app/workers/study_memo_card_worker.py        | 5 +++--
 app/workers/study_session_analysis_worker.py | 5 +++--
 config.yaml                                  | 3 +++
 8 files changed, 27 insertions(+), 11 deletions(-)

diff --git a/app/api/documents.py b/app/api/documents.py
index 78f206e..9732489 100644
--- a/app/api/documents.py
+++ b/app/api/documents.py
@@ -1557,7 +1557,7 @@ ANALYZE_PROMPT = (
 )
 
 ANALYZE_TEXT_LIMIT = 12000  # chars (15000 → 12000, 실측 timeout 빈발)
-ANALYZE_TIMEOUT_S = 60  # 15,000자 입력 + 4층 출력. 실측 7~45초, safety margin 포함
+ANALYZE_TIMEOUT_S = settings.llm_call_timeout_s  # 2026-06-20 config 단일소스 (구 60s=빠른 Gemma)
 ANALYZE_CACHE_TTL_S = 1800  # 30분
 ANALYZE_CACHE_MAXSIZE = 100
 ANALYZE_LAYER_MIN_CHARS = 50  # 이 미만이면 억지 채움으로 보고 제거
diff --git a/app/api/study_questions.py b/app/api/study_questions.py
index fcef421..84135c8 100644
--- a/app/api/study_questions.py
+++ b/app/api/study_questions.py
@@ -1543,8 +1543,8 @@ async def delete_question_image(
 
 # ─── PR-3: AI 풀이 생성 엔드포인트 ───
 
-# MLX 호출 timeout (초). MLX gate + 26B 추론 평균 ~10s, 안전 마진.
-LLM_TIMEOUT_S = 30.0
+# 2026-06-20: config 단일소스 (구 하드코딩 30s = 빠른 Gemma 기준).
+LLM_TIMEOUT_S = settings.llm_call_timeout_s
 # 프롬프트 템플릿 lazy load
 _PROMPT_PATH = "study_question_explanation.txt"
 _prompt_cache: str | None = None
diff --git a/app/api/study_topics.py b/app/api/study_topics.py
index cde2b28..345a227 100644
--- a/app/api/study_topics.py
+++ b/app/api/study_topics.py
@@ -33,6 +33,7 @@ from ai.client import AIClient, strip_thinking
 from eid.ai import EidAIClient
 from eid.compose import compose
 from core.auth import get_current_user
+from core.config import settings
 from core.database import get_session
 from core.library import LIBRARY_PREFIX, normalize_library_path
 from models.document import Document
@@ -1015,7 +1016,7 @@ async def detach_session_from_topic(
 
 # ─── PR-9: 분야 설명 (study_topic_subject_notes) ───
 
-SUBJECT_NOTE_TIMEOUT_S = 30.0
+SUBJECT_NOTE_TIMEOUT_S = settings.llm_call_timeout_s
 _SUBJECT_NOTE_PROMPT_PATH = "study_subject_note.txt"
 _subject_note_prompt_cache: str | None = None
 
@@ -1242,7 +1243,7 @@ async def generate_subject_note(
 # 워커(study_weakness)가 산출한 최신 eid_study_weakness 스냅샷을 '학습 진단 코치'(study overlay)
 # 로 번역. 약점/태도 '판정'은 코드 derived(스냅샷) — LLM 은 스냅샷 블록 값만 인용(환각 약점 차단).
 # compose("study_diagnosis") = persona+rules+study overlay(+{placeholder}) → 표면이 블록 substitute.
-DIAGNOSIS_TIMEOUT_S = 40.0
+DIAGNOSIS_TIMEOUT_S = settings.llm_call_timeout_s
 
 
 class StudyDiagnosisResponse(BaseModel):
diff --git a/app/core/config.py b/app/core/config.py
index fdb1521..c72dc4a 100644
--- a/app/core/config.py
+++ b/app/core/config.py
@@ -176,6 +176,9 @@ class Settings(BaseModel):
     digest_llm_timeout_s: int = 200
     digest_llm_attempts: int = 2
     digest_pipeline_hard_cap_s: int = 1800
+    # 2026-06-20: study/analyze 단일 primary-call 타임아웃 (구 하드코딩 30~60s = 빠른 Gemma 기준,
+    # Qwen 27B 교체 sweep 누락 → 사용자 대면 504 + 워커 영구 stuck). digest 와 동형 단일소스.
+    llm_call_timeout_s: int = 200
 
     # PR-MacMini-Derived-Worker-1: study explanation owner = Mac mini
     # GPU 측은 false 로 설정 (.env), explanation 분기 skip guard 트리거.
@@ -268,6 +271,7 @@ def load_settings() -> Settings:
     digest_llm_timeout_s = 200
     digest_llm_attempts = 2
     digest_pipeline_hard_cap_s = 1800
+    llm_call_timeout_s = 200
     if config_path.exists() and raw and "pipeline" in raw:
         held_raw = (raw.get("pipeline") or {}).get("held_stages") or []
         # 스칼라(문자열) 오기입 시 char-split 방지 — 단일 항목 리스트로 수용.
@@ -293,6 +297,10 @@ def load_settings() -> Settings:
             digest_pipeline_hard_cap_s = max(60, int(_pl.get("digest_pipeline_hard_cap_s", 1800)))
         except (TypeError, ValueError):
             digest_pipeline_hard_cap_s = 1800
+        try:
+            llm_call_timeout_s = max(1, int(_pl.get("llm_call_timeout_s", 200)))
+        except (TypeError, ValueError):
+            llm_call_timeout_s = 200
 
     taxonomy = raw.get("taxonomy", {}) if config_path.exists() and raw else {}
     document_types = raw.get("document_types", []) if config_path.exists() and raw else []
@@ -327,6 +335,7 @@ def load_settings() -> Settings:
         digest_llm_timeout_s=digest_llm_timeout_s,
         digest_llm_attempts=digest_llm_attempts,
         digest_pipeline_hard_cap_s=digest_pipeline_hard_cap_s,
+        llm_call_timeout_s=llm_call_timeout_s,
     )
 
 
diff --git a/app/workers/study_explanation_worker.py b/app/workers/study_explanation_worker.py
index 838494c..d3bfd50 100644
--- a/app/workers/study_explanation_worker.py
+++ b/app/workers/study_explanation_worker.py
@@ -25,6 +25,7 @@ import httpx
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from ai.client import AIClient, parse_json_response
+from core.config import settings
 from models.study_question import StudyQuestion
 from models.study_question_job import StudyQuestionJob
 from services.search.llm_gate import Priority, acquire_mlx_gate
@@ -35,8 +36,8 @@ from services.study.explanation_rag import (
 
 logger = logging.getLogger(__name__)
 
-# PR-3 LLM_TIMEOUT_S 와 동일 안전 마진 (26B 평균 ~10s, gate 직렬화 고려)
-LLM_TIMEOUT_S = 30.0
+# 2026-06-20: config 단일소스 (구 하드코딩 30s = 빠른 Gemma 기준, Qwen 27B 교체 sweep 누락).
+LLM_TIMEOUT_S = settings.llm_call_timeout_s
 
 # explanation_md hard cap — 운영 데이터 793/838/866자 사례에서 1200 으로 시작
 # (800 은 공식·오답·핵심개념 묶이는 기사시험 풀이에 빡빡함). 1차 운영 후 조정.
diff --git a/app/workers/study_memo_card_worker.py b/app/workers/study_memo_card_worker.py
index 791458c..502ff69 100644
--- a/app/workers/study_memo_card_worker.py
+++ b/app/workers/study_memo_card_worker.py
@@ -24,6 +24,7 @@ import httpx
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from ai.client import AIClient, parse_json_response
+from core.config import settings
 from models.study_memo_card import (
     append_card,
     append_card_evidence,
@@ -41,8 +42,8 @@ from services.study.study_memo_card_guards import guard_cards
 
 logger = logging.getLogger("study_memo_card_worker")
 
-# 다카드 출력이라 explanation(30s)보다 여유. config primary.timeout(180, soft-lock)은 미변경.
-CARD_LLM_TIMEOUT_S = 45.0
+# 2026-06-20: config 단일소스 (구 하드코딩 45s = 빠른 Gemma 기준).
+CARD_LLM_TIMEOUT_S = settings.llm_call_timeout_s
 SOURCE_KIND_QUESTION = "question"
 
 _ENVELOPE_PROMPT_FILE = "study_card_envelope.txt"
diff --git a/app/workers/study_session_analysis_worker.py b/app/workers/study_session_analysis_worker.py
index fc14937..133326f 100644
--- a/app/workers/study_session_analysis_worker.py
+++ b/app/workers/study_session_analysis_worker.py
@@ -28,6 +28,7 @@ from sqlalchemy.dialects.postgresql import insert as pg_insert
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from ai.client import AIClient, parse_json_response
+from core.config import settings
 from models.study_question import StudyQuestion, StudyQuestionAttempt
 from models.study_quiz_session import StudyQuizSession
 from models.study_quiz_session_analysis import StudyQuizSessionAnalysis
@@ -42,8 +43,8 @@ from services.study.session_summary_rag import gather_session_summary_context
 
 logger = logging.getLogger(__name__)
 
-# 4-A 와 동일 안전 마진 (26B 평균 ~10s, gate 직렬화 고려)
-LLM_TIMEOUT_S = 30.0
+# 2026-06-20: config 단일소스 (구 하드코딩 30s = 빠른 Gemma 기준).
+LLM_TIMEOUT_S = settings.llm_call_timeout_s
 # wrong/unsure 5 미만은 분석 의미 X — insufficient_attempts skip
 MIN_ATTEMPTS_FOR_ANALYSIS = 5
 # 큰 세션 (84건 등) 에서 prompt 과대 + LLM timeout 방어. 가장 최근 attempt 기준 cap.
diff --git a/config.yaml b/config.yaml
index a6f8b3f..4956e1f 100644
--- a/config.yaml
+++ b/config.yaml
@@ -208,3 +208,6 @@ pipeline:
   digest_llm_timeout_s: 300
   digest_llm_attempts: 2
   digest_pipeline_hard_cap_s: 5400
+  # 2026-06-20: study/analyze 단일 primary-call 타임아웃 (구 하드코딩 30~60s = 빠른 Gemma 기준).
+  # Qwen 27B(콜당 ~40~150s)에 맞춰 단일소스화 — 구 30s 즉사 = 사용자 504 + 워커 영구 재시도.
+  llm_call_timeout_s: 300