From b73a5cc601c6160c23ec0f14a9b362e101248c90 Mon Sep 17 00:00:00 2001 From: hyungi Date: Thu, 2 Jul 2026 13:11:06 +0900 Subject: [PATCH 1/3] =?UTF-8?q?feat(infra):=202=EB=85=B8=EB=93=9C=20?= =?UTF-8?q?=EC=9D=B4=EA=B4=80=20P1-4=20=E2=80=94=20rerank=20=ED=94=84?= =?UTF-8?q?=EB=A1=9C=ED=86=A0=EC=BD=9C=20=EC=8A=A4=EC=9C=84=EC=B9=98(tei|l?= =?UTF-8?q?lamacpp)=C2=B7OCR/STT=20=EB=AA=85=EC=8B=9C=20=EA=B2=8C=EC=9D=B4?= =?UTF-8?q?=ED=8A=B8=C2=B7413=20=EC=9E=AC=ED=99=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - AIModelConfig.protocol 판별자 신설(기본 tei = 무회귀), llamacpp = /v1/rerank 요청·응답 스키마 정규화(ai/rerank_protocol.py 순수함수 + 단위테스트 4) - OCR_ENABLED/STT_ENABLED 명시 게이트 — GPU CUDA 서비스(Surya/faster-whisper) 폐기 대응, silent 아님(경고 로그 + extract_meta 터미널 기록) - DS Caddyfile request_body 100MB — 413 정책을 edge(home-caddy)에서 내부로 재홈 (DSM 리버스 프록시 전환 대비, upload.max_bytes 정합) - SSE X-Accel-Buffering는 기점검 결과 기구현(eid_chat)이라 무변경 Co-Authored-By: Claude Fable 5 --- Caddyfile | 8 ++++++ app/ai/client.py | 38 ++++++++++++++++++------ app/ai/rerank_protocol.py | 24 ++++++++++++++++ app/core/config.py | 16 +++++++++++ app/workers/extract_worker.py | 5 ++++ app/workers/stt_worker.py | 8 ++++++ config.yaml | 3 ++ tests/test_rerank_protocol.py | 54 +++++++++++++++++++++++++++++++++++ 8 files changed, 147 insertions(+), 9 deletions(-) create mode 100644 app/ai/rerank_protocol.py create mode 100644 tests/test_rerank_protocol.py diff --git a/Caddyfile b/Caddyfile index 45cc567..d664f40 100644 --- a/Caddyfile +++ b/Caddyfile @@ -19,6 +19,14 @@ http://document.hyungi.net { Referrer-Policy strict-origin-when-cross-origin -Server } + + # 2노드 이관(2026-07-02): 업로드 100MB 한도 집행을 edge(home-caddy)에서 DS 내부로 재홈. + # 인그레스가 DSM 리버스 프록시(한도 GUI 미노출)로 바뀌어도 413 단일 소스 유지. + # config.yaml upload.max_bytes(100000000)와 정합. + request_body { + max_size 100MB + } + encode { gzip match { diff --git a/app/ai/client.py b/app/ai/client.py index 60d1b82..018ad36 100644 --- a/app/ai/client.py +++ b/app/ai/client.py @@ -290,23 +290,43 @@ class AIClient: return response.json()["embedding"] async def rerank(self, query: str, texts: list[str]) -> list[dict]: - """TEI bge-reranker-v2-m3 호출 (Phase 1.3). + """리랭커 호출 — ai.models.rerank.protocol 로 백엔드 분기 (2노드 이관 2026-07-02). - TEI POST /rerank API: + 공통 반환 계약: [{"index": int, "score": float}, ...] (score 내림차순) + + "tei" (기본, 무회귀) — TEI POST /rerank: request: {"query": str, "texts": [str, ...]} response: [{"index": int, "score": float}, ...] (정렬됨) + "llamacpp" — llama.cpp POST /v1/rerank (bge-reranker GGUF, 맥미니 :8807): + request: {"model": str, "query": str, "documents": [str, ...]} + response: {"results": [{"index": int, "relevance_score": float}, ...]} + → normalize_llamacpp_rerank 로 TEI 형태 정규화. + 미지원 protocol = ValueError (명시 실패 — silent fallback 금지). timeout은 self.ai.rerank.timeout (config.yaml). 호출자(rerank_service)가 asyncio.Semaphore + try/except로 감쌈. """ + protocol = getattr(self.ai.rerank, "protocol", "tei") or "tei" timeout = float(self.ai.rerank.timeout) if self.ai.rerank.timeout else 5.0 - response = await self._http.post( - self.ai.rerank.endpoint, - json={"query": query, "texts": texts}, - timeout=timeout, - ) - response.raise_for_status() - return response.json() + if protocol == "tei": + response = await self._http.post( + self.ai.rerank.endpoint, + json={"query": query, "texts": texts}, + timeout=timeout, + ) + response.raise_for_status() + return response.json() + if protocol == "llamacpp": + from ai.rerank_protocol import normalize_llamacpp_rerank + + response = await self._http.post( + self.ai.rerank.endpoint, + json={"model": self.ai.rerank.model, "query": query, "documents": texts}, + timeout=timeout, + ) + response.raise_for_status() + return normalize_llamacpp_rerank(response.json()) + raise ValueError(f"unknown rerank protocol: {protocol}") async def _call_chat(self, model_config, prompt: str) -> str: """OpenAI 호환 API 호출 (R6: 무동의 클라우드 폴백 제거). diff --git a/app/ai/rerank_protocol.py b/app/ai/rerank_protocol.py new file mode 100644 index 0000000..8d40cf8 --- /dev/null +++ b/app/ai/rerank_protocol.py @@ -0,0 +1,24 @@ +"""rerank 백엔드 응답 정규화 — 2노드 이관 (2026-07-02, main-server-retirement-1 P1-4). + +TEI(/rerank)와 llama.cpp(/v1/rerank)는 요청/응답 스키마가 다르다. +소비자(rerank_service)는 TEI 형태 [{"index": int, "score": float}]를 기대하므로 +llama.cpp 응답을 여기서 정규화한다. 순수 함수(stdlib only) — 단위 테스트 대상. +""" + + +def normalize_llamacpp_rerank(payload: dict) -> list[dict]: + """llama.cpp /v1/rerank 응답을 TEI 형태로 정규화. + + 입력: {"results": [{"index": int, "relevance_score": float}, ...], ...} + 반환: [{"index": int, "score": float}, ...] (score 내림차순 — TEI '정렬됨' 계약 유지) + + index/relevance_score 가 없는 항목은 버린다 (소비자 측 idx/sc None 가드와 동일 방어). + """ + results = payload.get("results") or [] + normalized = [ + {"index": r["index"], "score": float(r["relevance_score"])} + for r in results + if r.get("index") is not None and r.get("relevance_score") is not None + ] + normalized.sort(key=lambda r: -r["score"]) + return normalized diff --git a/app/core/config.py b/app/core/config.py index c37e1c4..3006ae2 100644 --- a/app/core/config.py +++ b/app/core/config.py @@ -35,6 +35,12 @@ class AIModelConfig(BaseModel): # OpenAI 호환 분기(mlx)만 적용 — Anthropic 분기는 미적용(별 범위). repetition_penalty: float | None = None top_k: int | None = None + # 2노드 이관 (2026-07-02): rerank 백엔드 프로토콜 판별자. + # "tei" = TEI POST /rerank {"query","texts"} → [{"index","score"}] (기본, 무회귀) + # "llamacpp" = llama.cpp POST /v1/rerank {"model","query","documents"} + # → {"results":[{"index","relevance_score"}]} (맥미니 :8807) + # 미지원 값 = client.rerank 가 ValueError (silent fallback 금지). rerank 블록 외 무시. + protocol: str = "tei" class DeepSummaryBacklogConfig(BaseModel): @@ -145,6 +151,12 @@ class Settings(BaseModel): # STT (faster-whisper, §3) stt_endpoint: str = "http://stt-service:3300" + # 2노드 이관 (2026-07-02): GPU CUDA 서비스(Surya OCR / faster-whisper) 폐기 대응 명시 게이트. + # false = 해당 경로 명시 비활성 — OCR 은 _call_ocr 이 경고 로그 후 None(기존 soft-fail 의미론), + # STT 는 터미널 skip + extract_meta 기록. silent 저품질 fallback 아님 (로그/메타로 가시). + ocr_enabled: bool = True + stt_enabled: bool = True + # §3 file_watcher: Roon 음원 경로 (prefix match 로 skip). # 빈 문자열이면 skip 없음. 예: "/documents/PKM/../Music/roon-library" 또는 # NFS 경유 별도 마운트된 Roon 라이브러리. @@ -224,6 +236,8 @@ def load_settings() -> Settings: kordoc_endpoint = os.getenv("KORDOC_ENDPOINT", "http://kordoc-service:3100") ocr_endpoint = os.getenv("OCR_ENDPOINT", "http://ocr-service:3200") stt_endpoint = os.getenv("STT_ENDPOINT", "http://stt-service:3300") + ocr_enabled = os.getenv("OCR_ENABLED", "true").lower() in ("1", "true", "yes") + stt_enabled = os.getenv("STT_ENABLED", "true").lower() in ("1", "true", "yes") roon_library_path = os.getenv("ROON_LIBRARY_PATH", "") # ADDITIONAL_WATCH_TARGETS — 쉼표 구분 (공백 제거) @@ -343,6 +357,8 @@ def load_settings() -> Settings: kordoc_endpoint=kordoc_endpoint, ocr_endpoint=ocr_endpoint, stt_endpoint=stt_endpoint, + ocr_enabled=ocr_enabled, + stt_enabled=stt_enabled, roon_library_path=roon_library_path, additional_watch_targets=additional_watch_targets, taxonomy=taxonomy, diff --git a/app/workers/extract_worker.py b/app/workers/extract_worker.py index 3a5339b..3fcc959 100644 --- a/app/workers/extract_worker.py +++ b/app/workers/extract_worker.py @@ -110,6 +110,11 @@ def _get_pdf_page_count( async def _call_ocr(file_path: Path, is_image: bool, max_pages: int = 200) -> str | None: """OCR 서비스 호출 — 타임아웃 페이지 수 비례""" + if not settings.ocr_enabled: + # 2노드 이관(2026-07-02): GPU Surya 폐기 — 명시 비활성. None 반환 = 기존 soft-fail + # 의미론(호출자가 ocr_attempted/skip_reason 메타 기록). 스캔 문서는 비전 배치 경로 별도. + logger.warning("[ocr] OCR_ENABLED=false — skip (스캔·이미지 추출은 비전 배치 경로)") + return None container_path = f"/documents/{file_path.relative_to(Path(settings.nas_mount_path))}" timeout = 60 if is_image else min(600, max(120, max_pages * 3)) try: diff --git a/app/workers/stt_worker.py b/app/workers/stt_worker.py index 21834f9..7f89bc6 100644 --- a/app/workers/stt_worker.py +++ b/app/workers/stt_worker.py @@ -42,6 +42,14 @@ async def process(document_id: int, session: AsyncSession) -> None: logger.warning(f"[stt] id={document_id} file_path 없음 — skip") return + if not settings.stt_enabled: + # 2노드 이관(2026-07-02): GPU stt-service 폐기 — 명시 비활성. silent 금지: + # 경고 로그 + extract_meta 터미널 기록 (재시도 안 함, 상태 가시). + doc.extract_meta = {**(doc.extract_meta or {}), "stt_skip_reason": "disabled", "stt_terminal": True} + await session.commit() + logger.warning(f"[stt] id={document_id} STT_ENABLED=false — 터미널 skip (전사 없음)") + return + # NAS 마운트 경로로 절대화 (services/stt 컨테이너도 동일 경로에 bind mount) container_path = str(Path(settings.nas_mount_path) / doc.file_path) diff --git a/config.yaml b/config.yaml index bbcbadb..d5b8c69 100644 --- a/config.yaml +++ b/config.yaml @@ -60,6 +60,9 @@ ai: rerank: endpoint: "http://reranker:80/rerank" model: "bge-reranker-v2-m3" + # 2노드 이관: "tei"(GPU TEI /rerank, 기본) | "llamacpp"(맥미니 llama.cpp, + # 예: endpoint http://100.76.254.116:8807/v1/rerank). 미지원 값 = 기동 시 ValueError. + protocol: "tei" # Phase 3.5a answerability classifier. 2026-05-14 GPU LLM 제거 후 Mac mini 26B 로 swap. # classifier_service 가 hasattr 체크로 optional 이므로 이 섹션 제거 시 classifier gate 는 자동 skip (score-only). diff --git a/tests/test_rerank_protocol.py b/tests/test_rerank_protocol.py new file mode 100644 index 0000000..330b971 --- /dev/null +++ b/tests/test_rerank_protocol.py @@ -0,0 +1,54 @@ +"""rerank 프로토콜 정규화 단위 테스트 — 2노드 이관 P1-4 (llama.cpp /v1/rerank). + +순수 함수(ai/rerank_protocol.py)만 대상 — HTTP/DB 의존 없음. +실행: PYTHONPATH=app pytest tests/test_rerank_protocol.py +""" + +import json +from pathlib import Path + +from ai.rerank_protocol import normalize_llamacpp_rerank + +FIXTURES = Path(__file__).parent / "fixtures" + + +def test_normalize_llamacpp_shape_and_desc_sort(): + payload = { + "model": "bge-reranker-v2-m3", + "results": [ + {"index": 0, "relevance_score": 0.12}, + {"index": 1, "relevance_score": 2.21}, + {"index": 2, "relevance_score": -1.5}, + ], + } + out = normalize_llamacpp_rerank(payload) + # TEI 계약: [{"index","score"}] score 내림차순 + assert [r["index"] for r in out] == [1, 0, 2] + assert all(set(r) == {"index", "score"} for r in out) + assert out[0]["score"] == 2.21 + + +def test_normalize_llamacpp_missing_fields_skipped(): + payload = { + "results": [ + {"index": 0}, # relevance_score 없음 → 버림 + {"relevance_score": 1.0}, # index 없음 → 버림 + {"index": 3, "relevance_score": 0.5}, + ] + } + assert normalize_llamacpp_rerank(payload) == [{"index": 3, "score": 0.5}] + + +def test_normalize_llamacpp_empty_and_absent_results(): + assert normalize_llamacpp_rerank({}) == [] + assert normalize_llamacpp_rerank({"results": []}) == [] + + +def test_tei_fixture_shape_is_already_contract(): + """TEI 캡처 fixture(Phase 2B G0-1 spec 박제)의 실응답이 정규화 없이 계약 형태임을 확인.""" + doc = json.loads((FIXTURES / "tei_rerank_response.json").read_text()) + captured = doc["captured_responses"]["baseline_bge_v2_m3"] + assert isinstance(captured, list) and captured + assert {"index", "score"} <= set(captured[0]) + # spec 문자열도 계약과 일치 (score desc 정렬 포함) + assert "index" in doc["response_shape"] and "score" in doc["response_shape"] -- 2.52.0 From 43594620b163179bf30fc3a0dbc41cc7d7ad0bc0 Mon Sep 17 00:00:00 2001 From: hyungi Date: Thu, 2 Jul 2026 13:11:33 +0900 Subject: [PATCH 2/3] =?UTF-8?q?fix(tests):=20rerank=20fixture=20=EA=B2=BD?= =?UTF-8?q?=EB=A1=9C=20=EC=A0=95=EC=A0=95=20=E2=80=94=20captured=5Frespons?= =?UTF-8?q?es.*.raw=20=EA=B0=80=20=EC=8B=A4=EC=9D=91=EB=8B=B5=20=EB=A6=AC?= =?UTF-8?q?=EC=8A=A4=ED=8A=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Fable 5 --- tests/test_rerank_protocol.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_rerank_protocol.py b/tests/test_rerank_protocol.py index 330b971..277214d 100644 --- a/tests/test_rerank_protocol.py +++ b/tests/test_rerank_protocol.py @@ -47,7 +47,7 @@ def test_normalize_llamacpp_empty_and_absent_results(): def test_tei_fixture_shape_is_already_contract(): """TEI 캡처 fixture(Phase 2B G0-1 spec 박제)의 실응답이 정규화 없이 계약 형태임을 확인.""" doc = json.loads((FIXTURES / "tei_rerank_response.json").read_text()) - captured = doc["captured_responses"]["baseline_bge_v2_m3"] + captured = doc["captured_responses"]["baseline_bge_v2_m3"]["raw"] assert isinstance(captured, list) and captured assert {"index", "score"} <= set(captured[0]) # spec 문자열도 계약과 일치 (score desc 정렬 포함) -- 2.52.0 From d53fcc2b36ac76a04fb6027b32e19a4e49b671d0 Mon Sep 17 00:00:00 2001 From: hyungi Date: Thu, 2 Jul 2026 13:30:04 +0900 Subject: [PATCH 3/3] =?UTF-8?q?feat(search):=20MAX=5FRERANK=5FINPUT=20env?= =?UTF-8?q?=20=EC=A1=B0=EC=A0=95=20=EA=B0=80=EB=8A=A5=ED=99=94=20=E2=80=94?= =?UTF-8?q?=202=EB=85=B8=EB=93=9C=20=EB=A6=AC=EB=9E=AD=ED=81=AC=20?= =?UTF-8?q?=EC=A7=80=EC=97=B0=20=EB=8C=80=EC=9D=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 맥미니 llama.cpp 리랭크는 후보 수 선형(실측 50=0.60s/200=1.89s) — NAS 배포에서 MAX_RERANK_INPUT=50 으로 tail 지연 축소. 기본 200 = 현행 무회귀. Co-Authored-By: Claude Fable 5 --- app/services/search/rerank_service.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/app/services/search/rerank_service.py b/app/services/search/rerank_service.py index 877926a..706fe79 100644 --- a/app/services/search/rerank_service.py +++ b/app/services/search/rerank_service.py @@ -17,6 +17,7 @@ snippet 생성: from __future__ import annotations import asyncio +import os import re from typing import TYPE_CHECKING @@ -33,8 +34,11 @@ logger = setup_logger("rerank") # 동시 rerank 호출 제한 (GPU saturation 방지) RERANK_SEMAPHORE = asyncio.Semaphore(2) -# rerank input 크기 제한 (latency / VRAM hard cap) -MAX_RERANK_INPUT = 200 +# rerank input 크기 제한 (latency / VRAM hard cap). +# 2노드 이관(2026-07-02): env MAX_RERANK_INPUT 로 조정 가능 — 맥미니 llama.cpp 리랭크는 +# 후보 수에 선형(NAS발 실측 50=0.60s / 100=0.95s / 200=1.89s)이라 NAS 배포는 50 권장. +# 기본 200 = 현행(GPU TEI) 무회귀. +MAX_RERANK_INPUT = int(os.getenv("MAX_RERANK_INPUT", "200")) MAX_CHUNKS_PER_DOC = 2 # Soft timeout (초) -- 2.52.0