diff --git a/app/workers/chunk_worker.py b/app/workers/chunk_worker.py index a9f1baf..f6ff953 100644 --- a/app/workers/chunk_worker.py +++ b/app/workers/chunk_worker.py @@ -313,8 +313,16 @@ async def process(document_id: int, session: AsyncSession) -> None: client = AIClient() try: for idx, c in enumerate(chunk_dicts): + # Phase 1.2-G: embedding 입력 강화 (자연어 query ↔ 법령 조항 의미 매칭 개선) + # 짧은 본문이나 segment-only chunk는 임베딩 signal이 약함 → title/section 포함. + section = c.get("section_title") or "" + embed_input = ( + f"[제목] {doc.title or ''}\n" + f"[섹션] {section}\n" + f"[본문] {c['text']}" + ) try: - embedding = await client.embed(c["text"]) + embedding = await client.embed(embed_input) except Exception as e: logger.warning(f"[chunk] document_id={document_id} chunk {idx} 임베딩 실패: {e}") embedding = None