From 06da098eabcb4ef902e0cfa6aed04d73040ef2c9 Mon Sep 17 00:00:00 2001
From: Hyungi Ahn <hyungiahn@Hyungiui-MacBookPro.local>
Date: Fri, 3 Apr 2026 15:05:48 +0900
Subject: [PATCH] =?UTF-8?q?fix:=20=EB=B2=95=EB=A0=B9=20=EB=B6=84=ED=95=A0?=
 =?UTF-8?q?=20=E2=80=94=20=EC=A1=B0=EB=AC=B8=ED=82=A4=20000=20=EA=B8=B0?=
 =?UTF-8?q?=EB=B0=98=20=EC=9E=A5(=E7=AB=A0)=20=EB=8B=A8=EC=9C=84=20?=
 =?UTF-8?q?=EB=B6=84=ED=95=A0=EB=A1=9C=20=EB=B3=80=EA=B2=BD?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

국가법령 XML은 <편>/<장> 태그가 아닌 <조문단위 조문키="xxxx000">에
"제X장 ..." 형태로 장 구분자가 포함됨. 이를 파싱하여 분할.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 app/workers/law_monitor.py | 63 +++++++++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 21 deletions(-)

diff --git a/app/workers/law_monitor.py b/app/workers/law_monitor.py
index b9ec8f1..545697e 100644
--- a/app/workers/law_monitor.py
+++ b/app/workers/law_monitor.py
@@ -191,29 +191,50 @@ async def _save_law_split(
     session, xml_text: str, law_name: str, proclamation_date: str,
     revision_type: str, prev_date: str,
 ) -> int:
-    """법령 XML → 편/장 단위 Markdown 분할 저장"""
+    """법령 XML → 장(章) 단위 Markdown 분할 저장"""
     root = ET.fromstring(xml_text)
+
+    # 조문단위에서 장 구분자 찾기 (조문키가 000으로 끝나는 조문)
+    units = root.findall(".//조문단위")
+    chapters = []  # [(장제목, [조문들])]
+    current_chapter = None
+    current_articles = []
+
+    for unit in units:
+        key = unit.attrib.get("조문키", "")
+        content = (unit.findtext("조문내용", "") or "").strip()
+
+        # 장 구분자: 키가 000으로 끝나고 내용에 "제X장" 포함
+        if key.endswith("000") and re.search(r"제\d+장", content):
+            # 이전 장 저장
+            if current_chapter and current_articles:
+                chapters.append((current_chapter, current_articles))
+            chapter_match = re.search(r"(제\d+장\s*.+)", content)
+            current_chapter = chapter_match.group(1).strip() if chapter_match else content.strip()
+            current_articles = []
+        else:
+            current_articles.append(unit)
+
+    # 마지막 장 저장
+    if current_chapter and current_articles:
+        chapters.append((current_chapter, current_articles))
+
+    # 장 분할 성공
     sections = []
-
-    # 편(編) 단위 분할 시도
-    for part in root.findall(".//*편"):
-        title = part.attrib.get("제목", part.findtext("편제목", ""))
-        number = part.attrib.get("번호", "")
-        content = _xml_section_to_markdown(part)
-        if content.strip():
-            sections.append((f"제{number}편_{_safe_name(title)}", content))
-
-    # 편이 없으면 장(章) 단위 시도
-    if not sections:
-        for chapter in root.findall(".//*장"):
-            title = chapter.attrib.get("제목", chapter.findtext("장제목", ""))
-            number = chapter.attrib.get("번호", "")
-            content = _xml_section_to_markdown(chapter)
-            if content.strip():
-                sections.append((f"제{number}장_{_safe_name(title)}", content))
-
-    # 편/장 둘 다 없으면 전체 1파일
-    if not sections:
+    if chapters:
+        for chapter_title, articles in chapters:
+            md_lines = [f"# {law_name}\n", f"## {chapter_title}\n"]
+            for article in articles:
+                title = article.findtext("조문제목", "")
+                content = article.findtext("조문내용", "")
+                if title:
+                    md_lines.append(f"\n### {title}\n")
+                if content:
+                    md_lines.append(content.strip())
+            section_name = _safe_name(chapter_title)
+            sections.append((section_name, "\n".join(md_lines)))
+    else:
+        # 장 분할 실패 → 전체 1파일
         full_md = _law_xml_to_markdown(xml_text, law_name)
         sections.append(("전문", full_md))