"""Phase 1B.5 ImgAuth — marker_worker 의 순수 헬퍼 단위 테스트. DB / NAS / marker-service 실접속이 필요한 통합 테스트는 별 파일 (배포 후 실행). 본 파일은 image-bytes mocking 만으로 검증 가능한 부분 (rewrite 로직 + persist 매핑). plan: ~/.claude/plans/piped-humming-crystal.md """ from __future__ import annotations import base64 import os import sys import pytest # tests/ → 프로젝트 루트 → app/ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "app")) from workers.marker_worker import ( _persist_images_to_nas, _rewrite_image_refs, ) # ─── _rewrite_image_refs ─── def test_rewrite_exact_slug_match(): md = "본문\n\n![도식 1](_page_0_Picture_3.jpeg)\n\n뒤" out = _rewrite_image_refs(md, {"_page_0_Picture_3.jpeg": "img_001"}) assert "![도식 1](docimg:img_001)" in out assert "_page_0_Picture_3.jpeg" not in out def test_rewrite_basename_match_with_subdir_href(): md = "![](sub/_page_2_Figure_1.png)" out = _rewrite_image_refs(md, {"_page_2_Figure_1.png": "img_007"}) assert out == "![](docimg:img_007)" def test_rewrite_preserves_external_urls(): md = "외부 ![logo](https://example.com/x.png) 와 내부 ![](slug.png)" out = _rewrite_image_refs(md, {"slug.png": "img_002"}) # 외부 URL 는 그대로, 내부 slug 만 docimg 로 치환. assert "https://example.com/x.png" in out assert "(docimg:img_002)" in out def test_rewrite_preserves_alt_text(): md = "![긴 한국어 alt 설명 with $math$](slug.jpeg)" out = _rewrite_image_refs(md, {"slug.jpeg": "img_001"}) assert out == "![긴 한국어 alt 설명 with $math$](docimg:img_001)" def test_rewrite_no_slug_map_is_noop(): md = "![](slug.png)" assert _rewrite_image_refs(md, {}) == md def test_rewrite_unknown_slug_kept(): md = "![](unknown_slug.png)" out = _rewrite_image_refs(md, {"other.png": "img_001"}) assert out == md def test_rewrite_idempotent_on_already_normalized(): """이미 docimg:img_NNN 인 ref 는 slug 매칭 실패 → 변경 없음 (재변환 idempotent).""" md = "![alt](docimg:img_001)" out = _rewrite_image_refs(md, {"_page_0.jpeg": "img_001"}) assert out == md def test_rewrite_multiple_images(): md = "![a](s1.png) text ![b](s2.png) ![c](s3.jpg)" out = _rewrite_image_refs(md, { "s1.png": "img_001", "s2.png": "img_002", "s3.jpg": "img_003", }) assert "(docimg:img_001)" in out assert "(docimg:img_002)" in out assert "(docimg:img_003)" in out # ─── _persist_images_to_nas ─── def _make_png_bytes() -> bytes: """1x1 transparent PNG (signature + IHDR + IDAT + IEND).""" return bytes.fromhex( "89504e470d0a1a0a" # signature "0000000d49484452" # IHDR len + type "00000001000000010806000000" # 1x1 RGBA "1f15c4890000000d4944415478" "9c626001000000ffff03000006" "00057ce4ec5d0000000049454e44ae426082" ) def test_persist_sequential_image_keys(tmp_path, monkeypatch): # NAS root 를 tmp_path 로 redirect monkeypatch.setattr( "workers.marker_worker.EXTRACTED_IMAGES_ROOT", tmp_path / "extracted_images", ) payload = [ {"slug": "_page_0.png", "format": "png", "bytes_b64": base64.b64encode(_make_png_bytes()).decode("ascii")}, {"slug": "_page_1.png", "format": "png", "bytes_b64": base64.b64encode(_make_png_bytes()).decode("ascii")}, {"slug": "_page_2.png", "format": "png", "bytes_b64": base64.b64encode(_make_png_bytes()).decode("ascii")}, ] saved = _persist_images_to_nas(document_id=999, images_resp=payload) assert [s["image_key"] for s in saved] == ["img_001", "img_002", "img_003"] assert all(s["mime_type"] == "image/png" for s in saved) assert all(s["file_size"] > 0 for s in saved) assert all(s["source_slug"].startswith("_page_") for s in saved) # NAS 파일 실재 확인 for s in saved: from pathlib import Path assert Path(s["file_path"]).is_file() def test_persist_idempotent_on_rerun(tmp_path, monkeypatch): """같은 doc_id 두번 persist → 같은 image_key 같은 path 에 overwrite.""" monkeypatch.setattr( "workers.marker_worker.EXTRACTED_IMAGES_ROOT", tmp_path / "extracted_images", ) raw = _make_png_bytes() payload = [{"slug": "_page_0.png", "format": "png", "bytes_b64": base64.b64encode(raw).decode("ascii")}] s1 = _persist_images_to_nas(document_id=42, images_resp=payload) s2 = _persist_images_to_nas(document_id=42, images_resp=payload) assert s1[0]["image_key"] == s2[0]["image_key"] == "img_001" assert s1[0]["file_path"] == s2[0]["file_path"] assert s1[0]["content_hash"] == s2[0]["content_hash"] def test_persist_skips_invalid_base64(tmp_path, monkeypatch): """깨진 base64 는 skip — 다른 이미지 처리는 계속.""" monkeypatch.setattr( "workers.marker_worker.EXTRACTED_IMAGES_ROOT", tmp_path / "extracted_images", ) raw = _make_png_bytes() payload = [ {"slug": "_page_0.png", "format": "png", "bytes_b64": "@@@invalid@@@"}, {"slug": "_page_1.png", "format": "png", "bytes_b64": base64.b64encode(raw).decode("ascii")}, ] saved = _persist_images_to_nas(document_id=7, images_resp=payload) # 첫 번째 invalid skip, 두 번째만 저장. seq 는 그대로 진행 → img_002 가 됨. assert len(saved) == 1 assert saved[0]["image_key"] == "img_002" assert saved[0]["source_slug"] == "_page_1.png" def test_persist_empty_images_returns_empty(tmp_path, monkeypatch): monkeypatch.setattr( "workers.marker_worker.EXTRACTED_IMAGES_ROOT", tmp_path / "extracted_images", ) assert _persist_images_to_nas(document_id=1, images_resp=[]) == []