wip/gpu-main-snapshot-2026-05-11 #7
@@ -88,6 +88,12 @@ class Settings(BaseModel):
|
||||
# NFS 경유 별도 마운트된 Roon 라이브러리.
|
||||
roon_library_path: str = ""
|
||||
|
||||
# KGS Code 등 외부 작성 마크다운 자료 추가 스캔 경로 (PKM 상대 경로, 쉼표 구분).
|
||||
# env: ADDITIONAL_WATCH_TARGETS=Knowledge/Industrial_Safety/가스기사/KGS_Code,...
|
||||
# 모두 expected_category="library" 로 처리 (md/pdf/docx 등 문서 확장자만 수락).
|
||||
# Inbox/Recordings/Videos 기본 스캔 외에 추가만 허용.
|
||||
additional_watch_targets: list[str] = []
|
||||
|
||||
# 분류 체계
|
||||
taxonomy: dict = {}
|
||||
document_types: list[str] = []
|
||||
@@ -108,6 +114,10 @@ def load_settings() -> Settings:
|
||||
stt_endpoint = os.getenv("STT_ENDPOINT", "http://stt-service:3300")
|
||||
roon_library_path = os.getenv("ROON_LIBRARY_PATH", "")
|
||||
|
||||
# ADDITIONAL_WATCH_TARGETS — 쉼표 구분 (공백 제거)
|
||||
awt_raw = os.getenv("ADDITIONAL_WATCH_TARGETS", "")
|
||||
additional_watch_targets = [p.strip() for p in awt_raw.split(",") if p.strip()]
|
||||
|
||||
# config.yaml — Docker 컨테이너 내부(/app/config.yaml) 또는 프로젝트 루트
|
||||
config_path = Path("/app/config.yaml")
|
||||
if not config_path.exists():
|
||||
@@ -172,6 +182,7 @@ def load_settings() -> Settings:
|
||||
ocr_endpoint=ocr_endpoint,
|
||||
stt_endpoint=stt_endpoint,
|
||||
roon_library_path=roon_library_path,
|
||||
additional_watch_targets=additional_watch_targets,
|
||||
taxonomy=taxonomy,
|
||||
document_types=document_types,
|
||||
upload=upload_cfg,
|
||||
|
||||
@@ -21,9 +21,12 @@ PR-B B-1 tier triage (신규, 4B gemma Ollama):
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime, timezone
|
||||
|
||||
import yaml
|
||||
|
||||
from pydantic import BaseModel, Field, ValidationError
|
||||
from sqlalchemy import text as sql_text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
@@ -301,8 +304,42 @@ def _distill(triage_out: TriageOutput, limit: int = 2000) -> str:
|
||||
return "\n".join(parts)[:limit]
|
||||
|
||||
|
||||
|
||||
# ───────────────────── frontmatter 파싱 (옵션 C) ──────────────────────
|
||||
|
||||
# YAML frontmatter (--- ... ---) + body 분리. body 가 없거나 frontmatter 가 형식 오류여도 안전하게 fallback.
|
||||
_FM_PATTERN = re.compile("^---\\s*\\n(.*?)\\n---\\s*\\n?(.*)$", re.DOTALL)
|
||||
|
||||
def _parse_frontmatter(extracted_text: str) -> tuple[dict, str]:
|
||||
"""extracted_text 시작에 YAML frontmatter 가 있으면 (frontmatter_dict, body) 반환.
|
||||
없으면 ({}, extracted_text). YAML 파싱 실패 시도 ({}, extracted_text) 로 안전 fallback.
|
||||
"""
|
||||
if not extracted_text or not extracted_text.startswith("---"):
|
||||
return {}, extracted_text
|
||||
m = _FM_PATTERN.match(extracted_text)
|
||||
if not m:
|
||||
return {}, extracted_text
|
||||
fm_text, body = m.group(1), m.group(2)
|
||||
try:
|
||||
fm = yaml.safe_load(fm_text)
|
||||
if not isinstance(fm, dict):
|
||||
return {}, extracted_text
|
||||
return fm, body
|
||||
except yaml.YAMLError:
|
||||
return {}, extracted_text
|
||||
|
||||
|
||||
# frontmatter 우선 인식: code/section/source_pdf/source_pages/source_basis/verified_level/verification_pending
|
||||
# 등 원문 추적 메타데이터는 LLM 이 절대 덮어쓰지 못하게 차단.
|
||||
_FRONTMATTER_PRESERVED_KEYS = {
|
||||
"code", "section", "source_pdf", "source_pages", "source_basis",
|
||||
"verified_level", "verification_pending", "source_type", "kgs_code",
|
||||
}
|
||||
|
||||
|
||||
# ───────────────────────── main process ────────────────────────────────
|
||||
|
||||
|
||||
async def process(document_id: int, session: AsyncSession) -> None:
|
||||
"""문서 분류 + 요약 + tier triage.
|
||||
|
||||
@@ -334,6 +371,59 @@ async def process(document_id: int, session: AsyncSession) -> None:
|
||||
if not doc.extracted_text:
|
||||
raise ValueError(f"문서 ID {document_id}: extracted_text가 비어있음")
|
||||
|
||||
# ─── 옵션 C: markdown frontmatter 우선 인식 ───────────────────────────
|
||||
# KGS Code 등 외부 작성 마크다운은 frontmatter 에 정확한 메타가 있다.
|
||||
# title / tags / ai_summary / ai_domain 은 frontmatter 에 있으면 그대로 사용,
|
||||
# 없는 필드만 LLM 호출. code/section/source_pages/verified_level 등 원문
|
||||
# 추적 메타는 documents.md_frontmatter JSONB 에 보존하고 LLM 이 덮어쓰지 못하게 한다.
|
||||
fm, body = _parse_frontmatter(doc.extracted_text)
|
||||
if fm:
|
||||
# frontmatter 전체를 md_frontmatter JSONB 에 저장 (원문 추적용 single source)
|
||||
doc.md_frontmatter = fm
|
||||
|
||||
# 우선 반영 (LLM 보다 신뢰도 높음, frontmatter 가 authoritative)
|
||||
if fm.get("title"):
|
||||
doc.title = str(fm["title"])
|
||||
|
||||
fm_tags = fm.get("tags")
|
||||
if isinstance(fm_tags, list) and fm_tags:
|
||||
# ai_tags 에 frontmatter 태그 우선 적재 (LLM 이 추가만 가능)
|
||||
doc.ai_tags = [str(t) for t in fm_tags]
|
||||
|
||||
if fm.get("ai_domain"):
|
||||
doc.ai_domain = str(fm["ai_domain"])
|
||||
parts = doc.ai_domain.split("/")
|
||||
if len(parts) > 1 and not doc.ai_sub_group:
|
||||
doc.ai_sub_group = parts[1]
|
||||
|
||||
if fm.get("ai_sub_group"):
|
||||
doc.ai_sub_group = str(fm["ai_sub_group"])
|
||||
|
||||
if fm.get("document_type"):
|
||||
doc.document_type = str(fm["document_type"])
|
||||
|
||||
if fm.get("ai_summary"):
|
||||
doc.ai_summary = str(fm["ai_summary"])
|
||||
|
||||
if fm.get("importance") in ("high", "medium", "low"):
|
||||
doc.importance = fm["importance"]
|
||||
|
||||
# 핵심 메타 (title + ai_domain + ai_summary) 가 모두 frontmatter 로 채워졌으면
|
||||
# LLM classify/summarize 스킵. tier triage 도 스킵 (frontmatter 가 더 정확).
|
||||
# frontmatter 미커버 필드는 그대로 두어 향후 필요 시 manual UI 채움.
|
||||
if doc.title and doc.ai_domain and doc.ai_summary:
|
||||
if not doc.ai_confidence:
|
||||
doc.ai_confidence = 1.0 # frontmatter 는 사람이 작성한 단정값
|
||||
doc.ai_processed_at = datetime.now(timezone.utc)
|
||||
doc.ai_model_version = "frontmatter@manual"
|
||||
await session.commit()
|
||||
logger.info(f"doc {document_id}: frontmatter 옵션 C → classify/summarize/triage 전부 skip")
|
||||
return
|
||||
|
||||
# 일부만 frontmatter 에 있을 때는 LLM 으로 미설정 필드 보완. 단 _FRONTMATTER_PRESERVED_KEYS
|
||||
# 는 이미 md_frontmatter 에 있으므로 LLM 이 ai_domain/document_type 등에 영향 못 준다.
|
||||
logger.info(f"doc {document_id}: frontmatter 부분 인식 → LLM 으로 미설정 필드 보완")
|
||||
|
||||
client = AIClient()
|
||||
try:
|
||||
# ─── 1. Legacy classify (primary 26B) ───
|
||||
@@ -344,17 +434,19 @@ async def process(document_id: int, session: AsyncSession) -> None:
|
||||
if not parsed:
|
||||
raise ValueError(f"AI 응답에서 JSON 추출 실패: {raw_response[:200]}")
|
||||
|
||||
# domain 검증
|
||||
# domain 검증 (frontmatter 가 이미 채웠으면 LLM 결과 무시)
|
||||
domain = _validate_domain(parsed.get("domain", ""))
|
||||
doc.ai_domain = domain
|
||||
if not doc.ai_domain:
|
||||
doc.ai_domain = domain
|
||||
|
||||
# sub_group은 domain 경로에서 추출 (호환성)
|
||||
parts = domain.split("/")
|
||||
doc.ai_sub_group = parts[1] if len(parts) > 1 else ""
|
||||
|
||||
# document_type 검증
|
||||
# document_type 검증 (frontmatter 가 이미 채웠으면 LLM 결과 무시)
|
||||
doc_type = parsed.get("document_type", "")
|
||||
doc.document_type = doc_type if doc_type in DOCUMENT_TYPES else "Note"
|
||||
if not doc.document_type:
|
||||
doc.document_type = doc_type if doc_type in DOCUMENT_TYPES else "Note"
|
||||
|
||||
# confidence
|
||||
confidence = parsed.get("confidence", 0.5)
|
||||
|
||||
@@ -31,6 +31,9 @@ AUDIO_EXTS = {".mp3", ".m4a", ".opus", ".wav", ".flac", ".ogg"}
|
||||
VIDEO_DIRECT_EXTS = {".mp4", ".webm"} # 브라우저 direct play
|
||||
VIDEO_QUARANTINE_EXTS = {".mov", ".mkv", ".avi"} # 변환 필요, 보관만
|
||||
|
||||
# library (외부 작성 학습 자료) 폴더 — md/pdf/docx 등 문서 확장자만 수락
|
||||
LIBRARY_DOC_EXTS = {".md", ".pdf", ".docx", ".doc", ".txt", ".rtf", ".html", ".odt"}
|
||||
|
||||
# 스캔 대상: (하위경로, 예상 category) — None 은 문서함(카테고리 미지정)
|
||||
SCAN_TARGETS: list[tuple[str, str | None]] = [
|
||||
("Inbox", None),
|
||||
@@ -77,6 +80,15 @@ def _route_media(path: Path, expected_category: str | None) -> tuple[str | None,
|
||||
return ("video", True, None)
|
||||
return (None, False, None) # 기타 → skip
|
||||
|
||||
if expected_category == "library":
|
||||
# 외부 작성 학습 자료 (KGS Code, 시행규칙 등). 문서 확장자만 수락.
|
||||
# frontmatter 해석은 classify_worker (옵션 C) 가 담당. file_watcher 는 라우팅만.
|
||||
if ext in LIBRARY_DOC_EXTS:
|
||||
return ("library", False, "extract")
|
||||
if ext in AUDIO_EXTS or ext in VIDEO_DIRECT_EXTS or ext in VIDEO_QUARANTINE_EXTS:
|
||||
return (None, False, None) # audio/video 잘못 들어오면 skip
|
||||
return (None, False, None) # 기타 알 수 없는 확장자 skip
|
||||
|
||||
# Inbox: 문서 파이프 (기존). audio/video 확장자가 실수로 여기 들어오면 skip.
|
||||
if ext in AUDIO_EXTS or ext in VIDEO_DIRECT_EXTS or ext in VIDEO_QUARANTINE_EXTS:
|
||||
return (None, False, None)
|
||||
@@ -92,8 +104,14 @@ async def watch_inbox():
|
||||
new_count = 0
|
||||
changed_count = 0
|
||||
|
||||
# 동적 스캔 대상 합성: 기본 (Inbox/Recordings/Videos) + env 로 확장된 library 경로
|
||||
# settings.additional_watch_targets 는 PKM 상대 경로 리스트 (예: "Knowledge/Industrial_Safety/가스기사/KGS_Code")
|
||||
targets = list(SCAN_TARGETS)
|
||||
for extra_path in settings.additional_watch_targets:
|
||||
targets.append((extra_path, "library"))
|
||||
|
||||
async with async_session() as session:
|
||||
for sub, expected_category in SCAN_TARGETS:
|
||||
for sub, expected_category in targets:
|
||||
scan_root = pkm_root / sub
|
||||
if not scan_root.exists():
|
||||
continue
|
||||
|
||||
+8
-8
@@ -83,11 +83,10 @@ services:
|
||||
restart: unless-stopped
|
||||
|
||||
stt-service:
|
||||
# 2026-04-24: STT 가 Mac mini (faster-whisper, 192.168.1.122:8804 / 100.76.254.116:8804)
|
||||
# 로 이전됨. GPU 에서 컨테이너는 더 이상 기동하지 않는다. 복원이 필요하면
|
||||
# `docker compose --profile legacy up -d stt-service` 로 legacy 프로파일 활성화.
|
||||
# fastapi 의 STT_ENDPOINT 도 Mac mini 주소를 가리킴 (아래 environment 참고).
|
||||
profiles: [legacy]
|
||||
# 2026-05-08 (D9 Track B revised): GPU is canonical STT owner.
|
||||
# 정책: Mac mini = Gemma 26B 전용 우선이므로 STT/Whisper 는 호출량 무관 GPU 서버 소유.
|
||||
# 이전 "Mac mini 이전본" 주석은 trace 오인 기반이었고 본 revised 결정으로 폐기.
|
||||
# fastapi 의 STT_ENDPOINT 는 `http://stt-service:3300` (compose 내부 DNS) 사용.
|
||||
build: ./services/stt
|
||||
expose:
|
||||
- "3300"
|
||||
@@ -191,9 +190,10 @@ services:
|
||||
- OCR_ENDPOINT=http://ocr-service:3200
|
||||
- MARKER_ENDPOINT=http://marker-service:3300
|
||||
- MARKER_CONTAINER_PATH_PREFIX=/documents
|
||||
# 2026-04-24 STT Mac mini 이전: 기본값 100.76.254.116:8804 (Tailscale), 필요 시
|
||||
# MAC_MINI_HOST env 로 192.168.1.122 등 LAN IP 주입.
|
||||
- STT_ENDPOINT=http://${MAC_MINI_HOST:-100.76.254.116}:8804
|
||||
# 2026-05-08 (D9 Track B revised): GPU stt-service 정식 승격, 내부 DNS 사용.
|
||||
- STT_ENDPOINT=http://stt-service:3300
|
||||
# KGS Code 등 외부 학습 자료 추가 스캔 경로 (host .env 에서 주입). 빈 값이면 비활성.
|
||||
- ADDITIONAL_WATCH_TARGETS=${ADDITIONAL_WATCH_TARGETS:-}
|
||||
restart: unless-stopped
|
||||
|
||||
frontend:
|
||||
|
||||
@@ -117,3 +117,143 @@ docker compose exec fastapi python /app/scripts/phase1d_pilot.py select \
|
||||
```
|
||||
|
||||
**enqueue 의 `--yes` 또는 `--no-dry-run` 류 실행은 별도 사용자 승인 + 야간 단발 sweep 윈도우 (23:00~03:00 KST) 안에서만**. 30건 backfill = marker-service BATCH_SIZE=1 × 평균 5분/건 ≈ 2.5h.
|
||||
|
||||
---
|
||||
|
||||
# Phase 2 — Full Backfill (legacy pending PDFs)
|
||||
|
||||
> Plan: `~/.claude/plans/iridescent-gathering-clover.md`
|
||||
> Script: `scripts/phase2_backfill.py` (subcommands: inventory / select-canary / enqueue / nightly-enqueue / post-report)
|
||||
|
||||
## 목적
|
||||
|
||||
1D pilot 결과 = engineering go signal. legacy pending PDF (1D 후 잔여 ~237건) 을 marker_worker 로 변환해 `md_status='success'` 누적. **신규 업로드 우선권 보존, 야간 저부하 sweep, DB state 기반 idempotent checkpoint**.
|
||||
|
||||
진행 로드맵: **2-A dry-run inventory → 2-B canary 40건 → 2-C nightly sweep ~4-5 nights → 2-D post-report**.
|
||||
|
||||
## 파일
|
||||
|
||||
| 파일 | 역할 | 갱신 시점 |
|
||||
|---|---|---|
|
||||
| `phase2_inventory.csv` | pending PDFs dry-run inventory + skip forecast | 2-A 종료 (commit, 1회) |
|
||||
| `phase2_canary_sample.csv` | stratified 40건 canary sample (시드 `20260503`) | 2-B(a) (commit) |
|
||||
| `phase2_canary_result.md` | canary 결과 요약 + 1D 비교 + GO/HALT 결정 근거 | 2-B 종료 (commit) |
|
||||
| `phase2_nightly_log.tsv` | 야간 sweep 한 줄/일 (date / enqueued / active_queue_at_start / active_queue_oldest_age_min / pending_pool_remaining / abort_reason / marker_ready) | append 매 sweep, 주 1회 commit |
|
||||
| `phase2_post_report.csv` | Phase 2 sweep 처리된 doc 별 final state + quality | 2-D (commit) |
|
||||
| `phase2_post_report.md` | 처리 분포 + 1D baseline 비교 + skip/failed/outlier 목록 | 2-D (commit) |
|
||||
|
||||
## Subcommand 사용법
|
||||
|
||||
### inventory (read-only, dry-run)
|
||||
```bash
|
||||
docker exec hyungi_document_server-fastapi-1 python /app/scripts/phase2_backfill.py inventory \
|
||||
--output /app/evals/markdown/phase2_inventory.csv
|
||||
```
|
||||
- pending PDFs 전체에 대해 doc_id / file_size / text_density / doc_type / forecast_skip_reason 적재.
|
||||
- forecast_skip_reason ∈ {unsupported_extension / doctype_skip / handwritten_hint / over_max_pages_estimated / none}. 'none' = 변환 시도 후보.
|
||||
- handwritten_hint = title/path 에 `필기|손글씨|handwritten|handwriting` 매칭 (marker_worker 의 7d0fca2 룰 미러).
|
||||
- over_max_pages_estimated = file_size > 25MB proxy. 실 page_count 는 marker_worker 가 PyMuPDF 로 결정.
|
||||
|
||||
### select-canary (재현성 시드)
|
||||
```bash
|
||||
docker exec hyungi_document_server-fastapi-1 python /app/scripts/phase2_backfill.py select-canary \
|
||||
--inventory /app/evals/markdown/phase2_inventory.csv \
|
||||
--output /app/evals/markdown/phase2_canary_sample.csv \
|
||||
--seed 20260503
|
||||
```
|
||||
- 40건 buckets: large 6 / scan_likely 2 / study_note 10 / Academic_Paper 8 / Reference 6 / {Standard,Manual,Specification} 4 / {Note,Report,Memo,NULL} 4
|
||||
- inventory 의 `forecast_skip_reason='none'` 만 선정 후보.
|
||||
- 시드 고정 → 재실행 시 동일 sample.
|
||||
|
||||
### 경로 정책 (2-B canary vs 2-C nightly)
|
||||
|
||||
`/app/scripts/` 와 `/app/evals/` 는 parent repo (`~/Documents/code/hyungi_Document_Server`, branch=main) 의 read-only bind-mount. `docker cp ... :/app/scripts/...` 는 read-only 위반으로 실패. `docker compose --build` 은 검색 실험 soft lock 위반. → 단계별로 다른 경로 사용:
|
||||
|
||||
| 단계 | script 경로 | sample CSV 경로 | 메커니즘 |
|
||||
|---|---|---|---|
|
||||
| **2-B canary (pre-merge)** | `/app/logs/phase2_backfill.py` | `/app/logs/phase2_canary_sample.csv` | `docker cp` worktree → /app/logs (rw bind-mount) |
|
||||
| **2-C nightly (post-merge canonical)** | `/app/scripts/phase2_backfill.py` | `/app/evals/markdown/phase2_*` | feat/phase2-backfill main 머지 + parent `git pull` 후 bind-mount 자동 활성 |
|
||||
|
||||
**2-B 임시 sync** (canary 실행 직전 1회):
|
||||
```bash
|
||||
docker cp ~/Documents/code/hyungi_Document_Server_phase2/scripts/phase2_backfill.py \
|
||||
hyungi_document_server-fastapi-1:/app/logs/phase2_backfill.py
|
||||
docker cp ~/Documents/code/hyungi_Document_Server_phase2/evals/markdown/phase2_canary_sample.csv \
|
||||
hyungi_document_server-fastapi-1:/app/logs/phase2_canary_sample.csv
|
||||
```
|
||||
|
||||
**2-C 진입 시점** (canary GO 결정 후):
|
||||
```bash
|
||||
cd ~/Documents/code/hyungi_Document_Server_phase2 && git push origin feat/phase2-backfill
|
||||
cd ~/Documents/code/hyungi_Document_Server && git fetch origin \
|
||||
&& git merge --ff-only origin/feat/phase2-backfill && git push origin main
|
||||
```
|
||||
|
||||
이유: 미검증 코드를 main 에 미리 박지 않음 / canary 결과 따라 worktree 에서 hot-fix 가능 / nightly cron 은 canonical path 사용 (script 자체).
|
||||
|
||||
**추가 (2026-05-03)**: nightly cron 의 `--log-tsv` 와 post-report 출력은 `/app/logs/` 사용 (위 표의 canonical path 가 아님). `/app/evals/markdown/` 는 fastapi 컨테이너에 **bind-mount 되어 있지 않아** 컨테이너 writable layer 에 쓰면 컨테이너 재기동 시 유실. `/app/logs/` 는 rw bind-mount → host `~/Documents/code/hyungi_Document_Server/logs/` 에 영구 저장. 주 1회 commit 시 `cp ~/Documents/code/hyungi_Document_Server/logs/phase2_nightly_log.tsv evals/markdown/` 로 복사 후 git add.
|
||||
|
||||
|
||||
### enqueue (one-shot, 사용자 승인 게이트)
|
||||
```bash
|
||||
# dry-run (default) — 2-B 단계 = /app/logs 임시 경로 (위 §"경로 정책" 참조)
|
||||
docker exec hyungi_document_server-fastapi-1 python /app/logs/phase2_backfill.py enqueue \
|
||||
--csv /app/logs/phase2_canary_sample.csv
|
||||
|
||||
# actual (사용자 승인 후)
|
||||
docker exec hyungi_document_server-fastapi-1 python /app/logs/phase2_backfill.py enqueue \
|
||||
--csv /app/logs/phase2_canary_sample.csv --no-dry-run
|
||||
```
|
||||
- marker-service `/ready` 사전 검증.
|
||||
- `enqueue_stage` idempotent — 중복 호출 안전.
|
||||
|
||||
### nightly-enqueue (cron / manual)
|
||||
```bash
|
||||
docker exec hyungi_document_server-fastapi-1 python /app/scripts/phase2_backfill.py nightly-enqueue \
|
||||
--limit 50 --max-active-queue 5 \
|
||||
--log-tsv /app/logs/phase2_nightly_log.tsv # /app/evals/ 미 bind-mount, /app/logs/ rw 사용
|
||||
```
|
||||
- 가드 순서: disable flag (`/tmp/phase2_disable`) → marker /ready → active_queue ≤ threshold → DB pool 비어있지 않음 → enqueue.
|
||||
- 매 sweep log_tsv 한 줄. abort_reason ∈ {disable_flag / marker_unhealthy / active_queue_threshold / pool_empty / empty}.
|
||||
- pool_empty = Phase 2 자연 완료 신호 (cron 제거 hard gate trigger).
|
||||
|
||||
### post-report
|
||||
```bash
|
||||
docker exec hyungi_document_server-fastapi-1 python /app/scripts/phase2_backfill.py post-report \
|
||||
--output-csv /app/logs/phase2_post_report.csv \
|
||||
--output-md /app/logs/phase2_post_report.md \
|
||||
--phase2-start 2026-05-03T00:00:00Z
|
||||
```
|
||||
- `--phase2-start` ISO timestamp 이후 `md_generated_at` 만 집계 (Phase 2 코드 push 시점 권장).
|
||||
- 1D baseline (success 92% / elapsed_p50 34s / text_length_ratio_p50 1.15) 와 비교.
|
||||
- outlier 후보: elapsed_ms > 300s, text_length_ratio < 0.5 또는 > 10, 신규 warning 종류.
|
||||
|
||||
## 의사결정 게이트
|
||||
|
||||
### 2-B canary GO/HALT
|
||||
- success ≥ 36/40 (90%) AND failed ≤ 2 AND skipped ≤ 6 → **2-C 진입 GO**
|
||||
- 위 미충족 → HALT, 사용자 보고 후 재검토
|
||||
|
||||
### 2-C nightly abort
|
||||
- 1 night 안에 failed > 5 → script 가 disable flag 자동 생성
|
||||
- marker-service `/ready` 실패 → 그 sweep 건너뜀, 다음 sweep 재시도
|
||||
- active_queue_oldest_age_min > 60 (stuck 임계) → log 에 [warn], 사용자 morning check 으로 판단
|
||||
|
||||
### 2-D 종료 hard gate (Phase 2 closed 선언 직전)
|
||||
- (cron 모드) `crontab -l | grep phase2_backfill` 결과 비어 있어야 함
|
||||
- `~/.phase2_disable` 파일 정리 됨
|
||||
- pending PDF (`md_status='pending'`, `file_format='pdf'`) ≤ 5
|
||||
- processing_queue markdown active = 0
|
||||
|
||||
## 1D 와의 차이
|
||||
|
||||
| 항목 | 1D | Phase 2 |
|
||||
|---|---|---|
|
||||
| 목적 | failure mode 진단 | 풀 변환 |
|
||||
| 대상 | 30건 stratified | 237건 잔여 |
|
||||
| sample_source | existing_success + controlled_backfill | controlled_backfill only |
|
||||
| 처리 모드 | one-shot cron (1회) | nightly cron (~4-5 nights) |
|
||||
| 평가 | 사용자 5축 rubric | marker 자가 metrics + 1D baseline 비교 |
|
||||
| anchor 보존 | doc 4809 forced_include | (재처리 안 함) |
|
||||
| handwritten | over-sample (3건) | marker_worker 자동 skip 신뢰 |
|
||||
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
# Phase 2 Canary Result — 40건 stratified backfill
|
||||
|
||||
> 실행: 2026-05-02 23:50 UTC enqueue → ~02:30 UTC 완료 (wall ~2h 40m, marker BATCH_SIZE=1)
|
||||
> sample CSV: `evals/markdown/phase2_canary_sample.csv` (seed 20260503)
|
||||
> plan 결정 게이트: §"2-B canary GO/HALT"
|
||||
|
||||
## 결과 분포
|
||||
|
||||
| md_status | count | rate |
|
||||
|---|---|---|
|
||||
| success | 35 | 87.5% |
|
||||
| failed | 3 | 7.5% |
|
||||
| skipped | 1 | 2.5% |
|
||||
| processing (stuck) | 1 | 2.5% |
|
||||
|
||||
총 40건 + 1건 corner case (stuck `processing`, queue 측은 max_attempts 도달 후 failed). **실효 doc-level 실패 = 4 / 40 = 10%**.
|
||||
|
||||
## Plan 게이트 판정
|
||||
|
||||
| 기준 | 임계 | 실측 | 판정 |
|
||||
|---|---|---|---|
|
||||
| success rate | ≥ 36/40 (90%) | 35/40 (87.5%) | **FAIL** (-1) |
|
||||
| failed | ≤ 2 | 4 (실효) | **FAIL** (+2) |
|
||||
| skipped | ≤ 6 | 1 | PASS |
|
||||
|
||||
**→ HALT (사용자 보고 후 재검토)**.
|
||||
|
||||
## 실패 분석 — 4건 모두 root cause 확인
|
||||
|
||||
| doc_id | 분류 | 표면 에러 | root cause | 재시도 가능? |
|
||||
|---|---|---|---|---|
|
||||
| 3817 | failed | `OutOfMemoryError: CUDA out of memory. Tried to allocate 74 MiB` | **GPU contention** (canary 진행 중 다른 프로세스 5.93+5.35 GiB 사용 → free 55 MiB) | ✅ Yes (GPU free 시) |
|
||||
| 4059 | failed | `OutOfMemoryError: CUDA out of memory. Tried to allocate 176 MiB` | **GPU contention** 동일 | ✅ Yes |
|
||||
| 3810 | failed | `PdfiumError: Failed to load page` | 진짜 PDF 파싱 오류 (corrupt/protected page 가능성) | ❌ No (Marker 책임 아님) |
|
||||
| 5201 | processing (corner) | `ReadTimeout('')` 3회 (queue) | scan-likely 추정 (705 KB / text_len=15) — OCR-bound 처리 시간 > 300s timeout. queue 측 attempts=3 도달 후 failed, 그러나 doc.md_status='processing' 으로 stuck (Phase 1B 알려진 corner case) | ⚠ (timeout 늘리거나 SKIP rule 확장 필요) |
|
||||
|
||||
**핵심 인사이트**: **Marker quality 자체는 0건 fail**. 실패 4건의 분류:
|
||||
- 50% (2/4) = **GPU 일시 contention** — infra 이슈, 사용자 활동 따라 재발 가능
|
||||
- 25% (1/4) = **PDF 자체 corrupt** — 정상 비율, non-retryable
|
||||
- 25% (1/4) = **scan-likely + 작은 file_size + 빈 텍스트** — 새 SKIP 후보 패턴
|
||||
|
||||
## 처리 시간 (success 35건, 1D baseline 비교)
|
||||
|
||||
| 메트릭 | 1D | 2-B canary | delta |
|
||||
|---|---|---|---|
|
||||
| avg | - | 44.3s | - |
|
||||
| p50 | 34s | 33.2s | -2% (안정) |
|
||||
| p90 | 112s | 92.6s | -17% (개선) |
|
||||
| max | 219s | 297.1s | +36% (large bias 영향) |
|
||||
|
||||
전반적으로 1D 와 동등 또는 약간 빠름. **마커 자체 throughput 안정**.
|
||||
|
||||
## Quality 메트릭 (success 35건, 1D baseline 비교)
|
||||
|
||||
| 메트릭 | 1D | 2-B canary | delta |
|
||||
|---|---|---|---|
|
||||
| text_length_ratio p50 | 1.15 | 1.00 | -13% (정상 범위) |
|
||||
| warnings: heading_hierarchy_jump | 86% (24/28) | 94% (33/35) | +8pp |
|
||||
| warnings: low_image_alt_text_ratio | 89% (25/28) | 86% (30/35) | -3pp |
|
||||
|
||||
신규 warning 종류 없음. heading_hierarchy_jump 가 거의 전건이라는 패턴 재확인 (1D 와 동일) — Marker output 의 heading 구조가 원본과 정확히 일치하지 않는 일반적 현상, score 기준 정의 필요.
|
||||
|
||||
## GPU contention 상세
|
||||
|
||||
canary 처리 시점 (~00:00-02:00 UTC) GPU 점유 process (canary 진행 중 nvidia-smi 추정):
|
||||
- text-embeddings-router: 1.35 GiB
|
||||
- python (process 1200154): 1.85 GiB
|
||||
- python (process 1562575): 5.93 GiB ← **largest, suspect Ollama or marker self**
|
||||
- python (process 1570943): 5.35 GiB
|
||||
- python (process 1612684): 1.01 GiB
|
||||
- **합계 ~15.5 GiB / 16.4 GiB capacity → free ~50-100 MiB**
|
||||
|
||||
canary 종료 후 (02:31 UTC) 측정:
|
||||
- text-embeddings-router: 1.38 GiB
|
||||
- python (1200154): 1.89 GiB
|
||||
- python (1562575): 3.83 GiB
|
||||
- **합계 7.1 GiB → free 8.8 GiB**
|
||||
|
||||
→ 5.35 GiB + 5.93 GiB → 3.83 GiB 으로 줄어듦 (또는 둘 중 하나 종료). canary 시점에만 일시적으로 cumulative load 였음.
|
||||
|
||||
## 권장 조정안 (HALT 후)
|
||||
|
||||
게이트 가지 못했지만 **failure 4건 모두 설명 가능**. Marker 본질 결함 0건. 다음 중 사용자 결정:
|
||||
|
||||
### A. 그대로 2-C 진입 (수용)
|
||||
- 근거: Marker quality 0 fail, GPU OOM 은 야간 sweep (23-03 KST) 윈도우 안에 contention 적을 가능성 높음 (사용자 활동 종료, Ollama 호출 적음)
|
||||
- 보완: 2-C 가드에 GPU free memory 사전 체크 추가 검토 (script 1 줄 — 단, 본 plan scope 외)
|
||||
- 실패 4건 (3817/4059/3810/5201) 은 별도 retry 또는 backlog
|
||||
|
||||
### B. Marker 코드 / 가드 보강 후 재시도
|
||||
- B1. **scan-likely + tiny text 자동 skip** (5201 패턴): marker_worker 에 `file_size < 1MB AND text_len < 100` 또는 `text_density < 1` 시 `md_status='skipped'`
|
||||
- B2. **OOM 을 transient 로 분류**: server.py 가 OutOfMemoryError 만 503 으로 raise → marker_worker 가 5xx → queue retry. (현재는 422 = non-retryable)
|
||||
- B3. **queue 영구 실패 시 doc.md_status='failed' 동기화**: Phase 1B 알려진 corner case 정리 (5201 패턴 제거)
|
||||
- 작업: marker_worker.py + server.py 변경 → 별도 PR/커밋 → canary 재실행
|
||||
|
||||
### C. canary 일부 재시도 → 결과 갱신
|
||||
- 3817, 4059 만 재 enqueue (GPU 지금 free) → 성공 확인 → **실효 success 37/40 = 92.5%, failed 2 (3810+5201) = 5%, skipped 1 = 2.5% → GO 게이트 통과**
|
||||
- 가장 빠른 unblock 경로
|
||||
|
||||
### D. HALT 유지 + 백로그 정리
|
||||
- canary 결과 그대로 인정. 4 failed/stuck doc 은 Phase 2 마지막 후속 정리.
|
||||
- 2-C 진입 보류.
|
||||
|
||||
## 다음 단계 — 사용자 결정 필요
|
||||
|
||||
위 A/B/C/D 중 선택. 추천 = **C (3817+4059 즉시 재 enqueue)** 또는 **A (그대로 진입, 야간 GPU contention 낮음 가정)**. B 는 plan scope 확장이라 별도 라운드 권장.
|
||||
|
||||
5201 의 stuck 'processing' 은 어느 옵션 가도 **수동 정리 필요** (`UPDATE documents SET md_status='failed', md_extraction_error='ReadTimeout after 3 attempts (Phase 1B corner case)' WHERE id=5201`). production DB write 라 사용자 승인 후.
|
||||
|
||||
---
|
||||
|
||||
## Retry 결과 (2026-05-03 02:36-02:39 UTC) — GO 게이트 통과
|
||||
|
||||
옵션 C 실행 결과:
|
||||
|
||||
| 작업 | 결과 |
|
||||
|---|---|
|
||||
| 5201 documents 정리 (`md_status='processing' → 'failed'`, conditional UPDATE) | 1 row updated, error_message 새로 박음 |
|
||||
| 5201 queue 정리 | 불필요 (이미 status='failed' terminal) |
|
||||
| 3817 재 enqueue | success (35.8s, GPU free 8820 MiB) |
|
||||
| 4059 재 enqueue | success (100.7s, large bias 영향) |
|
||||
|
||||
### 최종 분포
|
||||
|
||||
| md_status | count | rate |
|
||||
|---|---|---|
|
||||
| success | **37** | 92.5% |
|
||||
| failed | **2** | 5.0% |
|
||||
| skipped | **1** | 2.5% |
|
||||
|
||||
failed 2건 분류:
|
||||
- doc 3810: corrupt PDF (PdfiumError) — Marker 책임 아님, non-retryable
|
||||
- doc 5201: scan-likely + 빈 텍스트 + ReadTimeout — Phase 1B+ corner case backlog
|
||||
|
||||
### Plan 게이트 재판정
|
||||
|
||||
| 기준 | 임계 | 실측 | 판정 |
|
||||
|---|---|---|---|
|
||||
| success rate | ≥ 36/40 (90%) | 37/40 (92.5%) | ✅ PASS |
|
||||
| failed | ≤ 2 | 2 | ✅ PASS |
|
||||
| skipped | ≤ 6 | 1 | ✅ PASS |
|
||||
|
||||
**→ 2-C nightly sweep 진입 GO.**
|
||||
|
||||
### 다음 = 2-C 진입 게이트 (사용자 승인 필요)
|
||||
|
||||
옵션:
|
||||
- **모드 A (cron)** — 추천. 23:00 KST nightly, 50건/limit, max-active-queue 5
|
||||
- 모드 B (manual nightly)
|
||||
- 모드 C (systemd timer)
|
||||
|
||||
cron 추가 전 필수 작업:
|
||||
1. main 머지: feat/phase2-backfill (HEAD `79dc31b` 또는 retry 반영 후 새 commit) → main FF
|
||||
2. parent repo `git pull --ff-only` → `/app/scripts/phase2_backfill.py` canonical path 활성
|
||||
3. `~/logs/phase2/` 디렉토리 생성
|
||||
4. crontab entry 추가
|
||||
|
||||
사용자 승인 + 모드 선택 시 위 4 단계 진행.
|
||||
|
||||
### Phase 1B+ backlog (Phase 2 외 별도)
|
||||
|
||||
retry 결과는 다음 후속 작업의 우선순위 데이터:
|
||||
|
||||
- **B2 후보**: server.py 가 OutOfMemoryError 만 503 으로 분류 (현재 422 = non-retryable). 야간 GPU contention 발생 시 자동 retry 가능. → 1B+ small PR
|
||||
- **B1 후보**: marker_worker scan-likely + tiny text 자동 skip (5201 패턴). text_density < 1 OR text_len < 100 시 skipped. → 1B+ small PR
|
||||
- **B3 후보**: queue 영구 실패 시 doc.md_status 동기화. processing 상태 stuck 방지. → 1B+ small PR
|
||||
@@ -0,0 +1,41 @@
|
||||
doc_id,title,file_size,file_size_band,text_density,text_density_band,doc_type,bucket_label
|
||||
3817,"Safety and Health for Engineers_05_37 Safety and Health Data, Analysis and Management Information",11313981,L,11.264,mixed,study_note,large
|
||||
4059,공업역학 동역학(제13판)_Chapter 12 질점의 운동학,11140007,L,8.78,mixed,Academic_Paper,large
|
||||
5264,황현필의 진보를 위한 역사_2장 식민지 수탈과 학살의 진실을 말하라,14154100,L,2.14,scan-likely,Academic_Paper,large
|
||||
5271,황현필의 진보를 위한 역사_9장 박정희 신격화를 중단하라,16266788,L,1.878,scan-likely,Academic_Paper,large
|
||||
5267,황현필의 진보를 위한 역사_5장 해방정국을 감추지 말라,12320415,L,1.617,scan-likely,Note,large
|
||||
5090,일반기계기사_실전 모의고사,24976506,L,15.75,mixed,Academic_Paper,large
|
||||
8857,"TKP-26-0132_신양철강_74,290원 - 복사본",450026,S,4.407,scan-likely,Specification,scan_likely
|
||||
5116,표준기계설계(KS)_04_나사,21167224,L,2.759,scan-likely,Standard,scan_likely
|
||||
5238,근로자 노출평가제도 내 위험성평가 방법론의 적용,393908,S,89.488,born-digital,study_note,study_note
|
||||
5240,사업장 위험성평가에 관한 법제의 비교법적 고찰,437377,S,83.786,born-digital,study_note,study_note
|
||||
3771,Industrial Safety and Health Management(7-ED)_16 Welding,1239353,M,62.251,born-digital,study_note,study_note
|
||||
3762,Industrial Safety and Health Management(7-ED)_7 Buildings and Facilities,1123847,M,60.053,born-digital,study_note,study_note
|
||||
3768,Industrial Safety and Health Management(7-ED)_13 Fire Protection,725779,S,62.01,born-digital,study_note,study_note
|
||||
3764,Industrial Safety and Health Management(7-ED)_9 Health and Toxic Substances,1409470,M,66.395,born-digital,study_note,study_note
|
||||
3766,Industrial Safety and Health Management(7-ED)_11 Flammable and Explosive Materials,1185190,M,54.637,born-digital,study_note,study_note
|
||||
3756,Industrial Safety and Health Management(7-ED)_1 The Safety and Health Manager,550267,S,76.971,born-digital,study_note,study_note
|
||||
3761,Industrial Safety and Health Management(7-ED)_6 Process safety and Disaster Preparedness,773058,S,65.656,born-digital,study_note,study_note
|
||||
3767,Industrial Safety and Health Management(7-ED)_12 Personal Protection and First Aid,1553116,M,65.161,born-digital,study_note,study_note
|
||||
5156,스마트팩토리 구축 제조기업 조직원이 지각한 상사의 변혁적리더십이 셀프리더,449251,S,94.531,born-digital,Academic_Paper,Academic_Paper
|
||||
5257,Modifying Large Language Model Post-Training for Diverse Creative Writing,885908,S,106.149,born-digital,Academic_Paper,Academic_Paper
|
||||
5142,표준기계설계(KS)_10_키 및 스플라인,17185683,L,2.06,scan-likely,Standard,tech_doc
|
||||
4546,機械設計_7 不等速運動機構,1205358,M,16.38,mixed,Manual,tech_doc
|
||||
5205,2_ASME VIII_Impact Test,364827,S,8.791,mixed,Standard,tech_doc
|
||||
5112,표준기계설계(KS)_03_기하 공차의 도시방법,7742981,M,1.377,scan-likely,Standard,tech_doc
|
||||
5085,일반기계기사_실전 모의고사 2회,2357404,M,12.318,mixed,Note,minor_doc
|
||||
5201,A Endorsement Exam,705076,S,0.022,scan-likely,Memo,minor_doc
|
||||
5086,일반기계기사_실전 모의고사 3회,1643798,M,17.111,mixed,Note,minor_doc
|
||||
5199,2012_NB_A_March_homewrk_quiz,1937838,M,0.025,scan-likely,Memo,minor_doc
|
||||
3780,"Safety and Health for Engineers_02_0 Legal, Aspects of Safety and Health",5089329,M,0.415,scan-likely,study_note,filler
|
||||
5160,"스마트팩토리, 제조기업 분야에 새로운 경쟁우위 확보를 위한 견인차 역할을",1261747,M,29.268,mixed,Academic_Paper,filler
|
||||
5174,#8. 검사 및 시험 계획_테크니컬 코리아 R.A_현대로템 통합,156781,S,22.043,mixed,Note,filler
|
||||
3819,Safety and Health for Engineers_06_Alphabetical List of Organizations Cited in Text,9678536,M,13.504,mixed,study_note,filler
|
||||
3784,Safety and Health for Engineers_02_7 Products Liability,6618025,M,10.892,mixed,study_note,filler
|
||||
5056,기계진동 이론과 응용(제5판)_목차,835993,S,18.099,mixed,Reference,filler
|
||||
3810,Safety and Health for Engineers_04_31 Human Behavior and Performance in safety and Health,8384987,M,10.362,mixed,study_note,filler
|
||||
4547,機械設計_8 アクチュエータ,1242829,M,19.333,mixed,Reference,filler
|
||||
5093,재료역학_부록,3178000,M,28.455,mixed,Academic_Paper,filler
|
||||
5265,황현필의 진보를 위한 역사_3장 독립운동을 상처 내지 말라,10398784,M,2.302,scan-likely,Note,filler
|
||||
5161,스마트팩토리를 위한 운영빅데이터 분석 플랫폼,920193,S,22.36,mixed,Academic_Paper,filler
|
||||
5130,07_Mechanical Behavior of Materials_Yielding and Fracture under Combined Stresses,3953088,M,32.835,mixed,Academic_Paper,filler
|
||||
|
@@ -0,0 +1,238 @@
|
||||
doc_id,title,file_path,file_size,file_size_band,text_len,text_density,text_density_band,doc_type,forecast_skip_reason,created_at
|
||||
3755,Industrial Safety and Health Management(7-ED)_0 Contents,PKM/Inbox/Industrial Safety and Health Management(7-ED)_0 Contents.pdf,476785,S,20537,44.108,mixed,study_note,none,2026-04-03T04:34:24.389026+00:00
|
||||
3756,Industrial Safety and Health Management(7-ED)_1 The Safety and Health Manager,PKM/Inbox/Industrial Safety and Health Management(7-ED)_1 The Safety and Health Manager.pdf,550267,S,41362,76.971,born-digital,study_note,none,2026-04-03T04:34:24.572086+00:00
|
||||
3761,Industrial Safety and Health Management(7-ED)_6 Process safety and Disaster Preparedness,PKM/Inbox/Industrial Safety and Health Management(7-ED)_6 Process safety and Disaster Preparedness.pdf,773058,S,49566,65.656,born-digital,study_note,none,2026-04-03T04:49:04.845585+00:00
|
||||
3762,Industrial Safety and Health Management(7-ED)_7 Buildings and Facilities,PKM/Inbox/Industrial Safety and Health Management(7-ED)_7 Buildings and Facilities.pdf,1123847,M,65909,60.053,born-digital,study_note,none,2026-04-03T04:49:05.295538+00:00
|
||||
3764,Industrial Safety and Health Management(7-ED)_9 Health and Toxic Substances,PKM/Inbox/Industrial Safety and Health Management(7-ED)_9 Health and Toxic Substances.pdf,1409470,M,91388,66.395,born-digital,study_note,none,2026-04-03T04:49:06.411247+00:00
|
||||
3766,Industrial Safety and Health Management(7-ED)_11 Flammable and Explosive Materials,PKM/Inbox/Industrial Safety and Health Management(7-ED)_11 Flammable and Explosive Materials.pdf,1185190,M,63237,54.637,born-digital,study_note,none,2026-04-03T04:49:07.380118+00:00
|
||||
3767,Industrial Safety and Health Management(7-ED)_12 Personal Protection and First Aid,PKM/Inbox/Industrial Safety and Health Management(7-ED)_12 Personal Protection and First Aid.pdf,1553116,M,98831,65.161,born-digital,study_note,none,2026-04-03T04:49:07.939877+00:00
|
||||
3768,Industrial Safety and Health Management(7-ED)_13 Fire Protection,PKM/Inbox/Industrial Safety and Health Management(7-ED)_13 Fire Protection.pdf,725779,S,43951,62.01,born-digital,study_note,none,2026-04-03T04:49:08.149678+00:00
|
||||
3770,Industrial Safety and Health Management(7-ED)_15 Machine Guarding,PKM/Inbox/Industrial Safety and Health Management(7-ED)_15 Machine Guarding.pdf,3315859,M,142588,44.034,mixed,study_note,none,2026-04-03T04:49:10.013144+00:00
|
||||
3771,Industrial Safety and Health Management(7-ED)_16 Welding,PKM/Inbox/Industrial Safety and Health Management(7-ED)_16 Welding.pdf,1239353,M,75343,62.251,born-digital,study_note,none,2026-04-03T04:49:10.470978+00:00
|
||||
3772,Industrial Safety and Health Management(7-ED)_17 Electrical Hazards,PKM/Inbox/Industrial Safety and Health Management(7-ED)_17 Electrical Hazards.pdf,2039508,M,72357,36.329,mixed,study_note,none,2026-04-03T04:49:11.157819+00:00
|
||||
3773,Industrial Safety and Health Management(7-ED)_18 Construction,PKM/Inbox/Industrial Safety and Health Management(7-ED)_18 Construction.pdf,1908991,M,102240,54.842,born-digital,study_note,none,2026-04-03T04:49:11.800261+00:00
|
||||
3775,Safety and Health for Engineers_00,PKM/Inbox/Safety and Health for Engineers_00.pdf,1427288,M,13113,9.408,mixed,study_note,none,2026-04-03T05:06:56.386163+00:00
|
||||
3776,Safety and Health for Engineers_01_0 Introduction,PKM/Inbox/Safety and Health for Engineers_01_0 Introduction.pdf,8934279,M,743,0.085,scan-likely,study_note,none,2026-04-03T05:07:00.165514+00:00
|
||||
3777,Safety and Health for Engineers_01_1 The Importance of Safety and Health,PKM/Inbox/Safety and Health for Engineers_01_1 The Importance of Safety and Health.pdf,8934119,M,91642,10.504,mixed,study_note,none,2026-04-03T05:07:03.292702+00:00
|
||||
3778,Safety and Health for Engineers_01_2 Safety and Health Professions,PKM/Inbox/Safety and Health for Engineers_01_2 Safety and Health Professions.pdf,5807403,M,59814,10.547,mixed,study_note,none,2026-04-03T05:07:07.898542+00:00
|
||||
3779,Safety and Health for Engineers_01_3 Fundamental Concepts and Terms,PKM/Inbox/Safety and Health for Engineers_01_3 Fundamental Concepts and Terms.pdf,5989787,M,60618,10.363,mixed,study_note,none,2026-04-03T05:07:12.664716+00:00
|
||||
3780,"Safety and Health for Engineers_02_0 Legal, Aspects of Safety and Health","PKM/Inbox/Safety and Health for Engineers_02_0 Legal, Aspects of Safety and Health.pdf",5089329,M,2065,0.415,scan-likely,study_note,none,2026-04-03T05:07:17.328796+00:00
|
||||
3781,Safety and Health for Engineers_02_4 Legal Aspects of Safety and Health,PKM/Inbox/Safety and Health for Engineers_02_4 Legal Aspects of Safety and Health.pdf,5089299,M,50272,10.115,mixed,study_note,none,2026-04-03T05:07:21.571656+00:00
|
||||
3783,Safety and Health for Engineers_02_6 Worker's Compensation,PKM/Inbox/Safety and Health for Engineers_02_6 Worker's Compensation.pdf,6079717,M,65499,11.032,mixed,study_note,none,2026-04-03T05:07:30.644769+00:00
|
||||
3784,Safety and Health for Engineers_02_7 Products Liability,PKM/Inbox/Safety and Health for Engineers_02_7 Products Liability.pdf,6618025,M,70394,10.892,mixed,study_note,none,2026-04-03T05:07:35.452588+00:00
|
||||
3785,Safety and Health for Engineers_02_8 Record Keeping and Reporting,PKM/Inbox/Safety and Health for Engineers_02_8 Record Keeping and Reporting.pdf,7100288,M,81011,11.683,mixed,study_note,none,2026-04-03T05:07:41.197634+00:00
|
||||
3786,Safety and Health for Engineers_03_0 Hazards and Their control,PKM/Inbox/Safety and Health for Engineers_03_0 Hazards and Their control.pdf,8562283,M,2214,0.265,scan-likely,study_note,none,2026-04-03T05:07:47.779152+00:00
|
||||
3787,Safety and Health for Engineers_03_9 General Principles of Hazard Control,PKM/Inbox/Safety and Health for Engineers_03_9 General Principles of Hazard Control.pdf,8562254,M,90283,10.797,mixed,study_note,none,2026-04-03T05:07:53.502349+00:00
|
||||
3788,Safety and Health for Engineers_03_10 Mechanics and Structures,PKM/Inbox/Safety and Health for Engineers_03_10 Mechanics and Structures.pdf,9868378,M,104028,10.795,mixed,study_note,none,2026-04-03T05:07:59.923610+00:00
|
||||
3789,Safety and Health for Engineers_03_11 Walking and Working Surfaces,PKM/Inbox/Safety and Health for Engineers_03_11 Walking and Working Surfaces.pdf,9542843,M,102275,10.975,mixed,study_note,none,2026-04-03T05:08:07.160154+00:00
|
||||
3790,Safety and Health for Engineers_03_12 Electrical Safety,PKM/Inbox/Safety and Health for Engineers_03_12 Electrical Safety.pdf,7811797,M,82384,10.799,mixed,study_note,none,2026-04-03T05:08:12.616433+00:00
|
||||
3791,Safety and Health for Engineers_03_13 Tools and Machines,PKM/Inbox/Safety and Health for Engineers_03_13 Tools and Machines.pdf,10631629,L,105491,10.161,mixed,study_note,none,2026-04-03T05:08:19.963028+00:00
|
||||
3792,Safety and Health for Engineers_03_14 Transportation,PKM/Inbox/Safety and Health for Engineers_03_14 Transportation.pdf,9457716,M,103492,11.205,mixed,study_note,none,2026-04-03T05:08:27.286422+00:00
|
||||
3793,Safety and Health for Engineers_03_15 Materials Handling,PKM/Inbox/Safety and Health for Engineers_03_15 Materials Handling.pdf,12096868,L,132105,11.183,mixed,study_note,none,2026-04-03T05:08:36.205673+00:00
|
||||
3794,Safety and Health for Engineers_03_16 Fire Protection and Prevention,PKM/Inbox/Safety and Health for Engineers_03_16 Fire Protection and Prevention.pdf,27524814,L,165727,6.166,mixed,study_note,over_max_pages_estimated,2026-04-03T05:08:55.100356+00:00
|
||||
3795,Safety and Health for Engineers_03_17 Explosions and Explosives,PKM/Inbox/Safety and Health for Engineers_03_17 Explosions and Explosives.pdf,4876363,M,53213,11.174,mixed,study_note,none,2026-04-03T05:08:59.606826+00:00
|
||||
3796,Safety and Health for Engineers_03_18 Heat and Cold,PKM/Inbox/Safety and Health for Engineers_03_18 Heat and Cold.pdf,8215266,M,87758,10.939,mixed,study_note,none,2026-04-03T05:09:05.490524+00:00
|
||||
3797,Safety and Health for Engineers_03_19 Pressure,PKM/Inbox/Safety and Health for Engineers_03_19 Pressure.pdf,5611782,M,59442,10.847,mixed,study_note,none,2026-04-03T05:09:10.679634+00:00
|
||||
3798,Safety and Health for Engineers_03_20 Visual Environment,PKM/Inbox/Safety and Health for Engineers_03_20 Visual Environment.pdf,5414637,M,58281,11.022,mixed,study_note,none,2026-04-03T05:09:14.984786+00:00
|
||||
3799,Safety and Health for Engineers_03_21 Nonionizing Radiation,PKM/Inbox/Safety and Health for Engineers_03_21 Nonionizing Radiation.pdf,5514954,M,58454,10.854,mixed,study_note,none,2026-04-03T05:09:18.999306+00:00
|
||||
3800,Safety and Health for Engineers_03_22 Ionizing Radiation,PKM/Inbox/Safety and Health for Engineers_03_22 Ionizing Radiation.pdf,5035759,M,52456,10.667,mixed,study_note,none,2026-04-03T05:09:22.940102+00:00
|
||||
3801,Safety and Health for Engineers_03_23 Noise and Vibration,PKM/Inbox/Safety and Health for Engineers_03_23 Noise and Vibration.pdf,8765522,M,87691,10.244,mixed,study_note,none,2026-04-03T05:09:29.219392+00:00
|
||||
3802,Safety and Health for Engineers_03_24 Chemicals,PKM/Inbox/Safety and Health for Engineers_03_24 Chemicals.pdf,11162176,L,120941,11.095,mixed,study_note,none,2026-04-03T05:09:37.851928+00:00
|
||||
3803,Safety and Health for Engineers_03_25 Ventilation,PKM/Inbox/Safety and Health for Engineers_03_25 Ventilation.pdf,6894217,M,69324,10.297,mixed,study_note,none,2026-04-03T05:09:44.185835+00:00
|
||||
3804,Safety and Health for Engineers_03_26 Biohazards,PKM/Inbox/Safety and Health for Engineers_03_26 Biohazards.pdf,6554931,M,76458,11.944,mixed,study_note,none,2026-04-03T05:09:49.536301+00:00
|
||||
3805,Safety and Health for Engineers_03_27 Hazardous Waste,PKM/Inbox/Safety and Health for Engineers_03_27 Hazardous Waste.pdf,6671999,M,68689,10.542,mixed,study_note,none,2026-04-03T05:09:54.300613+00:00
|
||||
3806,Safety and Health for Engineers_03_28 Personal Protective Equipment,PKM/Inbox/Safety and Health for Engineers_03_28 Personal Protective Equipment.pdf,7122464,M,73829,10.614,mixed,study_note,none,2026-04-03T05:10:00.083359+00:00
|
||||
3807,Safety and Health for Engineers_03_29 Emergencies and Security,PKM/Inbox/Safety and Health for Engineers_03_29 Emergencies and Security.pdf,7077502,M,73716,10.666,mixed,study_note,none,2026-04-03T05:10:05.256855+00:00
|
||||
3808,"Safety and Health for Engineers_03_30 Facility Planning, Design, and Maintenance","PKM/Inbox/Safety and Health for Engineers_03_30 Facility Planning, Design, and Maintenance.pdf",4677058,M,50098,10.969,mixed,study_note,none,2026-04-03T05:10:09.854386+00:00
|
||||
3809,Safety and Health for Engineers_04_0 The Human Element,PKM/Inbox/Safety and Health for Engineers_04_0 The Human Element.pdf,8384999,M,3429,0.419,scan-likely,study_note,none,2026-04-03T05:10:15.932112+00:00
|
||||
3810,Safety and Health for Engineers_04_31 Human Behavior and Performance in safety and Health,PKM/Inbox/Safety and Health for Engineers_04_31 Human Behavior and Performance in safety and Health.pdf,8384987,M,84847,10.362,mixed,study_note,none,2026-04-03T05:10:22.591531+00:00
|
||||
3811,"Safety and Health for Engineers_04_32 Procedures, Rules, and Training","PKM/Inbox/Safety and Health for Engineers_04_32 Procedures, Rules, and Training.pdf",7175749,M,78070,11.141,mixed,study_note,none,2026-04-03T05:10:27.685186+00:00
|
||||
3813,Safety and Health for Engineers_05_0 Managing Safety and Health,PKM/Inbox/Safety and Health for Engineers_05_0 Managing Safety and Health.pdf,9454413,M,1899,0.206,scan-likely,study_note,none,2026-04-03T05:10:43.948700+00:00
|
||||
3814,"Safety and Health for Engineers_05_34 Risk, Risk Assessment, and Risk Management","PKM/Inbox/Safety and Health for Engineers_05_34 Risk, Risk Assessment, and Risk Management.pdf",9454350,M,104290,11.296,mixed,study_note,none,2026-04-03T05:10:51.419167+00:00
|
||||
3815,Safety and Health for Engineers_05_35 Safety and Health Management,PKM/Inbox/Safety and Health for Engineers_05_35 Safety and Health Management.pdf,8340393,M,91707,11.259,mixed,study_note,none,2026-04-03T05:10:57.600214+00:00
|
||||
3816,Safety and Health for Engineers_05_36 System Safety,PKM/Inbox/Safety and Health for Engineers_05_36 System Safety.pdf,7165903,M,70595,10.088,mixed,study_note,none,2026-04-03T05:11:02.595452+00:00
|
||||
3817,"Safety and Health for Engineers_05_37 Safety and Health Data, Analysis and Management Information","PKM/Inbox/Safety and Health for Engineers_05_37 Safety and Health Data, Analysis and Management Information.pdf",11313981,L,124455,11.264,mixed,study_note,none,2026-04-03T05:11:10.229570+00:00
|
||||
3818,Safety and Health for Engineers_05_38 Safety and Health Plans and Programs,PKM/Inbox/Safety and Health for Engineers_05_38 Safety and Health Plans and Programs.pdf,5832841,M,62752,11.017,mixed,study_note,none,2026-04-03T05:11:14.894009+00:00
|
||||
3819,Safety and Health for Engineers_06_Alphabetical List of Organizations Cited in Text,PKM/Inbox/Safety and Health for Engineers_06_Alphabetical List of Organizations Cited in Text.pdf,9678536,M,127636,13.504,mixed,study_note,none,2026-04-03T05:11:21.915005+00:00
|
||||
4057,공업역학 동력학(제13판)_APPENDIX,PKM/Inbox/공업역학 동력학(제13판)_APPENDIX.pdf,8966645,M,152829,17.453,mixed,Note,none,2026-04-03T07:06:39.166342+00:00
|
||||
4058,공업역학 동역학(제13판)_00 목차,PKM/Inbox/공업역학 동역학(제13판)_00 목차.pdf,2013932,M,20665,10.507,mixed,Reference,none,2026-04-03T07:06:41.434400+00:00
|
||||
4059,공업역학 동역학(제13판)_Chapter 12 질점의 운동학,PKM/Inbox/공업역학 동역학(제13판)_Chapter 12 질점의 운동학.pdf,11140007,L,95518,8.78,mixed,Academic_Paper,none,2026-04-03T07:06:50.001782+00:00
|
||||
4060,공업역학 동역학(제13판)_Chapter 13 질점의 운동역학_힘과 가속도,PKM/Inbox/공업역학 동역학(제13판)_Chapter 13 질점의 운동역학_힘과 가속도.pdf,7011688,M,56816,8.298,mixed,Manual,none,2026-04-03T07:06:56.438865+00:00
|
||||
4062,공업역학 동역학(제13판)_Chapter 15 질점의 운동역학_역적과 운동량,PKM/Inbox/공업역학 동역학(제13판)_Chapter 15 질점의 운동역학_역적과 운동량.pdf,8153718,M,76160,9.565,mixed,Academic_Paper,none,2026-04-03T07:07:08.670475+00:00
|
||||
4063,공업역학 동역학(제13판)_Chapter 16 강체 평면 운동학,PKM/Inbox/공업역학 동역학(제13판)_Chapter 16 강체 평면 운동학.pdf,10138853,M,70522,7.123,mixed,Academic_Paper,none,2026-04-03T07:07:17.049458+00:00
|
||||
4064,공업역학 동역학(제13판)_Chapter 17 강체의 평면 운동역학_힘과 가속도,PKM/Inbox/공업역학 동역학(제13판)_Chapter 17 강체의 평면 운동역학_힘과 가속도.pdf,7411889,M,52850,7.302,mixed,Academic_Paper,none,2026-04-03T07:07:23.207207+00:00
|
||||
4065,공업역학 동역학(제13판)_Chapter 18 강체의 평면 운동학,PKM/Inbox/공업역학 동역학(제13판)_Chapter 18 강체의 평면 운동학.pdf,5122916,M,32029,6.402,mixed,Academic_Paper,none,2026-04-03T07:07:27.893070+00:00
|
||||
4066,공업역학 동역학(제13판)_Chapter 19 강체의 평면 운동역학_역적과 운동량,PKM/Inbox/공업역학 동역학(제13판)_Chapter 19 강체의 평면 운동역학_역적과 운동량.pdf,4240748,M,33459,8.079,mixed,Academic_Paper,none,2026-04-03T07:07:31.640948+00:00
|
||||
4067,공업역학 동역학(제13판)_Chapter 20 3차원 강체 운동학,PKM/Inbox/공업역학 동역학(제13판)_Chapter 20 3차원 강체 운동학.pdf,3837746,M,28698,7.657,mixed,Academic_Paper,none,2026-04-03T07:07:35.050541+00:00
|
||||
4069,공업역학 동역학(제13판)_Chapter 22 진동,PKM/Inbox/공업역학 동역학(제13판)_Chapter 22 진동.pdf,3478353,M,31656,9.319,mixed,Academic_Paper,none,2026-04-03T07:07:42.935401+00:00
|
||||
4070,공업역학 동역학(제13판)_Review 1 질점의 운동학과 운동역학,PKM/Inbox/공업역학 동역학(제13판)_Review 1 질점의 운동학과 운동역학.pdf,1808025,M,12218,6.92,mixed,Note,none,2026-04-03T07:07:45.006654+00:00
|
||||
4071,공업역학 동역학(제13판)_Review 2 강체의 평면 운동학 및,PKM/Inbox/공업역학 동역학(제13판)_Review 2 강체의 평면 운동학 및.pdf,2070528,M,11892,5.881,mixed,Note,none,2026-04-03T07:07:47.379011+00:00
|
||||
4540,機械設計_1 機械設計の基礎,PKM/Inbox/機械設計_1 機械設計の基礎.pdf,1675175,M,27631,16.89,mixed,Reference,none,2026-04-07T02:59:21.853900+00:00
|
||||
4541,機械設計_2 機械材料と強度および安全率,PKM/Inbox/機械設計_2 機械材料と強度および安全率.pdf,1461925,M,29295,20.52,mixed,Reference,none,2026-04-07T02:59:22.999108+00:00
|
||||
4542,機械設計_3 締結•接合要素,PKM/Inbox/機械設計_3 締結•接合要素.pdf,2343158,M,50016,21.858,mixed,Standard,none,2026-04-07T02:59:26.325850+00:00
|
||||
4543,機械設計_4 軸系要素,PKM/Inbox/機械設計_4 軸系要素.pdf,2218086,M,40769,18.821,mixed,Reference,none,2026-04-07T02:59:27.312924+00:00
|
||||
4544,機械設計_5 軸受•案内要素,PKM/Inbox/機械設計_5 軸受•案内要素.pdf,3293674,M,70593,21.947,mixed,Manual,none,2026-04-07T02:59:28.849332+00:00
|
||||
4545,機械設計_6 動力伝達要素,PKM/Inbox/機械設計_6 動力伝達要素.pdf,3168370,M,62220,20.109,mixed,Manual,none,2026-04-07T02:59:30.485110+00:00
|
||||
4546,機械設計_7 不等速運動機構,PKM/Inbox/機械設計_7 不等速運動機構.pdf,1205358,M,19281,16.38,mixed,Manual,none,2026-04-07T02:59:32.619449+00:00
|
||||
4547,機械設計_8 アクチュエータ,PKM/Inbox/機械設計_8 アクチュエータ.pdf,1242829,M,23464,19.333,mixed,Reference,none,2026-04-07T02:59:33.985785+00:00
|
||||
4548,機械設計_9 機械システムの設計,PKM/Inbox/機械設計_9 機械システムの設計.pdf,1441721,M,30786,21.866,mixed,Manual,none,2026-04-07T02:59:35.258788+00:00
|
||||
4549,機械設計_10 練習問題解答,PKM/Inbox/機械設計_10 練習問題解答.pdf,1699097,M,22927,13.817,mixed,Manual,none,2026-04-07T02:59:36.108172+00:00
|
||||
5056,기계진동 이론과 응용(제5판)_목차,PKM/Inbox/기계진동 이론과 응용(제5판)_목차.pdf,835993,S,14776,18.099,mixed,Reference,none,2026-04-08T23:15:27.766489+00:00
|
||||
5057,기계진동 이론과 응용(제5판)_Appendix,PKM/Inbox/기계진동 이론과 응용(제5판)_Appendix.pdf,4017320,M,57998,14.783,mixed,Reference,none,2026-04-08T23:15:29.285679+00:00
|
||||
5058,기계진동 이론과 응용(제5판)_Chapter 1 진동운동,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 1 진동운동.pdf,1518132,M,10701,7.218,mixed,Note,none,2026-04-08T23:15:30.317920+00:00
|
||||
5059,기계진동 이론과 응용(제5판)_Chapter 2 자유진동,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 2 자유진동.pdf,2217720,M,23768,10.975,mixed,Academic_Paper,none,2026-04-08T23:15:31.704320+00:00
|
||||
5060,기계진동 이론과 응용(제5판)_Chapter 3 조화 가진진동,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 3 조화 가진진동.pdf,2992028,M,33140,11.342,mixed,Academic_Paper,none,2026-04-08T23:15:32.983116+00:00
|
||||
5061,기계진동 이론과 응용(제5판)_Chapter 4 과도 진동,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 4 과도 진동.pdf,2509875,M,30004,12.241,mixed,Academic_Paper,none,2026-04-08T23:15:34.301838+00:00
|
||||
5062,기계진동 이론과 응용(제5판)_Chapter 5 2자유도계와 다자유도계,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 5 2자유도계와 다자유도계.pdf,2775517,M,26202,9.667,mixed,Academic_Paper,none,2026-04-08T23:15:35.605446+00:00
|
||||
5063,기계진동 이론과 응용(제5판)_Chapter 6 진동계의 특성,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 6 진동계의 특성.pdf,2476898,M,27466,11.355,mixed,Reference,none,2026-04-08T23:15:36.927471+00:00
|
||||
5064,기계진동 이론과 응용(제5판)_Chapter 7 라그랑즈 방정식,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 7 라그랑즈 방정식.pdf,2662848,M,20434,7.858,mixed,Academic_Paper,none,2026-04-08T23:15:38.281162+00:00
|
||||
5066,기계진동 이론과 응용(제5판)_Chapter 8 수치해석법,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 8 수치해석법.pdf,1881462,M,31273,17.021,mixed,Academic_Paper,none,2026-04-08T23:15:39.385956+00:00
|
||||
5067,流体力学_01 完全流体の力学_0 Intro,PKM/Inbox/流体力学_01 完全流体の力学_0 Intro.pdf,319348,S,1049,3.364,scan-likely,Reference,none,2026-04-08T23:15:39.779551+00:00
|
||||
5068,기계진동 이론과 응용(제5판)_Chapter 9 연속계의 진동,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 9 연속계의 진동.pdf,1904149,M,22813,12.268,mixed,Academic_Paper,none,2026-04-08T23:15:40.244821+00:00
|
||||
5069,流体力学_01 完全流体の力学_1 基礎方程式と基礎原理,PKM/Inbox/流体力学_01 完全流体の力学_1 基礎方程式と基礎原理.pdf,1590589,M,32132,20.686,mixed,Academic_Paper,none,2026-04-08T23:15:40.795292+00:00
|
||||
5070,기계진동 이론과 응용(제5판)_Chapter 10 유한 요소법 입문,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 10 유한 요소법 입문.pdf,2918260,M,37484,13.153,mixed,Academic_Paper,none,2026-04-08T23:15:41.347817+00:00
|
||||
5071,流体力学_01 完全流体の力学_2 ベルヌーイの定理および運動量保存則とその応用,PKM/Inbox/流体力学_01 完全流体の力学_2 ベルヌーイの定理および運動量保存則とその応用.pdf,1327546,M,26457,20.408,mixed,Academic_Paper,none,2026-04-08T23:15:41.793483+00:00
|
||||
5072,기계진동 이론과 응용(제5판)_Chapter 11 연속계의 모드-합성 방법,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 11 연속계의 모드-합성 방법.pdf,1719318,M,18079,10.768,mixed,Academic_Paper,none,2026-04-08T23:15:42.200052+00:00
|
||||
5073,流体力学_01 完全流体の力学_3 流関数と速度ポテンシャル,PKM/Inbox/流体力学_01 完全流体の力学_3 流関数と速度ポテンシャル.pdf,1053322,M,17580,17.091,mixed,Academic_Paper,none,2026-04-08T23:15:42.776633+00:00
|
||||
5074,기계진동 이론과 응용(제5판)_Chapter 12 고전적인 해석방법,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 12 고전적인 해석방법.pdf,3361219,M,37053,11.288,mixed,Academic_Paper,none,2026-04-08T23:15:43.492238+00:00
|
||||
5075,流体力学_01 完全流体の力学_4 二次元ポテンシャル流れ,PKM/Inbox/流体力学_01 完全流体の力学_4 二次元ポテンシャル流れ.pdf,2564583,M,50432,20.137,mixed,Academic_Paper,none,2026-04-08T23:15:44.050776+00:00
|
||||
5076,기계진동 이론과 응용(제5판)_Chapter 13 불규칙 진동,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 13 불규칙 진동.pdf,2470352,M,34766,14.411,mixed,Reference,none,2026-04-08T23:15:44.575733+00:00
|
||||
5077,流体力学_01 完全流体の力学_5 渦(うず),PKM/Inbox/流体力学_01 完全流体の力学_5 渦(うず).pdf,1394271,M,23084,16.954,mixed,Academic_Paper,none,2026-04-08T23:15:45.026236+00:00
|
||||
5078,기계진동 이론과 응용(제5판)_Chapter 14 비선형진동,PKM/Inbox/기계진동 이론과 응용(제5판)_Chapter 14 비선형진동.pdf,2265927,M,23347,10.551,mixed,Academic_Paper,none,2026-04-08T23:15:45.599500+00:00
|
||||
5079,流体力学_01 完全流体の力学_6 波,PKM/Inbox/流体力学_01 完全流体の力学_6 波.pdf,1467570,M,25309,17.659,mixed,Academic_Paper,none,2026-04-08T23:15:46.156458+00:00
|
||||
5080,流体力学_02 粘性流体の力学_0 Intro,PKM/Inbox/流体力学_02 粘性流体の力学_0 Intro.pdf,226546,S,3008,13.596,mixed,Reference,none,2026-04-08T23:15:46.584826+00:00
|
||||
5081,流体力学_02 粘性流体の力学_7 粘性流体の基礎方程式,PKM/Inbox/流体力学_02 粘性流体の力学_7 粘性流体の基礎方程式.pdf,890677,S,14019,16.117,mixed,Academic_Paper,none,2026-04-08T23:15:47.550174+00:00
|
||||
5082,流体力学_02 粘性流体の力学_8 ナビエーストークス方程式の厳密解,PKM/Inbox/流体力学_02 粘性流体の力学_8 ナビエーストークス方程式の厳密解.pdf,1019341,S,19638,19.728,mixed,Academic_Paper,none,2026-04-08T23:15:48.643897+00:00
|
||||
5083,00_일반기계기사_목차,PKM/Inbox/00_일반기계기사_목차.pdf,8471940,M,70301,8.497,mixed,Manual,none,2026-04-08T23:15:50.578723+00:00
|
||||
5084,일반기계기사_실전 모의고사 1회,PKM/Inbox/일반기계기사_실전 모의고사 1회.pdf,1809053,M,28814,16.31,mixed,Reference,none,2026-04-08T23:15:55.821375+00:00
|
||||
5085,일반기계기사_실전 모의고사 2회,PKM/Inbox/일반기계기사_실전 모의고사 2회.pdf,2357404,M,28359,12.318,mixed,Note,none,2026-04-08T23:15:57.064358+00:00
|
||||
5086,일반기계기사_실전 모의고사 3회,PKM/Inbox/일반기계기사_실전 모의고사 3회.pdf,1643798,M,27468,17.111,mixed,Note,none,2026-04-08T23:15:58.106360+00:00
|
||||
5087,일반기계기사_실전 모의고사 4회,PKM/Inbox/일반기계기사_실전 모의고사 4회.pdf,1732142,M,28874,17.07,mixed,Reference,none,2026-04-08T23:15:59.221928+00:00
|
||||
5088,일반기계기사_실전 모의고사 5회,PKM/Inbox/일반기계기사_실전 모의고사 5회.pdf,1793083,M,29438,16.812,mixed,Reference,none,2026-04-08T23:16:00.341841+00:00
|
||||
5089,재료과학_00_서문,PKM/Inbox/재료과학_00_서문.pdf,1529124,M,28276,18.935,mixed,Reference,none,2026-04-08T23:16:03.847623+00:00
|
||||
5090,일반기계기사_실전 모의고사,PKM/Inbox/일반기계기사_실전 모의고사.pdf,24976506,L,384157,15.75,mixed,Academic_Paper,none,2026-04-08T23:16:05.341794+00:00
|
||||
5091,재료과학_01_재료과학 소개,PKM/Inbox/재료과학_01_재료과학 소개.pdf,3005951,M,26499,9.027,mixed,Academic_Paper,none,2026-04-08T23:16:06.184639+00:00
|
||||
5092,재료과학_02_원자의 구조와 원자간 결합,PKM/Inbox/재료과학_02_원자의 구조와 원자간 결합.pdf,4064573,M,47817,12.047,mixed,Academic_Paper,none,2026-04-08T23:16:08.028960+00:00
|
||||
5093,재료역학_부록,PKM/Inbox/재료역학_부록.pdf,3178000,M,88312,28.455,mixed,Academic_Paper,none,2026-04-08T23:16:09.428509+00:00
|
||||
5095,정역학_01_정역학 서론,PKM/Inbox/정역학_01_정역학 서론.pdf,1935591,M,32230,17.051,mixed,Reference,none,2026-04-08T23:16:15.217698+00:00
|
||||
5096,정역학_02_힘계의 기본 연산,PKM/Inbox/정역학_02_힘계의 기본 연산.pdf,3493287,M,39573,11.6,mixed,Academic_Paper,none,2026-04-08T23:16:16.562088+00:00
|
||||
5097,정역학_03_힘계의 합성,PKM/Inbox/정역학_03_힘계의 합성.pdf,2571786,M,28667,11.414,mixed,Academic_Paper,none,2026-04-08T23:16:17.833253+00:00
|
||||
5098,정역학_04_공명 평형 해석,PKM/Inbox/정역학_04_공명 평형 해석.pdf,5655029,M,71376,12.925,mixed,Academic_Paper,none,2026-04-08T23:16:19.571857+00:00
|
||||
5099,정역학_05_3차원 평형,PKM/Inbox/정역학_05_3차원 평형.pdf,2614037,M,32234,12.627,mixed,Academic_Paper,none,2026-04-08T23:16:20.693129+00:00
|
||||
5100,정역학_06_보와 케이블,PKM/Inbox/정역학_06_보와 케이블.pdf,3066734,M,42430,14.168,mixed,Academic_Paper,none,2026-04-08T23:16:21.984222+00:00
|
||||
5101,정역학_07_건마찰,PKM/Inbox/정역학_07_건마찰.pdf,3232307,M,44035,13.95,mixed,Reference,none,2026-04-08T23:16:23.491935+00:00
|
||||
5102,정역학_08_도심과 분포 하중,PKM/Inbox/정역학_08_도심과 분포 하중.pdf,4144284,M,42923,10.606,mixed,Academic_Paper,none,2026-04-08T23:16:24.746403+00:00
|
||||
5103,표준기계설계(KS)_01_치수공차와 끼워맞춤,PKM/Inbox/표준기계설계(KS)_01_치수공차와 끼워맞춤.pdf,20210623,L,44633,2.261,scan-likely,Standard,none,2026-04-08T23:16:26.944309+00:00
|
||||
5104,정역학_09_면적 관성 모멘트와 과성 곱,PKM/Inbox/정역학_09_면적 관성 모멘트와 과성 곱.pdf,2632774,M,32837,12.772,mixed,Reference,none,2026-04-08T23:16:26.964918+00:00
|
||||
5105,정역학_10_가상 일과 퍼텐셜 에너지,PKM/Inbox/정역학_10_가상 일과 퍼텐셜 에너지.pdf,2594344,M,28313,11.175,mixed,Academic_Paper,none,2026-04-08T23:16:28.289238+00:00
|
||||
5106,정역학_부록,PKM/Inbox/정역학_부록.pdf,1962700,M,39631,20.677,mixed,Academic_Paper,none,2026-04-08T23:16:29.586996+00:00
|
||||
5107,JlSにもとづく機械設計製囲便覧_第13版_0 Intro,PKM/Inbox/JlSにもとづく機械設計製囲便覧_第13版_0 Intro.pdf,1198849,M,43577,37.221,mixed,Reference,none,2026-04-08T23:16:30.246865+00:00
|
||||
5108,정영학_00_목차,PKM/Inbox/정영학_00_목차.pdf,850729,S,13589,16.357,mixed,Reference,none,2026-04-08T23:16:30.707965+00:00
|
||||
5109,표준기계설계(KS)_02_일반공차 및 보통공차,PKM/Inbox/표준기계설계(KS)_02_일반공차 및 보통공차.pdf,9486493,M,20303,2.192,scan-likely,Standard,none,2026-04-08T23:16:30.963027+00:00
|
||||
5110,JlSにもとづく機械設計製囲便覧_第13版_1章 諸単位,PKM/Inbox/JlSにもとづく機械設計製囲便覧_第13版_1章 諸単位.pdf,727694,S,22655,31.88,mixed,Standard,none,2026-04-08T23:16:31.668820+00:00
|
||||
5112,표준기계설계(KS)_03_기하 공차의 도시방법,PKM/Inbox/표준기계설계(KS)_03_기하 공차의 도시방법.pdf,7742981,M,10413,1.377,scan-likely,Standard,none,2026-04-08T23:16:32.995488+00:00
|
||||
5113,JlSにもとづく機械設計製囲便覧_第13版_3章 カ学,PKM/Inbox/JlSにもとづく機械設計製囲便覧_第13版_3章 カ学.pdf,833772,S,16544,20.319,mixed,Reference,none,2026-04-08T23:16:33.005005+00:00
|
||||
5114,JlSにもとづく機械設計製囲便覧_第13版_4章 材料カ学,PKM/Inbox/JlSにもとづく機械設計製囲便覧_第13版_4章 材料カ学.pdf,1172147,M,20208,17.654,mixed,Reference,none,2026-04-08T23:16:34.146033+00:00
|
||||
5116,표준기계설계(KS)_04_나사,PKM/Inbox/표준기계설계(KS)_04_나사.pdf,21167224,L,57023,2.759,scan-likely,Standard,none,2026-04-08T23:16:37.766254+00:00
|
||||
5117,01_Mechanical Behavior of Materials_Introduction,PKM/Inbox/01_Mechanical Behavior of Materials_Introduction.pdf,2893265,M,52850,18.705,mixed,Reference,none,2026-04-08T23:16:38.832286+00:00
|
||||
5118,"02_Mechanical Behavior of Materials_Structure, Defects, and Deformation in Materials","PKM/Inbox/02_Mechanical Behavior of Materials_Structure, Defects, and Deformation in Materials.pdf",4378386,M,56091,13.118,mixed,Note,none,2026-04-08T23:16:40.575592+00:00
|
||||
5119,"표준기계설계(KS)_05_볼트, 자리따기","PKM/Inbox/표준기계설계(KS)_05_볼트, 자리따기.pdf",14523342,L,31237,2.202,scan-likely,Standard,none,2026-04-08T23:16:42.196325+00:00
|
||||
5120,03_Mechanical Behavior of Materials_Mechanical Testing-Tension Test and Stress-Strain Mechanisms,PKM/Inbox/03_Mechanical Behavior of Materials_Mechanical Testing-Tension Test and Stress-Strain Mechanisms.pdf,3783032,M,138532,37.498,mixed,Reference,none,2026-04-08T23:16:42.870499+00:00
|
||||
5121,표준기계설계(KS)_06_너트,PKM/Inbox/표준기계설계(KS)_06_너트.pdf,4933016,M,13303,2.761,scan-likely,Standard,none,2026-04-08T23:16:44.448549+00:00
|
||||
5122,04_Mechanical Behavior of Materials_Mechanical Testing- Additional Basic Tests,PKM/Inbox/04_Mechanical Behavior of Materials_Mechanical Testing- Additional Basic Tests.pdf,2947668,M,62179,21.601,mixed,Reference,none,2026-04-08T23:16:44.738221+00:00
|
||||
5123,표준기계설계(KS)_07_와셔,PKM/Inbox/표준기계설계(KS)_07_와셔.pdf,7058048,M,15925,2.31,scan-likely,Standard,none,2026-04-08T23:16:47.338036+00:00
|
||||
5124,THERMODYNAMICS_An Engineering Approach 9-E_APPENDIX,PKM/Inbox/THERMODYNAMICS_An Engineering Approach 9-E_APPENDIX.pdf,7740918,M,159241,21.065,mixed,Reference,none,2026-04-08T23:16:48.026166+00:00
|
||||
5125,05_Mechanical Behavior of Materials_Stress-Strain Relationships and Behavior,PKM/Inbox/05_Mechanical Behavior of Materials_Stress-Strain Relationships and Behavior.pdf,2884730,M,99098,35.177,mixed,Academic_Paper,none,2026-04-08T23:16:49.247029+00:00
|
||||
5126,THERMODYNAMICS_An Engineering Approach 9-E_CHAPTER 01,PKM/Inbox/THERMODYNAMICS_An Engineering Approach 9-E_CHAPTER 01.pdf,5212258,M,166000,32.612,mixed,Academic_Paper,none,2026-04-08T23:16:50.848415+00:00
|
||||
5128,06_Mechanical Behavior of Materials_Review of Complex and Principal States of Stress and Strain,PKM/Inbox/06_Mechanical Behavior of Materials_Review of Complex and Principal States of Stress and Strain.pdf,3173000,M,81191,26.202,mixed,Reference,none,2026-04-08T23:16:51.964505+00:00
|
||||
5129,THERMODYNAMICS_An Engineering Approach 9-E_CHAPTER 02,PKM/Inbox/THERMODYNAMICS_An Engineering Approach 9-E_CHAPTER 02.pdf,4778125,M,213287,45.71,mixed,Reference,none,2026-04-08T23:16:53.289800+00:00
|
||||
5130,07_Mechanical Behavior of Materials_Yielding and Fracture under Combined Stresses,PKM/Inbox/07_Mechanical Behavior of Materials_Yielding and Fracture under Combined Stresses.pdf,3953088,M,126759,32.835,mixed,Academic_Paper,none,2026-04-08T23:16:53.797654+00:00
|
||||
5131,압력용기 핸드북_강 구조의 설계,PKM/Inbox/압력용기 핸드북_강 구조의 설계.pdf,1363261,M,14266,10.716,mixed,Reference,none,2026-04-08T23:16:54.838637+00:00
|
||||
5134,THERMODYNAMICS_An Engineering Approach 9-E_CHAPTER 04,PKM/Inbox/THERMODYNAMICS_An Engineering Approach 9-E_CHAPTER 04.pdf,5932814,M,159158,27.471,mixed,Reference,none,2026-04-08T23:16:58.605198+00:00
|
||||
5136,Pressure Vessel Design Manual_00,PKM/Inbox/Pressure Vessel Design Manual_00.pdf,372049,S,19436,53.494,born-digital,Manual,none,2026-04-08T23:17:00.194347+00:00
|
||||
5138,표준기계설계(KS)_09_축 관계,PKM/Inbox/표준기계설계(KS)_09_축 관계.pdf,26465006,L,52684,2.038,scan-likely,Standard,over_max_pages_estimated,2026-04-08T23:17:02.195454+00:00
|
||||
5139,압력용기 핸드북_압력용기 설계와 제작,PKM/Inbox/압력용기 핸드북_압력용기 설계와 제작.pdf,14093886,L,254595,18.498,mixed,Manual,none,2026-04-08T23:17:05.346798+00:00
|
||||
5140,Pressure Vessel Design Manual_02 General Design,PKM/Inbox/Pressure Vessel Design Manual_02 General Design.pdf,8285419,M,223799,27.659,mixed,Manual,none,2026-04-08T23:17:07.080394+00:00
|
||||
5142,표준기계설계(KS)_10_키 및 스플라인,PKM/Inbox/표준기계설계(KS)_10_키 및 스플라인.pdf,17185683,L,34581,2.06,scan-likely,Standard,none,2026-04-08T23:17:08.309320+00:00
|
||||
5143,Structural Analysiss and Design of Process Equipment_01_History and Organization of Codes,PKM/Inbox/Structural Analysiss and Design of Process Equipment_01_History and Organization of Codes.pdf,914921,S,16537,18.509,mixed,Reference,none,2026-04-08T23:17:08.926568+00:00
|
||||
5144,Pressure Vessel Design Manual_03 Flange Design,PKM/Inbox/Pressure Vessel Design Manual_03 Flange Design.pdf,3768131,M,112421,30.551,mixed,Manual,none,2026-04-08T23:17:09.602217+00:00
|
||||
5145,"Structural Analysiss and Design of Process Equipment_02_Selection of Vessel, Specifications, Reports and Allowable Stresses","PKM/Inbox/Structural Analysiss and Design of Process Equipment_02_Selection of Vessel, Specifications, Reports and Allowable Stresses.pdf",691082,S,33272,49.3,mixed,Reference,none,2026-04-08T23:17:09.766146+00:00
|
||||
5146,표준기계설계(KS)_11_멈춤링,PKM/Inbox/표준기계설계(KS)_11_멈춤링.pdf,5851521,M,15566,2.724,scan-likely,Standard,none,2026-04-08T23:17:10.529895+00:00
|
||||
5147,"Structural Analysiss and Design of Process Equipment_03_Strength Theories, Design Criteria and Design Equations","PKM/Inbox/Structural Analysiss and Design of Process Equipment_03_Strength Theories, Design Criteria and Design Equations.pdf",695405,S,20757,30.565,mixed,Reference,none,2026-04-08T23:17:10.657722+00:00
|
||||
5148,Structural Analysiss and Design of Process Equipment_04_Materials of Construction,PKM/Inbox/Structural Analysiss and Design of Process Equipment_04_Materials of Construction.pdf,3306699,M,70627,21.871,mixed,Reference,none,2026-04-08T23:17:12.650803+00:00
|
||||
5149,Pressure Vessel Design Manual_04 Design of Vessel Supports,PKM/Inbox/Pressure Vessel Design Manual_04 Design of Vessel Supports.pdf,8693390,M,234154,27.581,mixed,Manual,none,2026-04-08T23:17:13.743831+00:00
|
||||
5150,표준기계설계(KS)_12_벨트와 풀리,PKM/Inbox/표준기계설계(KS)_12_벨트와 풀리.pdf,18363580,L,38654,2.155,scan-likely,Standard,none,2026-04-08T23:17:17.375038+00:00
|
||||
5152,THE PIPE FITTERS BLUE BOOK,PKM/Inbox/THE PIPE FITTERS BLUE BOOK.pdf,42381135,L,238868,5.771,mixed,Manual,over_max_pages_estimated,2026-04-08T23:17:52.867389+00:00
|
||||
5153,로지스틱스 연구 조립제조산에서의 자재분류 요인분석 관한 사례연구,PKM/Inbox/로지스틱스 연구 조립제조산에서의 자재분류 요인분석 관한 사례연구.pdf,1137506,M,31149,28.041,mixed,Academic_Paper,none,2026-04-08T23:17:59.109278+00:00
|
||||
5156,스마트팩토리 구축 제조기업 조직원이 지각한 상사의 변혁적리더십이 셀프리더,PKM/Inbox/스마트팩토리 구축 제조기업 조직원이 지각한 상사의 변혁적리더십이 셀프리더.pdf,449251,S,41473,94.531,born-digital,Academic_Paper,none,2026-04-08T23:18:08.381775+00:00
|
||||
5157,스마트팩토리 운영전략이 경영성과와 지속사용의도 간의 구조적 관계분석,PKM/Inbox/스마트팩토리 운영전략이 경영성과와 지속사용의도 간의 구조적 관계분석.pdf,866978,S,35476,41.901,mixed,Academic_Paper,none,2026-04-08T23:18:09.256468+00:00
|
||||
5158,스마트팩토리 제조경쟁력 확보가 한국의 해외진출 제조기업 리쇼어링에 미치는,PKM/Inbox/스마트팩토리 제조경쟁력 확보가 한국의 해외진출 제조기업 리쇼어링에 미치는.pdf,1015118,S,37670,38.0,mixed,Academic_Paper,none,2026-04-08T23:18:10.193550+00:00
|
||||
5159,"스마트팩토리, 인공지능으로 날개를 달다","PKM/Inbox/스마트팩토리, 인공지능으로 날개를 달다.pdf",604946,S,9037,15.297,mixed,Report,none,2026-04-08T23:18:11.105141+00:00
|
||||
5160,"스마트팩토리, 제조기업 분야에 새로운 경쟁우위 확보를 위한 견인차 역할을","PKM/Inbox/스마트팩토리, 제조기업 분야에 새로운 경쟁우위 확보를 위한 견인차 역할을.pdf",1261747,M,36063,29.268,mixed,Academic_Paper,none,2026-04-08T23:18:12.225466+00:00
|
||||
5161,스마트팩토리를 위한 운영빅데이터 분석 플랫폼,PKM/Inbox/스마트팩토리를 위한 운영빅데이터 분석 플랫폼.pdf,920193,S,20093,22.36,mixed,Academic_Paper,none,2026-04-08T23:18:13.263588+00:00
|
||||
5162,철강 산업 기업의 스마트팩토리 도입 운영형태가 품질 안정화에 미치는 관계,PKM/Inbox/철강 산업 기업의 스마트팩토리 도입 운영형태가 품질 안정화에 미치는 관계.pdf,1139690,M,31606,28.398,mixed,Academic_Paper,none,2026-04-08T23:18:14.315451+00:00
|
||||
5165,금융 관련 공공기관의 ESG경영 현황,PKM/Inbox/금융 관련 공공기관의 ESG경영 현황.pdf,555648,S,33344,61.449,born-digital,Report,none,2026-04-08T23:18:37.418558+00:00
|
||||
5166,기업시민 포스코 ESG경영 사례,PKM/Inbox/기업시민 포스코 ESG경영 사례.pdf,662512,S,5868,9.07,mixed,Report,none,2026-04-08T23:18:38.237680+00:00
|
||||
5169,수소인프라 이슈 및 과제,PKM/Inbox/수소인프라 이슈 및 과제.pdf,1495185,M,26104,17.878,mixed,Academic_Paper,none,2026-04-08T23:19:16.572890+00:00
|
||||
5170,전주 완주 수소시범도시 주민수용성 분석,PKM/Inbox/전주 완주 수소시범도시 주민수용성 분석.pdf,976519,S,22013,23.083,mixed,study_note,none,2026-04-08T23:19:17.548849+00:00
|
||||
5172,탄소중립은 지속가능한 경제성장과 양립하는가,PKM/Inbox/탄소중립은 지속가능한 경제성장과 양립하는가.pdf,664629,S,24049,37.053,mixed,Academic_Paper,none,2026-04-08T23:19:19.371453+00:00
|
||||
5173,HyRAM 3.0을 이용한 수소충전소 정량적 위험성평가 분석에 관한 연구,PKM/Inbox/HyRAM 3.0을 이용한 수소충전소 정량적 위험성평가 분석에 관한 연구.pdf,1279610,M,26966,21.579,mixed,study_note,none,2026-04-08T23:19:20.401783+00:00
|
||||
5174,#8. 검사 및 시험 계획_테크니컬 코리아 R.A_현대로템 통합,PKM/Inbox/#8. 검사 및 시험 계획_테크니컬 코리아 R.A_현대로템 통합.pdf,156781,S,3375,22.043,mixed,Note,none,2026-04-08T23:19:22.531563+00:00
|
||||
5176,수소취성(최병학),PKM/Inbox/수소취성(최병학).pdf,7110851,M,11069,1.594,scan-likely,study_note,none,2026-04-08T23:19:30.535784+00:00
|
||||
5181,PTC46,PKM/Inbox/PTC46.PDF,8154549,M,0,0.0,scan-likely,,none,2026-04-08T23:19:47.730804+00:00
|
||||
5182,VIII-1_01,PKM/Inbox/VIII-1_01.pdf,1934884,M,85906,45.464,mixed,Standard,none,2026-04-08T23:19:48.887787+00:00
|
||||
5186,VIII-1_05-UG,PKM/Inbox/VIII-1_05-UG.pdf,6098647,M,531589,89.257,born-digital,Standard,none,2026-04-08T23:19:52.267197+00:00
|
||||
5187,VIII-1_06-UW,PKM/Inbox/VIII-1_06-UW.pdf,1520880,M,174445,117.453,born-digital,Standard,none,2026-04-08T23:19:53.320860+00:00
|
||||
5190,VIII-1_09-UCS,PKM/Inbox/VIII-1_09-UCS.pdf,737749,S,108861,151.1,born-digital,Standard,none,2026-04-08T23:19:55.892937+00:00
|
||||
5191,VIII-1_10-UNF,PKM/Inbox/VIII-1_10-UNF.pdf,359963,S,47261,134.445,born-digital,Standard,none,2026-04-08T23:19:56.741277+00:00
|
||||
5192,VIII-1_11-UHA,PKM/Inbox/VIII-1_11-UHA.pdf,379489,S,60782,164.012,born-digital,Standard,none,2026-04-08T23:19:57.550987+00:00
|
||||
5199,2012_NB_A_March_homewrk_quiz,PKM/Inbox/2012_NB_A_March_homewrk_quiz.pdf,1937838,M,47,0.025,scan-likely,Memo,none,2026-04-08T23:20:02.786018+00:00
|
||||
5201,A Endorsement Exam,PKM/Inbox/A Endorsement Exam.pdf,705076,S,15,0.022,scan-likely,Memo,none,2026-04-08T23:20:07.556782+00:00
|
||||
5204,1_ASME VIII_Introduction,PKM/Inbox/1_ASME VIII_Introduction.pdf,1206580,M,29525,25.057,mixed,Manual,none,2026-04-08T23:20:20.560415+00:00
|
||||
5205,2_ASME VIII_Impact Test,PKM/Inbox/2_ASME VIII_Impact Test.pdf,364827,S,3132,8.791,mixed,Standard,none,2026-04-08T23:20:21.422093+00:00
|
||||
5206,3_ASME VIII_Design_Basic_R1,PKM/Inbox/3_ASME VIII_Design_Basic_R1.pdf,3750689,M,40387,11.026,mixed,Manual,none,2026-04-08T23:20:22.835881+00:00
|
||||
5207,4_ASME VIII_Design_Practical_R1,PKM/Inbox/4_ASME VIII_Design_Practical_R1.pdf,1882509,M,24365,13.253,mixed,Manual,none,2026-04-08T23:20:23.956728+00:00
|
||||
5208,5_ASME VIII_Fabrication-Inspection_R1,PKM/Inbox/5_ASME VIII_Fabrication-Inspection_R1.pdf,2306232,M,26273,11.666,mixed,Manual,none,2026-04-08T23:20:26.704242+00:00
|
||||
5209,6_ASME IX_Welding,PKM/Inbox/6_ASME IX_Welding.pdf,1707491,M,38523,23.103,mixed,Manual,none,2026-04-08T23:20:27.826904+00:00
|
||||
5210,ASME B-PV CODE SECTION Ⅷ Div. 1압력용기 코드(2007),PKM/Inbox/ASME B-PV CODE SECTION Ⅷ Div. 1압력용기 코드(2007).pdf,35453616,L,1199923,34.657,mixed,Standard,over_max_pages_estimated,2026-04-08T23:20:36.746155+00:00
|
||||
5212,PTB-4-2013_01_GENERAL REQUIREMENTS,PKM/Inbox/PTB-4-2013_01_GENERAL REQUIREMENTS.pdf,256269,S,11370,45.432,mixed,Standard,none,2026-04-08T23:20:38.519481+00:00
|
||||
5213,PTB-4-2013_02_EXAMPLE PROBLEM DESCRIPTIONS,PKM/Inbox/PTB-4-2013_02_EXAMPLE PROBLEM DESCRIPTIONS.pdf,123771,S,1961,16.224,mixed,Standard,none,2026-04-08T23:20:39.242724+00:00
|
||||
5214,PTB-4-2013_03_MATERIALS REQUIREMENTS,PKM/Inbox/PTB-4-2013_03_MATERIALS REQUIREMENTS.pdf,808215,S,21380,27.088,mixed,Standard,none,2026-04-08T23:20:40.136240+00:00
|
||||
5225,code에_따른_wps_pqr_이해와_적용,PKM/Inbox/code에_따른_wps_pqr_이해와_적용.pdf,1471806,M,25363,17.646,mixed,Manual,none,2026-04-08T23:20:50.316232+00:00
|
||||
5226,ISO 45001(안전보건경영시스템)_요구사항 해설,PKM/Inbox/ISO 45001(안전보건경영시스템)_요구사항 해설.pdf,41321427,L,35770,0.886,scan-likely,study_note,over_max_pages_estimated,2026-04-08T23:21:07.053788+00:00
|
||||
5228,미국의 여름철 고온에 따른 산업안전 문제와 그 대응,PKM/Inbox/미국의 여름철 고온에 따른 산업안전 문제와 그 대응.pdf,980707,S,13012,13.586,mixed,study_note,none,2026-04-08T23:21:20.806314+00:00
|
||||
5229,사업체들의 산업안전 활동 최근 동향과 과제,PKM/Inbox/사업체들의 산업안전 활동 최근 동향과 과제.pdf,818746,S,30674,38.364,mixed,study_note,none,2026-04-08T23:21:21.773986+00:00
|
||||
5230,산업공학(産業工學)과 산업안전(産業安全),PKM/Inbox/산업공학(産業工學)과 산업안전(産業安全).pdf,1746354,M,13701,8.034,mixed,study_note,none,2026-04-08T23:21:22.870060+00:00
|
||||
5231,산업안전 수준 제고를 위한 노사관계의 과제,PKM/Inbox/산업안전 수준 제고를 위한 노사관계의 과제.pdf,1176219,M,19283,16.788,mixed,study_note,none,2026-04-08T23:21:23.918345+00:00
|
||||
5235,산업안전기사 2003년 3월,PKM/Inbox/산업안전기사 2003년 3월.pdf,1922633,M,38600,20.558,mixed,study_note,none,2026-04-08T23:21:30.549895+00:00
|
||||
5236,산업안전기사_건설공사 안전관리,PKM/Inbox/산업안전기사_건설공사 안전관리.pdf,35080540,L,59699,1.743,scan-likely,study_note,over_max_pages_estimated,2026-04-08T23:21:41.107632+00:00
|
||||
5237,4M 위험성평가 기법을 이용한 아차사고의 효과적인 발굴기법,PKM/Inbox/4M 위험성평가 기법을 이용한 아차사고의 효과적인 발굴기법.pdf,1428352,M,17312,12.411,mixed,study_note,none,2026-04-08T23:21:41.748592+00:00
|
||||
5238,근로자 노출평가제도 내 위험성평가 방법론의 적용,PKM/Inbox/근로자 노출평가제도 내 위험성평가 방법론의 적용.pdf,393908,S,34424,89.488,born-digital,study_note,none,2026-04-08T23:21:42.648168+00:00
|
||||
5239,기계안전의 위험성평가와 안전대책,PKM/Inbox/기계안전의 위험성평가와 안전대책.pdf,271735,S,6373,24.016,mixed,study_note,none,2026-04-08T23:21:43.516587+00:00
|
||||
5240,사업장 위험성평가에 관한 법제의 비교법적 고찰,PKM/Inbox/사업장 위험성평가에 관한 법제의 비교법적 고찰.pdf,437377,S,35787,83.786,born-digital,study_note,none,2026-04-08T23:21:44.434222+00:00
|
||||
5241,수소충전소 튜브트레일러 누출에 따른 위험성평가,PKM/Inbox/수소충전소 튜브트레일러 누출에 따른 위험성평가.pdf,1091248,M,12461,11.693,mixed,study_note,none,2026-04-08T23:21:45.145330+00:00
|
||||
5242,위험성평가 방법 A Literature Review,PKM/Inbox/위험성평가 방법 A Literature Review.pdf,210188,S,2602,12.676,mixed,study_note,none,2026-04-08T23:21:46.011697+00:00
|
||||
5243,위험성평가 사업장 구축 및 실행방안,PKM/Inbox/위험성평가 사업장 구축 및 실행방안.pdf,1029859,S,11524,11.458,mixed,study_note,none,2026-04-08T23:21:47.747234+00:00
|
||||
5244,산업안전기사_기계 기구 및 설비 안전관리,PKM/Inbox/산업안전기사_기계 기구 및 설비 안전관리.pdf,38948352,L,67629,1.778,scan-likely,study_note,over_max_pages_estimated,2026-04-08T23:21:48.974016+00:00
|
||||
5245,위험성평가 제도의 만족도 및 인식도 조사에 관한 연구,PKM/Inbox/위험성평가 제도의 만족도 및 인식도 조사에 관한 연구.pdf,208897,S,1840,9.02,mixed,study_note,none,2026-04-08T23:21:48.978130+00:00
|
||||
5246,위험성평가,PKM/Inbox/위험성평가.pdf,4051722,M,8267,2.089,scan-likely,study_note,none,2026-04-08T23:21:50.820505+00:00
|
||||
5249,산업안전기사_산업재해 예방 및 안전보건교육,PKM/Inbox/산업안전기사_산업재해 예방 및 안전보건교육.pdf,34442396,L,66969,1.991,scan-likely,study_note,over_max_pages_estimated,2026-04-08T23:21:56.802323+00:00
|
||||
5253,산업안전기사_인간공학 및 위험성평가,PKM/Inbox/산업안전기사_인간공학 및 위험성평가.pdf,29835234,L,59514,2.043,scan-likely,study_note,over_max_pages_estimated,2026-04-08T23:22:04.156790+00:00
|
||||
5257,Modifying Large Language Model Post-Training for Diverse Creative Writing,PKM/Inbox/Modifying Large Language Model Post-Training for Diverse Creative Writing.pdf,885908,S,91834,106.149,born-digital,Academic_Paper,none,2026-04-08T23:22:06.950818+00:00
|
||||
5260,산업안전기사_전기설비 안전관리,PKM/Inbox/산업안전기사_전기설비 안전관리.pdf,39221894,L,80499,2.102,scan-likely,study_note,over_max_pages_estimated,2026-04-08T23:22:12.862475+00:00
|
||||
5262,コード X AI_00_はじめに,PKM/Inbox/コード X AI_00_はじめに.pdf,4551317,M,18591,4.183,scan-likely,Reference,none,2026-04-08T23:22:17.418401+00:00
|
||||
5263,황현필의 진보를 위한 역사_1장 식민지근대화론 거짓말을 그만하라,PKM/Inbox/황현필의 진보를 위한 역사_1장 식민지근대화론 거짓말을 그만하라.pdf,16365941,L,26144,1.636,scan-likely,Academic_Paper,none,2026-04-08T23:22:41.986015+00:00
|
||||
5264,황현필의 진보를 위한 역사_2장 식민지 수탈과 학살의 진실을 말하라,PKM/Inbox/황현필의 진보를 위한 역사_2장 식민지 수탈과 학살의 진실을 말하라.pdf,14154100,L,29575,2.14,scan-likely,Academic_Paper,none,2026-04-08T23:22:43.326498+00:00
|
||||
5265,황현필의 진보를 위한 역사_3장 독립운동을 상처 내지 말라,PKM/Inbox/황현필의 진보를 위한 역사_3장 독립운동을 상처 내지 말라.pdf,10398784,M,23381,2.302,scan-likely,Note,none,2026-04-08T23:22:47.235541+00:00
|
||||
5266,황현필의 진보를 위한 역사_4장 김구를 모욕하지 말라,PKM/Inbox/황현필의 진보를 위한 역사_4장 김구를 모욕하지 말라.pdf,7617487,M,15170,2.039,scan-likely,Academic_Paper,none,2026-04-08T23:22:47.988096+00:00
|
||||
5267,황현필의 진보를 위한 역사_5장 해방정국을 감추지 말라,PKM/Inbox/황현필의 진보를 위한 역사_5장 해방정국을 감추지 말라.pdf,12320415,L,19457,1.617,scan-likely,Note,none,2026-04-08T23:22:49.154378+00:00
|
||||
5269,황현필의 진보를 위한 역사_7장 이승만 국부 만들기를 그만두라,PKM/Inbox/황현필의 진보를 위한 역사_7장 이승만 국부 만들기를 그만두라.pdf,14647972,L,34310,2.399,scan-likely,Note,none,2026-04-08T23:22:53.228481+00:00
|
||||
5270,황현필의 진보를 위한 역사_8장 6-25전쟁의 사실을은폐하지 말라,PKM/Inbox/황현필의 진보를 위한 역사_8장 6-25전쟁의 사실을은폐하지 말라.pdf,17737798,L,41579,2.4,scan-likely,Note,none,2026-04-08T23:22:59.687925+00:00
|
||||
5271,황현필의 진보를 위한 역사_9장 박정희 신격화를 중단하라,PKM/Inbox/황현필의 진보를 위한 역사_9장 박정희 신격화를 중단하라.pdf,16266788,L,29837,1.878,scan-likely,Academic_Paper,none,2026-04-08T23:23:04.499857+00:00
|
||||
5272,황현필의 진보를 위한 역사_10장 5-18 광주민주화운동을 폄훼하지 말라,PKM/Inbox/황현필의 진보를 위한 역사_10장 5-18 광주민주화운동을 폄훼하지 말라.pdf,13749286,L,24990,1.861,scan-likely,Note,none,2026-04-08T23:23:09.164908+00:00
|
||||
5273,황현필의 진보를 위한 역사_11장 통일을 반대하지 말라,PKM/Inbox/황현필의 진보를 위한 역사_11장 통일을 반대하지 말라.pdf,10701885,L,24856,2.378,scan-likely,Academic_Paper,none,2026-04-08T23:23:13.489086+00:00
|
||||
6675,TK-SUP-01_2026년 안전보건 경영목표 달성을 위한 세부 실시 계획서,PKM/Inbox/TK-SUP-01_2026년 안전보건 경영목표 달성을 위한 세부 실시 계획서.pdf,3852353,M,3988,1.06,scan-likely,study_note,none,2026-04-14T06:34:13.166804+00:00
|
||||
8852,"TKP-26-0112_신양철강_650,000원","PKM/Inbox/발주/2026-03/TKP-26-0112_신양철강/TKP-26-0112_신양철강_650,000원.pdf",448972,S,1845,4.208,scan-likely,Specification,none,2026-04-20T02:23:47.126481+00:00
|
||||
8854,"TKP-26-0114_대연기업_845,000원","PKM/Inbox/발주/2026-03/TKP-26-0114_대연기업/TKP-26-0114_대연기업_845,000원.pdf",447907,S,2006,4.586,scan-likely,Report,none,2026-04-20T02:23:47.132334+00:00
|
||||
8857,"TKP-26-0132_신양철강_74,290원 - 복사본","PKM/Inbox/발주/2026-03/TKP-26-0132_신양철강/TKP-26-0132_신양철강_74,290원 - 복사본.pdf",450026,S,1937,4.407,scan-likely,Specification,none,2026-04-20T02:23:47.140119+00:00
|
||||
8944,TrusBill print,PKM/Inbox/발주/2026-03/TKP-26-0112_신양철강/TrusBill print.pdf,209087,S,1800,8.815,mixed,Report,none,2026-04-20T05:51:45.513019+00:00
|
||||
|
@@ -0,0 +1,790 @@
|
||||
"""Phase 2 — markdown canonical layer full backfill.
|
||||
|
||||
* legacy pending PDF (Phase 1B/1C/1D 후 잔여) 을 야간 sweep + canary 단계로 변환.
|
||||
* 1D pilot (25건 controlled_backfill, 23 success / 2 skipped / 0 failed) 가 engineering go signal.
|
||||
* handwritten 자동 skip (commit 7d0fca2) 와 marker_worker SKIP_DOC_TYPES / MAX_PAGES guard 모두 운영 시점 가드. 본 스크립트는 inventory · sample · enqueue 만 담당.
|
||||
|
||||
Subcommands:
|
||||
inventory pending PDFs 의 dry-run inventory CSV 작성 (skip forecast 포함)
|
||||
select-canary inventory 에서 stratified 40건 canary sample CSV 작성 (재현성 시드)
|
||||
enqueue sample CSV 의 doc_id 들을 markdown 큐에 enqueue (one-shot, --no-dry-run 필요)
|
||||
nightly-enqueue 야간 sweep — disable flag / marker ready / active-queue threshold / DB pool 가드 후 limit 만큼 enqueue + log_tsv
|
||||
post-report 최종 결과 CSV + markdown 요약 (1D baseline 비교 포함)
|
||||
|
||||
실행 (GPU 서버):
|
||||
docker exec hyungi_document_server-fastapi-1 python /app/scripts/phase2_backfill.py inventory \
|
||||
--output /app/evals/markdown/phase2_inventory.csv
|
||||
|
||||
docker exec hyungi_document_server-fastapi-1 python /app/scripts/phase2_backfill.py select-canary \
|
||||
--inventory /app/evals/markdown/phase2_inventory.csv \
|
||||
--output /app/evals/markdown/phase2_canary_sample.csv \
|
||||
--seed 20260503
|
||||
|
||||
docker exec hyungi_document_server-fastapi-1 python /app/scripts/phase2_backfill.py enqueue \
|
||||
--csv /app/evals/markdown/phase2_canary_sample.csv --no-dry-run
|
||||
|
||||
# cron (nightly):
|
||||
docker exec hyungi_document_server-fastapi-1 python /app/scripts/phase2_backfill.py nightly-enqueue \
|
||||
--limit 50 --max-active-queue 5 \
|
||||
--log-tsv /app/evals/markdown/phase2_nightly_log.tsv
|
||||
|
||||
docker exec hyungi_document_server-fastapi-1 python /app/scripts/phase2_backfill.py post-report \
|
||||
--output-csv /app/evals/markdown/phase2_post_report.csv \
|
||||
--output-md /app/evals/markdown/phase2_post_report.md
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import csv
|
||||
import json
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
import sys
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
from collections import Counter, defaultdict
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
# fastapi 컨테이너 WORKDIR=/app — `from models...` import 가능하게 path 추가.
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
|
||||
|
||||
from sqlalchemy import func, select, text
|
||||
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
|
||||
|
||||
# marker_worker 룰 미러 (1D pilot 패턴 따름 — drift 회피 위해 한 곳).
|
||||
SKIP_DOC_TYPES = {
|
||||
"발주서", "세금계산서", "명세표",
|
||||
"Invoice", "Purchase_Order", "Estimate", "Statement",
|
||||
}
|
||||
|
||||
# handwritten 자동 skip 키워드 (commit 7d0fca2 의 marker_worker 정의와 동일).
|
||||
HANDWRITTEN_REGEX = re.compile(r"필기|손글씨|handwritten|handwriting", re.IGNORECASE)
|
||||
|
||||
# Inventory 의 page_count proxy — file_size 가 25MB 초과면 MAX_PAGES=200 초과 가능성 높음.
|
||||
# 실제 marker_worker 가 PyMuPDF 로 page_count 확인 후 결정. 본 forecast 는 plan budget 추정용.
|
||||
MAX_PAGES_FILESIZE_PROXY = 25 * 1024 * 1024
|
||||
|
||||
# stuck 판단 임계 — 평균 변환 시간 ~48s (1D 실측). 30분이면 의심, 60분이면 stuck likely.
|
||||
STUCK_THRESHOLD_MIN = 60.0
|
||||
|
||||
# Disable flag — 사용자 또는 자동 abort 가드가 생성. cron 매 사이클 시작 시 체크.
|
||||
DISABLE_FLAG_PATH = Path("/tmp/phase2_disable") # 컨테이너 내부 경로 (호스트 mount 별도)
|
||||
|
||||
# marker-service ready endpoint (Phase 1B 명세).
|
||||
MARKER_READY_URL = "http://marker-service:3300/ready"
|
||||
|
||||
|
||||
# ─── helpers ────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def _build_engine():
|
||||
db_url = os.environ["DATABASE_URL"]
|
||||
return create_async_engine(db_url, pool_pre_ping=True)
|
||||
|
||||
|
||||
def _file_size_band(file_size: int | None) -> str:
|
||||
if file_size is None:
|
||||
return "unknown"
|
||||
if file_size < 1 * 1024 * 1024:
|
||||
return "S"
|
||||
if file_size < 10 * 1024 * 1024:
|
||||
return "M"
|
||||
return "L"
|
||||
|
||||
|
||||
def _text_density(text_len: int, file_size: int | None) -> float | None:
|
||||
if not file_size or file_size <= 0:
|
||||
return None
|
||||
return text_len / (file_size / 1024.0)
|
||||
|
||||
|
||||
def _text_density_band(density: float | None) -> str:
|
||||
if density is None:
|
||||
return "unknown"
|
||||
if density < 5.0:
|
||||
return "scan-likely"
|
||||
if density < 50.0:
|
||||
return "mixed"
|
||||
return "born-digital"
|
||||
|
||||
|
||||
def _forecast_skip_reason(file_format: str | None, doc_type: str | None,
|
||||
title: str | None, file_path: str | None,
|
||||
file_size: int | None) -> str:
|
||||
if file_format and file_format.lower() != "pdf":
|
||||
return "unsupported_extension"
|
||||
if doc_type and doc_type in SKIP_DOC_TYPES:
|
||||
return "doctype_skip"
|
||||
blob = " ".join(filter(None, [title or "", file_path or ""]))
|
||||
if HANDWRITTEN_REGEX.search(blob):
|
||||
return "handwritten_hint"
|
||||
if file_size and file_size > MAX_PAGES_FILESIZE_PROXY:
|
||||
return "over_max_pages_estimated"
|
||||
return "none"
|
||||
|
||||
|
||||
def _percentile(values: list[float], p: float) -> float | None:
|
||||
if not values:
|
||||
return None
|
||||
s = sorted(values)
|
||||
idx = max(0, min(len(s) - 1, int(len(s) * p)))
|
||||
return s[idx]
|
||||
|
||||
|
||||
def _check_marker_ready() -> bool:
|
||||
try:
|
||||
with urllib.request.urlopen(MARKER_READY_URL, timeout=5) as resp:
|
||||
data = json.load(resp)
|
||||
return data.get("status") == "ready"
|
||||
except (urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError, OSError):
|
||||
return False
|
||||
|
||||
|
||||
# ─── inventory ──────────────────────────────────────────────────────────
|
||||
|
||||
INVENTORY_COLUMNS = [
|
||||
"doc_id", "title", "file_path", "file_size", "file_size_band",
|
||||
"text_len", "text_density", "text_density_band",
|
||||
"doc_type", "forecast_skip_reason", "created_at",
|
||||
]
|
||||
|
||||
|
||||
async def cmd_inventory(output: Path) -> None:
|
||||
engine = _build_engine()
|
||||
Session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||
|
||||
from models.document import Document # type: ignore
|
||||
|
||||
async with Session() as session:
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(
|
||||
Document.id, Document.title, Document.file_path, Document.file_size,
|
||||
Document.file_format, Document.document_type,
|
||||
Document.extracted_text, Document.created_at,
|
||||
).where(
|
||||
Document.deleted_at.is_(None),
|
||||
Document.file_format == "pdf",
|
||||
Document.md_status == "pending",
|
||||
)
|
||||
.order_by(Document.created_at.asc())
|
||||
)
|
||||
).all()
|
||||
|
||||
enriched = []
|
||||
for r in rows:
|
||||
text_len = len(r.extracted_text or "")
|
||||
density = _text_density(text_len, r.file_size)
|
||||
enriched.append({
|
||||
"doc_id": r.id,
|
||||
"title": (r.title or ""),
|
||||
"file_path": (r.file_path or ""),
|
||||
"file_size": r.file_size or 0,
|
||||
"file_size_band": _file_size_band(r.file_size),
|
||||
"text_len": text_len,
|
||||
"text_density": round(density, 3) if density is not None else "",
|
||||
"text_density_band": _text_density_band(density),
|
||||
"doc_type": r.document_type or "",
|
||||
"forecast_skip_reason": _forecast_skip_reason(
|
||||
r.file_format, r.document_type, r.title, r.file_path, r.file_size
|
||||
),
|
||||
"created_at": r.created_at.isoformat() if r.created_at else "",
|
||||
})
|
||||
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output.open("w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=INVENTORY_COLUMNS, extrasaction="ignore")
|
||||
writer.writeheader()
|
||||
for row in enriched:
|
||||
writer.writerow(row)
|
||||
|
||||
skip_dist = Counter(r["forecast_skip_reason"] for r in enriched)
|
||||
band_dist = Counter(r["file_size_band"] for r in enriched)
|
||||
density_dist = Counter(r["text_density_band"] for r in enriched)
|
||||
type_dist = Counter(r["doc_type"] or "(NULL)" for r in enriched)
|
||||
|
||||
print(f"\n## Phase 2 inventory")
|
||||
print(f" total pending PDFs: {len(enriched)}")
|
||||
print(f" output: {output}")
|
||||
print(f"\n forecast_skip_reason:")
|
||||
for k, v in skip_dist.most_common():
|
||||
print(f" {k:<30} {v:>4}")
|
||||
print(f"\n file_size_band: ", dict(band_dist))
|
||||
print(f" text_density_band: ", dict(density_dist))
|
||||
print(f"\n doc_type top 10:")
|
||||
for k, v in type_dist.most_common(10):
|
||||
print(f" {k:<25} {v:>4}")
|
||||
|
||||
convert_target = sum(1 for r in enriched if r["forecast_skip_reason"] == "none")
|
||||
print(f"\n 변환 시도 후보 (forecast_skip_reason='none'): {convert_target}건")
|
||||
print(f" 즉시 skip 예상: {len(enriched) - convert_target}건")
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
# ─── select-canary ──────────────────────────────────────────────────────
|
||||
|
||||
CANARY_COLUMNS = [
|
||||
"doc_id", "title", "file_size", "file_size_band",
|
||||
"text_density", "text_density_band", "doc_type", "bucket_label",
|
||||
]
|
||||
|
||||
|
||||
def _select_canary_buckets(candidates: list[dict], rng: random.Random) -> list[dict]:
|
||||
"""40건 stratified — plan §"Sample budget" 표 참조.
|
||||
|
||||
large 6 / scan_likely 2 / study_note 10 / Academic_Paper 8 / Reference 6 /
|
||||
{Standard,Manual,Specification} 4 / {Note,Report,Memo,NULL} 4 = 40
|
||||
"""
|
||||
selected: list[dict] = []
|
||||
used: set[int] = set()
|
||||
|
||||
def take(pool_filter, n: int, label: str) -> None:
|
||||
avail = [c for c in candidates if c["doc_id"] not in used and pool_filter(c)]
|
||||
rng.shuffle(avail)
|
||||
for c in avail[:n]:
|
||||
picked = dict(c)
|
||||
picked["bucket_label"] = label
|
||||
selected.append(picked)
|
||||
used.add(c["doc_id"])
|
||||
|
||||
take(lambda c: c["file_size_band"] == "L", 6, "large")
|
||||
take(lambda c: c["text_density_band"] == "scan-likely", 2, "scan_likely")
|
||||
take(lambda c: c["doc_type"] == "study_note" and c["text_density_band"] == "born-digital", 10, "study_note")
|
||||
take(lambda c: c["doc_type"] == "Academic_Paper" and c["text_density_band"] == "born-digital", 8, "Academic_Paper")
|
||||
take(lambda c: c["doc_type"] == "Reference" and c["text_density_band"] == "born-digital", 6, "Reference")
|
||||
take(lambda c: c["doc_type"] in {"Standard", "Manual", "Specification"}, 4, "tech_doc")
|
||||
take(lambda c: (c["doc_type"] in {"Note", "Report", "Memo"} or not c["doc_type"]), 4, "minor_doc")
|
||||
|
||||
if len(selected) < 40:
|
||||
leftover = [c for c in candidates if c["doc_id"] not in used]
|
||||
rng.shuffle(leftover)
|
||||
for c in leftover[: 40 - len(selected)]:
|
||||
picked = dict(c)
|
||||
picked["bucket_label"] = "filler"
|
||||
selected.append(picked)
|
||||
used.add(c["doc_id"])
|
||||
|
||||
return selected[:40]
|
||||
|
||||
|
||||
def cmd_select_canary(inventory: Path, output: Path, seed: int) -> None:
|
||||
if not inventory.is_file():
|
||||
print(f"[error] inventory CSV 없음: {inventory}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
candidates: list[dict] = []
|
||||
with inventory.open(encoding="utf-8") as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
if row["forecast_skip_reason"] != "none":
|
||||
continue
|
||||
candidates.append({
|
||||
"doc_id": int(row["doc_id"]),
|
||||
"title": row["title"],
|
||||
"file_size": int(row["file_size"]) if row["file_size"] else 0,
|
||||
"file_size_band": row["file_size_band"],
|
||||
"text_density": row["text_density"],
|
||||
"text_density_band": row["text_density_band"],
|
||||
"doc_type": row["doc_type"],
|
||||
})
|
||||
|
||||
rng = random.Random(seed)
|
||||
selected = _select_canary_buckets(candidates, rng)
|
||||
|
||||
output.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output.open("w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=CANARY_COLUMNS, extrasaction="ignore")
|
||||
writer.writeheader()
|
||||
for s in selected:
|
||||
writer.writerow({col: s.get(col, "") for col in CANARY_COLUMNS})
|
||||
|
||||
bucket_dist = Counter(s["bucket_label"] for s in selected)
|
||||
print(f"\n## Canary sample (seed={seed})")
|
||||
print(f" selected: {len(selected)} / 40")
|
||||
print(f" output: {output}")
|
||||
print(f"\n bucket_label:")
|
||||
for k, v in bucket_dist.most_common():
|
||||
print(f" {k:<20} {v:>3}")
|
||||
print(f"\n doc_id list:")
|
||||
for s in sorted(selected, key=lambda x: (x["bucket_label"], x["doc_id"])):
|
||||
print(f" {s['doc_id']:>5} [{s['bucket_label']:<14}] {(s['title'] or '-')[:60]}")
|
||||
|
||||
|
||||
# ─── enqueue (one-shot from CSV) ─────────────────────────────────────────
|
||||
|
||||
|
||||
async def cmd_enqueue(csv_path: Path, dry_run: bool) -> None:
|
||||
if not csv_path.is_file():
|
||||
print(f"[error] sample CSV 없음: {csv_path}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
ids: list[int] = []
|
||||
with csv_path.open(encoding="utf-8") as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
ids.append(int(row["doc_id"]))
|
||||
|
||||
if not ids:
|
||||
print("[abort] enqueue 대상 없음.")
|
||||
return
|
||||
|
||||
print(f"[targets] {len(ids)} doc_ids: {ids[:10]}{' …' if len(ids) > 10 else ''}")
|
||||
|
||||
if dry_run:
|
||||
print("[dry-run] --no-dry-run 으로 다시 실행하면 실제 enqueue.")
|
||||
return
|
||||
|
||||
if not _check_marker_ready():
|
||||
print("[abort] marker-service /ready 가 ready 가 아님. enqueue 중단.")
|
||||
sys.exit(2)
|
||||
|
||||
engine = _build_engine()
|
||||
Session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||
from models.queue import enqueue_stage # type: ignore
|
||||
|
||||
enqueued, skipped = [], []
|
||||
async with Session() as session:
|
||||
for doc_id in ids:
|
||||
ok = await enqueue_stage(session, doc_id, "markdown")
|
||||
(enqueued if ok else skipped).append(doc_id)
|
||||
await session.commit()
|
||||
|
||||
print(f"\nenqueued: {len(enqueued)}, skipped (이미 active): {len(skipped)}")
|
||||
if skipped:
|
||||
print(f" skipped ids: {skipped[:20]}{' …' if len(skipped) > 20 else ''}")
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
# ─── nightly-enqueue (cron-friendly) ─────────────────────────────────────
|
||||
|
||||
NIGHTLY_TSV_COLUMNS = [
|
||||
"date", "enqueued", "active_queue_at_start", "active_queue_oldest_age_min",
|
||||
"pending_pool_remaining", "abort_reason", "marker_ready",
|
||||
]
|
||||
|
||||
|
||||
def _append_tsv(log_tsv: Path, row: dict) -> None:
|
||||
log_tsv.parent.mkdir(parents=True, exist_ok=True)
|
||||
new_file = not log_tsv.exists() or log_tsv.stat().st_size == 0
|
||||
with log_tsv.open("a", encoding="utf-8") as f:
|
||||
if new_file:
|
||||
f.write("\t".join(NIGHTLY_TSV_COLUMNS) + "\n")
|
||||
f.write("\t".join(str(row.get(col, "")) for col in NIGHTLY_TSV_COLUMNS) + "\n")
|
||||
|
||||
|
||||
async def cmd_nightly_enqueue(limit: int, max_active_queue: int, log_tsv: Path,
|
||||
dry_run: bool) -> None:
|
||||
today = datetime.now().strftime("%Y-%m-%d")
|
||||
base_row = {
|
||||
"date": today,
|
||||
"enqueued": 0,
|
||||
"active_queue_at_start": "",
|
||||
"active_queue_oldest_age_min": "",
|
||||
"pending_pool_remaining": "",
|
||||
"abort_reason": "",
|
||||
"marker_ready": "",
|
||||
}
|
||||
|
||||
# 1. disable flag
|
||||
if DISABLE_FLAG_PATH.exists():
|
||||
base_row["abort_reason"] = "disable_flag"
|
||||
_append_tsv(log_tsv, base_row)
|
||||
print(f"[abort] disable flag 존재: {DISABLE_FLAG_PATH}")
|
||||
return
|
||||
|
||||
# 2. marker ready
|
||||
marker_ready = _check_marker_ready()
|
||||
base_row["marker_ready"] = 1 if marker_ready else 0
|
||||
if not marker_ready:
|
||||
base_row["abort_reason"] = "marker_unhealthy"
|
||||
_append_tsv(log_tsv, base_row)
|
||||
print("[abort] marker-service /ready 가 ready 아님")
|
||||
return
|
||||
|
||||
engine = _build_engine()
|
||||
Session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||
|
||||
from models.document import Document # type: ignore
|
||||
from models.queue import ProcessingQueue, enqueue_stage # type: ignore
|
||||
|
||||
# 3. active queue check + oldest age
|
||||
async with Session() as session:
|
||||
active_stats = (await session.execute(text("""
|
||||
SELECT COUNT(*) AS active_count,
|
||||
COALESCE(EXTRACT(EPOCH FROM (NOW() - MIN(COALESCE(started_at, created_at))))/60, 0) AS oldest_age_min
|
||||
FROM processing_queue
|
||||
WHERE stage='markdown' AND status IN ('pending','processing')
|
||||
"""))).first()
|
||||
active_count = int(active_stats[0])
|
||||
oldest_age = float(active_stats[1])
|
||||
|
||||
base_row["active_queue_at_start"] = active_count
|
||||
base_row["active_queue_oldest_age_min"] = round(oldest_age, 1)
|
||||
|
||||
if active_count > max_active_queue:
|
||||
base_row["abort_reason"] = "active_queue_threshold"
|
||||
_append_tsv(log_tsv, base_row)
|
||||
print(f"[skip] active queue {active_count} > threshold {max_active_queue} (oldest {oldest_age:.1f}min)")
|
||||
if oldest_age > STUCK_THRESHOLD_MIN:
|
||||
print(f"[warn] oldest active row age {oldest_age:.1f}min > {STUCK_THRESHOLD_MIN}min — possible stuck")
|
||||
await engine.dispose()
|
||||
return
|
||||
|
||||
# 4. pending pool query
|
||||
async with Session() as session:
|
||||
pool_rows = (
|
||||
await session.execute(text(f"""
|
||||
SELECT id FROM documents
|
||||
WHERE deleted_at IS NULL
|
||||
AND file_format='pdf'
|
||||
AND md_status='pending'
|
||||
AND id NOT IN (
|
||||
SELECT document_id FROM processing_queue
|
||||
WHERE stage='markdown' AND status IN ('pending','processing')
|
||||
)
|
||||
ORDER BY created_at ASC
|
||||
LIMIT {int(limit)}
|
||||
"""))
|
||||
).all()
|
||||
pool_ids = [r[0] for r in pool_rows]
|
||||
|
||||
if not pool_ids:
|
||||
base_row["abort_reason"] = "pool_empty"
|
||||
base_row["pending_pool_remaining"] = 0
|
||||
_append_tsv(log_tsv, base_row)
|
||||
print("[done] pending pool empty — Phase 2 backfill 자연 완료 신호.")
|
||||
await engine.dispose()
|
||||
return
|
||||
|
||||
if dry_run:
|
||||
print(f"[dry-run] would enqueue {len(pool_ids)} ids: {pool_ids[:10]}{' …' if len(pool_ids) > 10 else ''}")
|
||||
await engine.dispose()
|
||||
return
|
||||
|
||||
# 5. enqueue
|
||||
enqueued, skipped = [], []
|
||||
async with Session() as session:
|
||||
for doc_id in pool_ids:
|
||||
ok = await enqueue_stage(session, doc_id, "markdown")
|
||||
(enqueued if ok else skipped).append(doc_id)
|
||||
await session.commit()
|
||||
|
||||
# 6. pending pool remaining (post-enqueue)
|
||||
async with Session() as session:
|
||||
remaining_count = (
|
||||
await session.execute(text("""
|
||||
SELECT COUNT(*) FROM documents d
|
||||
WHERE d.deleted_at IS NULL
|
||||
AND d.file_format='pdf'
|
||||
AND d.md_status='pending'
|
||||
AND d.id NOT IN (
|
||||
SELECT document_id FROM processing_queue
|
||||
WHERE stage='markdown' AND status IN ('pending','processing')
|
||||
)
|
||||
"""))
|
||||
).scalar()
|
||||
|
||||
base_row["enqueued"] = len(enqueued)
|
||||
base_row["pending_pool_remaining"] = int(remaining_count or 0)
|
||||
_append_tsv(log_tsv, base_row)
|
||||
|
||||
print(f"\n[ok] enqueued={len(enqueued)} skipped={len(skipped)} remaining_pool={remaining_count}")
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
# ─── post-report ────────────────────────────────────────────────────────
|
||||
|
||||
POST_REPORT_COLUMNS = [
|
||||
"doc_id", "title", "final_md_status", "md_extraction_engine", "md_extraction_engine_version",
|
||||
"elapsed_ms_estimate", "text_length_ratio", "markdown_heading_count",
|
||||
"markdown_table_row_count", "markdown_image_count", "warnings", "phase2_processed_at",
|
||||
]
|
||||
|
||||
# 1D pilot baseline (project_markdown_canonical_layer.md, Phase 1D 결과 섹션).
|
||||
BASELINE_1D = {
|
||||
"success_rate": 0.92, # 23/25
|
||||
"skipped_rate": 0.08,
|
||||
"failed_rate": 0.0,
|
||||
"elapsed_p50_ms": 34000,
|
||||
"elapsed_p90_ms": 112000,
|
||||
"text_length_ratio_p50": 1.15,
|
||||
"warnings_heading_jump_pct": 0.86, # 24/28
|
||||
"warnings_low_image_alt_pct": 0.89, # 25/28
|
||||
}
|
||||
|
||||
|
||||
async def cmd_post_report(output_csv: Path, output_md: Path,
|
||||
phase2_start: str | None) -> None:
|
||||
"""Phase 2 sweep 결과 집계 + 1D baseline 비교.
|
||||
|
||||
phase2_start: ISO timestamp (예: '2026-05-03T00:00:00Z'). 이후 처리된 doc 만 집계.
|
||||
None 이면 sample CSV 들에서 doc_id union 으로 한정 (별도 인자 미지원 — 기본 = NULL = 전 history).
|
||||
실용적으로는 phase2_start 사용 권장 (Phase 2 코드 push 후 시각).
|
||||
"""
|
||||
engine = _build_engine()
|
||||
Session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
|
||||
|
||||
from models.document import Document # type: ignore
|
||||
from models.queue import ProcessingQueue # type: ignore
|
||||
|
||||
async with Session() as session:
|
||||
cond = [Document.deleted_at.is_(None), Document.file_format == "pdf"]
|
||||
if phase2_start:
|
||||
cond.append(Document.md_generated_at >= phase2_start)
|
||||
else:
|
||||
cond.append(Document.md_generated_at.is_not(None))
|
||||
|
||||
rows = (
|
||||
await session.execute(
|
||||
select(
|
||||
Document.id, Document.title, Document.md_status,
|
||||
Document.md_extraction_engine, Document.md_extraction_engine_version,
|
||||
Document.md_extraction_quality, Document.md_extraction_error,
|
||||
Document.md_generated_at,
|
||||
).where(*cond)
|
||||
)
|
||||
).all()
|
||||
|
||||
ids = [r.id for r in rows]
|
||||
if ids:
|
||||
q_rows = (
|
||||
await session.execute(
|
||||
select(
|
||||
ProcessingQueue.document_id,
|
||||
ProcessingQueue.started_at,
|
||||
ProcessingQueue.completed_at,
|
||||
).where(
|
||||
ProcessingQueue.document_id.in_(ids),
|
||||
ProcessingQueue.stage == "markdown",
|
||||
)
|
||||
)
|
||||
).all()
|
||||
else:
|
||||
q_rows = []
|
||||
|
||||
elapsed: dict[int, float] = {}
|
||||
for q in q_rows:
|
||||
if q.started_at and q.completed_at:
|
||||
elapsed[q.document_id] = (q.completed_at - q.started_at).total_seconds() * 1000
|
||||
|
||||
# CSV
|
||||
enriched_rows = []
|
||||
for r in rows:
|
||||
q = r.md_extraction_quality or {}
|
||||
m = q.get("metrics") if isinstance(q, dict) else {}
|
||||
m = m or {}
|
||||
warnings = q.get("warnings") if isinstance(q, dict) else []
|
||||
enriched_rows.append({
|
||||
"doc_id": r.id,
|
||||
"title": (r.title or "")[:120],
|
||||
"final_md_status": r.md_status,
|
||||
"md_extraction_engine": r.md_extraction_engine or "",
|
||||
"md_extraction_engine_version": r.md_extraction_engine_version or "",
|
||||
"elapsed_ms_estimate": int(elapsed.get(r.id, 0)) if r.id in elapsed else "",
|
||||
"text_length_ratio": m.get("text_length_ratio", "") if m else "",
|
||||
"markdown_heading_count": m.get("markdown_heading_count", "") if m else "",
|
||||
"markdown_table_row_count": m.get("markdown_table_row_count", "") if m else "",
|
||||
"markdown_image_count": m.get("markdown_image_count", "") if m else "",
|
||||
"warnings": ",".join(warnings) if isinstance(warnings, list) else "",
|
||||
"phase2_processed_at": r.md_generated_at.isoformat() if r.md_generated_at else "",
|
||||
})
|
||||
|
||||
output_csv.parent.mkdir(parents=True, exist_ok=True)
|
||||
with output_csv.open("w", newline="", encoding="utf-8") as f:
|
||||
writer = csv.DictWriter(f, fieldnames=POST_REPORT_COLUMNS, extrasaction="ignore")
|
||||
writer.writeheader()
|
||||
for row in enriched_rows:
|
||||
writer.writerow(row)
|
||||
|
||||
# Markdown summary
|
||||
total = len(rows)
|
||||
by_status = Counter(r.md_status for r in rows)
|
||||
success_n = by_status.get("success", 0)
|
||||
skipped_n = by_status.get("skipped", 0)
|
||||
failed_n = by_status.get("failed", 0)
|
||||
|
||||
success_rows = [r for r in rows if r.md_status == "success"]
|
||||
text_ratios = []
|
||||
heading_jump_count = 0
|
||||
low_image_alt_count = 0
|
||||
new_warnings: Counter = Counter()
|
||||
KNOWN_WARNINGS = {"heading_hierarchy_jump", "low_image_alt_text_ratio"}
|
||||
for r in success_rows:
|
||||
q = r.md_extraction_quality or {}
|
||||
m = q.get("metrics") if isinstance(q, dict) else {}
|
||||
m = m or {}
|
||||
if isinstance(m.get("text_length_ratio"), (int, float)):
|
||||
text_ratios.append(float(m["text_length_ratio"]))
|
||||
warnings = q.get("warnings") if isinstance(q, dict) else []
|
||||
if isinstance(warnings, list):
|
||||
if "heading_hierarchy_jump" in warnings:
|
||||
heading_jump_count += 1
|
||||
if "low_image_alt_text_ratio" in warnings:
|
||||
low_image_alt_count += 1
|
||||
for w in warnings:
|
||||
if w not in KNOWN_WARNINGS:
|
||||
new_warnings[w] += 1
|
||||
|
||||
elapsed_succ = [elapsed[r.id] for r in success_rows if r.id in elapsed]
|
||||
elapsed_p50 = _percentile(elapsed_succ, 0.5)
|
||||
elapsed_p90 = _percentile(elapsed_succ, 0.9)
|
||||
text_ratio_p50 = _percentile(text_ratios, 0.5)
|
||||
|
||||
skip_reasons = Counter()
|
||||
for r in rows:
|
||||
if r.md_status == "skipped":
|
||||
reason = (r.md_extraction_error or "unknown").split(":", 1)[0]
|
||||
skip_reasons[reason.strip()] += 1
|
||||
|
||||
failed_rows = [r for r in rows if r.md_status == "failed"]
|
||||
|
||||
outliers = []
|
||||
for r in success_rows:
|
||||
e = elapsed.get(r.id)
|
||||
if e and e > 300_000:
|
||||
outliers.append((r.id, f"elapsed_ms={int(e)}"))
|
||||
q = r.md_extraction_quality or {}
|
||||
m = q.get("metrics") if isinstance(q, dict) else {}
|
||||
m = m or {}
|
||||
ratio = m.get("text_length_ratio")
|
||||
if isinstance(ratio, (int, float)) and (ratio < 0.5 or ratio > 10):
|
||||
outliers.append((r.id, f"text_length_ratio={ratio:.2f}"))
|
||||
|
||||
def pct(n: int, d: int) -> str:
|
||||
return f"{n/d*100:.0f}%" if d else "-"
|
||||
|
||||
def delta_pp(actual_n: int, actual_d: int, baseline: float) -> str:
|
||||
if not actual_d:
|
||||
return "-"
|
||||
return f"{(actual_n/actual_d - baseline)*100:+.1f}pp"
|
||||
|
||||
def fmt_num(v) -> str:
|
||||
if v is None:
|
||||
return "-"
|
||||
if isinstance(v, float):
|
||||
return f"{v:.2f}"
|
||||
return str(v)
|
||||
|
||||
scope_line = "Scope: file_format='pdf' AND deleted_at IS NULL"
|
||||
if phase2_start:
|
||||
scope_line += f" AND md_generated_at >= {phase2_start}"
|
||||
else:
|
||||
scope_line += " AND md_generated_at IS NOT NULL"
|
||||
|
||||
md_lines = [
|
||||
"# Phase 2 Post-report",
|
||||
"",
|
||||
f"Generated: {datetime.now(timezone.utc).isoformat()}",
|
||||
scope_line,
|
||||
"",
|
||||
f"## 처리 분포 (총 {total}건)",
|
||||
"| status | count | rate |",
|
||||
"|---|---|---|",
|
||||
f"| success | {success_n} | {pct(success_n, total)} |",
|
||||
f"| skipped | {skipped_n} | {pct(skipped_n, total)} |",
|
||||
f"| failed | {failed_n} | {pct(failed_n, total)} |",
|
||||
"",
|
||||
"## vs 1D baseline 비교",
|
||||
"| 메트릭 | 1D | Phase 2 | delta |",
|
||||
"|---|---|---|---|",
|
||||
f"| success rate | 92% | {pct(success_n, total)} | {delta_pp(success_n, total, BASELINE_1D['success_rate'])} |",
|
||||
f"| skipped rate | 8% | {pct(skipped_n, total)} | {delta_pp(skipped_n, total, BASELINE_1D['skipped_rate'])} |",
|
||||
f"| failed rate | 0% | {pct(failed_n, total)} | {delta_pp(failed_n, total, BASELINE_1D['failed_rate'])} |",
|
||||
f"| elapsed_ms p50 | 34000 | {fmt_num(int(elapsed_p50)) if elapsed_p50 else '-'} | - |",
|
||||
f"| elapsed_ms p90 | 112000 | {fmt_num(int(elapsed_p90)) if elapsed_p90 else '-'} | - |",
|
||||
f"| text_length_ratio p50 | 1.15 | {fmt_num(text_ratio_p50)} | - |",
|
||||
f"| warnings: heading_hierarchy_jump | 86% | {pct(heading_jump_count, success_n)} | - |",
|
||||
f"| warnings: low_image_alt_text_ratio | 89% | {pct(low_image_alt_count, success_n)} | - |",
|
||||
"",
|
||||
"## skip reason 분포",
|
||||
]
|
||||
for reason, c in skip_reasons.most_common():
|
||||
md_lines.append(f"- `{reason}`: {c}건")
|
||||
if not skip_reasons:
|
||||
md_lines.append("- (no skipped)")
|
||||
md_lines += [
|
||||
f"",
|
||||
f"## failed 케이스 ({len(failed_rows)}건)",
|
||||
]
|
||||
for r in failed_rows:
|
||||
md_lines.append(f"- doc {r.id} `{(r.title or '-')[:60]}`: {r.md_extraction_error or '-'}")
|
||||
if not failed_rows:
|
||||
md_lines.append("- (no failed)")
|
||||
md_lines += [
|
||||
f"",
|
||||
f"## outlier ({len(outliers)}건)",
|
||||
]
|
||||
for doc_id, note in outliers[:30]:
|
||||
md_lines.append(f"- doc {doc_id}: {note}")
|
||||
if not outliers:
|
||||
md_lines.append("- (no outliers)")
|
||||
if new_warnings:
|
||||
md_lines += [f"", f"## 신규 warning 종류 (1D 미관측)"]
|
||||
for w, c in new_warnings.most_common():
|
||||
md_lines.append(f"- `{w}`: {c}건")
|
||||
|
||||
output_md.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_md.write_text("\n".join(md_lines) + "\n", encoding="utf-8")
|
||||
|
||||
print(f"\n## Phase 2 post-report")
|
||||
print(f" CSV: {output_csv}")
|
||||
print(f" MD: {output_md}")
|
||||
print(f" scope: {total} rows (success {success_n} / skipped {skipped_n} / failed {failed_n})")
|
||||
|
||||
await engine.dispose()
|
||||
|
||||
|
||||
# ─── main ───────────────────────────────────────────────────────────────
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Phase 2 markdown canonical layer full backfill",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
sub = parser.add_subparsers(dest="cmd", required=True)
|
||||
|
||||
p_inv = sub.add_parser("inventory", help="pending PDFs dry-run inventory CSV")
|
||||
p_inv.add_argument("--output", type=Path, required=True)
|
||||
|
||||
p_can = sub.add_parser("select-canary", help="stratified 40 canary sample CSV")
|
||||
p_can.add_argument("--inventory", type=Path, required=True)
|
||||
p_can.add_argument("--output", type=Path, required=True)
|
||||
p_can.add_argument("--seed", type=int, default=20260503)
|
||||
|
||||
p_enq = sub.add_parser("enqueue", help="enqueue from sample CSV (one-shot)")
|
||||
p_enq.add_argument("--csv", type=Path, required=True)
|
||||
g = p_enq.add_mutually_exclusive_group()
|
||||
g.add_argument("--dry-run", dest="dry_run", action="store_true", default=True)
|
||||
g.add_argument("--no-dry-run", dest="dry_run", action="store_false")
|
||||
|
||||
p_nig = sub.add_parser("nightly-enqueue", help="nightly cron sweep")
|
||||
p_nig.add_argument("--limit", type=int, default=50)
|
||||
p_nig.add_argument("--max-active-queue", type=int, default=5)
|
||||
p_nig.add_argument("--log-tsv", type=Path, required=True)
|
||||
p_nig.add_argument("--dry-run", action="store_true", default=False)
|
||||
|
||||
p_rep = sub.add_parser("post-report", help="final results CSV + markdown summary")
|
||||
p_rep.add_argument("--output-csv", type=Path, required=True)
|
||||
p_rep.add_argument("--output-md", type=Path, required=True)
|
||||
p_rep.add_argument("--phase2-start", type=str, default=None,
|
||||
help="ISO timestamp; only docs with md_generated_at >= this are counted")
|
||||
|
||||
args = parser.parse_args()
|
||||
if args.cmd == "inventory":
|
||||
asyncio.run(cmd_inventory(args.output))
|
||||
elif args.cmd == "select-canary":
|
||||
cmd_select_canary(args.inventory, args.output, args.seed)
|
||||
elif args.cmd == "enqueue":
|
||||
asyncio.run(cmd_enqueue(args.csv, args.dry_run))
|
||||
elif args.cmd == "nightly-enqueue":
|
||||
asyncio.run(cmd_nightly_enqueue(args.limit, args.max_active_queue, args.log_tsv, args.dry_run))
|
||||
elif args.cmd == "post-report":
|
||||
asyncio.run(cmd_post_report(args.output_csv, args.output_md, args.phase2_start))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user