Files
hyungi_document_server/docker-compose.yml
hyungi a77ac38e92 feat(extraction): 컷오버 Phase 2 — marker-service 제거 (MinerU 단독)
읽기뷰 회귀 0 확인(doc 39464 재처리 → engine=mineru success, 71 imgs, docimg ref/NAS persist
정상) 후 marker 제거. compose 에서 marker-service 블록 + fastapi depends_on + marker_models
볼륨 + services/marker/ 소스 삭제. 롤백 = git history + ~/.local/share/marker-decommission-backups.
마크다운 엔진 = mineru-service 단독.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-18 16:27:26 +09:00

279 lines
11 KiB
YAML
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
services:
postgres:
image: pgvector/pgvector:pg16
volumes:
- pgdata:/var/lib/postgresql/data
- ./migrations:/docker-entrypoint-initdb.d
environment:
POSTGRES_DB: pkm
POSTGRES_USER: pkm
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
ports:
- "100.110.63.63:15432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U pkm"]
interval: 5s
timeout: 5s
retries: 5
restart: unless-stopped
kordoc-service:
build: ./services/kordoc
ports:
- "127.0.0.1:3100:3100"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
mem_limit: 4g
memswap_limit: 4g
healthcheck:
test: ["CMD", "node", "-e", "fetch('http://localhost:3100/health').then(r=>{process.exit(r.ok?0:1)}).catch(()=>process.exit(1))"]
interval: 10s
timeout: 5s
retries: 3
restart: unless-stopped
ocr-service:
build: ./services/ocr
expose:
- "3200"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
- ocr_models:/root/.cache
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:3200/health')"]
interval: 30s
timeout: 10s
retries: 3
start_period: 180s
restart: unless-stopped
# MinerU 2.5 VLM PDF→markdown 추출 — ★ marker-service 대체(컷오버 2026-06-18, A/B 8/8 PASS).
# 단일카드 markdown VRAM ~10GB(marker)→~5.9GB 고정. fastapi 가 MARKER_ENDPOINT 로 호출.
# 동기 do_parse 버그 회피 위해 server.py 는 async aio_do_parse 사용. 포트 3301.
mineru-service:
build: ./services/mineru
ports:
- "127.0.0.1:3301:3301"
expose:
- "3301"
environment:
# vlm-engine = 순수 VLM 단일모델. 기본 hybrid-engine 은 다중모델 로드 = OOM(반드시 명시).
- MINERU_BACKEND=vlm-engine
- MINERU_LANG=${MINERU_LANG:-korean}
# 공유 16GB 카드 공존: 절대 VRAM 캡(GB, 공유카드 robust) + vLLM 분율 캡 병용.
- MINERU_VIRTUAL_VRAM_SIZE=${MINERU_VIRTUAL_VRAM_SIZE:-6}
- MINERU_GPU_MEMORY_UTILIZATION=${MINERU_GPU_MEMORY_UTILIZATION:-0.40}
- MINERU_PRELOAD=${MINERU_PRELOAD:-1}
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
- mineru_models:/root/.cache
ipc: host # vLLM 공유메모리 — 공식 run 의 --ipc=host 대응.
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3301/ready"]
interval: 30s
timeout: 10s
retries: 3
start_period: 900s # VLM 모델 lazy 다운로드(~2.4GB)+엔진 로드 여유.
restart: unless-stopped
stt-service:
# 2026-05-08 (D9 Track B revised): GPU is canonical STT owner.
# 정책: Mac mini = Gemma 26B 전용 우선이므로 STT/Whisper 는 호출량 무관 GPU 서버 소유.
# 이전 "Mac mini 이전본" 주석은 trace 오인 기반이었고 본 revised 결정으로 폐기.
# fastapi 의 STT_ENDPOINT 는 `http://stt-service:3300` (compose 내부 DNS) 사용.
build: ./services/stt
expose:
- "3300"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
- stt_models:/root/.cache
environment:
- WHISPER_MODEL=${WHISPER_MODEL:-large-v3}
- WHISPER_DEVICE=${WHISPER_DEVICE:-cuda}
- WHISPER_COMPUTE_TYPE=${WHISPER_COMPUTE_TYPE:-float16}
# D-1 (crawl-24x7): idle-unload 전환 — 영구 점유(~4GB) 해제가 90% 봉투의 전제.
# 콜드로드 수초~수십 초는 배치 작업이라 무방 (stt_worker read=1800s 가 흡수).
# 롤백 = STT_PRELOAD=1 + STT_IDLE_UNLOAD_MINUTES=0.
- STT_PRELOAD=0
- STT_IDLE_UNLOAD_MINUTES=${STT_IDLE_UNLOAD_MINUTES:-30}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
# D-1: idle-unload 도입으로 '모델 적재' 는 더 이상 상시 상태가 아님 — cuda 가용성만
# healthy 기준. 모델 적재 여부는 /ready 의 models_loaded 필드로 관측(정보성).
test: ["CMD", "python3", "-c", "import json,urllib.request,sys; r=urllib.request.urlopen('http://localhost:3300/ready'); sys.exit(0 if json.load(r).get('cuda') else 1)"]
interval: 30s
timeout: 10s
retries: 3
start_period: 300s
restart: unless-stopped
# ── ollama 서비스 제거 (2026-06-08) ──
# 정본 ollama = standalone `~/ollama/docker-compose.yml`(container_name: ollama).
# 그 컨테이너가 hyungi_document_server_default 망(external) + 동일 볼륨
# hyungi_document_server_ollama_data(external, bge-m3) 부착으로 fastapi 의 `ollama:11434`
# 임베딩을 이미 서빙(재부팅에도 durable). 본 중복 서비스는 같은 host 127.0.0.1:11434 를
# 점유 다퉈, 재부팅 후 `docker compose up` 을 'port already allocated' 로 abort →
# 뒤 의존서비스(caddy·frontend) 미기동 = 웹 outage 유발 → 제거. (ollama_data 볼륨 def 는
# standalone 이 external 로 참조하므로 아래 volumes: 에 보존.)
# Phase 1.3: bge-reranker-v2-m3 (TEI) — internal only, fastapi에서 reranker:80으로 호출
# fastapi가 depends_on 안 함 → 단독 시작 가능, 없어도 fastapi 동작 (rerank=false fallback)
reranker:
image: ghcr.io/huggingface/text-embeddings-inference:1.7
container_name: hyungi_document_server-reranker-1
expose:
- "80"
environment:
- MODEL_ID=BAAI/bge-reranker-v2-m3
# PR-2Q-Rerank-Payload-Fix (2026-05-24): 2 env 변경 — 413 root cause 분리.
# (a) MAX_BATCH_TOKENS 8192 → 16384: 각 batch 의 token sum 한도
# (b) MAX_CLIENT_BATCH_SIZE 32 → 64: 1 request 안 entries 수 한도 (default 32).
# multi-query cap 60 chunks (×2 chunks_per_doc dedup) 가 batch entries 54~60
# → 32 한도 초과 → 413. 64 로 늘림.
# GPU VRAM free 6199MiB 충분. baseline path (MAX_RERANK_INPUT=200) 영향 0.
- MAX_BATCH_TOKENS=16384
- MAX_CLIENT_BATCH_SIZE=256 # 2026-06-18 fix: 64→256, MAX_RERANK_INPUT=200 커버 (batch>64 ERROR=RRF silent fallback 해소; MAX_BATCH_TOKENS가 VRAM 상한이라 entries 증가는 VRAM 무관)
- MAX_CONCURRENT_REQUESTS=4
volumes:
- reranker_cache:/data
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-fsS", "http://localhost/health"]
interval: 30s
timeout: 5s
retries: 3
start_period: 120s
restart: unless-stopped
fastapi:
build: ./app
ports:
- "100.110.63.63:8000:8000"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents
- ./config.yaml:/app/config.yaml:ro
- ./domain_policy.yaml:/app/domain_policy.yaml:ro
- ./scripts:/app/scripts:ro
- ./logs:/app/logs
- ./migrations:/app/migrations:ro
depends_on:
postgres:
condition: service_healthy
kordoc-service:
condition: service_healthy
# 마크다운 엔진 = mineru-service (marker-service 제거 2026-06-18, 롤백=git history).
mineru-service:
condition: service_healthy
env_file:
- credentials.env
environment:
- DATABASE_URL=postgresql+asyncpg://pkm:${POSTGRES_PASSWORD}@postgres:5432/pkm
- KORDOC_ENDPOINT=http://kordoc-service:3100
- OCR_ENDPOINT=http://ocr-service:3200
# ★ 컷오버 2026-06-18: marker-service:3300 → mineru-service:3301 (동일 /convert 계약).
- MARKER_ENDPOINT=http://mineru-service:3301
- MARKER_CONTAINER_PATH_PREFIX=/documents
# 2026-05-08 (D9 Track B revised): GPU stt-service 정식 승격, 내부 DNS 사용.
- STT_ENDPOINT=http://stt-service:3300
# KGS Code 등 외부 학습 자료 추가 스캔 경로 (host .env 에서 주입). 빈 값이면 비활성.
- ADDITIONAL_WATCH_TARGETS=${ADDITIONAL_WATCH_TARGETS:-}
# PR-MacMini-Derived-Worker-1
- STUDY_EXPLANATION_ENABLED=${STUDY_EXPLANATION_ENABLED:-true}
- INTERNAL_WORKER_TOKEN=${INTERNAL_WORKER_TOKEN}
# Voice Memo PoC v1 — bot 계정 한정 long-expiry access token. default false → 일반 운영 영향 0.
# 활성화: host .env 에 VOICE_MEMO_BOT_TOKEN_ENABLED=true. plan: rosy-launching-otter.md
- VOICE_MEMO_BOT_TOKEN_ENABLED=${VOICE_MEMO_BOT_TOKEN_ENABLED:-false}
- VOICE_MEMO_BOT_USERNAME=${VOICE_MEMO_BOT_USERNAME:-voice-memo-bot}
- VOICE_MEMO_BOT_TOKEN_EXPIRE_DAYS=${VOICE_MEMO_BOT_TOKEN_EXPIRE_DAYS:-365}
# PR-Notebook-Client-1 — notebook-client-bot long-expiry token (laptop-worker-bot wrapper 재활용,
# 1B Pilot dormant 후 username rename). default false → 운영 영향 0.
- LAPTOP_WORKER_BOT_TOKEN_ENABLED=${LAPTOP_WORKER_BOT_TOKEN_ENABLED:-false}
- LAPTOP_WORKER_BOT_USERNAME=${LAPTOP_WORKER_BOT_USERNAME:-laptop-worker-bot}
- LAPTOP_WORKER_BOT_TOKEN_EXPIRE_DAYS=${LAPTOP_WORKER_BOT_TOKEN_EXPIRE_DAYS:-365}
# PR-2 of DS AI routing policy (2026-05-23) — backends dispatcher via llm-router.
# router_url default points at Mac mini Tailscale interface :8890 (PR-1).
# DS_BACKENDS_VIA_ROUTER=false 로 legacy 직접 호출 path 즉시 복귀.
- LLM_ROUTER_URL=${LLM_ROUTER_URL:-http://100.76.254.116:8890}
- DS_BACKENDS_VIA_ROUTER=${DS_BACKENDS_VIA_ROUTER:-true}
restart: unless-stopped
frontend:
build: ./frontend
ports:
- "127.0.0.1:3000:3000"
depends_on:
- fastapi
restart: unless-stopped
# crawl-24x7 A-8 1차: 전 소스 헬스 패널 — 내부 전용 (읽기 전용 SELECT 만).
# '내부 전용' 성립 구현 = 별도 바인딩뿐 (r4 결정): Tailscale 인터페이스에만 publish.
# 기존 SvelteKit 라우트(vhost=Host 헤더 검사=앱 가드 환원)나 프록시 경로 차단(경로 가드
# 회귀)으로 옮기지 말 것. caddy/home-caddy 라우트 추가 금지. fastapi/postgres 바인딩 선례.
crawl-health:
build: ./services/crawl-health
ports:
- "100.110.63.63:8765:8765"
environment:
- CRAWL_HEALTH_DSN=postgresql://pkm:${POSTGRES_PASSWORD}@postgres:5432/pkm
depends_on:
postgres:
condition: service_healthy
restart: unless-stopped
# crawl-24x7 B-3: 구독 세션 Playwright fetch 격리 — internal-only (host 포트·caddy 라우트 금지).
# 브라우저 hang/크래시가 fastapi APScheduler 를 잠식하지 않게 별도 컨테이너 + mem cap.
# 세션 파일(쿠키=credential 등가물)은 repo 밖 호스트 경로 ro mount (600, gitignore 무관 영역).
playwright-fetcher:
build: ./services/playwright-fetcher
volumes:
- /home/hyungi/.local/share/crawl-auth:/auth:ro
mem_limit: 2g
restart: unless-stopped
caddy:
image: caddy:2
ports:
- "8080:80"
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile
- caddy_data:/data
depends_on:
- fastapi
- frontend
restart: unless-stopped
volumes:
pgdata:
caddy_data:
ollama_data:
reranker_cache:
ocr_models:
stt_models:
mineru_models: