Files
hyungi_document_server/docker-compose.yml
T
hyungi 631e4cd8ef feat(publish): P1-1 digest 발행 read API scaffold(503)
기존 /published 라우터에 GET /published/digest 추가 — _verify_token(Bearer)
+ FeedResponse 엔벨로프 재사용(신규 라우터 X). DIGEST_PUBLISH_ENABLED 플래그
(기본 false=inert): off=503 "not enabled", on+projection 미구현=503. 실데이터·시크릿 0.
검증: 무토큰 401·잘못된 토큰 403·유효+off 503·기존 /feed 200 무회귀.
docsrv-viewer-publish 트랙 (plan viewer-daily-report P1-1).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-25 21:43:44 +00:00

289 lines
12 KiB
YAML
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
services:
postgres:
image: pgvector/pgvector:pg16
volumes:
- pgdata:/var/lib/postgresql/data
- ./migrations:/docker-entrypoint-initdb.d
environment:
POSTGRES_DB: pkm
POSTGRES_USER: pkm
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
ports:
- "100.110.63.63:15432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U pkm"]
interval: 5s
timeout: 5s
retries: 5
restart: unless-stopped
# 2026-06-20 tier-0 무장: 글로벌 OOM 시 커널이 postgres(prod DB)를 reap 하지 않도록.
oom_score_adj: -900
kordoc-service:
build: ./services/kordoc
ports:
- "127.0.0.1:3100:3100"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
mem_limit: 4g
memswap_limit: 4g
healthcheck:
test: ["CMD", "node", "-e", "fetch('http://localhost:3100/health').then(r=>{process.exit(r.ok?0:1)}).catch(()=>process.exit(1))"]
interval: 10s
timeout: 5s
retries: 3
restart: unless-stopped
ocr-service:
build: ./services/ocr
expose:
- "3200"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
- ocr_models:/root/.cache
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:3200/health')"]
interval: 30s
timeout: 10s
retries: 3
start_period: 180s
restart: unless-stopped
# MinerU 2.5 VLM PDF→markdown 추출 — ★ marker-service 대체(컷오버 2026-06-18, A/B 8/8 PASS).
# 단일카드 markdown VRAM ~10GB(marker)→~5.9GB 고정. fastapi 가 MARKER_ENDPOINT 로 호출.
# 동기 do_parse 버그 회피 위해 server.py 는 async aio_do_parse 사용. 포트 3301.
mineru-service:
build: ./services/mineru
mem_limit: 16g # 2026-06-20: VLM 스파이크 봉쇄 (steady ~12GB) — 호스트 30GB 글로벌 OOM 차단
ports:
- "127.0.0.1:3301:3301"
expose:
- "3301"
environment:
# vlm-engine = 순수 VLM 단일모델. 기본 hybrid-engine 은 다중모델 로드 = OOM(반드시 명시).
- MINERU_BACKEND=vlm-engine
- MINERU_LANG=${MINERU_LANG:-korean}
# 공유 16GB 카드 공존: 절대 VRAM 캡(GB, 공유카드 robust) + vLLM 분율 캡 병용.
- MINERU_VIRTUAL_VRAM_SIZE=${MINERU_VIRTUAL_VRAM_SIZE:-6}
- MINERU_GPU_MEMORY_UTILIZATION=${MINERU_GPU_MEMORY_UTILIZATION:-0.40}
- MINERU_PRELOAD=${MINERU_PRELOAD:-1}
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
- mineru_models:/root/.cache
ipc: host # vLLM 공유메모리 — 공식 run 의 --ipc=host 대응.
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:3301/ready"]
interval: 30s
timeout: 10s
retries: 3
start_period: 900s # VLM 모델 lazy 다운로드(~2.4GB)+엔진 로드 여유.
restart: unless-stopped
stt-service:
# 2026-05-08 (D9 Track B revised): GPU is canonical STT owner.
# 정책: Mac mini = Gemma 26B 전용 우선이므로 STT/Whisper 는 호출량 무관 GPU 서버 소유.
# 이전 "Mac mini 이전본" 주석은 trace 오인 기반이었고 본 revised 결정으로 폐기.
# fastapi 의 STT_ENDPOINT 는 `http://stt-service:3300` (compose 내부 DNS) 사용.
build: ./services/stt
expose:
- "3300"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents:ro
- stt_models:/root/.cache
environment:
- WHISPER_MODEL=${WHISPER_MODEL:-large-v3}
- WHISPER_DEVICE=${WHISPER_DEVICE:-cuda}
- WHISPER_COMPUTE_TYPE=${WHISPER_COMPUTE_TYPE:-float16}
# D-1 (crawl-24x7): idle-unload 전환 — 영구 점유(~4GB) 해제가 90% 봉투의 전제.
# 콜드로드 수초~수십 초는 배치 작업이라 무방 (stt_worker read=1800s 가 흡수).
# 롤백 = STT_PRELOAD=1 + STT_IDLE_UNLOAD_MINUTES=0.
- STT_PRELOAD=0
- STT_IDLE_UNLOAD_MINUTES=${STT_IDLE_UNLOAD_MINUTES:-30}
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
# D-1: idle-unload 도입으로 '모델 적재' 는 더 이상 상시 상태가 아님 — cuda 가용성만
# healthy 기준. 모델 적재 여부는 /ready 의 models_loaded 필드로 관측(정보성).
test: ["CMD", "python3", "-c", "import json,urllib.request,sys; r=urllib.request.urlopen('http://localhost:3300/ready'); sys.exit(0 if json.load(r).get('cuda') else 1)"]
interval: 30s
timeout: 10s
retries: 3
start_period: 300s
restart: unless-stopped
# ── ollama 서비스 제거 (2026-06-08) ──
# 정본 ollama = standalone `~/ollama/docker-compose.yml`(container_name: ollama).
# 그 컨테이너가 hyungi_document_server_default 망(external) + 동일 볼륨
# hyungi_document_server_ollama_data(external, bge-m3) 부착으로 fastapi 의 `ollama:11434`
# 임베딩을 이미 서빙(재부팅에도 durable). 본 중복 서비스는 같은 host 127.0.0.1:11434 를
# 점유 다퉈, 재부팅 후 `docker compose up` 을 'port already allocated' 로 abort →
# 뒤 의존서비스(caddy·frontend) 미기동 = 웹 outage 유발 → 제거. (ollama_data 볼륨 def 는
# standalone 이 external 로 참조하므로 아래 volumes: 에 보존.)
# Phase 1.3: bge-reranker-v2-m3 (TEI) — internal only, fastapi에서 reranker:80으로 호출
# fastapi가 depends_on 안 함 → 단독 시작 가능, 없어도 fastapi 동작 (rerank=false fallback)
reranker:
image: ghcr.io/huggingface/text-embeddings-inference:1.7
container_name: hyungi_document_server-reranker-1
expose:
- "80"
environment:
- MODEL_ID=BAAI/bge-reranker-v2-m3
# PR-2Q-Rerank-Payload-Fix (2026-05-24): 2 env 변경 — 413 root cause 분리.
# (a) MAX_BATCH_TOKENS 8192 → 16384: 각 batch 의 token sum 한도
# (b) MAX_CLIENT_BATCH_SIZE 32 → 64: 1 request 안 entries 수 한도 (default 32).
# multi-query cap 60 chunks (×2 chunks_per_doc dedup) 가 batch entries 54~60
# → 32 한도 초과 → 413. 64 로 늘림.
# GPU VRAM free 6199MiB 충분. baseline path (MAX_RERANK_INPUT=200) 영향 0.
- MAX_BATCH_TOKENS=16384
- MAX_CLIENT_BATCH_SIZE=256 # 2026-06-18 fix: 64→256, MAX_RERANK_INPUT=200 커버 (batch>64 ERROR=RRF silent fallback 해소; MAX_BATCH_TOKENS가 VRAM 상한이라 entries 증가는 VRAM 무관)
- MAX_CONCURRENT_REQUESTS=4
volumes:
- reranker_cache:/data
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
healthcheck:
test: ["CMD", "curl", "-fsS", "http://localhost/health"]
interval: 30s
timeout: 5s
retries: 3
start_period: 120s
restart: unless-stopped
fastapi:
build: ./app
oom_score_adj: -900 # 2026-06-20 tier-0 무장 (앱+스케줄러 SPOF 보호)
ports:
- "100.110.63.63:8000:8000"
volumes:
- ${NAS_NFS_PATH:-/mnt/nas/Document_Server}:/documents
- ./config.yaml:/app/config.yaml:ro
- ./domain_policy.yaml:/app/domain_policy.yaml:ro
- ./scripts:/app/scripts:ro
- ./logs:/app/logs
- ./migrations:/app/migrations:ro
depends_on:
postgres:
condition: service_healthy
kordoc-service:
condition: service_healthy
# 마크다운 엔진 = mineru-service (marker-service 제거 2026-06-18, 롤백=git history).
mineru-service:
condition: service_healthy
env_file:
- credentials.env
environment:
- DATABASE_URL=postgresql+asyncpg://pkm:${POSTGRES_PASSWORD}@postgres:5432/pkm
- KORDOC_ENDPOINT=http://kordoc-service:3100
- OCR_ENDPOINT=http://ocr-service:3200
# ★ 컷오버 2026-06-18: marker-service:3300 → mineru-service:3301 (동일 /convert 계약).
- MARKER_ENDPOINT=http://mineru-service:3301
- MARKER_CONTAINER_PATH_PREFIX=/documents
# 2026-05-08 (D9 Track B revised): GPU stt-service 정식 승격, 내부 DNS 사용.
- STT_ENDPOINT=http://stt-service:3300
# KGS Code 등 외부 학습 자료 추가 스캔 경로 (host .env 에서 주입). 빈 값이면 비활성.
- ADDITIONAL_WATCH_TARGETS=${ADDITIONAL_WATCH_TARGETS:-}
# PR-MacMini-Derived-Worker-1
- STUDY_EXPLANATION_ENABLED=${STUDY_EXPLANATION_ENABLED:-true}
- INTERNAL_WORKER_TOKEN=${INTERNAL_WORKER_TOKEN}
# docsrv-viewer-publish: 발행 워커/저작 enqueue 게이트(기본 false=inert) + 뷰어↔DS feed Bearer.
- STUDY_PUBLISH_ENABLED=${STUDY_PUBLISH_ENABLED:-false}
- DIGEST_PUBLISH_ENABLED=${DIGEST_PUBLISH_ENABLED:-false}
- VIEWER_SYNC_TOKEN=${VIEWER_SYNC_TOKEN:-}
# study-to-viewer P2: 뷰어 write-back ingest 게이트(기본 false=inert, 검증 후 점등).
- STUDY_INGEST_ENABLED=${STUDY_INGEST_ENABLED:-false}
# Voice Memo PoC v1 — bot 계정 한정 long-expiry access token. default false → 일반 운영 영향 0.
# 활성화: host .env 에 VOICE_MEMO_BOT_TOKEN_ENABLED=true. plan: rosy-launching-otter.md
- VOICE_MEMO_BOT_TOKEN_ENABLED=${VOICE_MEMO_BOT_TOKEN_ENABLED:-false}
- VOICE_MEMO_BOT_USERNAME=${VOICE_MEMO_BOT_USERNAME:-voice-memo-bot}
- VOICE_MEMO_BOT_TOKEN_EXPIRE_DAYS=${VOICE_MEMO_BOT_TOKEN_EXPIRE_DAYS:-365}
# PR-Notebook-Client-1 — notebook-client-bot long-expiry token (laptop-worker-bot wrapper 재활용,
# 1B Pilot dormant 후 username rename). default false → 운영 영향 0.
- LAPTOP_WORKER_BOT_TOKEN_ENABLED=${LAPTOP_WORKER_BOT_TOKEN_ENABLED:-false}
- LAPTOP_WORKER_BOT_USERNAME=${LAPTOP_WORKER_BOT_USERNAME:-laptop-worker-bot}
- LAPTOP_WORKER_BOT_TOKEN_EXPIRE_DAYS=${LAPTOP_WORKER_BOT_TOKEN_EXPIRE_DAYS:-365}
# PR-2 of DS AI routing policy (2026-05-23) — backends dispatcher via llm-router.
# router_url default points at Mac mini Tailscale interface :8890 (PR-1).
# DS_BACKENDS_VIA_ROUTER=false 로 legacy 직접 호출 path 즉시 복귀.
- LLM_ROUTER_URL=${LLM_ROUTER_URL:-http://100.76.254.116:8890}
- DS_BACKENDS_VIA_ROUTER=${DS_BACKENDS_VIA_ROUTER:-true}
restart: unless-stopped
frontend:
build: ./frontend
ports:
- "127.0.0.1:3000:3000"
depends_on:
- fastapi
restart: unless-stopped
# crawl-24x7 A-8 1차: 전 소스 헬스 패널 — 내부 전용 (읽기 전용 SELECT 만).
# '내부 전용' 성립 구현 = 별도 바인딩뿐 (r4 결정): Tailscale 인터페이스에만 publish.
# 기존 SvelteKit 라우트(vhost=Host 헤더 검사=앱 가드 환원)나 프록시 경로 차단(경로 가드
# 회귀)으로 옮기지 말 것. caddy/home-caddy 라우트 추가 금지. fastapi/postgres 바인딩 선례.
crawl-health:
build: ./services/crawl-health
ports:
- "100.110.63.63:8765:8765"
environment:
- CRAWL_HEALTH_DSN=postgresql://pkm:${POSTGRES_PASSWORD}@postgres:5432/pkm
depends_on:
postgres:
condition: service_healthy
restart: unless-stopped
# crawl-24x7 B-3: 구독 세션 Playwright fetch 격리 — internal-only (host 포트·caddy 라우트 금지).
# 브라우저 hang/크래시가 fastapi APScheduler 를 잠식하지 않게 별도 컨테이너 + mem cap.
# 세션 파일(쿠키=credential 등가물)은 repo 밖 호스트 경로 ro mount (600, gitignore 무관 영역).
playwright-fetcher:
build: ./services/playwright-fetcher
volumes:
- /home/hyungi/.local/share/crawl-auth:/auth:ro
mem_limit: 2g
restart: unless-stopped
caddy:
image: caddy:2
ports:
- "127.0.0.1:8080:80" # 2026-06-20: LAN 우회 차단 (실 ingress=home-caddy→caddy:80 도커망)
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile
- caddy_data:/data
depends_on:
- fastapi
- frontend
restart: unless-stopped
volumes:
pgdata:
caddy_data:
ollama_data:
reranker_cache:
ocr_models:
stt_models:
mineru_models: