feat: scaffold v2 project structure with Docker, FastAPI, and config

동작하는 최소 코드 수준의 v2 스캐폴딩:

- docker-compose.yml: postgres, fastapi, kordoc, frontend, caddy
- app/: FastAPI 백엔드 (main, core, models, ai, prompts)
- services/kordoc/: Node.js 문서 파싱 마이크로서비스
- gpu-server/: AI Gateway + GPU docker-compose
- frontend/: SvelteKit 기본 구조
- migrations/: PostgreSQL 초기 스키마 (documents, tasks, processing_queue)
- tests/: pytest conftest 기본 설정
- config.yaml, Caddyfile, credentials.env.example 갱신

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-02 10:20:15 +09:00
parent b338e6e424
commit 131dbd7b7c
37 changed files with 1122 additions and 19 deletions

8
.gitignore vendored
View File

@@ -23,3 +23,11 @@ data/
# IDE
.vscode/
.idea/
# Node.js (frontend, kordoc)
node_modules/
.svelte-kit/
# Docker volumes
pgdata/
caddy_data/

13
Caddyfile Normal file
View File

@@ -0,0 +1,13 @@
pkm.hyungi.net {
reverse_proxy fastapi:8000
}
# Synology Office 프록시
office.hyungi.net {
reverse_proxy https://ds1525.hyungi.net:5001 {
header_up Host {upstream_hostport}
transport http {
tls_insecure_skip_verify
}
}
}

10
app/Dockerfile Normal file
View File

@@ -0,0 +1,10 @@
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

0
app/ai/__init__.py Normal file
View File

79
app/ai/client.py Normal file
View File

@@ -0,0 +1,79 @@
"""AI 추상화 레이어 — 통합 클라이언트. 기본값은 항상 Qwen3.5."""
from pathlib import Path
import httpx
from core.config import settings
# 프롬프트 로딩
PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
def _load_prompt(name: str) -> str:
return (PROMPTS_DIR / name).read_text(encoding="utf-8")
CLASSIFY_PROMPT = _load_prompt("classify.txt") if (PROMPTS_DIR / "classify.txt").exists() else ""
class AIClient:
"""AI Gateway를 통한 통합 클라이언트. 기본값은 항상 Qwen3.5."""
def __init__(self):
self.ai = settings.ai
self._http = httpx.AsyncClient(timeout=120)
async def classify(self, text: str) -> dict:
"""문서 분류 — 항상 primary(Qwen3.5) 사용"""
prompt = CLASSIFY_PROMPT.replace("{document_text}", text)
response = await self._call_chat(self.ai.primary, prompt)
return response
async def summarize(self, text: str, force_premium: bool = False) -> str:
"""문서 요약 — 기본 Qwen3.5, 장문이거나 명시적 요청 시만 Claude"""
model = self.ai.primary
if force_premium or len(text) > 15000:
model = self.ai.premium
return await self._call_chat(model, f"다음 문서를 500자 이내로 요약해주세요:\n\n{text}")
async def embed(self, text: str) -> list[float]:
"""벡터 임베딩 — GPU 서버 전용"""
response = await self._http.post(
self.ai.embedding.endpoint,
json={"model": self.ai.embedding.model, "prompt": text},
)
response.raise_for_status()
return response.json()["embedding"]
async def ocr(self, image_bytes: bytes) -> str:
"""이미지 OCR — GPU 서버 전용"""
# TODO: Qwen2.5-VL-7B 비전 모델 호출 구현
raise NotImplementedError("OCR는 Phase 1에서 구현")
async def _call_chat(self, model_config, prompt: str) -> str:
"""OpenAI 호환 API 호출 + 자동 폴백"""
try:
return await self._request(model_config, prompt)
except (httpx.TimeoutException, httpx.ConnectError):
if model_config == self.ai.primary:
return await self._request(self.ai.fallback, prompt)
raise
async def _request(self, model_config, prompt: str) -> str:
"""단일 모델 API 호출"""
response = await self._http.post(
model_config.endpoint,
json={
"model": model_config.model,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": model_config.max_tokens,
},
timeout=model_config.timeout,
)
response.raise_for_status()
data = response.json()
return data["choices"][0]["message"]["content"]
async def close(self):
await self._http.aclose()

0
app/api/__init__.py Normal file
View File

0
app/core/__init__.py Normal file
View File

51
app/core/auth.py Normal file
View File

@@ -0,0 +1,51 @@
"""JWT + TOTP 2FA 인증"""
from datetime import datetime, timedelta, timezone
import pyotp
from jose import JWTError, jwt
from passlib.context import CryptContext
from core.config import settings
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
# JWT 설정
ALGORITHM = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES = 15
REFRESH_TOKEN_EXPIRE_DAYS = 7
def verify_password(plain: str, hashed: str) -> bool:
return pwd_context.verify(plain, hashed)
def hash_password(password: str) -> str:
return pwd_context.hash(password)
def create_access_token(subject: str) -> str:
expire = datetime.now(timezone.utc) + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
payload = {"sub": subject, "exp": expire, "type": "access"}
return jwt.encode(payload, settings.jwt_secret, algorithm=ALGORITHM)
def create_refresh_token(subject: str) -> str:
expire = datetime.now(timezone.utc) + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
payload = {"sub": subject, "exp": expire, "type": "refresh"}
return jwt.encode(payload, settings.jwt_secret, algorithm=ALGORITHM)
def decode_token(token: str) -> dict | None:
try:
return jwt.decode(token, settings.jwt_secret, algorithms=[ALGORITHM])
except JWTError:
return None
def verify_totp(code: str) -> bool:
"""TOTP 코드 검증"""
if not settings.totp_secret:
return True # TOTP 미설정 시 스킵
totp = pyotp.TOTP(settings.totp_secret)
return totp.verify(code)

93
app/core/config.py Normal file
View File

@@ -0,0 +1,93 @@
"""설정 로딩 — config.yaml + credentials.env"""
import os
from pathlib import Path
import yaml
from pydantic import BaseModel
class AIModelConfig(BaseModel):
endpoint: str
model: str
max_tokens: int = 4096
timeout: int = 60
daily_budget_usd: float | None = None
require_explicit_trigger: bool = False
class AIConfig(BaseModel):
gateway_endpoint: str
primary: AIModelConfig
fallback: AIModelConfig
premium: AIModelConfig
embedding: AIModelConfig
vision: AIModelConfig
rerank: AIModelConfig
class Settings(BaseModel):
# DB
database_url: str = ""
# AI
ai: AIConfig | None = None
# NAS
nas_mount_path: str = "/documents"
nas_pkm_root: str = "/documents/PKM"
# 인증
jwt_secret: str = ""
totp_secret: str = ""
# kordoc
kordoc_endpoint: str = "http://kordoc-service:3100"
def load_settings() -> Settings:
"""config.yaml + 환경변수에서 설정 로딩"""
# 환경변수 (docker-compose에서 주입)
database_url = os.getenv("DATABASE_URL", "")
jwt_secret = os.getenv("JWT_SECRET", "")
totp_secret = os.getenv("TOTP_SECRET", "")
kordoc_endpoint = os.getenv("KORDOC_ENDPOINT", "http://kordoc-service:3100")
# config.yaml
config_path = Path(__file__).parent.parent.parent / "config.yaml"
ai_config = None
nas_mount = "/documents"
nas_pkm = "/documents/PKM"
if config_path.exists():
with open(config_path) as f:
raw = yaml.safe_load(f)
if "ai" in raw:
ai_raw = raw["ai"]
ai_config = AIConfig(
gateway_endpoint=ai_raw.get("gateway", {}).get("endpoint", ""),
primary=AIModelConfig(**ai_raw["models"]["primary"]),
fallback=AIModelConfig(**ai_raw["models"]["fallback"]),
premium=AIModelConfig(**ai_raw["models"]["premium"]),
embedding=AIModelConfig(**ai_raw["models"]["embedding"]),
vision=AIModelConfig(**ai_raw["models"]["vision"]),
rerank=AIModelConfig(**ai_raw["models"]["rerank"]),
)
if "nas" in raw:
nas_mount = raw["nas"].get("mount_path", nas_mount)
nas_pkm = raw["nas"].get("pkm_root", nas_pkm)
return Settings(
database_url=database_url,
ai=ai_config,
nas_mount_path=nas_mount,
nas_pkm_root=nas_pkm,
jwt_secret=jwt_secret,
totp_secret=totp_secret,
kordoc_endpoint=kordoc_endpoint,
)
settings = load_settings()

34
app/core/database.py Normal file
View File

@@ -0,0 +1,34 @@
"""PostgreSQL 연결 — SQLAlchemy async engine + session factory"""
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.orm import DeclarativeBase
from core.config import settings
engine = create_async_engine(
settings.database_url,
echo=False,
pool_size=10,
max_overflow=20,
)
async_session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
class Base(DeclarativeBase):
pass
async def init_db():
"""DB 연결 확인 (스키마는 migrations/로 관리)"""
async with engine.begin() as conn:
# 연결 테스트
await conn.execute(
__import__("sqlalchemy").text("SELECT 1")
)
async def get_session() -> AsyncSession:
"""FastAPI Depends용 세션 제공"""
async with async_session() as session:
yield session

46
app/core/utils.py Normal file
View File

@@ -0,0 +1,46 @@
"""공통 유틸리티 — v1 pkm_utils.py에서 AppleScript 제거, 나머지 포팅"""
import hashlib
import logging
from pathlib import Path
def setup_logger(name: str, log_dir: str = "logs") -> logging.Logger:
"""로거 설정"""
Path(log_dir).mkdir(exist_ok=True)
logger = logging.getLogger(name)
logger.setLevel(logging.INFO)
if not logger.handlers:
# 파일 핸들러
fh = logging.FileHandler(f"{log_dir}/{name}.log", encoding="utf-8")
fh.setFormatter(logging.Formatter(
"%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
))
logger.addHandler(fh)
# 콘솔 핸들러
ch = logging.StreamHandler()
ch.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
logger.addHandler(ch)
return logger
def file_hash(path: str | Path) -> str:
"""파일 SHA-256 해시 계산"""
sha256 = hashlib.sha256()
with open(path, "rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
sha256.update(chunk)
return sha256.hexdigest()
def count_log_errors(log_path: str) -> int:
"""로그 파일에서 ERROR 건수 카운트"""
try:
with open(log_path, encoding="utf-8") as f:
return sum(1 for line in f if "[ERROR]" in line)
except FileNotFoundError:
return 0

41
app/main.py Normal file
View File

@@ -0,0 +1,41 @@
"""hyungi_Document_Server — FastAPI 엔트리포인트"""
from contextlib import asynccontextmanager
from fastapi import FastAPI
from core.config import settings
from core.database import init_db
@asynccontextmanager
async def lifespan(app: FastAPI):
"""앱 시작/종료 시 실행되는 lifespan 핸들러"""
# 시작: DB 연결, 스케줄러 등록
await init_db()
# TODO: APScheduler 시작 (Phase 3)
yield
# 종료: 리소스 정리
# TODO: 스케줄러 종료, DB 연결 해제
app = FastAPI(
title="hyungi_Document_Server",
description="Self-hosted PKM 웹 애플리케이션 API",
version="2.0.0",
lifespan=lifespan,
)
@app.get("/health")
async def health_check():
return {"status": "ok", "version": "2.0.0"}
# TODO: 라우터 등록 (Phase 0~2)
# from api import documents, search, tasks, dashboard, export
# app.include_router(documents.router, prefix="/api/documents", tags=["documents"])
# app.include_router(search.router, prefix="/api/search", tags=["search"])
# app.include_router(tasks.router, prefix="/api/tasks", tags=["tasks"])
# app.include_router(dashboard.router, prefix="/api/dashboard", tags=["dashboard"])
# app.include_router(export.router, prefix="/api/export", tags=["export"])

0
app/models/__init__.py Normal file
View File

64
app/models/document.py Normal file
View File

@@ -0,0 +1,64 @@
"""documents 테이블 ORM"""
from datetime import datetime
from pgvector.sqlalchemy import Vector
from sqlalchemy import BigInteger, DateTime, Enum, String, Text
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column
from core.database import Base
class Document(Base):
__tablename__ = "documents"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
# 1계층: 원본 파일
file_path: Mapped[str] = mapped_column(Text, unique=True, nullable=False)
file_hash: Mapped[str] = mapped_column(String(64), nullable=False)
file_format: Mapped[str] = mapped_column(String(20), nullable=False)
file_size: Mapped[int | None] = mapped_column(BigInteger)
file_type: Mapped[str] = mapped_column(
Enum("immutable", "editable", "note", name="doc_type"),
default="immutable"
)
import_source: Mapped[str | None] = mapped_column(Text)
# 2계층: 텍스트 추출
extracted_text: Mapped[str | None] = mapped_column(Text)
extracted_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
extractor_version: Mapped[str | None] = mapped_column(String(50))
# 2계층: AI 가공
ai_summary: Mapped[str | None] = mapped_column(Text)
ai_tags: Mapped[dict | None] = mapped_column(JSONB, default=[])
ai_domain: Mapped[str | None] = mapped_column(String(100))
ai_sub_group: Mapped[str | None] = mapped_column(String(100))
ai_model_version: Mapped[str | None] = mapped_column(String(50))
ai_processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
# 3계층: 벡터 임베딩
embedding = mapped_column(Vector(768), nullable=True)
embed_model_version: Mapped[str | None] = mapped_column(String(50))
embedded_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
# 메타데이터
source_channel: Mapped[str | None] = mapped_column(
Enum("law_monitor", "devonagent", "email", "web_clip",
"tksafety", "inbox_route", "manual", "drive_sync",
name="source_channel")
)
data_origin: Mapped[str | None] = mapped_column(
Enum("work", "external", name="data_origin")
)
title: Mapped[str | None] = mapped_column(Text)
# 타임스탬프
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.now
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.now, onupdate=datetime.now
)

34
app/models/queue.py Normal file
View File

@@ -0,0 +1,34 @@
"""processing_queue 테이블 ORM (비동기 가공 큐)"""
from datetime import datetime
from sqlalchemy import BigInteger, DateTime, Enum, ForeignKey, SmallInteger, Text, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column
from core.database import Base
class ProcessingQueue(Base):
__tablename__ = "processing_queue"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
document_id: Mapped[int] = mapped_column(BigInteger, ForeignKey("documents.id"), nullable=False)
stage: Mapped[str] = mapped_column(
Enum("extract", "classify", "embed", name="process_stage"), nullable=False
)
status: Mapped[str] = mapped_column(
Enum("pending", "processing", "completed", "failed", name="process_status"),
default="pending"
)
attempts: Mapped[int] = mapped_column(SmallInteger, default=0)
max_attempts: Mapped[int] = mapped_column(SmallInteger, default=3)
error_message: Mapped[str | None] = mapped_column(Text)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.now
)
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
__table_args__ = (
UniqueConstraint("document_id", "stage", "status"),
)

29
app/models/task.py Normal file
View File

@@ -0,0 +1,29 @@
"""tasks 테이블 ORM (CalDAV 캐시)"""
from datetime import datetime
from sqlalchemy import BigInteger, Boolean, DateTime, ForeignKey, SmallInteger, String, Text
from sqlalchemy.orm import Mapped, mapped_column
from core.database import Base
class Task(Base):
__tablename__ = "tasks"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
caldav_uid: Mapped[str | None] = mapped_column(Text, unique=True)
title: Mapped[str] = mapped_column(Text, nullable=False)
description: Mapped[str | None] = mapped_column(Text)
due_date: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
priority: Mapped[int] = mapped_column(SmallInteger, default=0)
completed: Mapped[bool] = mapped_column(Boolean, default=False)
completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
document_id: Mapped[int | None] = mapped_column(BigInteger, ForeignKey("documents.id"))
source: Mapped[str | None] = mapped_column(String(50))
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.now
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.now, onupdate=datetime.now
)

51
app/prompts/classify.txt Normal file
View File

@@ -0,0 +1,51 @@
당신은 문서 분류 AI입니다. 아래 문서를 분석하고 반드시 JSON 형식으로만 응답하세요. 다른 텍스트는 출력하지 마세요.
## 응답 형식
{
"tags": ["태그1", "태그2", "태그3"],
"domain": "도메인경로",
"sub_group": "하위그룹",
"sourceChannel": "유입경로",
"dataOrigin": "work 또는 external"
}
## 도메인 선택지 (NAS 폴더 경로)
- Knowledge/Philosophy — 철학, 사상, 인문학
- Knowledge/Language — 어학, 번역, 언어학
- Knowledge/Engineering — 공학 전반 기술 문서
- Knowledge/Industrial_Safety — 산업안전, 규정, 인증
- Knowledge/Programming — 개발, 코드, IT 기술
- Knowledge/General — 일반 도서, 독서 노트, 메모
- Reference — 도면, 참고자료, 규격표
## 하위 그룹 예시 (도메인별)
- Knowledge/Industrial_Safety: Legislation, Standards, Cases
- Knowledge/Programming: Language, Framework, DevOps, AI_ML
- Knowledge/Engineering: Mechanical, Electrical, Network
- 잘 모르겠으면: (비워둠)
## 태그 체계
태그는 최대 5개, 한글 사용. 아래 계층 구조 중에서 선택:
- @상태/: 처리중, 검토필요, 완료, 아카이브
- #주제/기술/: 서버관리, 네트워크, AI-ML
- #주제/산업안전/: 법령, 위험성평가, 순회점검, 안전교육, 사고사례, 신고보고, 안전관리자, 보건관리자
- #주제/업무/: 프로젝트, 회의, 보고서
- $유형/: 논문, 법령, 기사, 메모, 이메일, 채팅로그, 도면, 체크리스트
- !우선순위/: 긴급, 중요, 참고
## sourceChannel 값
- tksafety: TKSafety API 업무 실적
- devonagent: 자동 수집 뉴스
- law_monitor: 법령 API 법령 변경
- inbox_route: Inbox AI 분류 (이 프롬프트에 의한 분류)
- email: MailPlus 이메일
- web_clip: Web Clipper 스크랩
- manual: 직접 추가
- drive_sync: Synology Drive 동기화
## dataOrigin 값
- work: 자사 업무 관련 (TK, 테크니컬코리아, 공장, 생산, 사내)
- external: 외부 참고 자료 (뉴스, 논문, 법령, 일반 정보)
## 분류 대상 문서
{document_text}

16
app/requirements.txt Normal file
View File

@@ -0,0 +1,16 @@
fastapi>=0.110.0
uvicorn[standard]>=0.27.0
sqlalchemy[asyncio]>=2.0.0
asyncpg>=0.29.0
pgvector>=0.3.0
python-dotenv>=1.0.0
pyyaml>=6.0
httpx>=0.27.0
python-jose[cryptography]>=3.3.0
passlib[bcrypt]>=1.7.4
pyotp>=2.9.0
caldav>=1.3.0
apscheduler>=3.10.0
anthropic>=0.40.0
markdown>=3.5.0
python-multipart>=0.0.9

0
app/workers/__init__.py Normal file
View File

48
config.yaml Normal file
View File

@@ -0,0 +1,48 @@
# hyungi_Document_Server 설정
ai:
gateway:
endpoint: "http://gpu-server:8080"
models:
primary:
endpoint: "http://host.docker.internal:8800/v1/chat/completions"
model: "mlx-community/Qwen3.5-35B-A3B-4bit"
max_tokens: 4096
timeout: 60
fallback:
endpoint: "http://gpu-server:11434/v1/chat/completions"
model: "qwen3.5:35b-a3b"
max_tokens: 4096
timeout: 120
premium:
endpoint: "https://api.anthropic.com/v1/messages"
model: "claude-sonnet-4-20250514"
max_tokens: 8192
daily_budget_usd: 5.00
require_explicit_trigger: true
embedding:
endpoint: "http://gpu-server:11434/api/embeddings"
model: "nomic-embed-text"
vision:
endpoint: "http://gpu-server:11434/api/generate"
model: "Qwen2.5-VL-7B"
rerank:
endpoint: "http://gpu-server:11434/api/rerank"
model: "bge-reranker-v2-m3"
nas:
mount_path: "/documents"
pkm_root: "/documents/PKM"
schedule:
law_monitor: "07:00"
mailplus_archive: ["07:00", "18:00"]
daily_digest: "20:00"
file_watcher_interval_minutes: 5
queue_consumer_interval_minutes: 10

View File

@@ -1,29 +1,57 @@
# ═══════════════════════════════════════════════════
# PKM 시스템 인증 정보
# 이 파일은 템플릿입니다. 실제 값은 Mac mini의
# ~/.config/pkm/credentials.env 에 별도 관리합니다.
# hyungi_Document_Server — 인증 정보 템플릿
# 실제 값을 채워서 credentials.env로 저장
# ═══════════════════════════════════════════════════
# ─── Claude API (AI 고급 처리용) ───
# ─── PostgreSQL ───
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
POSTGRES_DB=pkm
POSTGRES_USER=pkm
POSTGRES_PASSWORD=
# ─── AI: Mac mini MLX (Qwen3.5, 기본 모델) ───
MLX_ENDPOINT=http://localhost:8800/v1/chat/completions
MLX_MODEL=mlx-community/Qwen3.5-35B-A3B-4bit
# ─── AI: GPU 서버 ───
GPU_SERVER_IP=
GPU_EMBED_PORT=11434
# ─── AI: Claude API (종량제, 복잡한 분석 전용) ───
CLAUDE_API_KEY=
# ─── AI Gateway (GPU 서버) ───
AI_GATEWAY_ENDPOINT=http://gpu-server:8080
# ─── Synology NAS ───
NAS_SMB_PATH=/Volumes/Document_Server
NAS_DOMAIN=ds1525.hyungi.net
NAS_TAILSCALE_IP=100.101.79.37
NAS_PORT=15001
# ─── Synology MailPlus (이메일 수집 + SMTP 알림) ───
MAILPLUS_HOST=mailplus.hyungi.net
MAILPLUS_PORT=993
MAILPLUS_SMTP_PORT=465
MAILPLUS_USER=hyungi
MAILPLUS_PASS=
# ─── Synology Calendar (CalDAV, 태스크 관리) ───
CALDAV_URL=https://ds1525.hyungi.net/caldav/
CALDAV_USER=hyungi
CALDAV_PASS=
# ─── kordoc 마이크로서비스 ───
KORDOC_ENDPOINT=http://kordoc-service:3100
# ─── 인증 (JWT + TOTP) ───
JWT_SECRET=
TOTP_SECRET=
# ─── 국가법령정보센터 (법령 모니터링) ───
LAW_OC=
# ─── Synology NAS 접속 ───
NAS_DOMAIN=
NAS_TAILSCALE_IP=
NAS_PORT=15001
# ─── MailPlus IMAP (이메일 수집용) ───
MAILPLUS_HOST=
MAILPLUS_PORT=993
MAILPLUS_USER=
MAILPLUS_PASS=
# ─── Synology Chat 웹훅 (나중에 추가) ───
#CHAT_WEBHOOK_URL=
# ─── TKSafety API (나중에 활성화) ───
#TKSAFETY_HOST=
#TKSAFETY_HOST=tksafety.technicalkorea.net
#TKSAFETY_PORT=

76
docker-compose.yml Normal file
View File

@@ -0,0 +1,76 @@
version: '3.8'
services:
postgres:
image: pgvector/pgvector:pg16
volumes:
- pgdata:/var/lib/postgresql/data
- ./migrations:/docker-entrypoint-initdb.d
environment:
POSTGRES_DB: pkm
POSTGRES_USER: pkm
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
ports:
- "5432:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U pkm"]
interval: 5s
timeout: 5s
retries: 5
restart: unless-stopped
kordoc-service:
build: ./services/kordoc
ports:
- "3100:3100"
volumes:
- ${NAS_SMB_PATH:-/Volumes/Document_Server}:/documents:ro
healthcheck:
test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/health"]
interval: 10s
timeout: 5s
retries: 3
restart: unless-stopped
fastapi:
build: ./app
ports:
- "8000:8000"
volumes:
- ${NAS_SMB_PATH:-/Volumes/Document_Server}:/documents
depends_on:
postgres:
condition: service_healthy
kordoc-service:
condition: service_healthy
env_file:
- credentials.env
environment:
- DATABASE_URL=postgresql+asyncpg://pkm:${POSTGRES_PASSWORD}@postgres:5432/pkm
- KORDOC_ENDPOINT=http://kordoc-service:3100
restart: unless-stopped
frontend:
build: ./frontend
ports:
- "3000:3000"
depends_on:
- fastapi
restart: unless-stopped
caddy:
image: caddy:2
ports:
- "80:80"
- "443:443"
volumes:
- ./Caddyfile:/etc/caddy/Caddyfile
- caddy_data:/data
depends_on:
- fastapi
- frontend
restart: unless-stopped
volumes:
pgdata:
caddy_data:

16
frontend/Dockerfile Normal file
View File

@@ -0,0 +1,16 @@
FROM node:20-slim AS build
WORKDIR /app
COPY package.json .
RUN npm install
COPY . .
RUN npm run build
FROM node:20-slim
WORKDIR /app
COPY --from=build /app/build build/
COPY --from=build /app/node_modules node_modules/
COPY package.json .
EXPOSE 3000
CMD ["node", "build"]

16
frontend/package.json Normal file
View File

@@ -0,0 +1,16 @@
{
"name": "hyungi-document-server-frontend",
"version": "0.0.1",
"private": true,
"scripts": {
"dev": "vite dev",
"build": "vite build",
"preview": "vite preview"
},
"devDependencies": {
"@sveltejs/adapter-node": "^2.0.0",
"@sveltejs/kit": "^2.0.0",
"svelte": "^4.0.0",
"vite": "^5.0.0"
}
}

12
frontend/src/app.html Normal file
View File

@@ -0,0 +1,12 @@
<!doctype html>
<html lang="ko">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>hyungi Document Server</title>
%sveltekit.head%
</head>
<body>
<div>%sveltekit.body%</div>
</body>
</html>

View File

@@ -0,0 +1,14 @@
<script>
// TODO: Phase 4에서 대시보드 위젯 구현
</script>
<h1>hyungi Document Server</h1>
<p>PKM 대시보드 — Phase 4에서 구현 예정</p>
<section>
<h2>시스템 상태</h2>
<ul>
<li>FastAPI: <a href="/api/health">헬스체크</a></li>
<li>API 문서: <a href="/docs">OpenAPI</a></li>
</ul>
</section>

10
frontend/svelte.config.js Normal file
View File

@@ -0,0 +1,10 @@
import adapter from '@sveltejs/adapter-node';
/** @type {import('@sveltejs/kit').Config} */
const config = {
kit: {
adapter: adapter()
}
};
export default config;

View File

@@ -0,0 +1,33 @@
version: '3.8'
services:
ollama:
image: ollama/ollama
volumes:
- ollama_data:/root/.ollama
deploy:
resources:
reservations:
devices:
- driver: nvidia
count: 1
capabilities: [gpu]
ports:
- "11434:11434"
restart: unless-stopped
ai-gateway:
build: ./services/ai-gateway
ports:
- "8080:8080"
environment:
- PRIMARY_ENDPOINT=${PRIMARY_ENDPOINT:-http://mac-mini:8800/v1/chat/completions}
- FALLBACK_ENDPOINT=http://ollama:11434/v1/chat/completions
- CLAUDE_API_KEY=${CLAUDE_API_KEY:-}
- DAILY_BUDGET_USD=${DAILY_BUDGET_USD:-5.00}
depends_on:
- ollama
restart: unless-stopped
volumes:
ollama_data:

View File

@@ -0,0 +1,10 @@
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY server.py .
CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8080"]

View File

@@ -0,0 +1,3 @@
fastapi>=0.110.0
uvicorn[standard]>=0.27.0
httpx>=0.27.0

View File

@@ -0,0 +1,58 @@
"""AI Gateway — 모델 라우팅, 폴백, 비용 제어, 요청 로깅"""
import os
from datetime import date
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import JSONResponse
import httpx
app = FastAPI(title="AI Gateway", version="1.0.0")
PRIMARY = os.getenv("PRIMARY_ENDPOINT", "http://localhost:8800/v1/chat/completions")
FALLBACK = os.getenv("FALLBACK_ENDPOINT", "http://localhost:11434/v1/chat/completions")
CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY", "")
DAILY_BUDGET = float(os.getenv("DAILY_BUDGET_USD", "5.00"))
# 일일 비용 추적 (메모리, 재시작 시 리셋)
_daily_cost: dict[str, float] = {}
_http = httpx.AsyncClient(timeout=120)
@app.get("/health")
async def health():
return {"status": "ok", "service": "ai-gateway"}
@app.post("/v1/chat/completions")
async def chat_completions(request: Request):
"""OpenAI 호환 채팅 엔드포인트 — 자동 폴백"""
body = await request.json()
tier = request.headers.get("x-model-tier", "primary")
if tier == "premium":
return await _call_premium(body)
# Primary → Fallback 폴백
try:
resp = await _http.post(PRIMARY, json=body, timeout=60)
resp.raise_for_status()
return JSONResponse(content=resp.json())
except (httpx.TimeoutException, httpx.ConnectError, httpx.HTTPStatusError):
# 폴백
resp = await _http.post(FALLBACK, json=body, timeout=120)
resp.raise_for_status()
return JSONResponse(content=resp.json())
async def _call_premium(body: dict):
"""Claude API 호출 — 비용 제어"""
today = date.today().isoformat()
if _daily_cost.get(today, 0) >= DAILY_BUDGET:
raise HTTPException(429, f"일일 예산 초과: ${DAILY_BUDGET}")
if not CLAUDE_API_KEY:
raise HTTPException(503, "CLAUDE_API_KEY 미설정")
# TODO: Anthropic API 호출 + 비용 계산 (Phase 3에서 구현)
raise HTTPException(501, "Premium 모델 호출은 Phase 3에서 구현")

View File

@@ -0,0 +1,106 @@
-- hyungi_Document_Server 초기 스키마
-- PostgreSQL 16 + pgvector + pg_trgm
CREATE EXTENSION IF NOT EXISTS vector;
CREATE EXTENSION IF NOT EXISTS pg_trgm;
-- ENUM 타입
CREATE TYPE doc_type AS ENUM ('immutable', 'editable', 'note');
CREATE TYPE source_channel AS ENUM (
'law_monitor', 'devonagent', 'email', 'web_clip',
'tksafety', 'inbox_route', 'manual', 'drive_sync'
);
CREATE TYPE data_origin AS ENUM ('work', 'external');
CREATE TYPE process_stage AS ENUM ('extract', 'classify', 'embed');
CREATE TYPE process_status AS ENUM ('pending', 'processing', 'completed', 'failed');
-- documents 테이블
CREATE TABLE documents (
id BIGSERIAL PRIMARY KEY,
-- 1계층: 원본 파일 참조
file_path TEXT NOT NULL UNIQUE,
file_hash CHAR(64) NOT NULL,
file_format VARCHAR(20) NOT NULL,
file_size BIGINT,
file_type doc_type NOT NULL DEFAULT 'immutable',
import_source TEXT,
-- 2계층: 텍스트 추출
extracted_text TEXT,
extracted_at TIMESTAMPTZ,
extractor_version VARCHAR(50),
-- 2계층: AI 가공
ai_summary TEXT,
ai_tags JSONB DEFAULT '[]',
ai_domain VARCHAR(100),
ai_sub_group VARCHAR(100),
ai_model_version VARCHAR(50),
ai_processed_at TIMESTAMPTZ,
-- 3계층: 벡터 임베딩
embedding vector(768),
embed_model_version VARCHAR(50),
embedded_at TIMESTAMPTZ,
-- 메타데이터
source_channel source_channel,
data_origin data_origin,
title TEXT,
-- 타임스탬프
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- 전문검색 인덱스
CREATE INDEX idx_documents_fts ON documents
USING GIN (to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(extracted_text, '')));
-- 트리그램 인덱스 (한국어 부분 매칭)
CREATE INDEX idx_documents_trgm ON documents
USING GIN ((coalesce(title, '') || ' ' || coalesce(extracted_text, '')) gin_trgm_ops);
-- 해시 기반 중복 검색
CREATE INDEX idx_documents_hash ON documents (file_hash);
-- 재가공 대상 필터링
CREATE INDEX idx_documents_ai_version ON documents (ai_model_version);
CREATE INDEX idx_documents_extractor_version ON documents (extractor_version);
CREATE INDEX idx_documents_embed_version ON documents (embed_model_version);
-- tasks 테이블 (CalDAV 캐시)
CREATE TABLE tasks (
id BIGSERIAL PRIMARY KEY,
caldav_uid TEXT UNIQUE,
title TEXT NOT NULL,
description TEXT,
due_date TIMESTAMPTZ,
priority SMALLINT DEFAULT 0,
completed BOOLEAN DEFAULT FALSE,
completed_at TIMESTAMPTZ,
document_id BIGINT REFERENCES documents(id),
source VARCHAR(50),
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
-- processing_queue 테이블 (비동기 가공 큐)
CREATE TABLE processing_queue (
id BIGSERIAL PRIMARY KEY,
document_id BIGINT REFERENCES documents(id) NOT NULL,
stage process_stage NOT NULL,
status process_status DEFAULT 'pending',
attempts SMALLINT DEFAULT 0,
max_attempts SMALLINT DEFAULT 3,
error_message TEXT,
created_at TIMESTAMPTZ DEFAULT NOW(),
started_at TIMESTAMPTZ,
completed_at TIMESTAMPTZ,
UNIQUE (document_id, stage, status)
);
CREATE INDEX idx_queue_pending ON processing_queue (stage, status)
WHERE status = 'pending';

View File

@@ -0,0 +1,12 @@
FROM node:20-slim
WORKDIR /app
COPY package.json .
RUN npm install --production
COPY server.js .
EXPOSE 3100
CMD ["node", "server.js"]

View File

@@ -0,0 +1,13 @@
{
"name": "kordoc-service",
"version": "1.0.0",
"description": "HWP/HWPX/PDF 문서 파싱 마이크로서비스",
"main": "server.js",
"scripts": {
"start": "node server.js"
},
"dependencies": {
"express": "^4.18.0",
"kordoc": "^1.7.0"
}
}

57
services/kordoc/server.js Normal file
View File

@@ -0,0 +1,57 @@
/**
* kordoc 마이크로서비스 — HWP/HWPX/PDF → Markdown 변환 API
*/
const express = require('express');
const app = express();
const PORT = 3100;
app.use(express.json({ limit: '500mb' }));
// 헬스체크
app.get('/health', (req, res) => {
res.json({ status: 'ok', service: 'kordoc' });
});
// 문서 파싱
app.post('/parse', async (req, res) => {
try {
const { filePath } = req.body;
if (!filePath) {
return res.status(400).json({ error: 'filePath is required' });
}
// TODO: kordoc 라이브러리 연동 (Phase 1에서 구현)
// const kordoc = require('kordoc');
// const result = await kordoc.parse(filePath);
// return res.json(result);
return res.json({
markdown: '',
metadata: {},
format: 'unknown',
message: 'kordoc 파싱은 Phase 1에서 구현 예정'
});
} catch (err) {
res.status(500).json({ error: err.message });
}
});
// 문서 비교
app.post('/compare', async (req, res) => {
try {
const { filePathA, filePathB } = req.body;
if (!filePathA || !filePathB) {
return res.status(400).json({ error: 'filePathA and filePathB are required' });
}
// TODO: kordoc compare 구현 (Phase 2)
return res.json({ diffs: [], message: 'compare는 Phase 2에서 구현 예정' });
} catch (err) {
res.status(500).json({ error: err.message });
}
});
app.listen(PORT, () => {
console.log(`kordoc-service listening on port ${PORT}`);
});

0
tests/__init__.py Normal file
View File

22
tests/conftest.py Normal file
View File

@@ -0,0 +1,22 @@
"""pytest 기본 fixture 설정"""
import pytest
import pytest_asyncio
from httpx import ASGITransport, AsyncClient
@pytest.fixture(scope="session")
def anyio_backend():
return "asyncio"
# TODO: Phase 0 완료 후 활성화
# @pytest_asyncio.fixture
# async def client():
# """FastAPI 테스트 클라이언트"""
# from app.main import app
# async with AsyncClient(
# transport=ASGITransport(app=app),
# base_url="http://test"
# ) as ac:
# yield ac