feat: scaffold v2 project structure with Docker, FastAPI, and config

동작하는 최소 코드 수준의 v2 스캐폴딩:

- docker-compose.yml: postgres, fastapi, kordoc, frontend, caddy
- app/: FastAPI 백엔드 (main, core, models, ai, prompts)
- services/kordoc/: Node.js 문서 파싱 마이크로서비스
- gpu-server/: AI Gateway + GPU docker-compose
- frontend/: SvelteKit 기본 구조
- migrations/: PostgreSQL 초기 스키마 (documents, tasks, processing_queue)
- tests/: pytest conftest 기본 설정
- config.yaml, Caddyfile, credentials.env.example 갱신

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-02 10:20:15 +09:00
parent b338e6e424
commit 131dbd7b7c
37 changed files with 1122 additions and 19 deletions

10
app/Dockerfile Normal file
View File

@@ -0,0 +1,10 @@
FROM python:3.11-slim
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]

0
app/ai/__init__.py Normal file
View File

79
app/ai/client.py Normal file
View File

@@ -0,0 +1,79 @@
"""AI 추상화 레이어 — 통합 클라이언트. 기본값은 항상 Qwen3.5."""
from pathlib import Path
import httpx
from core.config import settings
# 프롬프트 로딩
PROMPTS_DIR = Path(__file__).parent.parent / "prompts"
def _load_prompt(name: str) -> str:
return (PROMPTS_DIR / name).read_text(encoding="utf-8")
CLASSIFY_PROMPT = _load_prompt("classify.txt") if (PROMPTS_DIR / "classify.txt").exists() else ""
class AIClient:
"""AI Gateway를 통한 통합 클라이언트. 기본값은 항상 Qwen3.5."""
def __init__(self):
self.ai = settings.ai
self._http = httpx.AsyncClient(timeout=120)
async def classify(self, text: str) -> dict:
"""문서 분류 — 항상 primary(Qwen3.5) 사용"""
prompt = CLASSIFY_PROMPT.replace("{document_text}", text)
response = await self._call_chat(self.ai.primary, prompt)
return response
async def summarize(self, text: str, force_premium: bool = False) -> str:
"""문서 요약 — 기본 Qwen3.5, 장문이거나 명시적 요청 시만 Claude"""
model = self.ai.primary
if force_premium or len(text) > 15000:
model = self.ai.premium
return await self._call_chat(model, f"다음 문서를 500자 이내로 요약해주세요:\n\n{text}")
async def embed(self, text: str) -> list[float]:
"""벡터 임베딩 — GPU 서버 전용"""
response = await self._http.post(
self.ai.embedding.endpoint,
json={"model": self.ai.embedding.model, "prompt": text},
)
response.raise_for_status()
return response.json()["embedding"]
async def ocr(self, image_bytes: bytes) -> str:
"""이미지 OCR — GPU 서버 전용"""
# TODO: Qwen2.5-VL-7B 비전 모델 호출 구현
raise NotImplementedError("OCR는 Phase 1에서 구현")
async def _call_chat(self, model_config, prompt: str) -> str:
"""OpenAI 호환 API 호출 + 자동 폴백"""
try:
return await self._request(model_config, prompt)
except (httpx.TimeoutException, httpx.ConnectError):
if model_config == self.ai.primary:
return await self._request(self.ai.fallback, prompt)
raise
async def _request(self, model_config, prompt: str) -> str:
"""단일 모델 API 호출"""
response = await self._http.post(
model_config.endpoint,
json={
"model": model_config.model,
"messages": [{"role": "user", "content": prompt}],
"max_tokens": model_config.max_tokens,
},
timeout=model_config.timeout,
)
response.raise_for_status()
data = response.json()
return data["choices"][0]["message"]["content"]
async def close(self):
await self._http.aclose()

0
app/api/__init__.py Normal file
View File

0
app/core/__init__.py Normal file
View File

51
app/core/auth.py Normal file
View File

@@ -0,0 +1,51 @@
"""JWT + TOTP 2FA 인증"""
from datetime import datetime, timedelta, timezone
import pyotp
from jose import JWTError, jwt
from passlib.context import CryptContext
from core.config import settings
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")
# JWT 설정
ALGORITHM = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES = 15
REFRESH_TOKEN_EXPIRE_DAYS = 7
def verify_password(plain: str, hashed: str) -> bool:
return pwd_context.verify(plain, hashed)
def hash_password(password: str) -> str:
return pwd_context.hash(password)
def create_access_token(subject: str) -> str:
expire = datetime.now(timezone.utc) + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES)
payload = {"sub": subject, "exp": expire, "type": "access"}
return jwt.encode(payload, settings.jwt_secret, algorithm=ALGORITHM)
def create_refresh_token(subject: str) -> str:
expire = datetime.now(timezone.utc) + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS)
payload = {"sub": subject, "exp": expire, "type": "refresh"}
return jwt.encode(payload, settings.jwt_secret, algorithm=ALGORITHM)
def decode_token(token: str) -> dict | None:
try:
return jwt.decode(token, settings.jwt_secret, algorithms=[ALGORITHM])
except JWTError:
return None
def verify_totp(code: str) -> bool:
"""TOTP 코드 검증"""
if not settings.totp_secret:
return True # TOTP 미설정 시 스킵
totp = pyotp.TOTP(settings.totp_secret)
return totp.verify(code)

93
app/core/config.py Normal file
View File

@@ -0,0 +1,93 @@
"""설정 로딩 — config.yaml + credentials.env"""
import os
from pathlib import Path
import yaml
from pydantic import BaseModel
class AIModelConfig(BaseModel):
endpoint: str
model: str
max_tokens: int = 4096
timeout: int = 60
daily_budget_usd: float | None = None
require_explicit_trigger: bool = False
class AIConfig(BaseModel):
gateway_endpoint: str
primary: AIModelConfig
fallback: AIModelConfig
premium: AIModelConfig
embedding: AIModelConfig
vision: AIModelConfig
rerank: AIModelConfig
class Settings(BaseModel):
# DB
database_url: str = ""
# AI
ai: AIConfig | None = None
# NAS
nas_mount_path: str = "/documents"
nas_pkm_root: str = "/documents/PKM"
# 인증
jwt_secret: str = ""
totp_secret: str = ""
# kordoc
kordoc_endpoint: str = "http://kordoc-service:3100"
def load_settings() -> Settings:
"""config.yaml + 환경변수에서 설정 로딩"""
# 환경변수 (docker-compose에서 주입)
database_url = os.getenv("DATABASE_URL", "")
jwt_secret = os.getenv("JWT_SECRET", "")
totp_secret = os.getenv("TOTP_SECRET", "")
kordoc_endpoint = os.getenv("KORDOC_ENDPOINT", "http://kordoc-service:3100")
# config.yaml
config_path = Path(__file__).parent.parent.parent / "config.yaml"
ai_config = None
nas_mount = "/documents"
nas_pkm = "/documents/PKM"
if config_path.exists():
with open(config_path) as f:
raw = yaml.safe_load(f)
if "ai" in raw:
ai_raw = raw["ai"]
ai_config = AIConfig(
gateway_endpoint=ai_raw.get("gateway", {}).get("endpoint", ""),
primary=AIModelConfig(**ai_raw["models"]["primary"]),
fallback=AIModelConfig(**ai_raw["models"]["fallback"]),
premium=AIModelConfig(**ai_raw["models"]["premium"]),
embedding=AIModelConfig(**ai_raw["models"]["embedding"]),
vision=AIModelConfig(**ai_raw["models"]["vision"]),
rerank=AIModelConfig(**ai_raw["models"]["rerank"]),
)
if "nas" in raw:
nas_mount = raw["nas"].get("mount_path", nas_mount)
nas_pkm = raw["nas"].get("pkm_root", nas_pkm)
return Settings(
database_url=database_url,
ai=ai_config,
nas_mount_path=nas_mount,
nas_pkm_root=nas_pkm,
jwt_secret=jwt_secret,
totp_secret=totp_secret,
kordoc_endpoint=kordoc_endpoint,
)
settings = load_settings()

34
app/core/database.py Normal file
View File

@@ -0,0 +1,34 @@
"""PostgreSQL 연결 — SQLAlchemy async engine + session factory"""
from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
from sqlalchemy.orm import DeclarativeBase
from core.config import settings
engine = create_async_engine(
settings.database_url,
echo=False,
pool_size=10,
max_overflow=20,
)
async_session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False)
class Base(DeclarativeBase):
pass
async def init_db():
"""DB 연결 확인 (스키마는 migrations/로 관리)"""
async with engine.begin() as conn:
# 연결 테스트
await conn.execute(
__import__("sqlalchemy").text("SELECT 1")
)
async def get_session() -> AsyncSession:
"""FastAPI Depends용 세션 제공"""
async with async_session() as session:
yield session

46
app/core/utils.py Normal file
View File

@@ -0,0 +1,46 @@
"""공통 유틸리티 — v1 pkm_utils.py에서 AppleScript 제거, 나머지 포팅"""
import hashlib
import logging
from pathlib import Path
def setup_logger(name: str, log_dir: str = "logs") -> logging.Logger:
"""로거 설정"""
Path(log_dir).mkdir(exist_ok=True)
logger = logging.getLogger(name)
logger.setLevel(logging.INFO)
if not logger.handlers:
# 파일 핸들러
fh = logging.FileHandler(f"{log_dir}/{name}.log", encoding="utf-8")
fh.setFormatter(logging.Formatter(
"%(asctime)s [%(levelname)s] %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
))
logger.addHandler(fh)
# 콘솔 핸들러
ch = logging.StreamHandler()
ch.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
logger.addHandler(ch)
return logger
def file_hash(path: str | Path) -> str:
"""파일 SHA-256 해시 계산"""
sha256 = hashlib.sha256()
with open(path, "rb") as f:
for chunk in iter(lambda: f.read(8192), b""):
sha256.update(chunk)
return sha256.hexdigest()
def count_log_errors(log_path: str) -> int:
"""로그 파일에서 ERROR 건수 카운트"""
try:
with open(log_path, encoding="utf-8") as f:
return sum(1 for line in f if "[ERROR]" in line)
except FileNotFoundError:
return 0

41
app/main.py Normal file
View File

@@ -0,0 +1,41 @@
"""hyungi_Document_Server — FastAPI 엔트리포인트"""
from contextlib import asynccontextmanager
from fastapi import FastAPI
from core.config import settings
from core.database import init_db
@asynccontextmanager
async def lifespan(app: FastAPI):
"""앱 시작/종료 시 실행되는 lifespan 핸들러"""
# 시작: DB 연결, 스케줄러 등록
await init_db()
# TODO: APScheduler 시작 (Phase 3)
yield
# 종료: 리소스 정리
# TODO: 스케줄러 종료, DB 연결 해제
app = FastAPI(
title="hyungi_Document_Server",
description="Self-hosted PKM 웹 애플리케이션 API",
version="2.0.0",
lifespan=lifespan,
)
@app.get("/health")
async def health_check():
return {"status": "ok", "version": "2.0.0"}
# TODO: 라우터 등록 (Phase 0~2)
# from api import documents, search, tasks, dashboard, export
# app.include_router(documents.router, prefix="/api/documents", tags=["documents"])
# app.include_router(search.router, prefix="/api/search", tags=["search"])
# app.include_router(tasks.router, prefix="/api/tasks", tags=["tasks"])
# app.include_router(dashboard.router, prefix="/api/dashboard", tags=["dashboard"])
# app.include_router(export.router, prefix="/api/export", tags=["export"])

0
app/models/__init__.py Normal file
View File

64
app/models/document.py Normal file
View File

@@ -0,0 +1,64 @@
"""documents 테이블 ORM"""
from datetime import datetime
from pgvector.sqlalchemy import Vector
from sqlalchemy import BigInteger, DateTime, Enum, String, Text
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, mapped_column
from core.database import Base
class Document(Base):
__tablename__ = "documents"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
# 1계층: 원본 파일
file_path: Mapped[str] = mapped_column(Text, unique=True, nullable=False)
file_hash: Mapped[str] = mapped_column(String(64), nullable=False)
file_format: Mapped[str] = mapped_column(String(20), nullable=False)
file_size: Mapped[int | None] = mapped_column(BigInteger)
file_type: Mapped[str] = mapped_column(
Enum("immutable", "editable", "note", name="doc_type"),
default="immutable"
)
import_source: Mapped[str | None] = mapped_column(Text)
# 2계층: 텍스트 추출
extracted_text: Mapped[str | None] = mapped_column(Text)
extracted_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
extractor_version: Mapped[str | None] = mapped_column(String(50))
# 2계층: AI 가공
ai_summary: Mapped[str | None] = mapped_column(Text)
ai_tags: Mapped[dict | None] = mapped_column(JSONB, default=[])
ai_domain: Mapped[str | None] = mapped_column(String(100))
ai_sub_group: Mapped[str | None] = mapped_column(String(100))
ai_model_version: Mapped[str | None] = mapped_column(String(50))
ai_processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
# 3계층: 벡터 임베딩
embedding = mapped_column(Vector(768), nullable=True)
embed_model_version: Mapped[str | None] = mapped_column(String(50))
embedded_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
# 메타데이터
source_channel: Mapped[str | None] = mapped_column(
Enum("law_monitor", "devonagent", "email", "web_clip",
"tksafety", "inbox_route", "manual", "drive_sync",
name="source_channel")
)
data_origin: Mapped[str | None] = mapped_column(
Enum("work", "external", name="data_origin")
)
title: Mapped[str | None] = mapped_column(Text)
# 타임스탬프
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.now
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.now, onupdate=datetime.now
)

34
app/models/queue.py Normal file
View File

@@ -0,0 +1,34 @@
"""processing_queue 테이블 ORM (비동기 가공 큐)"""
from datetime import datetime
from sqlalchemy import BigInteger, DateTime, Enum, ForeignKey, SmallInteger, Text, UniqueConstraint
from sqlalchemy.orm import Mapped, mapped_column
from core.database import Base
class ProcessingQueue(Base):
__tablename__ = "processing_queue"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
document_id: Mapped[int] = mapped_column(BigInteger, ForeignKey("documents.id"), nullable=False)
stage: Mapped[str] = mapped_column(
Enum("extract", "classify", "embed", name="process_stage"), nullable=False
)
status: Mapped[str] = mapped_column(
Enum("pending", "processing", "completed", "failed", name="process_status"),
default="pending"
)
attempts: Mapped[int] = mapped_column(SmallInteger, default=0)
max_attempts: Mapped[int] = mapped_column(SmallInteger, default=3)
error_message: Mapped[str | None] = mapped_column(Text)
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.now
)
started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
__table_args__ = (
UniqueConstraint("document_id", "stage", "status"),
)

29
app/models/task.py Normal file
View File

@@ -0,0 +1,29 @@
"""tasks 테이블 ORM (CalDAV 캐시)"""
from datetime import datetime
from sqlalchemy import BigInteger, Boolean, DateTime, ForeignKey, SmallInteger, String, Text
from sqlalchemy.orm import Mapped, mapped_column
from core.database import Base
class Task(Base):
__tablename__ = "tasks"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True)
caldav_uid: Mapped[str | None] = mapped_column(Text, unique=True)
title: Mapped[str] = mapped_column(Text, nullable=False)
description: Mapped[str | None] = mapped_column(Text)
due_date: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
priority: Mapped[int] = mapped_column(SmallInteger, default=0)
completed: Mapped[bool] = mapped_column(Boolean, default=False)
completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
document_id: Mapped[int | None] = mapped_column(BigInteger, ForeignKey("documents.id"))
source: Mapped[str | None] = mapped_column(String(50))
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.now
)
updated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.now, onupdate=datetime.now
)

51
app/prompts/classify.txt Normal file
View File

@@ -0,0 +1,51 @@
당신은 문서 분류 AI입니다. 아래 문서를 분석하고 반드시 JSON 형식으로만 응답하세요. 다른 텍스트는 출력하지 마세요.
## 응답 형식
{
"tags": ["태그1", "태그2", "태그3"],
"domain": "도메인경로",
"sub_group": "하위그룹",
"sourceChannel": "유입경로",
"dataOrigin": "work 또는 external"
}
## 도메인 선택지 (NAS 폴더 경로)
- Knowledge/Philosophy — 철학, 사상, 인문학
- Knowledge/Language — 어학, 번역, 언어학
- Knowledge/Engineering — 공학 전반 기술 문서
- Knowledge/Industrial_Safety — 산업안전, 규정, 인증
- Knowledge/Programming — 개발, 코드, IT 기술
- Knowledge/General — 일반 도서, 독서 노트, 메모
- Reference — 도면, 참고자료, 규격표
## 하위 그룹 예시 (도메인별)
- Knowledge/Industrial_Safety: Legislation, Standards, Cases
- Knowledge/Programming: Language, Framework, DevOps, AI_ML
- Knowledge/Engineering: Mechanical, Electrical, Network
- 잘 모르겠으면: (비워둠)
## 태그 체계
태그는 최대 5개, 한글 사용. 아래 계층 구조 중에서 선택:
- @상태/: 처리중, 검토필요, 완료, 아카이브
- #주제/기술/: 서버관리, 네트워크, AI-ML
- #주제/산업안전/: 법령, 위험성평가, 순회점검, 안전교육, 사고사례, 신고보고, 안전관리자, 보건관리자
- #주제/업무/: 프로젝트, 회의, 보고서
- $유형/: 논문, 법령, 기사, 메모, 이메일, 채팅로그, 도면, 체크리스트
- !우선순위/: 긴급, 중요, 참고
## sourceChannel 값
- tksafety: TKSafety API 업무 실적
- devonagent: 자동 수집 뉴스
- law_monitor: 법령 API 법령 변경
- inbox_route: Inbox AI 분류 (이 프롬프트에 의한 분류)
- email: MailPlus 이메일
- web_clip: Web Clipper 스크랩
- manual: 직접 추가
- drive_sync: Synology Drive 동기화
## dataOrigin 값
- work: 자사 업무 관련 (TK, 테크니컬코리아, 공장, 생산, 사내)
- external: 외부 참고 자료 (뉴스, 논문, 법령, 일반 정보)
## 분류 대상 문서
{document_text}

16
app/requirements.txt Normal file
View File

@@ -0,0 +1,16 @@
fastapi>=0.110.0
uvicorn[standard]>=0.27.0
sqlalchemy[asyncio]>=2.0.0
asyncpg>=0.29.0
pgvector>=0.3.0
python-dotenv>=1.0.0
pyyaml>=6.0
httpx>=0.27.0
python-jose[cryptography]>=3.3.0
passlib[bcrypt]>=1.7.4
pyotp>=2.9.0
caldav>=1.3.0
apscheduler>=3.10.0
anthropic>=0.40.0
markdown>=3.5.0
python-multipart>=0.0.9

0
app/workers/__init__.py Normal file
View File