From 131dbd7b7c3c102276c97e9ad7523e3a44ad57a4 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Thu, 2 Apr 2026 10:20:15 +0900 Subject: [PATCH] feat: scaffold v2 project structure with Docker, FastAPI, and config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 동작하는 최소 코드 수준의 v2 스캐폴딩: - docker-compose.yml: postgres, fastapi, kordoc, frontend, caddy - app/: FastAPI 백엔드 (main, core, models, ai, prompts) - services/kordoc/: Node.js 문서 파싱 마이크로서비스 - gpu-server/: AI Gateway + GPU docker-compose - frontend/: SvelteKit 기본 구조 - migrations/: PostgreSQL 초기 스키마 (documents, tasks, processing_queue) - tests/: pytest conftest 기본 설정 - config.yaml, Caddyfile, credentials.env.example 갱신 Co-Authored-By: Claude Opus 4.6 (1M context) --- .gitignore | 8 ++ Caddyfile | 13 +++ app/Dockerfile | 10 ++ app/ai/__init__.py | 0 app/ai/client.py | 79 +++++++++++++ app/api/__init__.py | 0 app/core/__init__.py | 0 app/core/auth.py | 51 +++++++++ app/core/config.py | 93 +++++++++++++++ app/core/database.py | 34 ++++++ app/core/utils.py | 46 ++++++++ app/main.py | 41 +++++++ app/models/__init__.py | 0 app/models/document.py | 64 +++++++++++ app/models/queue.py | 34 ++++++ app/models/task.py | 29 +++++ app/prompts/classify.txt | 51 +++++++++ app/requirements.txt | 16 +++ app/workers/__init__.py | 0 config.yaml | 48 ++++++++ credentials.env.example | 66 +++++++---- docker-compose.yml | 76 +++++++++++++ frontend/Dockerfile | 16 +++ frontend/package.json | 16 +++ frontend/src/app.html | 12 ++ frontend/src/routes/+page.svelte | 14 +++ frontend/svelte.config.js | 10 ++ gpu-server/docker-compose.yml | 33 ++++++ gpu-server/services/ai-gateway/Dockerfile | 10 ++ .../services/ai-gateway/requirements.txt | 3 + gpu-server/services/ai-gateway/server.py | 58 ++++++++++ migrations/001_initial_schema.sql | 106 ++++++++++++++++++ services/kordoc/Dockerfile | 12 ++ services/kordoc/package.json | 13 +++ services/kordoc/server.js | 57 ++++++++++ tests/__init__.py | 0 tests/conftest.py | 22 ++++ 37 files changed, 1122 insertions(+), 19 deletions(-) create mode 100644 Caddyfile create mode 100644 app/Dockerfile create mode 100644 app/ai/__init__.py create mode 100644 app/ai/client.py create mode 100644 app/api/__init__.py create mode 100644 app/core/__init__.py create mode 100644 app/core/auth.py create mode 100644 app/core/config.py create mode 100644 app/core/database.py create mode 100644 app/core/utils.py create mode 100644 app/main.py create mode 100644 app/models/__init__.py create mode 100644 app/models/document.py create mode 100644 app/models/queue.py create mode 100644 app/models/task.py create mode 100644 app/prompts/classify.txt create mode 100644 app/requirements.txt create mode 100644 app/workers/__init__.py create mode 100644 config.yaml create mode 100644 docker-compose.yml create mode 100644 frontend/Dockerfile create mode 100644 frontend/package.json create mode 100644 frontend/src/app.html create mode 100644 frontend/src/routes/+page.svelte create mode 100644 frontend/svelte.config.js create mode 100644 gpu-server/docker-compose.yml create mode 100644 gpu-server/services/ai-gateway/Dockerfile create mode 100644 gpu-server/services/ai-gateway/requirements.txt create mode 100644 gpu-server/services/ai-gateway/server.py create mode 100644 migrations/001_initial_schema.sql create mode 100644 services/kordoc/Dockerfile create mode 100644 services/kordoc/package.json create mode 100644 services/kordoc/server.js create mode 100644 tests/__init__.py create mode 100644 tests/conftest.py diff --git a/.gitignore b/.gitignore index c102ae0..25282e4 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,11 @@ data/ # IDE .vscode/ .idea/ + +# Node.js (frontend, kordoc) +node_modules/ +.svelte-kit/ + +# Docker volumes +pgdata/ +caddy_data/ diff --git a/Caddyfile b/Caddyfile new file mode 100644 index 0000000..6102d7e --- /dev/null +++ b/Caddyfile @@ -0,0 +1,13 @@ +pkm.hyungi.net { + reverse_proxy fastapi:8000 +} + +# Synology Office 프록시 +office.hyungi.net { + reverse_proxy https://ds1525.hyungi.net:5001 { + header_up Host {upstream_hostport} + transport http { + tls_insecure_skip_verify + } + } +} diff --git a/app/Dockerfile b/app/Dockerfile new file mode 100644 index 0000000..628cc31 --- /dev/null +++ b/app/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY . . + +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/app/ai/__init__.py b/app/ai/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/ai/client.py b/app/ai/client.py new file mode 100644 index 0000000..f64b9e4 --- /dev/null +++ b/app/ai/client.py @@ -0,0 +1,79 @@ +"""AI 추상화 레이어 — 통합 클라이언트. 기본값은 항상 Qwen3.5.""" + +from pathlib import Path + +import httpx + +from core.config import settings + +# 프롬프트 로딩 +PROMPTS_DIR = Path(__file__).parent.parent / "prompts" + + +def _load_prompt(name: str) -> str: + return (PROMPTS_DIR / name).read_text(encoding="utf-8") + + +CLASSIFY_PROMPT = _load_prompt("classify.txt") if (PROMPTS_DIR / "classify.txt").exists() else "" + + +class AIClient: + """AI Gateway를 통한 통합 클라이언트. 기본값은 항상 Qwen3.5.""" + + def __init__(self): + self.ai = settings.ai + self._http = httpx.AsyncClient(timeout=120) + + async def classify(self, text: str) -> dict: + """문서 분류 — 항상 primary(Qwen3.5) 사용""" + prompt = CLASSIFY_PROMPT.replace("{document_text}", text) + response = await self._call_chat(self.ai.primary, prompt) + return response + + async def summarize(self, text: str, force_premium: bool = False) -> str: + """문서 요약 — 기본 Qwen3.5, 장문이거나 명시적 요청 시만 Claude""" + model = self.ai.primary + if force_premium or len(text) > 15000: + model = self.ai.premium + return await self._call_chat(model, f"다음 문서를 500자 이내로 요약해주세요:\n\n{text}") + + async def embed(self, text: str) -> list[float]: + """벡터 임베딩 — GPU 서버 전용""" + response = await self._http.post( + self.ai.embedding.endpoint, + json={"model": self.ai.embedding.model, "prompt": text}, + ) + response.raise_for_status() + return response.json()["embedding"] + + async def ocr(self, image_bytes: bytes) -> str: + """이미지 OCR — GPU 서버 전용""" + # TODO: Qwen2.5-VL-7B 비전 모델 호출 구현 + raise NotImplementedError("OCR는 Phase 1에서 구현") + + async def _call_chat(self, model_config, prompt: str) -> str: + """OpenAI 호환 API 호출 + 자동 폴백""" + try: + return await self._request(model_config, prompt) + except (httpx.TimeoutException, httpx.ConnectError): + if model_config == self.ai.primary: + return await self._request(self.ai.fallback, prompt) + raise + + async def _request(self, model_config, prompt: str) -> str: + """단일 모델 API 호출""" + response = await self._http.post( + model_config.endpoint, + json={ + "model": model_config.model, + "messages": [{"role": "user", "content": prompt}], + "max_tokens": model_config.max_tokens, + }, + timeout=model_config.timeout, + ) + response.raise_for_status() + data = response.json() + return data["choices"][0]["message"]["content"] + + async def close(self): + await self._http.aclose() diff --git a/app/api/__init__.py b/app/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/core/__init__.py b/app/core/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/core/auth.py b/app/core/auth.py new file mode 100644 index 0000000..fd390dd --- /dev/null +++ b/app/core/auth.py @@ -0,0 +1,51 @@ +"""JWT + TOTP 2FA 인증""" + +from datetime import datetime, timedelta, timezone + +import pyotp +from jose import JWTError, jwt +from passlib.context import CryptContext + +from core.config import settings + +pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto") + +# JWT 설정 +ALGORITHM = "HS256" +ACCESS_TOKEN_EXPIRE_MINUTES = 15 +REFRESH_TOKEN_EXPIRE_DAYS = 7 + + +def verify_password(plain: str, hashed: str) -> bool: + return pwd_context.verify(plain, hashed) + + +def hash_password(password: str) -> str: + return pwd_context.hash(password) + + +def create_access_token(subject: str) -> str: + expire = datetime.now(timezone.utc) + timedelta(minutes=ACCESS_TOKEN_EXPIRE_MINUTES) + payload = {"sub": subject, "exp": expire, "type": "access"} + return jwt.encode(payload, settings.jwt_secret, algorithm=ALGORITHM) + + +def create_refresh_token(subject: str) -> str: + expire = datetime.now(timezone.utc) + timedelta(days=REFRESH_TOKEN_EXPIRE_DAYS) + payload = {"sub": subject, "exp": expire, "type": "refresh"} + return jwt.encode(payload, settings.jwt_secret, algorithm=ALGORITHM) + + +def decode_token(token: str) -> dict | None: + try: + return jwt.decode(token, settings.jwt_secret, algorithms=[ALGORITHM]) + except JWTError: + return None + + +def verify_totp(code: str) -> bool: + """TOTP 코드 검증""" + if not settings.totp_secret: + return True # TOTP 미설정 시 스킵 + totp = pyotp.TOTP(settings.totp_secret) + return totp.verify(code) diff --git a/app/core/config.py b/app/core/config.py new file mode 100644 index 0000000..6017693 --- /dev/null +++ b/app/core/config.py @@ -0,0 +1,93 @@ +"""설정 로딩 — config.yaml + credentials.env""" + +import os +from pathlib import Path + +import yaml +from pydantic import BaseModel + + +class AIModelConfig(BaseModel): + endpoint: str + model: str + max_tokens: int = 4096 + timeout: int = 60 + daily_budget_usd: float | None = None + require_explicit_trigger: bool = False + + +class AIConfig(BaseModel): + gateway_endpoint: str + primary: AIModelConfig + fallback: AIModelConfig + premium: AIModelConfig + embedding: AIModelConfig + vision: AIModelConfig + rerank: AIModelConfig + + +class Settings(BaseModel): + # DB + database_url: str = "" + + # AI + ai: AIConfig | None = None + + # NAS + nas_mount_path: str = "/documents" + nas_pkm_root: str = "/documents/PKM" + + # 인증 + jwt_secret: str = "" + totp_secret: str = "" + + # kordoc + kordoc_endpoint: str = "http://kordoc-service:3100" + + +def load_settings() -> Settings: + """config.yaml + 환경변수에서 설정 로딩""" + # 환경변수 (docker-compose에서 주입) + database_url = os.getenv("DATABASE_URL", "") + jwt_secret = os.getenv("JWT_SECRET", "") + totp_secret = os.getenv("TOTP_SECRET", "") + kordoc_endpoint = os.getenv("KORDOC_ENDPOINT", "http://kordoc-service:3100") + + # config.yaml + config_path = Path(__file__).parent.parent.parent / "config.yaml" + ai_config = None + nas_mount = "/documents" + nas_pkm = "/documents/PKM" + + if config_path.exists(): + with open(config_path) as f: + raw = yaml.safe_load(f) + + if "ai" in raw: + ai_raw = raw["ai"] + ai_config = AIConfig( + gateway_endpoint=ai_raw.get("gateway", {}).get("endpoint", ""), + primary=AIModelConfig(**ai_raw["models"]["primary"]), + fallback=AIModelConfig(**ai_raw["models"]["fallback"]), + premium=AIModelConfig(**ai_raw["models"]["premium"]), + embedding=AIModelConfig(**ai_raw["models"]["embedding"]), + vision=AIModelConfig(**ai_raw["models"]["vision"]), + rerank=AIModelConfig(**ai_raw["models"]["rerank"]), + ) + + if "nas" in raw: + nas_mount = raw["nas"].get("mount_path", nas_mount) + nas_pkm = raw["nas"].get("pkm_root", nas_pkm) + + return Settings( + database_url=database_url, + ai=ai_config, + nas_mount_path=nas_mount, + nas_pkm_root=nas_pkm, + jwt_secret=jwt_secret, + totp_secret=totp_secret, + kordoc_endpoint=kordoc_endpoint, + ) + + +settings = load_settings() diff --git a/app/core/database.py b/app/core/database.py new file mode 100644 index 0000000..5ef068a --- /dev/null +++ b/app/core/database.py @@ -0,0 +1,34 @@ +"""PostgreSQL 연결 — SQLAlchemy async engine + session factory""" + +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine +from sqlalchemy.orm import DeclarativeBase + +from core.config import settings + +engine = create_async_engine( + settings.database_url, + echo=False, + pool_size=10, + max_overflow=20, +) + +async_session = async_sessionmaker(engine, class_=AsyncSession, expire_on_commit=False) + + +class Base(DeclarativeBase): + pass + + +async def init_db(): + """DB 연결 확인 (스키마는 migrations/로 관리)""" + async with engine.begin() as conn: + # 연결 테스트 + await conn.execute( + __import__("sqlalchemy").text("SELECT 1") + ) + + +async def get_session() -> AsyncSession: + """FastAPI Depends용 세션 제공""" + async with async_session() as session: + yield session diff --git a/app/core/utils.py b/app/core/utils.py new file mode 100644 index 0000000..7981411 --- /dev/null +++ b/app/core/utils.py @@ -0,0 +1,46 @@ +"""공통 유틸리티 — v1 pkm_utils.py에서 AppleScript 제거, 나머지 포팅""" + +import hashlib +import logging +from pathlib import Path + + +def setup_logger(name: str, log_dir: str = "logs") -> logging.Logger: + """로거 설정""" + Path(log_dir).mkdir(exist_ok=True) + logger = logging.getLogger(name) + logger.setLevel(logging.INFO) + + if not logger.handlers: + # 파일 핸들러 + fh = logging.FileHandler(f"{log_dir}/{name}.log", encoding="utf-8") + fh.setFormatter(logging.Formatter( + "%(asctime)s [%(levelname)s] %(message)s", + datefmt="%Y-%m-%d %H:%M:%S" + )) + logger.addHandler(fh) + + # 콘솔 핸들러 + ch = logging.StreamHandler() + ch.setFormatter(logging.Formatter("[%(levelname)s] %(message)s")) + logger.addHandler(ch) + + return logger + + +def file_hash(path: str | Path) -> str: + """파일 SHA-256 해시 계산""" + sha256 = hashlib.sha256() + with open(path, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + sha256.update(chunk) + return sha256.hexdigest() + + +def count_log_errors(log_path: str) -> int: + """로그 파일에서 ERROR 건수 카운트""" + try: + with open(log_path, encoding="utf-8") as f: + return sum(1 for line in f if "[ERROR]" in line) + except FileNotFoundError: + return 0 diff --git a/app/main.py b/app/main.py new file mode 100644 index 0000000..76c778e --- /dev/null +++ b/app/main.py @@ -0,0 +1,41 @@ +"""hyungi_Document_Server — FastAPI 엔트리포인트""" + +from contextlib import asynccontextmanager + +from fastapi import FastAPI + +from core.config import settings +from core.database import init_db + + +@asynccontextmanager +async def lifespan(app: FastAPI): + """앱 시작/종료 시 실행되는 lifespan 핸들러""" + # 시작: DB 연결, 스케줄러 등록 + await init_db() + # TODO: APScheduler 시작 (Phase 3) + yield + # 종료: 리소스 정리 + # TODO: 스케줄러 종료, DB 연결 해제 + + +app = FastAPI( + title="hyungi_Document_Server", + description="Self-hosted PKM 웹 애플리케이션 API", + version="2.0.0", + lifespan=lifespan, +) + + +@app.get("/health") +async def health_check(): + return {"status": "ok", "version": "2.0.0"} + + +# TODO: 라우터 등록 (Phase 0~2) +# from api import documents, search, tasks, dashboard, export +# app.include_router(documents.router, prefix="/api/documents", tags=["documents"]) +# app.include_router(search.router, prefix="/api/search", tags=["search"]) +# app.include_router(tasks.router, prefix="/api/tasks", tags=["tasks"]) +# app.include_router(dashboard.router, prefix="/api/dashboard", tags=["dashboard"]) +# app.include_router(export.router, prefix="/api/export", tags=["export"]) diff --git a/app/models/__init__.py b/app/models/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/models/document.py b/app/models/document.py new file mode 100644 index 0000000..4352852 --- /dev/null +++ b/app/models/document.py @@ -0,0 +1,64 @@ +"""documents 테이블 ORM""" + +from datetime import datetime + +from pgvector.sqlalchemy import Vector +from sqlalchemy import BigInteger, DateTime, Enum, String, Text +from sqlalchemy.dialects.postgresql import JSONB +from sqlalchemy.orm import Mapped, mapped_column + +from core.database import Base + + +class Document(Base): + __tablename__ = "documents" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + + # 1계층: 원본 파일 + file_path: Mapped[str] = mapped_column(Text, unique=True, nullable=False) + file_hash: Mapped[str] = mapped_column(String(64), nullable=False) + file_format: Mapped[str] = mapped_column(String(20), nullable=False) + file_size: Mapped[int | None] = mapped_column(BigInteger) + file_type: Mapped[str] = mapped_column( + Enum("immutable", "editable", "note", name="doc_type"), + default="immutable" + ) + import_source: Mapped[str | None] = mapped_column(Text) + + # 2계층: 텍스트 추출 + extracted_text: Mapped[str | None] = mapped_column(Text) + extracted_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + extractor_version: Mapped[str | None] = mapped_column(String(50)) + + # 2계층: AI 가공 + ai_summary: Mapped[str | None] = mapped_column(Text) + ai_tags: Mapped[dict | None] = mapped_column(JSONB, default=[]) + ai_domain: Mapped[str | None] = mapped_column(String(100)) + ai_sub_group: Mapped[str | None] = mapped_column(String(100)) + ai_model_version: Mapped[str | None] = mapped_column(String(50)) + ai_processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + + # 3계층: 벡터 임베딩 + embedding = mapped_column(Vector(768), nullable=True) + embed_model_version: Mapped[str | None] = mapped_column(String(50)) + embedded_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + + # 메타데이터 + source_channel: Mapped[str | None] = mapped_column( + Enum("law_monitor", "devonagent", "email", "web_clip", + "tksafety", "inbox_route", "manual", "drive_sync", + name="source_channel") + ) + data_origin: Mapped[str | None] = mapped_column( + Enum("work", "external", name="data_origin") + ) + title: Mapped[str | None] = mapped_column(Text) + + # 타임스탬프 + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=datetime.now + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=datetime.now, onupdate=datetime.now + ) diff --git a/app/models/queue.py b/app/models/queue.py new file mode 100644 index 0000000..73a85b4 --- /dev/null +++ b/app/models/queue.py @@ -0,0 +1,34 @@ +"""processing_queue 테이블 ORM (비동기 가공 큐)""" + +from datetime import datetime + +from sqlalchemy import BigInteger, DateTime, Enum, ForeignKey, SmallInteger, Text, UniqueConstraint +from sqlalchemy.orm import Mapped, mapped_column + +from core.database import Base + + +class ProcessingQueue(Base): + __tablename__ = "processing_queue" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + document_id: Mapped[int] = mapped_column(BigInteger, ForeignKey("documents.id"), nullable=False) + stage: Mapped[str] = mapped_column( + Enum("extract", "classify", "embed", name="process_stage"), nullable=False + ) + status: Mapped[str] = mapped_column( + Enum("pending", "processing", "completed", "failed", name="process_status"), + default="pending" + ) + attempts: Mapped[int] = mapped_column(SmallInteger, default=0) + max_attempts: Mapped[int] = mapped_column(SmallInteger, default=3) + error_message: Mapped[str | None] = mapped_column(Text) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=datetime.now + ) + started_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + + __table_args__ = ( + UniqueConstraint("document_id", "stage", "status"), + ) diff --git a/app/models/task.py b/app/models/task.py new file mode 100644 index 0000000..15f0e1b --- /dev/null +++ b/app/models/task.py @@ -0,0 +1,29 @@ +"""tasks 테이블 ORM (CalDAV 캐시)""" + +from datetime import datetime + +from sqlalchemy import BigInteger, Boolean, DateTime, ForeignKey, SmallInteger, String, Text +from sqlalchemy.orm import Mapped, mapped_column + +from core.database import Base + + +class Task(Base): + __tablename__ = "tasks" + + id: Mapped[int] = mapped_column(BigInteger, primary_key=True) + caldav_uid: Mapped[str | None] = mapped_column(Text, unique=True) + title: Mapped[str] = mapped_column(Text, nullable=False) + description: Mapped[str | None] = mapped_column(Text) + due_date: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + priority: Mapped[int] = mapped_column(SmallInteger, default=0) + completed: Mapped[bool] = mapped_column(Boolean, default=False) + completed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + document_id: Mapped[int | None] = mapped_column(BigInteger, ForeignKey("documents.id")) + source: Mapped[str | None] = mapped_column(String(50)) + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=datetime.now + ) + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), default=datetime.now, onupdate=datetime.now + ) diff --git a/app/prompts/classify.txt b/app/prompts/classify.txt new file mode 100644 index 0000000..232cde9 --- /dev/null +++ b/app/prompts/classify.txt @@ -0,0 +1,51 @@ +당신은 문서 분류 AI입니다. 아래 문서를 분석하고 반드시 JSON 형식으로만 응답하세요. 다른 텍스트는 출력하지 마세요. + +## 응답 형식 +{ + "tags": ["태그1", "태그2", "태그3"], + "domain": "도메인경로", + "sub_group": "하위그룹", + "sourceChannel": "유입경로", + "dataOrigin": "work 또는 external" +} + +## 도메인 선택지 (NAS 폴더 경로) +- Knowledge/Philosophy — 철학, 사상, 인문학 +- Knowledge/Language — 어학, 번역, 언어학 +- Knowledge/Engineering — 공학 전반 기술 문서 +- Knowledge/Industrial_Safety — 산업안전, 규정, 인증 +- Knowledge/Programming — 개발, 코드, IT 기술 +- Knowledge/General — 일반 도서, 독서 노트, 메모 +- Reference — 도면, 참고자료, 규격표 + +## 하위 그룹 예시 (도메인별) +- Knowledge/Industrial_Safety: Legislation, Standards, Cases +- Knowledge/Programming: Language, Framework, DevOps, AI_ML +- Knowledge/Engineering: Mechanical, Electrical, Network +- 잘 모르겠으면: (비워둠) + +## 태그 체계 +태그는 최대 5개, 한글 사용. 아래 계층 구조 중에서 선택: +- @상태/: 처리중, 검토필요, 완료, 아카이브 +- #주제/기술/: 서버관리, 네트워크, AI-ML +- #주제/산업안전/: 법령, 위험성평가, 순회점검, 안전교육, 사고사례, 신고보고, 안전관리자, 보건관리자 +- #주제/업무/: 프로젝트, 회의, 보고서 +- $유형/: 논문, 법령, 기사, 메모, 이메일, 채팅로그, 도면, 체크리스트 +- !우선순위/: 긴급, 중요, 참고 + +## sourceChannel 값 +- tksafety: TKSafety API 업무 실적 +- devonagent: 자동 수집 뉴스 +- law_monitor: 법령 API 법령 변경 +- inbox_route: Inbox AI 분류 (이 프롬프트에 의한 분류) +- email: MailPlus 이메일 +- web_clip: Web Clipper 스크랩 +- manual: 직접 추가 +- drive_sync: Synology Drive 동기화 + +## dataOrigin 값 +- work: 자사 업무 관련 (TK, 테크니컬코리아, 공장, 생산, 사내) +- external: 외부 참고 자료 (뉴스, 논문, 법령, 일반 정보) + +## 분류 대상 문서 +{document_text} diff --git a/app/requirements.txt b/app/requirements.txt new file mode 100644 index 0000000..2d1d06e --- /dev/null +++ b/app/requirements.txt @@ -0,0 +1,16 @@ +fastapi>=0.110.0 +uvicorn[standard]>=0.27.0 +sqlalchemy[asyncio]>=2.0.0 +asyncpg>=0.29.0 +pgvector>=0.3.0 +python-dotenv>=1.0.0 +pyyaml>=6.0 +httpx>=0.27.0 +python-jose[cryptography]>=3.3.0 +passlib[bcrypt]>=1.7.4 +pyotp>=2.9.0 +caldav>=1.3.0 +apscheduler>=3.10.0 +anthropic>=0.40.0 +markdown>=3.5.0 +python-multipart>=0.0.9 diff --git a/app/workers/__init__.py b/app/workers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..206255c --- /dev/null +++ b/config.yaml @@ -0,0 +1,48 @@ +# hyungi_Document_Server 설정 + +ai: + gateway: + endpoint: "http://gpu-server:8080" + + models: + primary: + endpoint: "http://host.docker.internal:8800/v1/chat/completions" + model: "mlx-community/Qwen3.5-35B-A3B-4bit" + max_tokens: 4096 + timeout: 60 + + fallback: + endpoint: "http://gpu-server:11434/v1/chat/completions" + model: "qwen3.5:35b-a3b" + max_tokens: 4096 + timeout: 120 + + premium: + endpoint: "https://api.anthropic.com/v1/messages" + model: "claude-sonnet-4-20250514" + max_tokens: 8192 + daily_budget_usd: 5.00 + require_explicit_trigger: true + + embedding: + endpoint: "http://gpu-server:11434/api/embeddings" + model: "nomic-embed-text" + + vision: + endpoint: "http://gpu-server:11434/api/generate" + model: "Qwen2.5-VL-7B" + + rerank: + endpoint: "http://gpu-server:11434/api/rerank" + model: "bge-reranker-v2-m3" + +nas: + mount_path: "/documents" + pkm_root: "/documents/PKM" + +schedule: + law_monitor: "07:00" + mailplus_archive: ["07:00", "18:00"] + daily_digest: "20:00" + file_watcher_interval_minutes: 5 + queue_consumer_interval_minutes: 10 diff --git a/credentials.env.example b/credentials.env.example index ee7ecb0..4f31062 100644 --- a/credentials.env.example +++ b/credentials.env.example @@ -1,29 +1,57 @@ # ═══════════════════════════════════════════════════ -# PKM 시스템 인증 정보 -# 이 파일은 템플릿입니다. 실제 값은 Mac mini의 -# ~/.config/pkm/credentials.env 에 별도 관리합니다. +# hyungi_Document_Server — 인증 정보 템플릿 +# 실제 값을 채워서 credentials.env로 저장 # ═══════════════════════════════════════════════════ -# ─── Claude API (AI 고급 처리용) ─── +# ─── PostgreSQL ─── +POSTGRES_HOST=localhost +POSTGRES_PORT=5432 +POSTGRES_DB=pkm +POSTGRES_USER=pkm +POSTGRES_PASSWORD= + +# ─── AI: Mac mini MLX (Qwen3.5, 기본 모델) ─── +MLX_ENDPOINT=http://localhost:8800/v1/chat/completions +MLX_MODEL=mlx-community/Qwen3.5-35B-A3B-4bit + +# ─── AI: GPU 서버 ─── +GPU_SERVER_IP= +GPU_EMBED_PORT=11434 + +# ─── AI: Claude API (종량제, 복잡한 분석 전용) ─── CLAUDE_API_KEY= +# ─── AI Gateway (GPU 서버) ─── +AI_GATEWAY_ENDPOINT=http://gpu-server:8080 + +# ─── Synology NAS ─── +NAS_SMB_PATH=/Volumes/Document_Server +NAS_DOMAIN=ds1525.hyungi.net +NAS_TAILSCALE_IP=100.101.79.37 +NAS_PORT=15001 + +# ─── Synology MailPlus (이메일 수집 + SMTP 알림) ─── +MAILPLUS_HOST=mailplus.hyungi.net +MAILPLUS_PORT=993 +MAILPLUS_SMTP_PORT=465 +MAILPLUS_USER=hyungi +MAILPLUS_PASS= + +# ─── Synology Calendar (CalDAV, 태스크 관리) ─── +CALDAV_URL=https://ds1525.hyungi.net/caldav/ +CALDAV_USER=hyungi +CALDAV_PASS= + +# ─── kordoc 마이크로서비스 ─── +KORDOC_ENDPOINT=http://kordoc-service:3100 + +# ─── 인증 (JWT + TOTP) ─── +JWT_SECRET= +TOTP_SECRET= + # ─── 국가법령정보센터 (법령 모니터링) ─── LAW_OC= -# ─── Synology NAS 접속 ─── -NAS_DOMAIN= -NAS_TAILSCALE_IP= -NAS_PORT=15001 - -# ─── MailPlus IMAP (이메일 수집용) ─── -MAILPLUS_HOST= -MAILPLUS_PORT=993 -MAILPLUS_USER= -MAILPLUS_PASS= - -# ─── Synology Chat 웹훅 (나중에 추가) ─── -#CHAT_WEBHOOK_URL= - # ─── TKSafety API (나중에 활성화) ─── -#TKSAFETY_HOST= +#TKSAFETY_HOST=tksafety.technicalkorea.net #TKSAFETY_PORT= diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..f5e3c9e --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,76 @@ +version: '3.8' + +services: + postgres: + image: pgvector/pgvector:pg16 + volumes: + - pgdata:/var/lib/postgresql/data + - ./migrations:/docker-entrypoint-initdb.d + environment: + POSTGRES_DB: pkm + POSTGRES_USER: pkm + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} + ports: + - "5432:5432" + healthcheck: + test: ["CMD-SHELL", "pg_isready -U pkm"] + interval: 5s + timeout: 5s + retries: 5 + restart: unless-stopped + + kordoc-service: + build: ./services/kordoc + ports: + - "3100:3100" + volumes: + - ${NAS_SMB_PATH:-/Volumes/Document_Server}:/documents:ro + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:3100/health"] + interval: 10s + timeout: 5s + retries: 3 + restart: unless-stopped + + fastapi: + build: ./app + ports: + - "8000:8000" + volumes: + - ${NAS_SMB_PATH:-/Volumes/Document_Server}:/documents + depends_on: + postgres: + condition: service_healthy + kordoc-service: + condition: service_healthy + env_file: + - credentials.env + environment: + - DATABASE_URL=postgresql+asyncpg://pkm:${POSTGRES_PASSWORD}@postgres:5432/pkm + - KORDOC_ENDPOINT=http://kordoc-service:3100 + restart: unless-stopped + + frontend: + build: ./frontend + ports: + - "3000:3000" + depends_on: + - fastapi + restart: unless-stopped + + caddy: + image: caddy:2 + ports: + - "80:80" + - "443:443" + volumes: + - ./Caddyfile:/etc/caddy/Caddyfile + - caddy_data:/data + depends_on: + - fastapi + - frontend + restart: unless-stopped + +volumes: + pgdata: + caddy_data: diff --git a/frontend/Dockerfile b/frontend/Dockerfile new file mode 100644 index 0000000..b40d740 --- /dev/null +++ b/frontend/Dockerfile @@ -0,0 +1,16 @@ +FROM node:20-slim AS build + +WORKDIR /app +COPY package.json . +RUN npm install +COPY . . +RUN npm run build + +FROM node:20-slim +WORKDIR /app +COPY --from=build /app/build build/ +COPY --from=build /app/node_modules node_modules/ +COPY package.json . + +EXPOSE 3000 +CMD ["node", "build"] diff --git a/frontend/package.json b/frontend/package.json new file mode 100644 index 0000000..8c36293 --- /dev/null +++ b/frontend/package.json @@ -0,0 +1,16 @@ +{ + "name": "hyungi-document-server-frontend", + "version": "0.0.1", + "private": true, + "scripts": { + "dev": "vite dev", + "build": "vite build", + "preview": "vite preview" + }, + "devDependencies": { + "@sveltejs/adapter-node": "^2.0.0", + "@sveltejs/kit": "^2.0.0", + "svelte": "^4.0.0", + "vite": "^5.0.0" + } +} diff --git a/frontend/src/app.html b/frontend/src/app.html new file mode 100644 index 0000000..94db804 --- /dev/null +++ b/frontend/src/app.html @@ -0,0 +1,12 @@ + + + + + + hyungi Document Server + %sveltekit.head% + + +
%sveltekit.body%
+ + diff --git a/frontend/src/routes/+page.svelte b/frontend/src/routes/+page.svelte new file mode 100644 index 0000000..317a4bd --- /dev/null +++ b/frontend/src/routes/+page.svelte @@ -0,0 +1,14 @@ + + +

hyungi Document Server

+

PKM 대시보드 — Phase 4에서 구현 예정

+ +
+

시스템 상태

+ +
diff --git a/frontend/svelte.config.js b/frontend/svelte.config.js new file mode 100644 index 0000000..0e7712b --- /dev/null +++ b/frontend/svelte.config.js @@ -0,0 +1,10 @@ +import adapter from '@sveltejs/adapter-node'; + +/** @type {import('@sveltejs/kit').Config} */ +const config = { + kit: { + adapter: adapter() + } +}; + +export default config; diff --git a/gpu-server/docker-compose.yml b/gpu-server/docker-compose.yml new file mode 100644 index 0000000..7f579fd --- /dev/null +++ b/gpu-server/docker-compose.yml @@ -0,0 +1,33 @@ +version: '3.8' + +services: + ollama: + image: ollama/ollama + volumes: + - ollama_data:/root/.ollama + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] + ports: + - "11434:11434" + restart: unless-stopped + + ai-gateway: + build: ./services/ai-gateway + ports: + - "8080:8080" + environment: + - PRIMARY_ENDPOINT=${PRIMARY_ENDPOINT:-http://mac-mini:8800/v1/chat/completions} + - FALLBACK_ENDPOINT=http://ollama:11434/v1/chat/completions + - CLAUDE_API_KEY=${CLAUDE_API_KEY:-} + - DAILY_BUDGET_USD=${DAILY_BUDGET_USD:-5.00} + depends_on: + - ollama + restart: unless-stopped + +volumes: + ollama_data: diff --git a/gpu-server/services/ai-gateway/Dockerfile b/gpu-server/services/ai-gateway/Dockerfile new file mode 100644 index 0000000..d886a93 --- /dev/null +++ b/gpu-server/services/ai-gateway/Dockerfile @@ -0,0 +1,10 @@ +FROM python:3.11-slim + +WORKDIR /app + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY server.py . + +CMD ["uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8080"] diff --git a/gpu-server/services/ai-gateway/requirements.txt b/gpu-server/services/ai-gateway/requirements.txt new file mode 100644 index 0000000..00c3e93 --- /dev/null +++ b/gpu-server/services/ai-gateway/requirements.txt @@ -0,0 +1,3 @@ +fastapi>=0.110.0 +uvicorn[standard]>=0.27.0 +httpx>=0.27.0 diff --git a/gpu-server/services/ai-gateway/server.py b/gpu-server/services/ai-gateway/server.py new file mode 100644 index 0000000..051c290 --- /dev/null +++ b/gpu-server/services/ai-gateway/server.py @@ -0,0 +1,58 @@ +"""AI Gateway — 모델 라우팅, 폴백, 비용 제어, 요청 로깅""" + +import os +from datetime import date + +from fastapi import FastAPI, HTTPException, Request +from fastapi.responses import JSONResponse +import httpx + +app = FastAPI(title="AI Gateway", version="1.0.0") + +PRIMARY = os.getenv("PRIMARY_ENDPOINT", "http://localhost:8800/v1/chat/completions") +FALLBACK = os.getenv("FALLBACK_ENDPOINT", "http://localhost:11434/v1/chat/completions") +CLAUDE_API_KEY = os.getenv("CLAUDE_API_KEY", "") +DAILY_BUDGET = float(os.getenv("DAILY_BUDGET_USD", "5.00")) + +# 일일 비용 추적 (메모리, 재시작 시 리셋) +_daily_cost: dict[str, float] = {} +_http = httpx.AsyncClient(timeout=120) + + +@app.get("/health") +async def health(): + return {"status": "ok", "service": "ai-gateway"} + + +@app.post("/v1/chat/completions") +async def chat_completions(request: Request): + """OpenAI 호환 채팅 엔드포인트 — 자동 폴백""" + body = await request.json() + tier = request.headers.get("x-model-tier", "primary") + + if tier == "premium": + return await _call_premium(body) + + # Primary → Fallback 폴백 + try: + resp = await _http.post(PRIMARY, json=body, timeout=60) + resp.raise_for_status() + return JSONResponse(content=resp.json()) + except (httpx.TimeoutException, httpx.ConnectError, httpx.HTTPStatusError): + # 폴백 + resp = await _http.post(FALLBACK, json=body, timeout=120) + resp.raise_for_status() + return JSONResponse(content=resp.json()) + + +async def _call_premium(body: dict): + """Claude API 호출 — 비용 제어""" + today = date.today().isoformat() + if _daily_cost.get(today, 0) >= DAILY_BUDGET: + raise HTTPException(429, f"일일 예산 초과: ${DAILY_BUDGET}") + + if not CLAUDE_API_KEY: + raise HTTPException(503, "CLAUDE_API_KEY 미설정") + + # TODO: Anthropic API 호출 + 비용 계산 (Phase 3에서 구현) + raise HTTPException(501, "Premium 모델 호출은 Phase 3에서 구현") diff --git a/migrations/001_initial_schema.sql b/migrations/001_initial_schema.sql new file mode 100644 index 0000000..950fe53 --- /dev/null +++ b/migrations/001_initial_schema.sql @@ -0,0 +1,106 @@ +-- hyungi_Document_Server 초기 스키마 +-- PostgreSQL 16 + pgvector + pg_trgm + +CREATE EXTENSION IF NOT EXISTS vector; +CREATE EXTENSION IF NOT EXISTS pg_trgm; + +-- ENUM 타입 +CREATE TYPE doc_type AS ENUM ('immutable', 'editable', 'note'); +CREATE TYPE source_channel AS ENUM ( + 'law_monitor', 'devonagent', 'email', 'web_clip', + 'tksafety', 'inbox_route', 'manual', 'drive_sync' +); +CREATE TYPE data_origin AS ENUM ('work', 'external'); +CREATE TYPE process_stage AS ENUM ('extract', 'classify', 'embed'); +CREATE TYPE process_status AS ENUM ('pending', 'processing', 'completed', 'failed'); + +-- documents 테이블 +CREATE TABLE documents ( + id BIGSERIAL PRIMARY KEY, + + -- 1계층: 원본 파일 참조 + file_path TEXT NOT NULL UNIQUE, + file_hash CHAR(64) NOT NULL, + file_format VARCHAR(20) NOT NULL, + file_size BIGINT, + file_type doc_type NOT NULL DEFAULT 'immutable', + import_source TEXT, + + -- 2계층: 텍스트 추출 + extracted_text TEXT, + extracted_at TIMESTAMPTZ, + extractor_version VARCHAR(50), + + -- 2계층: AI 가공 + ai_summary TEXT, + ai_tags JSONB DEFAULT '[]', + ai_domain VARCHAR(100), + ai_sub_group VARCHAR(100), + ai_model_version VARCHAR(50), + ai_processed_at TIMESTAMPTZ, + + -- 3계층: 벡터 임베딩 + embedding vector(768), + embed_model_version VARCHAR(50), + embedded_at TIMESTAMPTZ, + + -- 메타데이터 + source_channel source_channel, + data_origin data_origin, + title TEXT, + + -- 타임스탬프 + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() +); + +-- 전문검색 인덱스 +CREATE INDEX idx_documents_fts ON documents + USING GIN (to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(extracted_text, ''))); + +-- 트리그램 인덱스 (한국어 부분 매칭) +CREATE INDEX idx_documents_trgm ON documents + USING GIN ((coalesce(title, '') || ' ' || coalesce(extracted_text, '')) gin_trgm_ops); + +-- 해시 기반 중복 검색 +CREATE INDEX idx_documents_hash ON documents (file_hash); + +-- 재가공 대상 필터링 +CREATE INDEX idx_documents_ai_version ON documents (ai_model_version); +CREATE INDEX idx_documents_extractor_version ON documents (extractor_version); +CREATE INDEX idx_documents_embed_version ON documents (embed_model_version); + +-- tasks 테이블 (CalDAV 캐시) +CREATE TABLE tasks ( + id BIGSERIAL PRIMARY KEY, + caldav_uid TEXT UNIQUE, + title TEXT NOT NULL, + description TEXT, + due_date TIMESTAMPTZ, + priority SMALLINT DEFAULT 0, + completed BOOLEAN DEFAULT FALSE, + completed_at TIMESTAMPTZ, + document_id BIGINT REFERENCES documents(id), + source VARCHAR(50), + created_at TIMESTAMPTZ DEFAULT NOW(), + updated_at TIMESTAMPTZ DEFAULT NOW() +); + +-- processing_queue 테이블 (비동기 가공 큐) +CREATE TABLE processing_queue ( + id BIGSERIAL PRIMARY KEY, + document_id BIGINT REFERENCES documents(id) NOT NULL, + stage process_stage NOT NULL, + status process_status DEFAULT 'pending', + attempts SMALLINT DEFAULT 0, + max_attempts SMALLINT DEFAULT 3, + error_message TEXT, + created_at TIMESTAMPTZ DEFAULT NOW(), + started_at TIMESTAMPTZ, + completed_at TIMESTAMPTZ, + + UNIQUE (document_id, stage, status) +); + +CREATE INDEX idx_queue_pending ON processing_queue (stage, status) + WHERE status = 'pending'; diff --git a/services/kordoc/Dockerfile b/services/kordoc/Dockerfile new file mode 100644 index 0000000..a36b401 --- /dev/null +++ b/services/kordoc/Dockerfile @@ -0,0 +1,12 @@ +FROM node:20-slim + +WORKDIR /app + +COPY package.json . +RUN npm install --production + +COPY server.js . + +EXPOSE 3100 + +CMD ["node", "server.js"] diff --git a/services/kordoc/package.json b/services/kordoc/package.json new file mode 100644 index 0000000..5f37986 --- /dev/null +++ b/services/kordoc/package.json @@ -0,0 +1,13 @@ +{ + "name": "kordoc-service", + "version": "1.0.0", + "description": "HWP/HWPX/PDF 문서 파싱 마이크로서비스", + "main": "server.js", + "scripts": { + "start": "node server.js" + }, + "dependencies": { + "express": "^4.18.0", + "kordoc": "^1.7.0" + } +} diff --git a/services/kordoc/server.js b/services/kordoc/server.js new file mode 100644 index 0000000..6841785 --- /dev/null +++ b/services/kordoc/server.js @@ -0,0 +1,57 @@ +/** + * kordoc 마이크로서비스 — HWP/HWPX/PDF → Markdown 변환 API + */ + +const express = require('express'); +const app = express(); +const PORT = 3100; + +app.use(express.json({ limit: '500mb' })); + +// 헬스체크 +app.get('/health', (req, res) => { + res.json({ status: 'ok', service: 'kordoc' }); +}); + +// 문서 파싱 +app.post('/parse', async (req, res) => { + try { + const { filePath } = req.body; + if (!filePath) { + return res.status(400).json({ error: 'filePath is required' }); + } + + // TODO: kordoc 라이브러리 연동 (Phase 1에서 구현) + // const kordoc = require('kordoc'); + // const result = await kordoc.parse(filePath); + // return res.json(result); + + return res.json({ + markdown: '', + metadata: {}, + format: 'unknown', + message: 'kordoc 파싱은 Phase 1에서 구현 예정' + }); + } catch (err) { + res.status(500).json({ error: err.message }); + } +}); + +// 문서 비교 +app.post('/compare', async (req, res) => { + try { + const { filePathA, filePathB } = req.body; + if (!filePathA || !filePathB) { + return res.status(400).json({ error: 'filePathA and filePathB are required' }); + } + + // TODO: kordoc compare 구현 (Phase 2) + return res.json({ diffs: [], message: 'compare는 Phase 2에서 구현 예정' }); + } catch (err) { + res.status(500).json({ error: err.message }); + } +}); + +app.listen(PORT, () => { + console.log(`kordoc-service listening on port ${PORT}`); +}); diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..6c6713d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,22 @@ +"""pytest 기본 fixture 설정""" + +import pytest +import pytest_asyncio +from httpx import ASGITransport, AsyncClient + + +@pytest.fixture(scope="session") +def anyio_backend(): + return "asyncio" + + +# TODO: Phase 0 완료 후 활성화 +# @pytest_asyncio.fixture +# async def client(): +# """FastAPI 테스트 클라이언트""" +# from app.main import app +# async with AsyncClient( +# transport=ASGITransport(app=app), +# base_url="http://test" +# ) as ac: +# yield ac