"""documents 테이블 ORM""" from datetime import datetime from pgvector.sqlalchemy import Vector from sqlalchemy import BigInteger, DateTime, Enum, String, Text from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Mapped, mapped_column from core.database import Base class Document(Base): __tablename__ = "documents" id: Mapped[int] = mapped_column(BigInteger, primary_key=True) # 1계층: 원본 파일 file_path: Mapped[str] = mapped_column(Text, unique=True, nullable=False) file_hash: Mapped[str] = mapped_column(String(64), nullable=False) file_format: Mapped[str] = mapped_column(String(20), nullable=False) file_size: Mapped[int | None] = mapped_column(BigInteger) file_type: Mapped[str] = mapped_column( Enum("immutable", "editable", "note", name="doc_type"), default="immutable" ) import_source: Mapped[str | None] = mapped_column(Text) # 2계층: 텍스트 추출 extracted_text: Mapped[str | None] = mapped_column(Text) extracted_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) extractor_version: Mapped[str | None] = mapped_column(String(50)) # 2계층: AI 가공 ai_summary: Mapped[str | None] = mapped_column(Text) ai_tags: Mapped[dict | None] = mapped_column(JSONB, default=[]) ai_domain: Mapped[str | None] = mapped_column(String(100)) ai_sub_group: Mapped[str | None] = mapped_column(String(100)) ai_model_version: Mapped[str | None] = mapped_column(String(50)) ai_processed_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) # 3계층: 벡터 임베딩 embedding = mapped_column(Vector(768), nullable=True) embed_model_version: Mapped[str | None] = mapped_column(String(50)) embedded_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) # 메타데이터 source_channel: Mapped[str | None] = mapped_column( Enum("law_monitor", "devonagent", "email", "web_clip", "tksafety", "inbox_route", "manual", "drive_sync", name="source_channel") ) data_origin: Mapped[str | None] = mapped_column( Enum("work", "external", name="data_origin") ) title: Mapped[str | None] = mapped_column(Text) # 타임스탬프 created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=datetime.now ) updated_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=datetime.now, onupdate=datetime.now )