"""document_chunks 테이블 ORM — chunk 단위 검색 (Phase 0.1)""" from datetime import datetime from pgvector.sqlalchemy import Vector from sqlalchemy import BigInteger, DateTime, ForeignKey, Integer, String, Text, UniqueConstraint from sqlalchemy.orm import Mapped, mapped_column, relationship from core.database import Base class DocumentChunk(Base): __tablename__ = "document_chunks" id: Mapped[int] = mapped_column(BigInteger, primary_key=True) doc_id: Mapped[int] = mapped_column( BigInteger, ForeignKey("documents.id", ondelete="CASCADE"), nullable=False ) chunk_index: Mapped[int] = mapped_column(Integer, nullable=False) # chunking 전략 메타 chunk_type: Mapped[str] = mapped_column(String(30), nullable=False) section_title: Mapped[str | None] = mapped_column(Text) heading_path: Mapped[str | None] = mapped_column(Text) page: Mapped[int | None] = mapped_column(Integer) # 다국어/domain 메타 language: Mapped[str | None] = mapped_column(String(10)) country: Mapped[str | None] = mapped_column(String(10)) source: Mapped[str | None] = mapped_column(String(100)) domain_category: Mapped[str] = mapped_column(String(20), nullable=False) # 본문 + 임베딩 text: Mapped[str] = mapped_column(Text, nullable=False) embedding = mapped_column(Vector(1024), nullable=True) created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=datetime.now ) updated_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=datetime.now, onupdate=datetime.now ) __table_args__ = ( UniqueConstraint("doc_id", "chunk_index", name="uq_chunks_doc_index"), )