-- hyungi_Document_Server 초기 스키마 -- PostgreSQL 16 + pgvector + pg_trgm CREATE EXTENSION IF NOT EXISTS vector; CREATE EXTENSION IF NOT EXISTS pg_trgm; -- ENUM 타입 CREATE TYPE doc_type AS ENUM ('immutable', 'editable', 'note'); CREATE TYPE source_channel AS ENUM ( 'law_monitor', 'devonagent', 'email', 'web_clip', 'tksafety', 'inbox_route', 'manual', 'drive_sync' ); CREATE TYPE data_origin AS ENUM ('work', 'external'); CREATE TYPE process_stage AS ENUM ('extract', 'classify', 'embed'); CREATE TYPE process_status AS ENUM ('pending', 'processing', 'completed', 'failed'); -- documents 테이블 CREATE TABLE documents ( id BIGSERIAL PRIMARY KEY, -- 1계층: 원본 파일 참조 file_path TEXT NOT NULL UNIQUE, file_hash CHAR(64) NOT NULL, file_format VARCHAR(20) NOT NULL, file_size BIGINT, file_type doc_type NOT NULL DEFAULT 'immutable', import_source TEXT, -- 2계층: 텍스트 추출 extracted_text TEXT, extracted_at TIMESTAMPTZ, extractor_version VARCHAR(50), -- 2계층: AI 가공 ai_summary TEXT, ai_tags JSONB DEFAULT '[]', ai_domain VARCHAR(100), ai_sub_group VARCHAR(100), ai_model_version VARCHAR(50), ai_processed_at TIMESTAMPTZ, -- 3계층: 벡터 임베딩 embedding vector(768), embed_model_version VARCHAR(50), embedded_at TIMESTAMPTZ, -- 메타데이터 source_channel source_channel, data_origin data_origin, title TEXT, -- 타임스탬프 created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); -- 전문검색 인덱스 CREATE INDEX idx_documents_fts ON documents USING GIN (to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(extracted_text, ''))); -- 트리그램 인덱스 (한국어 부분 매칭) CREATE INDEX idx_documents_trgm ON documents USING GIN ((coalesce(title, '') || ' ' || coalesce(extracted_text, '')) gin_trgm_ops); -- 해시 기반 중복 검색 CREATE INDEX idx_documents_hash ON documents (file_hash); -- 재가공 대상 필터링 CREATE INDEX idx_documents_ai_version ON documents (ai_model_version); CREATE INDEX idx_documents_extractor_version ON documents (extractor_version); CREATE INDEX idx_documents_embed_version ON documents (embed_model_version); -- tasks 테이블 (CalDAV 캐시) CREATE TABLE tasks ( id BIGSERIAL PRIMARY KEY, caldav_uid TEXT UNIQUE, title TEXT NOT NULL, description TEXT, due_date TIMESTAMPTZ, priority SMALLINT DEFAULT 0, completed BOOLEAN DEFAULT FALSE, completed_at TIMESTAMPTZ, document_id BIGINT REFERENCES documents(id), source VARCHAR(50), created_at TIMESTAMPTZ DEFAULT NOW(), updated_at TIMESTAMPTZ DEFAULT NOW() ); -- processing_queue 테이블 (비동기 가공 큐) CREATE TABLE processing_queue ( id BIGSERIAL PRIMARY KEY, document_id BIGINT REFERENCES documents(id) NOT NULL, stage process_stage NOT NULL, status process_status DEFAULT 'pending', attempts SMALLINT DEFAULT 0, max_attempts SMALLINT DEFAULT 3, error_message TEXT, created_at TIMESTAMPTZ DEFAULT NOW(), started_at TIMESTAMPTZ, completed_at TIMESTAMPTZ, UNIQUE (document_id, stage, status) ); CREATE INDEX idx_queue_pending ON processing_queue (stage, status) WHERE status = 'pending';