feat: scaffold v2 project structure with Docker, FastAPI, and config
동작하는 최소 코드 수준의 v2 스캐폴딩: - docker-compose.yml: postgres, fastapi, kordoc, frontend, caddy - app/: FastAPI 백엔드 (main, core, models, ai, prompts) - services/kordoc/: Node.js 문서 파싱 마이크로서비스 - gpu-server/: AI Gateway + GPU docker-compose - frontend/: SvelteKit 기본 구조 - migrations/: PostgreSQL 초기 스키마 (documents, tasks, processing_queue) - tests/: pytest conftest 기본 설정 - config.yaml, Caddyfile, credentials.env.example 갱신 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
106
migrations/001_initial_schema.sql
Normal file
106
migrations/001_initial_schema.sql
Normal file
@@ -0,0 +1,106 @@
|
||||
-- hyungi_Document_Server 초기 스키마
|
||||
-- PostgreSQL 16 + pgvector + pg_trgm
|
||||
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
CREATE EXTENSION IF NOT EXISTS pg_trgm;
|
||||
|
||||
-- ENUM 타입
|
||||
CREATE TYPE doc_type AS ENUM ('immutable', 'editable', 'note');
|
||||
CREATE TYPE source_channel AS ENUM (
|
||||
'law_monitor', 'devonagent', 'email', 'web_clip',
|
||||
'tksafety', 'inbox_route', 'manual', 'drive_sync'
|
||||
);
|
||||
CREATE TYPE data_origin AS ENUM ('work', 'external');
|
||||
CREATE TYPE process_stage AS ENUM ('extract', 'classify', 'embed');
|
||||
CREATE TYPE process_status AS ENUM ('pending', 'processing', 'completed', 'failed');
|
||||
|
||||
-- documents 테이블
|
||||
CREATE TABLE documents (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
|
||||
-- 1계층: 원본 파일 참조
|
||||
file_path TEXT NOT NULL UNIQUE,
|
||||
file_hash CHAR(64) NOT NULL,
|
||||
file_format VARCHAR(20) NOT NULL,
|
||||
file_size BIGINT,
|
||||
file_type doc_type NOT NULL DEFAULT 'immutable',
|
||||
import_source TEXT,
|
||||
|
||||
-- 2계층: 텍스트 추출
|
||||
extracted_text TEXT,
|
||||
extracted_at TIMESTAMPTZ,
|
||||
extractor_version VARCHAR(50),
|
||||
|
||||
-- 2계층: AI 가공
|
||||
ai_summary TEXT,
|
||||
ai_tags JSONB DEFAULT '[]',
|
||||
ai_domain VARCHAR(100),
|
||||
ai_sub_group VARCHAR(100),
|
||||
ai_model_version VARCHAR(50),
|
||||
ai_processed_at TIMESTAMPTZ,
|
||||
|
||||
-- 3계층: 벡터 임베딩
|
||||
embedding vector(768),
|
||||
embed_model_version VARCHAR(50),
|
||||
embedded_at TIMESTAMPTZ,
|
||||
|
||||
-- 메타데이터
|
||||
source_channel source_channel,
|
||||
data_origin data_origin,
|
||||
title TEXT,
|
||||
|
||||
-- 타임스탬프
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- 전문검색 인덱스
|
||||
CREATE INDEX idx_documents_fts ON documents
|
||||
USING GIN (to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(extracted_text, '')));
|
||||
|
||||
-- 트리그램 인덱스 (한국어 부분 매칭)
|
||||
CREATE INDEX idx_documents_trgm ON documents
|
||||
USING GIN ((coalesce(title, '') || ' ' || coalesce(extracted_text, '')) gin_trgm_ops);
|
||||
|
||||
-- 해시 기반 중복 검색
|
||||
CREATE INDEX idx_documents_hash ON documents (file_hash);
|
||||
|
||||
-- 재가공 대상 필터링
|
||||
CREATE INDEX idx_documents_ai_version ON documents (ai_model_version);
|
||||
CREATE INDEX idx_documents_extractor_version ON documents (extractor_version);
|
||||
CREATE INDEX idx_documents_embed_version ON documents (embed_model_version);
|
||||
|
||||
-- tasks 테이블 (CalDAV 캐시)
|
||||
CREATE TABLE tasks (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
caldav_uid TEXT UNIQUE,
|
||||
title TEXT NOT NULL,
|
||||
description TEXT,
|
||||
due_date TIMESTAMPTZ,
|
||||
priority SMALLINT DEFAULT 0,
|
||||
completed BOOLEAN DEFAULT FALSE,
|
||||
completed_at TIMESTAMPTZ,
|
||||
document_id BIGINT REFERENCES documents(id),
|
||||
source VARCHAR(50),
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
updated_at TIMESTAMPTZ DEFAULT NOW()
|
||||
);
|
||||
|
||||
-- processing_queue 테이블 (비동기 가공 큐)
|
||||
CREATE TABLE processing_queue (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
document_id BIGINT REFERENCES documents(id) NOT NULL,
|
||||
stage process_stage NOT NULL,
|
||||
status process_status DEFAULT 'pending',
|
||||
attempts SMALLINT DEFAULT 0,
|
||||
max_attempts SMALLINT DEFAULT 3,
|
||||
error_message TEXT,
|
||||
created_at TIMESTAMPTZ DEFAULT NOW(),
|
||||
started_at TIMESTAMPTZ,
|
||||
completed_at TIMESTAMPTZ,
|
||||
|
||||
UNIQUE (document_id, stage, status)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_queue_pending ON processing_queue (stage, status)
|
||||
WHERE status = 'pending';
|
||||
Reference in New Issue
Block a user