diff --git a/app/ai/client.py b/app/ai/client.py index 6ff06b5..887c7ff 100644 --- a/app/ai/client.py +++ b/app/ai/client.py @@ -289,13 +289,16 @@ class AIClient: return response.json() async def _call_chat(self, model_config, prompt: str) -> str: - """OpenAI 호환 API 호출 + 자동 폴백""" - try: - return await self._request(model_config, prompt) - except (httpx.TimeoutException, httpx.ConnectError): - if model_config == self.ai.primary: - return await self._request(self.ai.fallback, prompt) - raise + """OpenAI 호환 API 호출 (R6: 무동의 클라우드 폴백 제거). + + 이전엔 primary(맥미니) TimeoutException/ConnectError 시 동의·과금 통제 없이 + self.ai.fallback(Claude API)로 자동 전환 → 개인 문서/쿼리/메모가 Anthropic 으로 + silent egress. on-prem 추론 프라이버시 계약 위반이라 봉쇄한다. 실패는 그대로 전파: + 배치 워커는 재시도/StageDeferred(R3·queue_consumer), interactive 호출자는 5xx 표면화 + (documents.analyze 등 이미 502/504 변환). 클라우드는 premium explicit-trigger + (summarize force_premium) 또는 call_fallback 명시 호출로만 — 자동 진입 금지. + """ + return await self._request(model_config, prompt) async def _request(self, model_config, prompt: str, system: str | None = None) -> str: """단일 모델 API 호출 (OpenAI 호환 + Anthropic Messages API). diff --git a/app/api/briefing.py b/app/api/briefing.py index f784a18..64cf2c0 100644 --- a/app/api/briefing.py +++ b/app/api/briefing.py @@ -195,8 +195,14 @@ async def regenerate( date 미지정 시 오늘 KST. 같은 날 row 존재 시 transaction 안에서 삭제 후 신규 생성. 응답 status='success' | 'partial' | 'failed' | 'empty'. """ + from core.config import settings from workers.briefing_worker import run + # held(정책상 정상 보류)를 409 로 표면화 (R8) — digest.py 정본 대칭. 이전엔 briefing_worker.run() + # 이 held/timeout/exception 셋 다 None 반환 → API 가 셋 다 500 으로 오보(silent-state-conflation). + if "briefing" in settings.pipeline_held_stages: + raise HTTPException(status_code=409, detail="briefing 단계가 일시 보류(held) 상태입니다") + result = await run(target_date=date) if result is None: raise HTTPException(status_code=500, detail="briefing 워커 실행 실패 (로그 확인)") diff --git a/app/api/documents.py b/app/api/documents.py index 31b4722..2d5ddde 100644 --- a/app/api/documents.py +++ b/app/api/documents.py @@ -69,6 +69,19 @@ def _upload_error(status_code: int, error_code: str, message: str) -> HTTPExcept ) +async def get_live_document(session: AsyncSession, doc_id: int) -> Document: + """soft-delete(deleted_at) 가드 포함 문서 조회 — 없거나 삭제됐으면 404 (R7). + + 조회/수정 경로는 deleted_at 을 일관 가드하나 파일/콘텐츠 서빙 엔드포인트가 누락 → + 삭제 문서의 원본/preview/전문이 doc_id(+유효 토큰)만으로 노출되던 비대칭. '경로마다 + deleted_at 기억'에 의존하지 않게 헬퍼로 구조 강제(추가될 서빙 경로도 자동 보호). + """ + doc = await session.get(Document, doc_id) + if not doc or doc.deleted_at is not None: + raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") + return doc + + async def _near_dup_scan_bg(doc_id: int) -> None: """B-3: post-upload near_duplicate 스캔 (BackgroundTask). 자체 세션, best-effort. @@ -838,9 +851,7 @@ async def get_document_file( # 일반 Bearer 헤더 인증 시도 raise HTTPException(status_code=401, detail="토큰이 필요합니다") - doc = await session.get(Document, doc_id) - if not doc: - raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") + doc = await get_live_document(session, doc_id) # note(메모)는 물리 파일이 없음 if not doc.file_path: @@ -943,10 +954,8 @@ async def get_document_image_raw( if not payload or payload.get("type") != "access": raise HTTPException(status_code=401, detail="유효하지 않은 토큰") - # 문서 존재 확인 (image_key 만 있고 doc 가 사라진 케이스 차단) - doc = await session.get(Document, doc_id) - if doc is None: - raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") + # 문서 존재 확인 (image_key 만 있고 doc 가 사라진 케이스 차단 + soft-delete 가드) + doc = await get_live_document(session, doc_id) img = await session.scalar( select(DocumentImage).where( @@ -1357,9 +1366,8 @@ async def save_document_content( body: dict = None, ): """Markdown 원본 파일 저장 + extracted_text 갱신""" - doc = await session.get(Document, doc_id) - if not doc: - raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") + # soft-delete 문서엔 쓰기 차단 (R7 — 삭제 문서 resurrect / NAS 재기록 방지) + doc = await get_live_document(session, doc_id) if doc.file_format not in ("md", "txt"): raise HTTPException(status_code=400, detail="편집 가능한 포맷이 아닙니다 (md, txt만 가능)") @@ -1399,9 +1407,7 @@ async def get_document_preview( else: raise HTTPException(status_code=401, detail="토큰이 필요합니다") - doc = await session.get(Document, doc_id) - if not doc: - raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") + doc = await get_live_document(session, doc_id) preview_path = Path(settings.nas_mount_path) / "PKM" / ".preview" / f"{doc_id}.pdf" if not preview_path.exists(): @@ -1427,18 +1433,24 @@ async def delete_document( doc_id: int, user: Annotated[User, Depends(get_current_user)], session: Annotated[AsyncSession, Depends(get_session)], - delete_file: bool = Query(False, description="NAS 파일도 함께 삭제"), + delete_file: bool = Query(False, description="NAS 원본도 삭제 (grace 후 retention sweep 이 물리삭제)"), ): - """문서 삭제 (기본: DB만 삭제, 파일 유지)""" - doc = await session.get(Document, doc_id) - if not doc: - raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") + """문서 삭제. 기본: soft-delete(숨김, 파일 보존). delete_file=true: purge 예약 (R7).""" + doc = await get_live_document(session, doc_id) - # soft-delete (물리 파일은 cleanup job에서 나중에 정리) - doc.deleted_at = datetime.now(timezone.utc) + # soft-delete(숨김). delete_file=true 면 purge_requested_at 마커를 추가로 set — + # retention sweep cron(document_purge_sweep)이 grace(30일) 경과 후 NAS 원본 물리삭제 + # + audit-log. ★일반 숨김(delete_file=false)은 파일 보존 = undelete 가능. sweep 는 + # deleted_at 이 아니라 purge_requested_at 기준이라 단순 숨김이 영구삭제되지 않는다. + now = datetime.now(timezone.utc) + doc.deleted_at = now + if delete_file: + doc.purge_requested_at = now await session.commit() - return {"message": f"문서 {doc_id} soft-delete 완료"} + if delete_file: + return {"message": f"문서 {doc_id} 삭제 — NAS 원본은 30일 후 정리 예약"} + return {"message": f"문서 {doc_id} soft-delete 완료 (파일 보존)"} @router.get("/{doc_id}/content") @@ -1448,9 +1460,7 @@ async def get_document_content( session: Annotated[AsyncSession, Depends(get_session)], ): """문서 전문 텍스트 반환 (서비스 호출용).""" - doc = await session.get(Document, doc_id) - if not doc: - raise HTTPException(status_code=404, detail="문서를 찾을 수 없습니다") + doc = await get_live_document(session, doc_id) raw_text = doc.extracted_text or "" content = raw_text[:15000] diff --git a/app/api/events.py b/app/api/events.py index d6be9ff..6cc8f25 100644 --- a/app/api/events.py +++ b/app/api/events.py @@ -21,7 +21,7 @@ from zoneinfo import ZoneInfo from fastapi import APIRouter, Body, Depends, HTTPException, Query from pydantic import BaseModel, Field -from sqlalchemy import and_, or_, select +from sqlalchemy import and_, func, or_, select from sqlalchemy.ext.asyncio import AsyncSession from core.auth import get_current_user @@ -388,10 +388,10 @@ async def list_events( ) base = select(Event).where(and_(*where)) - total_q = await session.execute( - select(Event.id).where(and_(*where)) - ) - total = len(total_q.scalars().all()) + # R10: 전체 ID 로딩 후 len() 대신 DB COUNT 푸시다운 (행 수 선형 메모리/전송 비용 제거). + total = ( + await session.execute(select(func.count(Event.id)).where(and_(*where))) + ).scalar() or 0 rows = await session.execute( base.order_by(Event.created_at.desc()) diff --git a/app/api/internal_study.py b/app/api/internal_study.py index 2ae3cda..19c274f 100644 --- a/app/api/internal_study.py +++ b/app/api/internal_study.py @@ -6,6 +6,7 @@ Bearer token 보호 (settings.internal_worker_token). """ from __future__ import annotations +import hmac import logging from fastapi import APIRouter, Depends, Header, HTTPException, Path, Response, status @@ -28,7 +29,10 @@ def _verify_token(authorization: str | None = Header(default=None)) -> None: if not authorization or not authorization.lower().startswith("bearer "): raise HTTPException(status_code=401, detail="missing Bearer token") token = authorization[7:].strip() - if token != settings.internal_worker_token: + # 상수시간 비교 (R7) — 일반 != 는 첫 불일치에서 단락돼 prefix 길이로 바이트 추정 가능한 + # timing side-channel. 이 토큰이 RAG 정답 포함 endpoint 를 보호하므로 compare_digest 로 + # 통일(search.py 정본과 일치). + if not hmac.compare_digest(token, settings.internal_worker_token): raise HTTPException(status_code=403, detail="invalid token") diff --git a/app/api/library.py b/app/api/library.py index fd201a6..a0137d7 100644 --- a/app/api/library.py +++ b/app/api/library.py @@ -473,72 +473,35 @@ async def get_facet_counts( result = FacetCountsResponse(company=[], topic=[], year=[], doctype=[]) - # company counts (다른 facet 필터 적용, 자기 자신 제외) - q_company = base_query() - if facet_topic: - q_company = q_company.where(Document.facet_topic == facet_topic) - if facet_year: - q_company = q_company.where(Document.facet_year == facet_year) - if facet_doctype: - q_company = q_company.where(Document.facet_doctype == facet_doctype) - rows = await session.execute( - select(Document.facet_company, func.count()) - .where(Document.facet_company != None) # noqa: E711 - .where(Document.id.in_(q_company.with_only_columns(Document.id).subquery().select())) - .group_by(Document.facet_company) - .order_by(func.count().desc()) - ) - result.company = [FacetCountItem(value=r[0], count=r[1]) for r in rows] - - # topic counts - q_topic = base_query() + # R10: 4 facet 블록 중복 제거 — 적용된 facet 필터(값 있는 것만)를 모아 각 축 집계 시 + # '자기 자신 축'만 제외하고 적용하는 헬퍼로. 쿼리/자기제외/order_by/value 매핑 모두 동일. + applied: dict = {} if facet_company: - q_topic = q_topic.where(Document.facet_company == facet_company) - if facet_year: - q_topic = q_topic.where(Document.facet_year == facet_year) - if facet_doctype: - q_topic = q_topic.where(Document.facet_doctype == facet_doctype) - rows = await session.execute( - select(Document.facet_topic, func.count()) - .where(Document.facet_topic != None) # noqa: E711 - .where(Document.id.in_(q_topic.with_only_columns(Document.id).subquery().select())) - .group_by(Document.facet_topic) - .order_by(func.count().desc()) - ) - result.topic = [FacetCountItem(value=r[0], count=r[1]) for r in rows] - - # year counts - q_year = base_query() - if facet_company: - q_year = q_year.where(Document.facet_company == facet_company) + applied["company"] = Document.facet_company == facet_company if facet_topic: - q_year = q_year.where(Document.facet_topic == facet_topic) - if facet_doctype: - q_year = q_year.where(Document.facet_doctype == facet_doctype) - rows = await session.execute( - select(Document.facet_year, func.count()) - .where(Document.facet_year != None) # noqa: E711 - .where(Document.id.in_(q_year.with_only_columns(Document.id).subquery().select())) - .group_by(Document.facet_year) - .order_by(Document.facet_year.desc()) - ) - result.year = [FacetCountItem(value=str(r[0]), count=r[1]) for r in rows] - - # doctype counts - q_doctype = base_query() - if facet_company: - q_doctype = q_doctype.where(Document.facet_company == facet_company) - if facet_topic: - q_doctype = q_doctype.where(Document.facet_topic == facet_topic) + applied["topic"] = Document.facet_topic == facet_topic if facet_year: - q_doctype = q_doctype.where(Document.facet_year == facet_year) - rows = await session.execute( - select(Document.facet_doctype, func.count()) - .where(Document.facet_doctype != None) # noqa: E711 - .where(Document.id.in_(q_doctype.with_only_columns(Document.id).subquery().select())) - .group_by(Document.facet_doctype) - .order_by(func.count().desc()) - ) - result.doctype = [FacetCountItem(value=r[0], count=r[1]) for r in rows] + applied["year"] = Document.facet_year == facet_year + if facet_doctype: + applied["doctype"] = Document.facet_doctype == facet_doctype + + async def _facet_count(name, facet_col, order_by, value_fn): + q = base_query() + for k, cond in applied.items(): + if k != name: # 자기 자신 facet 필터는 제외 (다른 축만 적용) + q = q.where(cond) + rows = await session.execute( + select(facet_col, func.count()) + .where(facet_col != None) # noqa: E711 + .where(Document.id.in_(q.with_only_columns(Document.id).subquery().select())) + .group_by(facet_col) + .order_by(order_by) + ) + return [FacetCountItem(value=value_fn(r[0]), count=r[1]) for r in rows] + + result.company = await _facet_count("company", Document.facet_company, func.count().desc(), lambda v: v) + result.topic = await _facet_count("topic", Document.facet_topic, func.count().desc(), lambda v: v) + result.year = await _facet_count("year", Document.facet_year, Document.facet_year.desc(), lambda v: str(v)) + result.doctype = await _facet_count("doctype", Document.facet_doctype, func.count().desc(), lambda v: v) return result diff --git a/app/api/memos.py b/app/api/memos.py index e5d22b5..e2032f9 100644 --- a/app/api/memos.py +++ b/app/api/memos.py @@ -300,9 +300,13 @@ async def list_memos( base = base.where(Document.pinned == pinned) if tag: + # 파라미터 바인딩 (R7) — f-string 으로 사용자 tag 를 JSON 배열 리터럴에 직접 삽입하면 + # tag 안 " 나 ] 가 JSON 을 깨 500 + 필터 의미 변형. jsonb_build_array 로 tag 를 + # 바인드 파라미터로 전달(@> JSONB containment). + tag_arr = func.jsonb_build_array(tag) base = base.where( - Document.user_tags.op("@>")(f'["{tag}"]') - | Document.ai_tags.op("@>")(f'["{tag}"]') + Document.user_tags.op("@>")(tag_arr) + | Document.ai_tags.op("@>")(tag_arr) ) count_query = select(func.count()).select_from(base.subquery()) diff --git a/app/api/news.py b/app/api/news.py index 199e249..ff33370 100644 --- a/app/api/news.py +++ b/app/api/news.py @@ -65,7 +65,8 @@ async def create_source( ): from core.url_validator import validate_feed_url try: - validate_feed_url(body.feed_url) + # getaddrinfo(DNS) 는 blocking — 이벤트 루프 점유 방지 위해 off-thread (R5) + await asyncio.to_thread(validate_feed_url, body.feed_url) except ValueError as e: raise HTTPException(status_code=422, detail=f"feed_url 검증 실패: {e}") source = NewsSource(**body.model_dump()) @@ -194,10 +195,17 @@ async def trigger_collect( if _collect_lock.locked(): raise HTTPException(status_code=429, detail="수집이 이미 진행 중입니다") + # TOCTOU 제거 (R9) — 기존엔 locked() 체크 후 실제 acquire 가 별도 task 안에서 일어나, 그 + # 사이 다른 요청이 끼어들어 이중 수집 task 가 생길 수 있었다. 핸들러에서 동기적으로(uncontended + # Lock.acquire 는 이벤트루프 양보 없이 즉시 완료) acquire 하고 task 의 finally 에서 release. + await _collect_lock.acquire() + async def _run_with_lock(): - async with _collect_lock: + try: from workers.news_collector import run await run() + finally: + _collect_lock.release() asyncio.create_task(_run_with_lock()) return {"message": "뉴스 수집 시작됨"} diff --git a/app/api/search.py b/app/api/search.py index 2cd43ed..b781986 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -291,7 +291,7 @@ async def search( content={ "error_reason": "unknown_embedding_backend", "backend_requested": embedding_backend, - "allowed": ["baseline", "cand_me5_large_inst", "cand_snowflake_l_v2"], + "allowed": ["baseline"], "detail": msg, }, ) @@ -710,7 +710,9 @@ async def ask( # 30s 로 align → classifier 동작 안정. ask 응답 latency 상한 ↑ 의도. try: classifier_result = await asyncio.wait_for(classifier_task, timeout=30.0) - except (asyncio.TimeoutError, Exception): + except asyncio.CancelledError: + raise # 요청 취소는 전파 — broad except 가 삼키지 않게 명시 (R3) + except Exception: classifier_result = ClassifierResult("timeout", None, [], [], 0.0) defense_log["classifier"] = { @@ -872,7 +874,9 @@ async def ask( # → classifier 와 동일 패턴 (search.py:522 가 6s→15s swap 했던 case). 10s 로 align. try: verifier_result = await asyncio.wait_for(verifier_task, timeout=10.0) - except (asyncio.TimeoutError, Exception): + except asyncio.CancelledError: + raise # 요청 취소는 전파 — broad except 가 삼키지 않게 명시 (R3) + except Exception: verifier_result = VerifierResult("timeout", [], 0.0) # Verifier contradictions → grounding flags 머지 (prefix 로 구분, severity 3단계) diff --git a/app/api/study_questions.py b/app/api/study_questions.py index 86ed340..fcef421 100644 --- a/app/api/study_questions.py +++ b/app/api/study_questions.py @@ -1009,7 +1009,16 @@ async def submit_attempt( # PR-10: 세션 연동. 기본은 None. quiz_session: StudyQuizSession | None = None if body.quiz_session_id is not None: - quiz_session = await session.get(StudyQuizSession, body.quiz_session_id) + # FOR UPDATE 로 행 잠금 (R9) — 모바일 더블탭/재시도로 같은 세션에 동시 제출이 들어오면 + # 둘 다 cursor=N 을 읽고 둘 다 cursor+1·count 가산하는 race(이중 가산). 잠금으로 직렬화 → + # 두 번째 제출은 첫 commit 후 cursor=N+1 을 보고 cursor 불일치 409 로 거부된다. + quiz_session = ( + await session.execute( + select(StudyQuizSession) + .where(StudyQuizSession.id == body.quiz_session_id) + .with_for_update() + ) + ).scalar_one_or_none() if quiz_session is None or quiz_session.user_id != user.id: raise HTTPException(status_code=404, detail="quiz_session 을 찾을 수 없습니다") if quiz_session.study_topic_id != q.study_topic_id: diff --git a/app/core/database.py b/app/core/database.py index 9dca470..cbf4052 100644 --- a/app/core/database.py +++ b/app/core/database.py @@ -72,6 +72,55 @@ def _validate_sql_content(name: str, sql: str) -> None: ) +# R1: baseline 스냅샷이 대표하는 마지막 마이그레이션 버전 (이하 버전은 baseline 에 포함). +# 새 baseline 재생성 시 이 값을 갱신한다 (migrations/_baseline/_schema_baseline.sql). +_BASELINE_CUTOFF = 358 + + +async def _load_baseline_if_fresh(conn, migrations_dir: Path) -> None: + """fresh DB(documents 부재)면 baseline 스키마 스냅샷 적재 + schema_migrations 1..cutoff 스탬프. + + 기존 DB(documents 존재)는 즉시 반환 — baseline 미적재, 무영향. baseline 파일 부재 시도 + 기존 replay 경로 유지(하위호환). + """ + from sqlalchemy import text + + baseline_dir = migrations_dir / "_baseline" + baseline_files = ( + sorted(baseline_dir.glob("*_schema_baseline.sql")) if baseline_dir.is_dir() else [] + ) + if not baseline_files: + return + + docs_exists = ( + await conn.execute(text("SELECT to_regclass('public.documents') IS NOT NULL")) + ).scalar() + if docs_exists: + return # 기존 DB — baseline skip + + baseline_path = baseline_files[-1] + logger.info(f"[migration] fresh DB 감지 — baseline 적재: {baseline_path.name}") + # baseline 은 multi-statement 덤프 — exec_driver_sql(asyncpg prepared)은 multi-statement + # 불허("cannot insert multiple commands into a prepared statement"). raw asyncpg 의 simple + # 프로토콜 execute() 로 적재한다(같은 connection = 현재 트랜잭션 내). psql 스모크는 이 제약을 + # 못 잡으므로 init_db 런타임 검증으로 확인됨. + raw = await conn.get_raw_connection() + await raw.driver_connection.execute(baseline_path.read_text(encoding="utf-8")) + # baseline = cutoff 까지의 스키마 → 실제 파일 버전 기준으로 schema_migrations 스탬프. + versions = [v for v, _, _ in _parse_migration_files(migrations_dir) if v <= _BASELINE_CUTOFF] + for v in versions: + await conn.execute( + text( + "INSERT INTO schema_migrations (version, name) " + "VALUES (:v, :n) ON CONFLICT DO NOTHING" + ), + {"v": v, "n": f"baseline:{v}"}, + ) + logger.info( + f"[migration] baseline 적재 + schema_migrations {len(versions)}건 스탬프 (cutoff {_BASELINE_CUTOFF})" + ) + + async def _run_migrations(conn) -> None: """미적용 migration 실행 (호출자가 트랜잭션 관리)""" from sqlalchemy import text @@ -90,10 +139,6 @@ async def _run_migrations(conn) -> None: f"SELECT pg_advisory_xact_lock({_MIGRATION_LOCK_KEY})" )) - # 적용 이력 조회 - result = await conn.execute(text("SELECT version FROM schema_migrations")) - applied = {row[0] for row in result} - # migration 파일 스캔 # /app/core/database.py → parent.parent = /app → /app/migrations (volume mount 위치) migrations_dir = Path(__file__).resolve().parent.parent / "migrations" @@ -101,6 +146,15 @@ async def _run_migrations(conn) -> None: logger.info("[migration] migrations/ 디렉토리 없음, 스킵") return + # R1: fresh DB(documents 부재)면 baseline 스냅샷 먼저 적재 + schema_migrations 스탬프. + # migrations/ 전체 replay 는 누적 비-replayable(011 view 의존·326 enum-same-txn 등)로 + # 깨지므로 신규/DR 환경은 prod 스키마 스냅샷에서 출발한다. 기존 DB 는 skip(무영향). + await _load_baseline_if_fresh(conn, migrations_dir) + + # 적용 이력 조회 (baseline 스탬프 반영 — fresh DB 는 1..cutoff 가 이미 applied) + result = await conn.execute(text("SELECT version FROM schema_migrations")) + applied = {row[0] for row in result} + files = _parse_migration_files(migrations_dir) pending = [(v, name, path) for v, name, path in files if v not in applied] diff --git a/app/main.py b/app/main.py index 3c240ac..99320d0 100644 --- a/app/main.py +++ b/app/main.py @@ -51,6 +51,7 @@ async def lifespan(app: FastAPI): from workers.briefing_worker import run as morning_briefing_run from workers.daily_digest import run as daily_digest_run from workers.dedup_reconcile import run as dedup_reconcile_run + from workers.document_purge_sweep import run as purge_sweep_run from workers.digest_worker import run as global_digest_run from workers.file_watcher import watch_inbox from workers.mailplus_archive import run as mailplus_run @@ -150,6 +151,9 @@ async def lifespan(app: FastAPI): # plan ds-s1-backend-1 B-4: dedup 컬럼(duplicate_of/duplicate_count) 야간 절대 재계산. # soft-delete 잔여 드리프트 정리(멱등, 드리프트 없으면 no-op). cron 03:30 (다른 잡과 비충돌). scheduler.add_job(dedup_reconcile_run, CronTrigger(hour=3, minute=30, timezone=KST), id="dedup_reconcile") + # R7: delete_file=true purge 요청 문서의 NAS 원본 grace(30일) 후 물리삭제 + audit. + # purge_requested_at 마커 기준(단순 숨김은 보존). 03:20 = 다른 새벽 잡과 비충돌 슬롯. + scheduler.add_job(purge_sweep_run, CronTrigger(hour=3, minute=20, timezone=KST), id="purge_sweep") # B-3 PR4: 레거시 paper 행 arXiv DataCite DOI 스탬프(재유입 차단). keyless·in-DB·enqueue 0. # dedup_reconcile(03:30)·fulltext_reconcile(03:40) 와 별 worker·비충돌 슬롯. scheduler.add_job(paper_doi_reconcile_run, CronTrigger(hour=3, minute=50, timezone=KST), id="paper_doi_reconcile") @@ -236,21 +240,27 @@ SETUP_BYPASS_PREFIXES = ( "/api/setup", "/api/config", "/setup", "/health", "/docs", "/openapi.json", "/redoc", ) +# R10: 셋업 완료(user 존재)는 단조(monotonic) — 한 번 확인되면 영구. 매 요청 COUNT 쿼리 +# 대신 캐시 플래그로 전환 (setup 후 모든 요청이 users COUNT 하던 per-request 비용 제거). +_setup_complete = False + @app.middleware("http") async def setup_redirect_middleware(request: Request, call_next): + global _setup_complete # 함수 내 read+assign 둘 다 모듈 전역 참조 (UnboundLocalError 방지) path = request.url.path - # 바이패스 경로는 항상 통과 - if any(path.startswith(p) for p in SETUP_BYPASS_PREFIXES): + # 셋업 완료됐거나 바이패스 경로면 즉시 통과 (DB 쿼리 없음) + if _setup_complete or any(path.startswith(p) for p in SETUP_BYPASS_PREFIXES): return await call_next(request) - # 유저 존재 여부 확인 + # 유저 존재 여부 확인 (셋업 완료 전 1회성 — 완료 확인되면 플래그 set 후 영구 skip) try: async with async_session() as session: result = await session.execute(select(func.count(User.id))) user_count = result.scalar() if user_count == 0: return RedirectResponse(url="/setup") + _setup_complete = True except Exception: pass # DB 연결 실패 시 통과 (health에서 확인 가능) diff --git a/app/models/document.py b/app/models/document.py index f5b0abf..8436da8 100644 --- a/app/models/document.py +++ b/app/models/document.py @@ -52,7 +52,8 @@ class Document(Base): # 2계층: AI 가공 ai_summary: Mapped[str | None] = mapped_column(Text) - ai_tags: Mapped[dict | None] = mapped_column(JSONB, default=[]) + # R11a: 주석 dict→list 정정(실제 list 적재), 공유 가변 default=[] → callable default=list. + ai_tags: Mapped[list | None] = mapped_column(JSONB, default=list) ai_domain: Mapped[str | None] = mapped_column(String(100)) ai_sub_group: Mapped[str | None] = mapped_column(String(100)) ai_model_version: Mapped[str | None] = mapped_column(String(50)) @@ -79,7 +80,7 @@ class Document(Base): user_note: Mapped[str | None] = mapped_column(Text) # 사용자 태그 (ai_tags와 분리, #태그 파싱 결과 또는 수동 입력) - user_tags: Mapped[list | None] = mapped_column(JSONB, default=[]) + user_tags: Mapped[list | None] = mapped_column(JSONB, default=list) # R11a: 공유 가변 default 제거 # 핀 고정 pinned: Mapped[bool] = mapped_column(Boolean, default=False) @@ -105,6 +106,9 @@ class Document(Base): # 승인/삭제 review_status: Mapped[str | None] = mapped_column(String(20), default="pending") deleted_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) + # delete_file=true 명시 삭제 요청 마커 (R7) — retention sweep(document_purge_sweep)이 + # grace 후 NAS 원본 물리삭제. deleted_at(단순 숨김, 파일 보존)과 분리. + purge_requested_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True)) # 외부 편집 URL edit_url: Mapped[str | None] = mapped_column(Text) diff --git a/app/models/study_question.py b/app/models/study_question.py index 040fab1..402e3f8 100644 --- a/app/models/study_question.py +++ b/app/models/study_question.py @@ -7,7 +7,7 @@ PR-2 가드레일: - correct_choice 변경 시 기존 attempt.is_correct 재계산 안 함 (기록은 그 시점의 사실). """ -from datetime import datetime +from datetime import datetime, timezone from pgvector.sqlalchemy import Vector from sqlalchemy import BigInteger, Boolean, DateTime, ForeignKey, Integer, SmallInteger, String, Text @@ -128,7 +128,9 @@ class StudyQuestionAttempt(Base): # PR-9: outcome 권장값 (correct/wrong/unsure). 강한 enum 미사용. outcome: Mapped[str] = mapped_column(String(20), nullable=False) answered_at: Mapped[datetime] = mapped_column( - DateTime(timezone=True), default=datetime.now, nullable=False + # TZ-aware 명시 (R8) — naive datetime.now() 는 컨테이너 TZ 의존. 현 컨테이너=UTC 라 + # 값 동일(백필 불요)이나, 컨테이너 TZ 가 바뀌면 9시간 어긋나는 잠복 의존 제거. + DateTime(timezone=True), default=lambda: datetime.now(timezone.utc), nullable=False ) # PR-10: 어떤 quiz 세션의 attempt 인지 (NULL = 세션 외 직접 입력 또는 세션 삭제됨). quiz_session_id: Mapped[int | None] = mapped_column( diff --git a/app/services/papers/holder.py b/app/services/papers/holder.py index 2455dc5..1bc601a 100644 --- a/app/services/papers/holder.py +++ b/app/services/papers/holder.py @@ -32,7 +32,8 @@ async def find_paper_holder(session, raw_or_normalized_doi): return None result = await session.execute( select(Document) - .where(Document.material_type == "paper", _DOI_EXPR == doi) + .where(Document.material_type == "paper", _DOI_EXPR == doi, + Document.deleted_at.is_(None)) .limit(1) ) return result.scalars().first() diff --git a/app/services/search/retrieval_service.py b/app/services/search/retrieval_service.py index f1539ef..4f22eec 100644 --- a/app/services/search/retrieval_service.py +++ b/app/services/search/retrieval_service.py @@ -54,42 +54,10 @@ QUERY_EMBED_MAXSIZE = 500 # server-side allowlist map. query parameter 가 raw table name 받지 않음. CANDIDATE_BACKEND_MAP: dict[str, dict[str, str] | None] = { "baseline": None, - "cand_me5_large_inst": { - "docs_table": "documents_cand_me5_large_inst", - "chunks_table": "document_chunks_cand_me5_large_inst", - "embed_endpoint": "http://embedding-cand-me5-inst:80/embed", - }, - "cand_snowflake_l_v2": { - "docs_table": "documents_cand_snowflake_l_v2", - "chunks_table": "document_chunks_cand_snowflake_l_v2", - "embed_endpoint": "http://embedding-cand-snowflake-l-v2:80/embed", - }, - # ─── Phase 2A (embedding-phase2a-1, 2026-06-12): Qwen3-Embedding 후보 3종 ─── - # embed_kind="ollama" = /api/embed 호출 + 쿼리측 instruct prefix (비대칭 사용, - # G-1 fixture 실측: prefix 가 관련쌍 cos +0.016). 문서측은 backfill 이 plain 으로 적재. - # qwen4m = 4B 의 MRL 1024d (dimensions 옵션 — Ollama 가 truncate+재정규화 수행, G-1 실측). - "cand_qwen06": { - "docs_table": "documents_cand_qwen06", - "chunks_table": "document_chunks_cand_qwen06", - "embed_endpoint": "http://ollama:11434/api/embed", - "embed_kind": "ollama", - "embed_model": "qwen3-embedding:0.6b", - }, - "cand_qwen4": { - "docs_table": "documents_cand_qwen4", - "chunks_table": "document_chunks_cand_qwen4", - "embed_endpoint": "http://ollama:11434/api/embed", - "embed_kind": "ollama", - "embed_model": "qwen3-embedding:4b", - }, - "cand_qwen4m": { - "docs_table": "documents_cand_qwen4m", - "chunks_table": "document_chunks_cand_qwen4m", - "embed_endpoint": "http://ollama:11434/api/embed", - "embed_kind": "ollama", - "embed_model": "qwen3-embedding:4b", - "embed_dimensions": 1024, - }, + # Phase 2A 임베딩 후보(me5_large_inst·snowflake_l_v2·qwen06·qwen4·qwen4m) 전량 no-go + # 종결(2026-06-12, 후보 전부 -0.03~-0.04) → cand 슬러그·테이블 제거 (R13, 마이그 360 + # DROP). read-path 슬러그를 먼저 빼야 embedding_backend=cand_X /search 가 dropped 테이블을 + # 읽어 500 나지 않는다. baseline(production)만 잔존. } # G-1 핀 고정 instruct 문자열 (inventory 2026-06-12-c 기록과 동일해야 함 — diff --git a/app/services/search/search_pipeline.py b/app/services/search/search_pipeline.py index 996737f..b9273b2 100644 --- a/app/services/search/search_pipeline.py +++ b/app/services/search/search_pipeline.py @@ -32,6 +32,8 @@ from typing import TYPE_CHECKING, Literal from sqlalchemy.ext.asyncio import AsyncSession +from core.database import async_session + from . import query_analyzer, query_rewriter from .fusion_service import ( DEFAULT_FUSION, @@ -188,6 +190,7 @@ async def run_search( snapshot_chunk_id_max=snapshot_chunk_id_max, reranker_backend=reranker_backend, rewrite_backend=rewrite_backend, + axis=axis, ) timing: dict[str, float] = {} @@ -536,6 +539,7 @@ async def search_with_rewrite( snapshot_chunk_id_max: int | None, reranker_backend: str | None, rewrite_backend: str, + axis: "AxisFilter | None" = None, ) -> PipelineResult: """Phase 2Q multi-query retrieval 합성 path (plan v6 §5.5). @@ -579,13 +583,20 @@ async def search_with_rewrite( async def _variant_retrieve( v: str, ) -> "tuple[list[SearchResult], list[SearchResult], dict[int, list[SearchResult]]]": - text = await search_text(session, v, per_variant_k) - raw_chunks = await search_vector( - session, v, per_variant_k, - embedding_backend=embedding_backend, - snapshot_doc_id_max=snapshot_doc_id_max, - snapshot_chunk_id_max=snapshot_chunk_id_max, - ) + # 변형별 독립 AsyncSession (fan-out). 공유 session 을 asyncio.gather 로 동시 + # execute 에 넘기면 SQLAlchemy async 가 'another operation in progress' 로 + # 부하 의존적 비결정 크래시 — variant 마다 독립 연결로 분리한다. + # axis(material_type/jurisdiction/year) 도 single-query path 와 동일하게 전달 + # (rewrite 경로가 axis 필터를 조용히 누락하던 결함 수정). + async with async_session() as vsession: + text = await search_text(vsession, v, per_variant_k, axis=axis) + raw_chunks = await search_vector( + vsession, v, per_variant_k, + embedding_backend=embedding_backend, + snapshot_doc_id_max=snapshot_doc_id_max, + snapshot_chunk_id_max=snapshot_chunk_id_max, + axis=axis, + ) vector, chunks_by_doc = compress_chunks_to_docs(raw_chunks, per_variant_k) return text, vector, chunks_by_doc diff --git a/app/services/search/synthesis_service.py b/app/services/search/synthesis_service.py index a6b7aed..b9d198c 100644 --- a/app/services/search/synthesis_service.py +++ b/app/services/search/synthesis_service.py @@ -95,8 +95,10 @@ except FileNotFoundError: ) -# ─── in-memory LRU (FIFO 근사, query_analyzer 패턴 복제) ─ -_CACHE: dict[str, SynthesisResult] = {} +# ─── in-memory 캐시 (FIFO eviction + TTL, query_analyzer 패턴 복제) ─ +# R10: (ts, result) 저장 — TTL 미적용으로 원문 수정돼도 CACHE_MAXSIZE 찰 때까지 stale answer +# 반환하던 결함 수정. query_rewriter 의 expire_at TTL enforce 정본 복제. +_CACHE: dict[str, tuple[float, SynthesisResult]] = {} def _model_version() -> str: @@ -122,10 +124,11 @@ def get_cached(query: str, chunk_ids: list[int], backend_name: str = "gemma-macm entry = _CACHE.get(key) if entry is None: return None - # TTL 체크는 elapsed_ms 를 악용할 수 없으므로 별도 저장 - # 여기서는 단순 policy 로 처리: entry 가 있으면 반환 (eviction 은 FIFO 시점) - # 정확한 TTL 이 필요하면 (ts, result) tuple 로 저장해야 함. - return entry + ts, result = entry + if time.time() - ts > CACHE_TTL: + _CACHE.pop(key, None) # 만료 — 삭제 후 miss + return None + return result def _should_cache(result: SynthesisResult) -> bool: @@ -143,8 +146,9 @@ def set_cached(query: str, chunk_ids: list[int], result: SynthesisResult, backen if not _should_cache(result): return key = _cache_key(query, chunk_ids, backend_name) + now = time.time() if key in _CACHE: - _CACHE[key] = result + _CACHE[key] = (now, result) return if len(_CACHE) >= CACHE_MAXSIZE: try: @@ -152,7 +156,7 @@ def set_cached(query: str, chunk_ids: list[int], result: SynthesisResult, backen _CACHE.pop(oldest, None) except StopIteration: pass - _CACHE[key] = result + _CACHE[key] = (now, result) def cache_stats() -> dict[str, int]: diff --git a/app/services/storage/local.py b/app/services/storage/local.py index 1dcaec5..f3f3e94 100644 --- a/app/services/storage/local.py +++ b/app/services/storage/local.py @@ -2,6 +2,7 @@ from __future__ import annotations +import asyncio import os from collections.abc import AsyncIterator from pathlib import Path @@ -42,7 +43,7 @@ class LocalBackend(StorageBackend): to_read = _STREAM_CHUNK if remaining is None else min(_STREAM_CHUNK, remaining) if to_read <= 0: break - data = f.read(to_read) + data = await asyncio.to_thread(f.read, to_read) if not data: break yield data diff --git a/app/services/study/explanation_rag.py b/app/services/study/explanation_rag.py index dc088b1..4e3616d 100644 --- a/app/services/study/explanation_rag.py +++ b/app/services/study/explanation_rag.py @@ -252,12 +252,15 @@ async def gather_explanation_context( client = AIClient() query = _build_query(question) try: - # 두 조회 병렬화 (rerank 호출이 별개라 lock 충돌 없음) - docs, questions = await asyncio.gather( - _gather_document_evidence(session, user_id, question.study_topic_id, query, client), - _gather_question_evidence( - session, user_id, question.study_topic_id, question.id, query, client - ), + # 같은 AsyncSession 을 asyncio.gather 로 동시 execute 에 넘기면 SQLAlchemy async 가 + # 'another operation in progress' 로 부하 의존적 비결정 크래시(이전 주석 'lock 충돌 + # 없음' 은 rerank HTTP 만 보고 DB execute 동시성을 간과한 오인). 백그라운드 prefetch + # 라 순차 직렬화 — 사용자 대면 rewrite 경로(독립 세션 fan-out)와는 다른 처방. + docs = await _gather_document_evidence( + session, user_id, question.study_topic_id, query, client + ) + questions = await _gather_question_evidence( + session, user_id, question.study_topic_id, question.id, query, client ) return ExplanationContext(documents=docs, questions=questions) finally: diff --git a/app/services/study/subject_note_rag.py b/app/services/study/subject_note_rag.py index 17bcfeb..0f398bb 100644 --- a/app/services/study/subject_note_rag.py +++ b/app/services/study/subject_note_rag.py @@ -238,9 +238,13 @@ async def gather_subject_note_context( client = AIClient() query = _build_query(subject, scope) try: - docs, questions = await asyncio.gather( - _gather_document_evidence(session, user_id, study_topic_id, query, client), - _gather_question_evidence(session, user_id, study_topic_id, subject, scope, query, client), + # 같은 AsyncSession 동시 execute 회피 — 순차 직렬화(백그라운드 prefetch). + # explanation_rag.gather_explanation_context 와 동형(R2 공유세션 동시성 수정). + docs = await _gather_document_evidence( + session, user_id, study_topic_id, query, client + ) + questions = await _gather_question_evidence( + session, user_id, study_topic_id, subject, scope, query, client ) return SubjectNoteContext(documents=docs, questions=questions) finally: diff --git a/app/workers/arxiv_collector.py b/app/workers/arxiv_collector.py index 386733f..65f9327 100644 --- a/app/workers/arxiv_collector.py +++ b/app/workers/arxiv_collector.py @@ -303,10 +303,12 @@ async def run(bulk: bool = False, limit: int = 0) -> None: src = await session.get(NewsSource, source_id) watermark = _watermark(src, category) newest_seen: datetime | None = None + capped = False # 이번 run 이 cap 으로 카테고리 중도 절단됐는지 (R4) max_pages = (10**6 if bulk else _MAX_PAGES_PER_CAT) try: for page in range(max_pages): if inserted >= run_cap: + capped = True break xml_text = await _fetch(client, query, page * _PAGE_SIZE) total, entries = parse_arxiv_feed(xml_text) @@ -329,12 +331,18 @@ async def run(bulk: bool = False, limit: int = 0) -> None: else: await session.rollback() if inserted >= run_cap: + capped = True break await asyncio.sleep(_REQ_SLEEP) if stop or (page + 1) * _PAGE_SIZE >= total: break - # 카테고리 워터마크 전진(이번 run 최신 발행일) - if newest_seen: + # 카테고리 워터마크 전진 — cap 으로 절단된 run 은 미전진 (R4). + # 절단 시 newest_seen 으로 전진하면 [oldest-ingested, 옛 watermark] 사이 + # 미적재 항목이 다음 run 의 watermark 필터(entry.published <= watermark)에 + # 영구 배제(silent data loss). 미전진하면 다음 run 이 최신부터 재스캔하며 + # 적재분은 dedup-skip(_ingest_entry False, cap 미소모)하고 gap 까지 내려가 + # 이어 적재 → 백로그가 run 당 cap 씩 소화(livelock 회피). bulk 은 cap 무관. + if newest_seen and not capped: async with async_session() as session: src = await session.get(NewsSource, source_id) _set_watermark(src, category, newest_seen) diff --git a/app/workers/chunk_worker.py b/app/workers/chunk_worker.py index 8eead6e..a8105cc 100644 --- a/app/workers/chunk_worker.py +++ b/app/workers/chunk_worker.py @@ -272,15 +272,20 @@ async def _lookup_news_source( if not source_name: return None, None, None - # news_sources에서 이름이 일치하는 레코드 찾기 (prefix match) - result = await session.execute(select(NewsSource)) - sources = result.scalars().all() - for src in sources: - if source_name and ( - src.name.split(" ")[0] == source_name - or src.name.startswith(source_name + " ") - ): - return src.country, src.name, src.language + # news_sources prefix 매칭 — R10: 전체 로드+Python 루프 대신 DB 필터 푸시다운. + # (name == source_name) OR (name 이 "source_name " 로 시작) = 기존 split[0]==source_name 동치 + # (첫 토큰 일치 = 정확일치 또는 'source_name ' prefix). autoescape 로 %/_ 안전. + result = await session.execute( + select(NewsSource) + .where( + (NewsSource.name == source_name) + | NewsSource.name.startswith(source_name + " ", autoescape=True) + ) + .limit(1) + ) + src = result.scalars().first() + if src is not None: + return src.country, src.name, src.language logger.warning( f"[chunk] news_source 매핑 실패: doc_id={doc.id} ai_sub_group={source_name!r} " diff --git a/app/workers/classify_worker.py b/app/workers/classify_worker.py index f0f0a96..8a14982 100644 --- a/app/workers/classify_worker.py +++ b/app/workers/classify_worker.py @@ -563,7 +563,9 @@ async def process( doc.facet_doctype = ai_doctype # ─── ai_suggestion 저장 (자료실 승인 대기함 제안, §1) ─── - if ai_doctype in LIBRARY_SUGGESTION_DOCTYPES: + # R9: 기존 제안(material_type 제안 등) 우선 — doc.ai_suggestion is None 가드 추가 + # (material 제안 블록과 대칭). 없으면 거래문서 제안이 기존 제안을 clobber('기존 제안 우선' 위반). + if ai_doctype in LIBRARY_SUGGESTION_DOCTYPES and doc.ai_suggestion is None: year = doc.facet_year or datetime.now(timezone.utc).year doc.ai_suggestion = { "proposed_category": "library", diff --git a/app/workers/daily_digest.py b/app/workers/daily_digest.py index be68abd..a9ee500 100644 --- a/app/workers/daily_digest.py +++ b/app/workers/daily_digest.py @@ -5,7 +5,8 @@ DEVONthink/OmniFocus → PostgreSQL/CalDAV 쿼리로 전환. SMTP 발송은 2026-06-10 제거 (한 번도 전달 성공한 적 없는 기능 — 폐기 결정). """ -from datetime import datetime, timezone +import asyncio +from datetime import datetime, time, timedelta, timezone from zoneinfo import ZoneInfo from pathlib import Path @@ -20,17 +21,36 @@ from models.queue import ProcessingQueue logger = setup_logger("daily_digest") +def _write_and_rotate(digest_dir: Path, today: str, markdown: str) -> Path: + """digest 파일 저장 + 90일 초과 아카이브 이동 (blocking — caller 가 to_thread, R8).""" + digest_dir.mkdir(parents=True, exist_ok=True) + digest_path = digest_dir / f"{today}_digest.md" + digest_path.write_text(markdown, encoding="utf-8") + archive_dir = digest_dir / "archive" + archive_dir.mkdir(exist_ok=True) + cutoff = datetime.now(timezone.utc).timestamp() - (90 * 86400) + for old in digest_dir.glob("*_digest.md"): + if old.stat().st_mtime < cutoff: + old.rename(archive_dir / old.name) + return digest_path + + async def run(): """일일 다이제스트 생성 + 저장 + 발송""" - # KST 기준 오늘 (cron 이 KST timezone fix 후 20:00 KST 에 fire). date 객체로 비교 — Document.created_at::date 와 직접 매칭. - today = datetime.now(ZoneInfo("Asia/Seoul")).date() + # KST 기준 오늘 (cron 이 KST timezone fix 후 20:00 KST 에 fire). + kst = ZoneInfo("Asia/Seoul") + today = datetime.now(kst).date() + # KST 하루를 UTC 범위로 변환 (R8) — func.date(created_at)는 pg TimeZone(UTC) 기준 날짜라 + # KST 0~9시 생성 문서(UTC 전날)가 누락되던 경계 버그. created_at(UTC저장) 범위 비교로. + start_utc = datetime.combine(today, time.min, tzinfo=kst).astimezone(timezone.utc) + end_utc = start_utc + timedelta(days=1) sections = [] async with async_session() as session: # ─── 1. 오늘 추가된 문서 ─── added = await session.execute( select(Document.ai_domain, func.count(Document.id)) - .where(func.date(Document.created_at) == today) + .where(Document.created_at >= start_utc, Document.created_at < end_utc) .group_by(Document.ai_domain) ) added_rows = added.all() @@ -49,7 +69,8 @@ async def run(): select(Document.title) .where( Document.source_channel == "law_monitor", - func.date(Document.created_at) == today, + Document.created_at >= start_utc, + Document.created_at < end_utc, ) ) law_rows = law_docs.scalars().all() @@ -66,7 +87,8 @@ async def run(): select(func.count(Document.id)) .where( Document.source_channel == "email", - func.date(Document.created_at) == today, + Document.created_at >= start_utc, + Document.created_at < end_utc, ) ) email_total = email_count.scalar() or 0 @@ -101,7 +123,7 @@ async def run(): ) failed_count = failed.scalar() or 0 if failed_count > 0: - section += f"\n⚠️ **실패 {failed_count}건** — 수동 확인 필요\n" + section += f"\n**[주의] 실패 {failed_count}건** — 수동 확인 필요\n" sections.append(section) # ─── 5. Inbox 미분류 ─── @@ -119,18 +141,8 @@ async def run(): markdown += "\n".join(sections) markdown += f"\n---\n*생성: {datetime.now(timezone.utc).isoformat()}*\n" - # ─── NAS 저장 ─── + # ─── NAS 저장 + 90일 아카이브 (blocking 파일 I/O off-thread, R8/R5 일관) ─── digest_dir = Path(settings.nas_mount_path) / "PKM" / "Archive" / "digests" - digest_dir.mkdir(parents=True, exist_ok=True) - digest_path = digest_dir / f"{today}_digest.md" - digest_path.write_text(markdown, encoding="utf-8") - - # ─── 90일 초과 아카이브 ─── - archive_dir = digest_dir / "archive" - archive_dir.mkdir(exist_ok=True) - cutoff = datetime.now(timezone.utc).timestamp() - (90 * 86400) - for old in digest_dir.glob("*_digest.md"): - if old.stat().st_mtime < cutoff: - old.rename(archive_dir / old.name) + digest_path = await asyncio.to_thread(_write_and_rotate, digest_dir, str(today), markdown) logger.info(f"다이제스트 생성 완료: {digest_path}") diff --git a/app/workers/deep_summary_worker.py b/app/workers/deep_summary_worker.py index 728c181..077ff54 100644 --- a/app/workers/deep_summary_worker.py +++ b/app/workers/deep_summary_worker.py @@ -144,9 +144,13 @@ async def process( logger.info(f"[deep] id={document_id} 맥북 일시 불가 — 보류 (deferred)") raise except Exception as exc: + # 호출 실패(네트워크/API 5xx 등)는 삼키지 않고 전파 (R3) — queue_consumer 가 + # attempts 소진까지 재시도 후 status=failed(dead-letter)로 가시화한다. 삼키면 + # worker_fn 이 정상 반환 → 큐가 completed 로 확정 → ai_detail_summary 영구 누락 + + # tier 가 triage 에 고착(silent 영구 손실). extract/marker/fulltext/stt 정본과 일치. + # 완주 전 doc 쓰기(168~)는 일어나지 않으므로 부분 쓰기 0 (sleep-안전). logger.warning(f"[deep] 호출 실패 id={document_id} model={used_cfg.model}: {exc}") - parse_error = "call_failed" - raw = "" + raise finally: await client.close() diff --git a/app/workers/document_purge_sweep.py b/app/workers/document_purge_sweep.py new file mode 100644 index 0000000..77dc473 --- /dev/null +++ b/app/workers/document_purge_sweep.py @@ -0,0 +1,65 @@ +"""delete_file=true 로 요청된 문서의 NAS 원본을 grace 후 물리삭제 (R7 retention sweep). + +purge_requested_at 마커 기준(deleted_at 아님 — 일반 soft-delete/숨김은 파일 보존, undelete +가능). grace(30일) 경과 + 파일 존재 시 unlink + AUDIT 로그. 파일 존재 체크로 멱등 +(재실행 시 이미 삭제된 건 skip). 요청 경로(DELETE)엔 동기 비가역 op 0 — 모두 이 cron 으로. +""" +import asyncio +import logging +from datetime import datetime, timedelta, timezone +from pathlib import Path + +from sqlalchemy import select + +from core.config import settings +from core.database import async_session +from models.document import Document + +logger = logging.getLogger("purge_sweep") + +PURGE_GRACE_DAYS = 30 + + +def _unlink_if_exists(p: Path) -> bool: + """파일이 있으면 unlink (blocking — caller 가 to_thread). 존재 여부 반환(멱등).""" + if p.exists(): + p.unlink() + return True + return False + + +async def run() -> int: + """purge 요청 + grace 경과 문서의 NAS 원본 물리삭제. 삭제 건수 반환.""" + cutoff = datetime.now(timezone.utc) - timedelta(days=PURGE_GRACE_DAYS) + async with async_session() as session: + rows = ( + await session.execute( + select(Document.id, Document.file_path, Document.purge_requested_at).where( + Document.purge_requested_at.is_not(None), + Document.purge_requested_at < cutoff, + Document.file_path.is_not(None), + ) + ) + ).all() + + purged = 0 + for doc_id, file_path, requested_at in rows: + nas_path = Path(settings.nas_mount_path) / file_path + try: + existed = await asyncio.to_thread(_unlink_if_exists, nas_path) + if existed: + purged += 1 + # AUDIT — 물리삭제 기록 (가시화). doc_id / 경로 / 요청일 / grace. + logger.warning( + "PURGE doc_id=%s file=%s requested_at=%s grace_days=%s", + doc_id, + file_path, + requested_at.isoformat() if requested_at else None, + PURGE_GRACE_DAYS, + ) + except OSError as e: + logger.error("PURGE 실패 doc_id=%s file=%s: %s", doc_id, file_path, e) + + if purged: + logger.info("[purge_sweep] NAS 원본 %d건 물리삭제 (grace %d일)", purged, PURGE_GRACE_DAYS) + return purged diff --git a/app/workers/file_watcher.py b/app/workers/file_watcher.py index f1987cc..99c7694 100644 --- a/app/workers/file_watcher.py +++ b/app/workers/file_watcher.py @@ -17,6 +17,7 @@ Web/Blog ingest (devonagent 트랙, plan db-snuggly-petal.md): - sidecar (.json) 누락 시: skip 안 하고 ingest, web_meta.sidecar_missing=true """ +import asyncio import hashlib import json from pathlib import Path @@ -136,6 +137,10 @@ def _canonicalize_url(url: str) -> str: 같은 글의 utm 변형 (`?utm_source=foo`) 과 fragment 변형 (`#section`) 을 한 row 로 수렴시키기 위해 file_hash 산출 전 반드시 거친다. + + ★R11c: news_collector._normalize_url(news 채널)과 의도적으로 다르다 — 이쪽(web_clip)은 + query-sort/trailing-slash/소문자화로 공격적 정규화하지만, news 쪽은 query-식별 사이트의 + 별개 기사 붕괴 방지를 위해 보수적이다. 두 함수 통합 금지(채널별 dedup 의도가 다름). """ if not url: return "" @@ -246,7 +251,8 @@ async def watch_inbox(): async with async_session() as session: # ─── Web/ 트랙 (devonagent) — DEVONthink Smart Rule 이 떨군 .html 만 진입 ─── if web_root.exists(): - for file_path in web_root.rglob("*.html"): + # rglob NFS 디렉토리 walk(blocking stat 다발)를 off-thread 로 수집 (R5). + for file_path in await asyncio.to_thread(lambda: list(web_root.rglob("*.html"))): if not file_path.is_file() or should_skip(file_path): continue rel_path = str(file_path.relative_to(nas_root)) @@ -264,7 +270,8 @@ async def watch_inbox(): Path(sub).name, (None, None, None) ) - for file_path in scan_root.rglob("*"): + # NFS 디렉토리 walk(blocking) off-thread 수집 (R5). + for file_path in await asyncio.to_thread(lambda: list(scan_root.rglob("*"))): if not file_path.is_file() or should_skip(file_path): continue @@ -278,7 +285,11 @@ async def watch_inbox(): continue rel_path = str(file_path.relative_to(nas_root)) - fhash = file_hash(file_path) + # GB 파일 SHA-256 은 이벤트 루프를 점유 → 같은 루프의 모든 1분 주기 consumer + # + FastAPI 요청이 수십초~분 동시 정지. to_thread 오프로드. 스캔 루프가 이미 + # 순차라 file_hash 는 한 번에 하나만 실행(직렬화) — 병렬 해싱 X = NFS 2.5GbE + # 대역폭·버퍼 메모리 blowup 방지 (R5). + fhash = await asyncio.to_thread(file_hash, file_path) result = await session.execute( select(Document).where(Document.file_path == rel_path) diff --git a/app/workers/kosha_collector.py b/app/workers/kosha_collector.py index 0ece796..0d873c3 100644 --- a/app/workers/kosha_collector.py +++ b/app/workers/kosha_collector.py @@ -297,6 +297,10 @@ async def collect_disaster_cases(session) -> int: await _ingest_attachment(session, boardno, filenm, filepath) except FeedError as e: logger.warning(f"[kosha] 첨부 실패 skip ({boardno}/{filenm}): {e}") + + # 케이스 단위 commit (R4) — 이후 페이지/케이스의 _api_get 실패가 앞서 적재한 + # 케이스까지 전체 rollback 하지 않게 부분 적재 보존 (csb/api_standards idiom). + await session.commit() if page_all_dup: break # 등록일 역순 — 페이지 전체가 기존이면 이후 페이지도 기존 @@ -374,6 +378,8 @@ async def collect_fatal_accidents(session) -> int: await enqueue_stage(session, doc.id, "embed") await enqueue_stage(session, doc.id, "chunk") new_count += 1 + # 케이스 단위 commit (R4) — 이후 페이지 실패가 앞 케이스 전체 rollback 방지. + await session.commit() if page_all_dup: break # 등록일 역순 — 페이지 전체가 기존이면 이후 페이지도 기존 @@ -450,6 +456,8 @@ async def collect_kosha_guide(session, cap: int = _GUIDE_DAILY_CAP) -> int: await session.flush() await enqueue_stage(session, doc.id, "extract") ingested += 1 + # 항목 단위 commit (R4) — 다운로드 실패가 앞서 적재한 GUIDE 항목 전체 rollback 방지. + await session.commit() # silent cap 금지 — 잔량 가시화 (자동 점진 백필: 내일 cap 만큼 또 소화) logger.info(f"[kosha] GUIDE 신규/개정 {len(new_specs)}건 중 {ingested}건 ingest" diff --git a/app/workers/news_collector.py b/app/workers/news_collector.py index ea6945a..3a4a6df 100644 --- a/app/workers/news_collector.py +++ b/app/workers/news_collector.py @@ -83,6 +83,10 @@ def _normalize_url(url: str) -> str: query 전체 제거 금지: hada.io/topic?id= · aitimes articleView.html?idxno= · HN item?id= 등 query-식별 사이트에서 별개 기사가 같은 URL 로 붕괴된다. 저장(edit_url)·조회 양쪽이 이 함수를 공유해야 dedup 이 성립. + + ★R11c: file_watcher._canonicalize_url(web_clip 채널)과 의도적으로 다르다 — 이쪽은 콘텐츠 + 식별 query 보존(별개 기사 붕괴 방지)이 핵심이라 query-sort/trailing-slash/소문자화를 안 한다. + 두 함수 통합 금지(news dedup 가 깨짐). 채널별 normalization 은 의도된 설계. """ parsed = urlparse(url) kept = [ @@ -397,6 +401,55 @@ def _doc_identity(source: NewsSource, source_short: str, category: str) -> dict: } +async def _already_ingested(session, article_id: str, normalized_url: str, link: str) -> bool: + """이미 적재된 기사인지 — file_hash 또는 정규화/raw edit_url 매칭 (3 fetch 공통, R11c). + + 레거시 raw URL + 교차 게시 다중 매칭 내성(first). _fetch_rss/_fetch_api_guardian/ + _fetch_api_nyt 가 복제하던 동일 존재체크를 단일화. + """ + existing = await session.execute( + select(Document).where( + (Document.file_hash == article_id) + | (Document.edit_url.in_([normalized_url, link])) + ).limit(1) + ) + return existing.scalars().first() is not None + + +def _build_news_doc(source, ident, source_short, article_id, title, body, + extractor_version, normalized_url, pub_dt) -> Document: + """3 fetch 공통 뉴스 Document 빌더 (R11c). 채널별 차이는 인자로만 — body(NYT=summary)· + extractor_version·ident(category 계산 차이 흡수)만 다르고 22 필드 구조는 정적 동일. + edit_url 은 조회와 동일 정규화 저장(raw 저장 시 URL dedup 무력화).""" + return Document( + file_path=f"{ident['path_prefix']}/{source.name}/{article_id}", + file_hash=article_id, + file_format="article", + file_size=len(body.encode()), + file_type="note", + title=title, + extracted_text=f"{title}\n\n{body}", + extracted_at=datetime.now(timezone.utc), + extractor_version=extractor_version, + # article = 텍스트 네이티브 → 생성 시점 terminal 'skipped' 명시(markdown 변환 비대상, + # 미명시 시 'pending' 영구 비수렴 → backlog 지표 오염). page 정책은 fulltext_worker 승격. + md_status="skipped", + md_extraction_error="news article: 텍스트 네이티브, markdown 변환 비대상", + source_channel=source.source_channel, + data_origin="external", + edit_url=normalized_url, + review_status="approved", + ai_domain=ident["ai_domain"], + ai_sub_group=source_short, + ai_tags=ident["ai_tags"], + # 안전 자료실 A-2 — 레지스트리 deterministic (classify-skip 경로라 ingest 시점 필수) + material_type=ident["material_type"], + jurisdiction=ident["jurisdiction"], + published_date=pub_dt.date() if pub_dt else None, + extract_meta=_build_extract_meta(source, pub_dt), + ) + + async def _fetch_rss(session, source: NewsSource) -> tuple[int, str]: """RSS 피드 수집 — redirect 재검증 + 크기/content-type 제한 + 조건부 GET (A-1). @@ -515,13 +568,7 @@ async def _fetch_rss(session, source: NewsSource) -> tuple[int, str]: article_id = _article_hash(title, pub_dt.strftime("%Y%m%d"), source.name) normalized_url = _normalize_url(link) - existing = await session.execute( - select(Document).where( - (Document.file_hash == article_id) | - (Document.edit_url.in_([normalized_url, link])) - ).limit(1) - ) - if existing.scalars().first(): + if await _already_ingested(session, article_id, normalized_url, link): continue # A-6 2차: 포털 전재 dedup (first-wins — 먼저 적재된 쪽이 정본) @@ -533,35 +580,9 @@ async def _fetch_rss(session, source: NewsSource) -> tuple[int, str]: source_short = source.name.split(" ")[0] # "경향신문 문화" → "경향신문" ident = _doc_identity(source, source_short, category) - doc = Document( - file_path=f"{ident['path_prefix']}/{source.name}/{article_id}", - file_hash=article_id, - file_format="article", - file_size=len(body.encode()), - file_type="note", - title=title, - extracted_text=f"{title}\n\n{body}", - extracted_at=datetime.now(timezone.utc), - extractor_version=extractor_version, - # article = 텍스트 네이티브(본문=extracted_text). markdown 단계 미enqueue 라 - # 기본값 'pending' 이면 영구 비수렴 → backlog 지표 오염 + md_status_pending partial - # 인덱스 비대. 생성 시점에 terminal 'skipped' 로 명시(변환 비대상). - # fulltext_policy='page' 소스는 fulltext_worker 가 승격 시 success 로 갱신. - md_status="skipped", - md_extraction_error="news article: 텍스트 네이티브, markdown 변환 비대상", - source_channel=source.source_channel, - data_origin="external", - # 조회와 동일하게 정규화해 저장 — raw(tracking param 포함) 저장 시 URL dedup 무력화 - edit_url=normalized_url, - review_status="approved", - ai_domain=ident["ai_domain"], - ai_sub_group=source_short, - ai_tags=ident["ai_tags"], - # 안전 자료실 A-2 — 레지스트리 deterministic (classify-skip 경로라 ingest 시점 필수) - material_type=ident["material_type"], - jurisdiction=ident["jurisdiction"], - published_date=pub_dt.date() if pub_dt else None, - extract_meta=_build_extract_meta(source, pub_dt), + doc = _build_news_doc( + source, ident, source_short, article_id, title, body, + extractor_version, normalized_url, pub_dt, ) session.add(doc) await session.flush() @@ -658,13 +679,7 @@ async def _fetch_api_guardian(session, source: NewsSource) -> tuple[int, str]: normalized_url = _normalize_url(link) # RSS 수집부와 동일: 레거시 raw URL + 교차 게시 다중 매칭 내성 (first) - existing = await session.execute( - select(Document).where( - (Document.file_hash == article_id) | - (Document.edit_url.in_([normalized_url, link])) - ).limit(1) - ) - if existing.scalars().first(): + if await _already_ingested(session, article_id, normalized_url, link): continue if await _is_portal_duplicate(session, title): @@ -675,30 +690,9 @@ async def _fetch_api_guardian(session, source: NewsSource) -> tuple[int, str]: source_short = source.name.split(" ")[0] ident = _doc_identity(source, source_short, category) - doc = Document( - file_path=f"{ident['path_prefix']}/{source.name}/{article_id}", - file_hash=article_id, - file_format="article", - file_size=len(body.encode()), - file_type="note", - title=title, - extracted_text=f"{title}\n\n{body}", - extracted_at=datetime.now(timezone.utc), - extractor_version="guardian_api_full" if is_full else "guardian_api", - md_status="skipped", - md_extraction_error="news article: 텍스트 네이티브, markdown 변환 비대상", - source_channel=source.source_channel, - data_origin="external", - edit_url=normalized_url, - review_status="approved", - ai_domain=ident["ai_domain"], - ai_sub_group=source_short, - ai_tags=ident["ai_tags"], - # 안전 자료실 A-2 — 레지스트리 deterministic (classify-skip 경로라 ingest 시점 필수) - material_type=ident["material_type"], - jurisdiction=ident["jurisdiction"], - published_date=pub_dt.date() if pub_dt else None, - extract_meta=_build_extract_meta(source, pub_dt), + doc = _build_news_doc( + source, ident, source_short, article_id, title, body, + "guardian_api_full" if is_full else "guardian_api", normalized_url, pub_dt, ) session.add(doc) await session.flush() @@ -755,13 +749,7 @@ async def _fetch_api_nyt(session, source: NewsSource) -> tuple[int, str]: normalized_url = _normalize_url(link) # RSS 수집부와 동일: 레거시 raw URL + 교차 게시 다중 매칭 내성 (first) - existing = await session.execute( - select(Document).where( - (Document.file_hash == article_id) | - (Document.edit_url.in_([normalized_url, link])) - ).limit(1) - ) - if existing.scalars().first(): + if await _already_ingested(session, article_id, normalized_url, link): continue if await _is_portal_duplicate(session, title): @@ -772,33 +760,9 @@ async def _fetch_api_nyt(session, source: NewsSource) -> tuple[int, str]: source_short = source.name.split(" ")[0] ident = _doc_identity(source, source_short, category) - doc = Document( - file_path=f"{ident['path_prefix']}/{source.name}/{article_id}", - file_hash=article_id, - file_format="article", - file_size=len(summary.encode()), - file_type="note", - title=title, - extracted_text=f"{title}\n\n{summary}", - extracted_at=datetime.now(timezone.utc), - extractor_version="nyt_api", - # article = 텍스트 네이티브(본문=extracted_text). markdown 단계 미enqueue 라 - # 기본값 'pending' 이면 영구 비수렴 → backlog 지표 오염 + md_status_pending partial - # 인덱스 비대. 생성 시점에 terminal 'skipped' 로 명시(변환 비대상). - md_status="skipped", - md_extraction_error="news article: 텍스트 네이티브, markdown 변환 비대상", - source_channel=source.source_channel, - data_origin="external", - edit_url=normalized_url, - review_status="approved", - ai_domain=ident["ai_domain"], - ai_sub_group=source_short, - ai_tags=ident["ai_tags"], - # 안전 자료실 A-2 — 레지스트리 deterministic (classify-skip 경로라 ingest 시점 필수) - material_type=ident["material_type"], - jurisdiction=ident["jurisdiction"], - published_date=pub_dt.date() if pub_dt else None, - extract_meta=_build_extract_meta(source, pub_dt), + doc = _build_news_doc( + source, ident, source_short, article_id, title, summary, + "nyt_api", normalized_url, pub_dt, ) session.add(doc) await session.flush() diff --git a/app/workers/openalex_collector.py b/app/workers/openalex_collector.py index a52c867..b31d5e4 100644 --- a/app/workers/openalex_collector.py +++ b/app/workers/openalex_collector.py @@ -331,11 +331,13 @@ async def run(bulk: bool = False, limit: int = 0) -> None: filter_str = (build_issn_filter(wm_key, watermark) if kind == "issn" else build_filter(wm_key, watermark)) newest: str | None = None + capped = False # 이번 run 이 cap 으로 시드 중도 절단됐는지 (R4) cursor = "*" max_pages = (10**6 if bulk else _MAX_PAGES_PER_KW) try: for _page in range(max_pages): if inserted >= run_cap: + capped = True break text = await _fetch(client, key, filter_str, cursor) _count, next_cursor, works = parse_openalex_works(text) @@ -353,12 +355,17 @@ async def run(bulk: bool = False, limit: int = 0) -> None: else: await session.rollback() if inserted >= run_cap: + capped = True break await asyncio.sleep(_REQ_SLEEP) if not next_cursor: break cursor = next_cursor - if newest: + # cap 절단 시 워터마크 미전진 — 미페치 works 가 다음 run 의 watermark 필터 + # (publication_date > watermark)에 영구 배제되는 silent loss 방지. 미전진하면 + # 다음 run 이 옛 watermark 부터 재페치하며 적재분 dedup-skip(cap 미소모) 후 + # 이어 적재 → 백로그 run 당 cap 소화 (R4). bulk 은 cap 무관. + if newest and not capped: async with async_session() as session: src = await session.get(NewsSource, source_id) _set_watermark(src, wm_key, newest) diff --git a/app/workers/phase2a_cand_backfill.py b/app/workers/phase2a_cand_backfill.py deleted file mode 100644 index 4c734d2..0000000 --- a/app/workers/phase2a_cand_backfill.py +++ /dev/null @@ -1,142 +0,0 @@ -"""Phase 2A 후보 임베딩 백필 CLI (embedding-phase2a-1 E-1). - - docker compose exec -T fastapi python -m workers.phase2a_cand_backfill \ - --target qwen06 --doc-id-max 41944 --chunk-id-max 104140 [--batch 32] - -설계 원칙 (plan r3): - - resumable/idempotent: 대상 = NOT EXISTS(후보 테이블) — 중단/재실행 시 이어서. - 배치 단위 커밋. C-1 백필 게이트 = "후보 카운트 == 동결셋 카운트". - - 동결셋: id <= *_id_max AND 베이스라인 embedding IS NOT NULL (AND docs.deleted_at IS NULL). - cand 테이블은 동결 범위로만 INSERT (retrieval cand path 가 snapshot filter 를 안 타는 전제). - - 문서/청크 입력 = production 경로와 동일 구성(embed_worker._build_embed_input / - chunk_worker 의 [제목][섹션][본문]) + plain (instruct prefix 는 쿼리 측 전용 — G-1 불변식). - - 임베딩 = Ollama /api/embed 배치 호출 (G-1 fixture: 정규화 출력). - - qwen4m 은 본 CLI 대상이 아님 — qwen4 적재 후 SQL 파생(subvector+l2_normalize), plan E-1. -""" - -import argparse -import asyncio -import hashlib -import time - -import httpx -from sqlalchemy import text - -from core.database import async_session -from core.utils import setup_logger -from models.document import Document -from workers.embed_worker import _build_embed_input - -logger = setup_logger("phase2a_cand_backfill") - -OLLAMA_EMBED = "http://ollama:11434/api/embed" - -TARGETS = { - "qwen06": { - "model": "qwen3-embedding:0.6b", "dim": 1024, - "docs": "documents_cand_qwen06", "chunks": "document_chunks_cand_qwen06", - }, - "qwen4": { - "model": "qwen3-embedding:4b", "dim": 2560, - "docs": "documents_cand_qwen4", "chunks": "document_chunks_cand_qwen4", - }, -} - - -async def _embed_batch(client: httpx.AsyncClient, model: str, texts: list[str]) -> list[list[float]]: - r = await client.post(OLLAMA_EMBED, json={"model": model, "input": texts}, timeout=600) - r.raise_for_status() - embs = r.json()["embeddings"] - if len(embs) != len(texts): - raise RuntimeError(f"embed count mismatch: {len(embs)} != {len(texts)}") - return embs - - -async def backfill_docs(target: dict, doc_id_max: int, batch: int, http: httpx.AsyncClient) -> int: - total = 0 - while True: - async with async_session() as session: - rows = (await session.execute(text(f""" - SELECT d.id FROM documents d - WHERE d.id <= :m AND d.embedding IS NOT NULL AND d.deleted_at IS NULL - AND NOT EXISTS (SELECT 1 FROM {target['docs']} c WHERE c.doc_id = d.id) - ORDER BY d.id LIMIT :b - """), {"m": doc_id_max, "b": batch})).scalars().all() - if not rows: - break - docs = [(await session.get(Document, i)) for i in rows] - inputs = [_build_embed_input(d) for d in docs] - embs = await _embed_batch(http, target["model"], inputs) - for d, inp, e in zip(docs, inputs, embs): - await session.execute(text(f""" - INSERT INTO {target['docs']} (doc_id, embed_input_hash, embedding) - VALUES (:i, :h, cast(:e AS vector)) - ON CONFLICT (doc_id) DO NOTHING - """), {"i": d.id, "h": hashlib.sha256(inp.encode()).hexdigest()[:16], "e": str(e)}) - await session.commit() - total += len(rows) - if total % (batch * 10) < batch: - logger.info(f"[{target['docs']}] +{total} (last id={rows[-1]})") - return total - - -async def backfill_chunks(target: dict, chunk_id_max: int, batch: int, http: httpx.AsyncClient) -> int: - total = 0 - while True: - async with async_session() as session: - rows = (await session.execute(text(f""" - SELECT c.id, c.doc_id, c.chunk_index, c.section_title, c.text, d.title - FROM corpus_chunks c JOIN documents d ON d.id = c.doc_id - WHERE c.id <= :m AND c.embedding IS NOT NULL AND d.deleted_at IS NULL - AND NOT EXISTS (SELECT 1 FROM {target['chunks']} k WHERE k.id = c.id) - ORDER BY c.id LIMIT :b - """), {"m": chunk_id_max, "b": batch})).all() - if not rows: - break - inputs = [ - f"[제목] {r.title or ''}\n[섹션] {r.section_title or ''}\n[본문] {r.text}" - for r in rows - ] - embs = await _embed_batch(http, target["model"], inputs) - for r, e in zip(rows, embs): - await session.execute(text(f""" - INSERT INTO {target['chunks']} (id, doc_id, chunk_index, section_title, text, embedding) - VALUES (:i, :d, :x, :s, :t, cast(:e AS vector)) - ON CONFLICT (id) DO NOTHING - """), {"i": r.id, "d": r.doc_id, "x": r.chunk_index, - "s": r.section_title, "t": r.text, "e": str(e)}) - await session.commit() - total += len(rows) - if total % (batch * 10) < batch: - logger.info(f"[{target['chunks']}] +{total} (last id={rows[-1]})") - return total - - -async def run(target_key: str, doc_id_max: int, chunk_id_max: int, batch: int) -> None: - target = TARGETS[target_key] - start = time.monotonic() - async with httpx.AsyncClient() as http: - nd = await backfill_docs(target, doc_id_max, batch, http) - nc = await backfill_chunks(target, chunk_id_max, batch, http) - mins = (time.monotonic() - start) / 60 - async with async_session() as session: - cd = (await session.execute(text(f"SELECT count(*) FROM {target['docs']}"))).scalar_one() - cc = (await session.execute(text(f"SELECT count(*) FROM {target['chunks']}"))).scalar_one() - logger.info( - f"[{target_key}] 완료 — 이번 run docs +{nd} chunks +{nc} ({mins:.1f}분) · " - f"누적 docs {cd} / chunks {cc} (동결 게이트 = 베이스라인 동결셋 카운트와 일치 확인)" - ) - - -def main() -> None: - p = argparse.ArgumentParser(description="Phase 2A 후보 임베딩 백필 (resumable)") - p.add_argument("--target", required=True, choices=sorted(TARGETS)) - p.add_argument("--doc-id-max", type=int, required=True) - p.add_argument("--chunk-id-max", type=int, required=True) - p.add_argument("--batch", type=int, default=32) - a = p.parse_args() - asyncio.run(run(a.target, a.doc_id_max, a.chunk_id_max, a.batch)) - - -if __name__ == "__main__": - main() diff --git a/app/workers/queue_consumer.py b/app/workers/queue_consumer.py index 4903ff1..c7c2ebb 100644 --- a/app/workers/queue_consumer.py +++ b/app/workers/queue_consumer.py @@ -275,7 +275,15 @@ async def _process_stage(stage, worker_fn): item.status = "completed" item.completed_at = datetime.now(timezone.utc) await skip_session.commit() - await enqueue_next_stage(document_id, stage) + # 완료 커밋 후 enqueue — 실패가 outer except 로 전파돼 completed 재오픈 + # 되지 않게 격리 (R3, 정상 완료 경로와 동일 처리). + try: + await enqueue_next_stage(document_id, stage) + except Exception as enq_err: + logger.error( + f"[{stage}] document_id={document_id} skip(note) 완료됐으나 " + f"다음 단계 enqueue 실패: {enq_err}" + ) logger.info(f"[{stage}] document_id={document_id} skip (note)") continue @@ -293,7 +301,15 @@ async def _process_stage(stage, worker_fn): item.completed_at = datetime.now(timezone.utc) await session.commit() - await enqueue_next_stage(document_id, stage) + # 완료는 이미 커밋됨. enqueue_next_stage 실패가 outer except 로 전파되면 + # completed 항목을 재오픈(pending/failed)해 같은 단계를 재실행 = 비싼 작업 중복 + # + 부분 재쓰기. 자체 try 로 격리하고 ERROR 로 가시화한다 (R3). + try: + await enqueue_next_stage(document_id, stage) + except Exception as enq_err: + logger.error( + f"[{stage}] document_id={document_id} 완료됐으나 다음 단계 enqueue 실패: {enq_err}" + ) logger.info(f"[{stage}] document_id={document_id} 완료") except StageDeferred as defer: diff --git a/app/workers/study_question_embed_worker.py b/app/workers/study_question_embed_worker.py index 016da7b..28fdbf1 100644 --- a/app/workers/study_question_embed_worker.py +++ b/app/workers/study_question_embed_worker.py @@ -102,7 +102,9 @@ async def _process_one(session: AsyncSession, qid: int, client: AIClient) -> boo try: async with asyncio.timeout(EMBED_TIMEOUT_S): vec = await client.embed(text) - except (asyncio.TimeoutError, Exception) as e: + except asyncio.CancelledError: + raise # 취소는 전파 — broad except 가 삼키지 않게 명시 (R3) + except Exception as e: logger.warning("study_q_embed_failed qid=%s err=%s: %s", qid, type(e).__name__, e) # 실패 — status='failed'. 직전 embedding 보존. q.embedding_status = "failed" diff --git a/app/workers/thumbnail_worker.py b/app/workers/thumbnail_worker.py index 89bd3eb..8a1b360 100644 --- a/app/workers/thumbnail_worker.py +++ b/app/workers/thumbnail_worker.py @@ -121,7 +121,12 @@ async def process(document_id: int, session: AsyncSession) -> None: ok = _extract_thumbnail(source, output, seek) if not ok: - return + # 썸네일 추출 실패(ffmpeg)는 삼키지 않고 raise (R3) — queue_consumer 가 attempts + # 소진까지 재시도 후 status=failed 로 가시화. silent return 이면 큐가 completed 로 + # 확정 + 썸네일 영구 누락 + 재시도/추적 0 (silent skip). 손상 영상이면 failed 로 안착. + raise RuntimeError( + f"thumbnail 추출 실패: document_id={document_id} source={source}" + ) doc.thumbnail_path = str(output) doc.updated_at = datetime.now(timezone.utc) diff --git a/app/workers/tier_backfill.py b/app/workers/tier_backfill.py index f2f8ec0..cfd60bc 100644 --- a/app/workers/tier_backfill.py +++ b/app/workers/tier_backfill.py @@ -52,6 +52,11 @@ DOMAIN_PRIORITY: list[tuple[str, str]] = [ ("manual", "source_channel = 'manual'"), ] +# R12: filter_clause 는 SQL 에 직접 보간되므로 이 allowlist(DOMAIN_PRIORITY 출처) 통과분만 +# 허용 — 현재 모듈 상수라 injection 경로 0 이나, 외부 입력화 시 즉시 차단하는 final gate +# (retrieval_service 의 _VALID_DOCS_TABLE allowlist 정본 대비 비대칭 해소). +_ALLOWED_FILTER_CLAUSES: frozenset[str] = frozenset(c for _, c in DOMAIN_PRIORITY) + async def _classify_pending(session: AsyncSession) -> int: return int(await session.scalar(text(""" @@ -66,6 +71,9 @@ async def _enqueue_domain(session: AsyncSession, filter_clause: str, limit: int) extracted_text 빈 문자열 (LENGTH=0) 도 제외 — classify_worker 는 not doc.extracted_text truthy 체크라 빈 문자열에서 ValueError raise. 무한 retry 루프 방지. """ + # R12: SQL 직접 보간 전 allowlist final gate. + if filter_clause not in _ALLOWED_FILTER_CLAUSES: + raise ValueError(f"비허용 filter_clause (allowlist 외): {filter_clause!r}") sql = text(f""" INSERT INTO processing_queue (document_id, stage, status, attempts, max_attempts) SELECT id, 'classify', 'pending', 0, 3 diff --git a/migrations/359_documents_purge_requested_at.sql b/migrations/359_documents_purge_requested_at.sql new file mode 100644 index 0000000..0fb9d6d --- /dev/null +++ b/migrations/359_documents_purge_requested_at.sql @@ -0,0 +1,6 @@ +-- 359: delete_file=true 명시 삭제 요청 마커 (R7 delete_file 큐드삭제). +-- retention sweep(document_purge_sweep) 이 이 컬럼 + grace(30일) 기준으로 NAS 원본을 +-- 물리삭제한다. deleted_at(단순 숨김)과 분리 — 숨김(delete_file=false)은 파일 보존(undelete +-- 가능). sweep 가 deleted_at 기준이면 모든 숨김이 30일 후 물리삭제되는 데이터 손실이 되므로 +-- 명시 purge 요청만 대상으로 한다. +ALTER TABLE documents ADD COLUMN IF NOT EXISTS purge_requested_at TIMESTAMPTZ; diff --git a/migrations/360_drop_phase2a_cand_tables.sql b/migrations/360_drop_phase2a_cand_tables.sql new file mode 100644 index 0000000..03924e0 --- /dev/null +++ b/migrations/360_drop_phase2a_cand_tables.sql @@ -0,0 +1,11 @@ +-- 360: Phase 2A 임베딩 후보 cand 섀도 테이블 제거 (R13). +-- Phase 2A no-go 종결(2026-06-12, 후보 전부 -0.03~-0.04) + phase2a_cand_backfill 워커 dormant. +-- retrieval_service.CANDIDATE_BACKEND_MAP / api.search allowed 슬러그 선제거 후 DROP. +-- ★single statement(콤마 구분) — init_db 의 exec_driver_sql(asyncpg)은 multi-statement 불허. +-- IF EXISTS — me5/snowflake 는 ad-hoc 생성분이라 환경별 존재 여부 다를 수 있음(멱등). +DROP TABLE IF EXISTS + document_chunks_cand_me5_large_inst, documents_cand_me5_large_inst, + document_chunks_cand_snowflake_l_v2, documents_cand_snowflake_l_v2, + document_chunks_cand_qwen06, documents_cand_qwen06, + document_chunks_cand_qwen4, documents_cand_qwen4, + document_chunks_cand_qwen4m, documents_cand_qwen4m; diff --git a/migrations/361_attempt_session_question_unique.sql b/migrations/361_attempt_session_question_unique.sql new file mode 100644 index 0000000..ee80a36 --- /dev/null +++ b/migrations/361_attempt_session_question_unique.sql @@ -0,0 +1,9 @@ +-- 361: quiz 세션 내 같은 문제 이중 attempt 방지 partial UNIQUE (R9). +-- submit_attempt 의 FOR UPDATE 행잠금이 1차 방어, 이 제약은 DB 레벨 belt-and-suspenders. +-- prod 실측 중복 0 (GROUP BY (quiz_session_id, study_question_id) HAVING count>1 = 0) + fresh DB +-- 빈 테이블이라 dedup DELETE 불요 → ★single statement(init_db exec_driver_sql 은 multi-statement +-- 불허). 혹시 중복이 생긴 환경이면 이 마이그가 실패하므로(IntegrityError) 수동 dedup 후 재적용. +-- quiz_session_id IS NULL(세션 외 직접 입력)은 비대상 → partial index. +CREATE UNIQUE INDEX IF NOT EXISTS uq_attempt_session_question +ON study_question_attempts (quiz_session_id, study_question_id) +WHERE quiz_session_id IS NOT NULL; diff --git a/migrations/_baseline/0358_schema_baseline.sql b/migrations/_baseline/0358_schema_baseline.sql new file mode 100644 index 0000000..05baf49 --- /dev/null +++ b/migrations/_baseline/0358_schema_baseline.sql @@ -0,0 +1,5212 @@ +-- +-- PostgreSQL database dump +-- + + +-- Dumped from database version 16.13 (Debian 16.13-1.pgdg12+1) +-- Dumped by pg_dump version 16.13 (Debian 16.13-1.pgdg12+1) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +-- +-- Name: pg_trgm; Type: EXTENSION; Schema: -; Owner: - +-- + +CREATE EXTENSION IF NOT EXISTS pg_trgm WITH SCHEMA public; + + +-- +-- Name: EXTENSION pg_trgm; Type: COMMENT; Schema: -; Owner: - +-- + +COMMENT ON EXTENSION pg_trgm IS 'text similarity measurement and index searching based on trigrams'; + + +-- +-- Name: vector; Type: EXTENSION; Schema: -; Owner: - +-- + +CREATE EXTENSION IF NOT EXISTS vector WITH SCHEMA public; + + +-- +-- Name: EXTENSION vector; Type: COMMENT; Schema: -; Owner: - +-- + +COMMENT ON EXTENSION vector IS 'vector data type and ivfflat and hnsw access methods'; + + +-- +-- Name: data_origin; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.data_origin AS ENUM ( + 'work', + 'external' +); + + +-- +-- Name: doc_category; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.doc_category AS ENUM ( + 'document', + 'library', + 'news', + 'memo', + 'audio', + 'video', + 'mail', + 'calendar', + 'plex', + 'law' +); + + +-- +-- Name: doc_type; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.doc_type AS ENUM ( + 'immutable', + 'editable', + 'note' +); + + +-- +-- Name: document_purpose; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.document_purpose AS ENUM ( + 'business', + 'knowledge' +); + + +-- +-- Name: event_actor; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.event_actor AS ENUM ( + 'manual', + 'eid', + 'email_ingest', + 'system' +); + + +-- +-- Name: event_kind; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.event_kind AS ENUM ( + 'task', + 'calendar_event', + 'activity_log' +); + + +-- +-- Name: event_kind_hint; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.event_kind_hint AS ENUM ( + 'note', + 'task', + 'calendar_event', + 'activity_log', + 'reference' +); + + +-- +-- Name: event_source; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.event_source AS ENUM ( + 'manual', + 'memo', + 'email', + 'chat', + 'webhook', + 'git_commit', + 'claude_code' +); + + +-- +-- Name: event_status; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.event_status AS ENUM ( + 'inbox', + 'next', + 'scheduled', + 'in_progress', + 'done', + 'cancelled', + 'deferred' +); + + +-- +-- Name: history_change_kind; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.history_change_kind AS ENUM ( + 'create', + 'reschedule', + 'defer', + 'reactivate', + 'complete', + 'cancel' +); + + +-- +-- Name: process_stage; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.process_stage AS ENUM ( + 'extract', + 'classify', + 'embed', + 'preview', + 'summarize', + 'chunk', + 'stt', + 'thumbnail', + 'deep_summary', + 'markdown', + 'fulltext' +); + + +-- +-- Name: process_status; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.process_status AS ENUM ( + 'pending', + 'processing', + 'completed', + 'failed' +); + + +-- +-- Name: source_channel; Type: TYPE; Schema: public; Owner: - +-- + +CREATE TYPE public.source_channel AS ENUM ( + 'law_monitor', + 'devonagent', + 'email', + 'web_clip', + 'tksafety', + 'inbox_route', + 'manual', + 'drive_sync', + 'news', + 'memo', + 'voice', + 'hermes', + 'crawl' +); + + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: documents; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.documents ( + id bigint NOT NULL, + file_path text, + file_hash character(64) NOT NULL, + file_format character varying(20) NOT NULL, + file_size bigint, + file_type public.doc_type DEFAULT 'immutable'::public.doc_type NOT NULL, + import_source text, + extracted_text text, + extracted_at timestamp with time zone, + extractor_version character varying(50), + ai_summary text, + ai_tags jsonb DEFAULT '[]'::jsonb, + ai_domain character varying(100), + ai_sub_group character varying(100), + ai_model_version character varying(50), + ai_processed_at timestamp with time zone, + embed_model_version character varying(50), + embedded_at timestamp with time zone, + source_channel public.source_channel, + data_origin public.data_origin, + title text, + created_at timestamp with time zone DEFAULT now(), + updated_at timestamp with time zone DEFAULT now(), + user_note text, + preview_status character varying(20) DEFAULT 'none'::character varying, + preview_hash character varying(64), + preview_at timestamp with time zone, + edit_url text, + original_path text, + original_format character varying(20), + original_hash character varying(64), + conversion_status character varying(20) DEFAULT 'none'::character varying, + document_type character varying(50), + importance character varying(20) DEFAULT 'medium'::character varying, + ai_confidence double precision, + review_status character varying(20) DEFAULT 'pending'::character varying, + derived_path text, + deleted_at timestamp with time zone, + embedding public.vector(1024), + is_read boolean DEFAULT false, + user_tags jsonb DEFAULT '[]'::jsonb, + pinned boolean DEFAULT false, + ask_includable boolean DEFAULT true, + archived boolean DEFAULT false, + doc_purpose public.document_purpose, + facet_company text, + facet_topic text, + facet_year integer, + facet_doctype text, + extract_meta jsonb DEFAULT '{}'::jsonb, + category public.doc_category, + ai_suggestion jsonb, + thumbnail_path text, + needs_conversion boolean DEFAULT false NOT NULL, + ai_tldr text, + ai_bullets jsonb, + ai_detail_summary text, + ai_inconsistencies jsonb, + ai_analysis_tier text, + memo_task_state jsonb DEFAULT '{}'::jsonb NOT NULL, + md_content text, + md_frontmatter jsonb DEFAULT '{}'::jsonb NOT NULL, + md_format_version text DEFAULT '1.0'::text NOT NULL, + md_status text DEFAULT 'pending'::text NOT NULL, + md_extraction_engine text, + md_extraction_engine_version text, + md_extraction_quality jsonb, + md_extraction_error text, + md_content_hash text, + md_source_hash text, + md_generated_at timestamp with time zone, + content_origin text DEFAULT 'extracted'::text NOT NULL, + md_draft_status text, + ai_event_kind public.event_kind_hint, + ai_event_confidence numeric(3,2), + source_external_id text, + email_metadata jsonb, + source_metadata jsonb DEFAULT '{}'::jsonb NOT NULL, + ocr_derived boolean DEFAULT false NOT NULL, + original_filename text, + duplicate_of bigint, + duplicate_count integer DEFAULT 0 NOT NULL, + material_type text, + jurisdiction text, + published_date date, + CONSTRAINT chk_documents_jurisdiction CHECK ((jurisdiction = ANY (ARRAY['KR'::text, 'US'::text, 'EU'::text, 'JP'::text, 'GB'::text, 'INT'::text]))), + CONSTRAINT chk_documents_law_jurisdiction CHECK (((material_type <> 'law'::text) OR (jurisdiction IS NOT NULL))), + CONSTRAINT chk_documents_material_type CHECK ((material_type = ANY (ARRAY['law'::text, 'paper'::text, 'book'::text, 'incident'::text, 'manual'::text, 'standard'::text, 'guide'::text]))), + CONSTRAINT documents_ai_event_confidence_check CHECK (((ai_event_confidence IS NULL) OR ((ai_event_confidence >= (0)::numeric) AND (ai_event_confidence <= (1)::numeric)))), + CONSTRAINT documents_content_origin_check CHECK ((content_origin = ANY (ARRAY['extracted'::text, 'manual'::text, 'ai_drafted'::text, 'imported'::text]))), + CONSTRAINT documents_md_draft_status_check CHECK (((md_draft_status IS NULL) OR (md_draft_status = ANY (ARRAY['draft'::text, 'pending_review'::text, 'approved'::text, 'revised'::text, 'rejected'::text])))), + CONSTRAINT documents_md_draft_status_only_ai CHECK (((md_draft_status IS NULL) OR (content_origin = 'ai_drafted'::text))), + CONSTRAINT documents_md_status_check CHECK ((md_status = ANY (ARRAY['pending'::text, 'processing'::text, 'success'::text, 'partial'::text, 'failed'::text, 'skipped'::text]))) +); + + +-- +-- Name: active_documents; Type: VIEW; Schema: public; Owner: - +-- + +CREATE VIEW public.active_documents AS + SELECT id, + file_path, + file_hash, + file_format, + file_size, + file_type, + import_source, + extracted_text, + extracted_at, + extractor_version, + ai_summary, + ai_tags, + ai_domain, + ai_sub_group, + ai_model_version, + ai_processed_at, + embed_model_version, + embedded_at, + source_channel, + data_origin, + title, + created_at, + updated_at, + user_note, + preview_status, + preview_hash, + preview_at, + edit_url, + original_path, + original_format, + original_hash, + conversion_status, + document_type, + importance, + ai_confidence, + review_status, + derived_path, + deleted_at, + embedding + FROM public.documents + WHERE (deleted_at IS NULL); + + +-- +-- Name: analyze_events; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.analyze_events ( + id bigint NOT NULL, + doc_id bigint NOT NULL, + user_id bigint, + mode text DEFAULT 'quick'::text NOT NULL, + text_limit integer, + truncated boolean DEFAULT false, + layers_returned jsonb DEFAULT '[]'::jsonb, + cached boolean DEFAULT false, + latency_ms integer, + model_name text, + prompt_version text, + error_code text, + source text DEFAULT 'document_server'::text NOT NULL, + created_at timestamp with time zone DEFAULT now(), + subject_domain text, + risk_flags text[], + high_impact_task boolean, + escalated_to_26b boolean, + escalation_reasons text[], + confidence real, + policy_violation boolean, + policy_violation_ids text[], + shadow_would_route_to text, + policy_version text, + tier text, + suppressed_reason text, + answerability text, + partial_basis boolean, + suggested_query_count integer +); + + +-- +-- Name: analyze_events_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.analyze_events_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: analyze_events_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.analyze_events_id_seq OWNED BY public.analyze_events.id; + + +-- +-- Name: approval_requests; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.approval_requests ( + id bigint NOT NULL, + user_id bigint NOT NULL, + request_type character varying(40) NOT NULL, + payload jsonb NOT NULL, + status character varying(20) DEFAULT 'pending'::character varying NOT NULL, + requester character varying(20) NOT NULL, + decided_by character varying(40), + decided_at timestamp with time zone, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: approval_requests_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.approval_requests_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: approval_requests_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.approval_requests_id_seq OWNED BY public.approval_requests.id; + + +-- +-- Name: ask_events; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.ask_events ( + id bigint NOT NULL, + query text NOT NULL, + user_id bigint, + completeness text, + synthesis_status text, + confidence text, + refused boolean DEFAULT false, + classifier_verdict text, + max_rerank_score real, + aggregate_score real, + hallucination_flags jsonb DEFAULT '[]'::jsonb, + evidence_count integer, + citation_count integer, + defense_layers jsonb, + total_ms integer, + created_at timestamp with time zone DEFAULT now(), + answer_length integer, + covered_aspects jsonb, + missing_aspects jsonb, + model_name text, + prompt_version text, + source text DEFAULT 'document_server'::text NOT NULL, + eval_case_id text +); + + +-- +-- Name: ask_events_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.ask_events_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: ask_events_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.ask_events_id_seq OWNED BY public.ask_events.id; + + +-- +-- Name: audio_segments; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.audio_segments ( + id bigint NOT NULL, + document_id bigint NOT NULL, + start_s real NOT NULL, + end_s real NOT NULL, + text text NOT NULL +); + + +-- +-- Name: audio_segments_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.audio_segments_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: audio_segments_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.audio_segments_id_seq OWNED BY public.audio_segments.id; + + +-- +-- Name: automation_state; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.automation_state ( + id bigint NOT NULL, + job_name character varying(50) NOT NULL, + last_check_value text, + last_run_at timestamp with time zone, + updated_at timestamp with time zone DEFAULT now() +); + + +-- +-- Name: automation_state_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.automation_state_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: automation_state_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.automation_state_id_seq OWNED BY public.automation_state.id; + + +-- +-- Name: background_jobs; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.background_jobs ( + id bigint NOT NULL, + kind text NOT NULL, + label text, + state text DEFAULT 'running'::text NOT NULL, + processed integer DEFAULT 0 NOT NULL, + total integer, + detail jsonb DEFAULT '{}'::jsonb NOT NULL, + error text, + started_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + finished_at timestamp with time zone, + CONSTRAINT background_jobs_state_check CHECK ((state = ANY (ARRAY['running'::text, 'done'::text, 'failed'::text]))) +); + + +-- +-- Name: background_jobs_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.background_jobs_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: background_jobs_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.background_jobs_id_seq OWNED BY public.background_jobs.id; + + +-- +-- Name: briefing_topics; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.briefing_topics ( + id bigint NOT NULL, + briefing_id bigint NOT NULL, + topic_rank integer NOT NULL, + topic_label character varying(120) NOT NULL, + headline text NOT NULL, + country_perspectives jsonb DEFAULT '[]'::jsonb NOT NULL, + divergences jsonb DEFAULT '[]'::jsonb NOT NULL, + convergences jsonb DEFAULT '[]'::jsonb NOT NULL, + key_quotes jsonb DEFAULT '[]'::jsonb NOT NULL, + historical_article_ids jsonb, + historical_context text, + historical_window_days integer, + cluster_members jsonb DEFAULT '[]'::jsonb NOT NULL, + article_count integer NOT NULL, + country_count integer NOT NULL, + importance_score double precision NOT NULL, + raw_weight_sum double precision NOT NULL, + llm_model character varying(100), + llm_fallback_used boolean DEFAULT false NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + is_read boolean DEFAULT false NOT NULL, + read_at timestamp with time zone, + highlighted boolean DEFAULT false NOT NULL, + highlighted_at timestamp with time zone +); + + +-- +-- Name: briefing_topics_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.briefing_topics_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: briefing_topics_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.briefing_topics_id_seq OWNED BY public.briefing_topics.id; + + +-- +-- Name: chunk_section_analysis; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.chunk_section_analysis ( + id bigint NOT NULL, + chunk_id bigint NOT NULL, + status text NOT NULL, + summary text, + section_type text, + domain text, + confidence real, + model text, + prompt_version text NOT NULL, + source_content_hash text, + error text, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: chunk_section_analysis_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.chunk_section_analysis_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: chunk_section_analysis_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.chunk_section_analysis_id_seq OWNED BY public.chunk_section_analysis.id; + + +-- +-- Name: document_chunks; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.document_chunks ( + id bigint NOT NULL, + doc_id bigint NOT NULL, + chunk_index integer NOT NULL, + chunk_type character varying(30) NOT NULL, + section_title text, + heading_path text, + page integer, + language character varying(10), + country character varying(10), + source character varying(100), + domain_category character varying(20) NOT NULL, + text text NOT NULL, + embedding public.vector(1024), + created_at timestamp with time zone DEFAULT now(), + updated_at timestamp with time zone DEFAULT now(), + page_start integer, + page_end integer, + source_type text, + chunker_version text, + source_hash text, + chunk_content_hash text, + parent_id bigint, + level smallint, + node_type text, + is_leaf boolean DEFAULT false NOT NULL, + in_corpus boolean DEFAULT true NOT NULL, + char_start integer +); + + +-- +-- Name: corpus_chunks; Type: VIEW; Schema: public; Owner: - +-- + +CREATE VIEW public.corpus_chunks AS + SELECT id, + doc_id, + chunk_index, + chunk_type, + section_title, + heading_path, + page, + language, + country, + source, + domain_category, + text, + embedding, + created_at, + updated_at, + page_start, + page_end, + source_type, + chunker_version, + source_hash, + chunk_content_hash, + parent_id, + level, + node_type, + is_leaf, + in_corpus + FROM public.document_chunks + WHERE (in_corpus = true); + + +-- +-- Name: corpus_chunks_hier_sim_clean; Type: VIEW; Schema: public; Owner: - +-- + +CREATE VIEW public.corpus_chunks_hier_sim_clean AS + SELECT id, + doc_id, + chunk_index, + chunk_type, + section_title, + heading_path, + page, + language, + country, + source, + domain_category, + text, + embedding, + created_at, + updated_at, + page_start, + page_end, + source_type, + chunker_version, + source_hash, + chunk_content_hash, + parent_id, + level, + node_type, + is_leaf, + in_corpus + FROM public.document_chunks dc + WHERE ((embedding IS NOT NULL) AND (((source_type = 'hier_section'::text) AND (is_leaf = true) AND ((length(TRIM(BOTH FROM text)) >= 30) OR (EXISTS ( SELECT 1 + FROM public.document_chunks ch + WHERE (ch.parent_id = dc.id))))) OR ((source_type IS DISTINCT FROM 'hier_section'::text) AND (NOT (EXISTS ( SELECT 1 + FROM public.document_chunks h + WHERE ((h.doc_id = dc.doc_id) AND (h.source_type = 'hier_section'::text) AND (h.is_leaf = true) AND (h.embedding IS NOT NULL) AND ((length(TRIM(BOTH FROM h.text)) >= 30) OR (EXISTS ( SELECT 1 + FROM public.document_chunks ch2 + WHERE (ch2.parent_id = h.id))))))))))); + + +-- +-- Name: VIEW corpus_chunks_hier_sim_clean; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON VIEW public.corpus_chunks_hier_sim_clean IS 'EVAL-ONLY (Hier-Replace-Diagnose-1, NO-GO 2026-05-25). post-replace 시뮬(childless-tiny 제외). ?corpus_variant=hier_sim_clean 전용. production 검색 미사용.'; + + +-- +-- Name: corpus_chunks_hier_sim_raw; Type: VIEW; Schema: public; Owner: - +-- + +CREATE VIEW public.corpus_chunks_hier_sim_raw AS + SELECT id, + doc_id, + chunk_index, + chunk_type, + section_title, + heading_path, + page, + language, + country, + source, + domain_category, + text, + embedding, + created_at, + updated_at, + page_start, + page_end, + source_type, + chunker_version, + source_hash, + chunk_content_hash, + parent_id, + level, + node_type, + is_leaf, + in_corpus + FROM public.document_chunks dc + WHERE ((embedding IS NOT NULL) AND (((source_type = 'hier_section'::text) AND (is_leaf = true)) OR ((source_type IS DISTINCT FROM 'hier_section'::text) AND (NOT (EXISTS ( SELECT 1 + FROM public.document_chunks h + WHERE ((h.doc_id = dc.doc_id) AND (h.source_type = 'hier_section'::text) AND (h.is_leaf = true) AND (h.embedding IS NOT NULL)))))))); + + +-- +-- Name: VIEW corpus_chunks_hier_sim_raw; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON VIEW public.corpus_chunks_hier_sim_raw IS 'EVAL-ONLY (Hier-Replace-Diagnose-1, NO-GO 2026-05-25). post-replace 시뮬(raw). ?corpus_variant=hier_sim_raw 전용. production 검색 미사용.'; + + +-- +-- Name: corpus_chunks_prehier; Type: VIEW; Schema: public; Owner: - +-- + +CREATE VIEW public.corpus_chunks_prehier AS + SELECT id, + doc_id, + chunk_index, + chunk_type, + section_title, + heading_path, + page, + language, + country, + source, + domain_category, + text, + embedding, + created_at, + updated_at, + page_start, + page_end, + source_type, + chunker_version, + source_hash, + chunk_content_hash, + parent_id, + level, + node_type, + is_leaf, + in_corpus + FROM public.document_chunks + WHERE ((source_type IS DISTINCT FROM 'hier_section'::text) AND (embedding IS NOT NULL)); + + +-- +-- Name: VIEW corpus_chunks_prehier; Type: COMMENT; Schema: public; Owner: - +-- + +COMMENT ON VIEW public.corpus_chunks_prehier IS 'EVAL-ONLY (Hier-Replace-Diagnose-1, NO-GO 2026-05-25). pre-hier baseline. ?corpus_variant=prehier 전용. default retrieval 은 corpus_chunks 만.'; + + +-- +-- Name: csa_snapshot_20260609; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.csa_snapshot_20260609 ( + id bigint, + chunk_id bigint, + status text, + summary text, + section_type text, + domain text, + confidence real, + model text, + prompt_version text, + source_content_hash text, + error text, + created_at timestamp with time zone, + updated_at timestamp with time zone +); + + +-- +-- Name: digest_topics; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.digest_topics ( + id bigint NOT NULL, + digest_id bigint NOT NULL, + country character varying(10) NOT NULL, + topic_rank integer NOT NULL, + topic_label text NOT NULL, + summary text NOT NULL, + article_ids jsonb NOT NULL, + article_count integer NOT NULL, + importance_score double precision NOT NULL, + raw_weight_sum double precision NOT NULL, + centroid_sample jsonb, + llm_model character varying(100), + llm_fallback_used boolean DEFAULT false NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: digest_topics_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.digest_topics_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: digest_topics_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.digest_topics_id_seq OWNED BY public.digest_topics.id; + + +-- +-- Name: document_chunks_cand_qwen06; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.document_chunks_cand_qwen06 ( + id bigint NOT NULL, + doc_id bigint NOT NULL, + chunk_index integer, + section_title text, + text text, + embedding public.vector(1024) NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: document_chunks_cand_qwen4; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.document_chunks_cand_qwen4 ( + id bigint NOT NULL, + doc_id bigint NOT NULL, + chunk_index integer, + section_title text, + text text, + embedding public.vector(2560) NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: document_chunks_cand_qwen4m; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.document_chunks_cand_qwen4m ( + id bigint NOT NULL, + doc_id bigint NOT NULL, + chunk_index integer, + section_title text, + text text, + embedding public.vector(1024) NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: document_chunks_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.document_chunks_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: document_chunks_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.document_chunks_id_seq OWNED BY public.document_chunks.id; + + +-- +-- Name: document_images; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.document_images ( + id bigint NOT NULL, + document_id bigint NOT NULL, + image_key character varying(32) NOT NULL, + relative_path text NOT NULL, + file_path text NOT NULL, + mime_type text NOT NULL, + file_size bigint NOT NULL, + content_hash character varying(64) NOT NULL, + width integer, + height integer, + page_index integer, + alt_text text, + source_slug text, + extraction_engine character varying(32) DEFAULT 'marker'::character varying NOT NULL, + extraction_engine_version character varying(32), + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: document_images_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.document_images_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: document_images_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.document_images_id_seq OWNED BY public.document_images.id; + + +-- +-- Name: document_lineage; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.document_lineage ( + id bigint NOT NULL, + source_document_id bigint NOT NULL, + derived_document_id bigint NOT NULL, + relation_type text NOT NULL, + metadata jsonb DEFAULT '{}'::jsonb NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + CONSTRAINT document_lineage_no_self CHECK ((source_document_id <> derived_document_id)), + CONSTRAINT document_lineage_relation_type_check CHECK ((relation_type = ANY (ARRAY['cited'::text, 'summarized_from'::text, 'generated_from'::text, 'revised_from'::text]))) +); + + +-- +-- Name: document_lineage_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.document_lineage_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: document_lineage_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.document_lineage_id_seq OWNED BY public.document_lineage.id; + + +-- +-- Name: document_notes; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.document_notes ( + id bigint NOT NULL, + user_id bigint NOT NULL, + document_id bigint NOT NULL, + strokes_json jsonb, + canvas_width integer, + canvas_height integer, + schema_version integer DEFAULT 1 NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: document_notes_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.document_notes_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: document_notes_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.document_notes_id_seq OWNED BY public.document_notes.id; + + +-- +-- Name: document_reads; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.document_reads ( + id bigint NOT NULL, + user_id bigint NOT NULL, + document_id bigint NOT NULL, + read_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: document_reads_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.document_reads_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: document_reads_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.document_reads_id_seq OWNED BY public.document_reads.id; + + +-- +-- Name: documents_cand_qwen06; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.documents_cand_qwen06 ( + doc_id bigint NOT NULL, + embed_input_hash text, + embedding public.vector(1024) NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: documents_cand_qwen4; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.documents_cand_qwen4 ( + doc_id bigint NOT NULL, + embed_input_hash text, + embedding public.vector(2560) NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: documents_cand_qwen4m; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.documents_cand_qwen4m ( + doc_id bigint NOT NULL, + embed_input_hash text, + embedding public.vector(1024) NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: documents_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.documents_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: documents_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.documents_id_seq OWNED BY public.documents.id; + + +-- +-- Name: eid_review_set_draft; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.eid_review_set_draft ( + id bigint NOT NULL, + user_id bigint NOT NULL, + study_topic_id bigint, + question_ids jsonb NOT NULL, + reason character varying(40) NOT NULL, + actor character varying(20) NOT NULL, + source_weakness_id bigint, + source_generated_at timestamp with time zone NOT NULL, + supersedes_id bigint, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: eid_review_set_draft_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.eid_review_set_draft_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: eid_review_set_draft_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.eid_review_set_draft_id_seq OWNED BY public.eid_review_set_draft.id; + + +-- +-- Name: eid_study_weakness; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.eid_study_weakness ( + id bigint NOT NULL, + user_id bigint NOT NULL, + weaknesses jsonb NOT NULL, + habit_signals jsonb NOT NULL, + trend_label character varying(20) NOT NULL, + sample_attempts integer DEFAULT 0 NOT NULL, + is_shallow_sample boolean DEFAULT false NOT NULL, + status character varying(20) DEFAULT 'active'::character varying NOT NULL, + supersedes_id bigint, + actor character varying(20) NOT NULL, + source_generated_at timestamp with time zone NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: eid_study_weakness_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.eid_study_weakness_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: eid_study_weakness_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.eid_study_weakness_id_seq OWNED BY public.eid_study_weakness.id; + + +-- +-- Name: eid_weekly_recap; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.eid_weekly_recap ( + id bigint NOT NULL, + user_id bigint NOT NULL, + period_start date NOT NULL, + period_end date NOT NULL, + recap jsonb NOT NULL, + trend_label character varying(20), + status character varying(20) DEFAULT 'active'::character varying NOT NULL, + supersedes_id bigint, + actor character varying(20) NOT NULL, + source_generated_at timestamp with time zone NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: eid_weekly_recap_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.eid_weekly_recap_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: eid_weekly_recap_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.eid_weekly_recap_id_seq OWNED BY public.eid_weekly_recap.id; + + +-- +-- Name: events; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.events ( + id bigint NOT NULL, + title text NOT NULL, + description text, + kind public.event_kind NOT NULL, + status public.event_status DEFAULT 'inbox'::public.event_status NOT NULL, + due_at timestamp with time zone, + start_at timestamp with time zone, + end_at timestamp with time zone, + started_at timestamp with time zone, + ended_at timestamp with time zone, + all_day boolean DEFAULT false NOT NULL, + timezone text, + defer_until timestamp with time zone, + completed_at timestamp with time zone, + cancelled_at timestamp with time zone, + priority smallint, + project_tag character varying(64), + tags jsonb DEFAULT '[]'::jsonb NOT NULL, + source public.event_source DEFAULT 'manual'::public.event_source NOT NULL, + source_ref text, + raw_metadata jsonb DEFAULT '{}'::jsonb NOT NULL, + memo_document_id bigint, + user_id bigint NOT NULL, + created_by public.event_actor DEFAULT 'manual'::public.event_actor NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + CONSTRAINT events_activity_log_requires_time CHECK (((kind <> 'activity_log'::public.event_kind) OR (started_at IS NOT NULL) OR (ended_at IS NOT NULL))), + CONSTRAINT events_calendar_event_requires_start CHECK (((kind <> 'calendar_event'::public.event_kind) OR (start_at IS NOT NULL))), + CONSTRAINT events_priority_check CHECK (((priority >= 1) AND (priority <= 4))) +); + + +-- +-- Name: events_history; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.events_history ( + id bigint NOT NULL, + event_id bigint NOT NULL, + changed_at timestamp with time zone DEFAULT now() NOT NULL, + changed_by public.event_actor NOT NULL, + change_kind public.history_change_kind NOT NULL, + before jsonb, + after jsonb NOT NULL +); + + +-- +-- Name: events_history_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.events_history_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: events_history_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.events_history_id_seq OWNED BY public.events_history.id; + + +-- +-- Name: events_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.events_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: events_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.events_id_seq OWNED BY public.events.id; + + +-- +-- Name: facet_values; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.facet_values ( + id bigint NOT NULL, + facet_type text NOT NULL, + value text NOT NULL, + is_system boolean DEFAULT false NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: facet_values_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.facet_values_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: facet_values_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.facet_values_id_seq OWNED BY public.facet_values.id; + + +-- +-- Name: global_digests; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.global_digests ( + id bigint NOT NULL, + digest_date date NOT NULL, + window_start timestamp with time zone NOT NULL, + window_end timestamp with time zone NOT NULL, + decay_lambda double precision NOT NULL, + total_articles integer DEFAULT 0 NOT NULL, + total_countries integer DEFAULT 0 NOT NULL, + total_topics integer DEFAULT 0 NOT NULL, + generation_ms integer, + llm_calls integer DEFAULT 0 NOT NULL, + llm_failures integer DEFAULT 0 NOT NULL, + status character varying(20) DEFAULT 'success'::character varying NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: global_digests_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.global_digests_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: global_digests_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.global_digests_id_seq OWNED BY public.global_digests.id; + + +-- +-- Name: hier_snapshot_20260609; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.hier_snapshot_20260609 ( + id bigint, + doc_id bigint, + chunk_index integer, + chunk_type character varying(30), + section_title text, + heading_path text, + page integer, + language character varying(10), + country character varying(10), + source character varying(100), + domain_category character varying(20), + text text, + embedding public.vector(1024), + created_at timestamp with time zone, + updated_at timestamp with time zone, + page_start integer, + page_end integer, + source_type text, + chunker_version text, + source_hash text, + chunk_content_hash text, + parent_id bigint, + level smallint, + node_type text, + is_leaf boolean, + in_corpus boolean, + char_start integer +); + + +-- +-- Name: legal_acts; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.legal_acts ( + family_id text NOT NULL, + jurisdiction text NOT NULL, + law_level text NOT NULL, + title text NOT NULL, + title_ko text, + parent_family_id text, + native_id text NOT NULL, + source_api text NOT NULL, + watch boolean DEFAULT true NOT NULL, + poll_cycle text DEFAULT 'daily'::text NOT NULL, + watermark text, + repeal_detected_at timestamp with time zone, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + CONSTRAINT legal_acts_jurisdiction_check CHECK ((jurisdiction = ANY (ARRAY['KR'::text, 'US'::text, 'EU'::text, 'JP'::text, 'GB'::text, 'INT'::text]))), + CONSTRAINT legal_acts_law_level_check CHECK ((law_level = ANY (ARRAY['statute'::text, 'decree'::text, 'rule'::text, 'admin_rule'::text, 'code'::text]))), + CONSTRAINT legal_acts_poll_cycle_check CHECK ((poll_cycle = ANY (ARRAY['daily'::text, 'weekly'::text, 'monthly'::text, 'quarterly'::text]))) +); + + +-- +-- Name: legal_meta; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.legal_meta ( + document_id bigint NOT NULL, + family_id text NOT NULL, + law_doc_kind text DEFAULT 'primary'::text NOT NULL, + version_key text NOT NULL, + promulgation_date date, + effective_date date, + version_status text DEFAULT 'pending'::text NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + CONSTRAINT legal_meta_law_doc_kind_check CHECK ((law_doc_kind = ANY (ARRAY['primary'::text, 'annex'::text, 'interpretation'::text]))), + CONSTRAINT legal_meta_version_status_check CHECK ((version_status = ANY (ARRAY['pending'::text, 'current'::text, 'superseded'::text, 'repealed'::text]))) +); + + +-- +-- Name: library_categories; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.library_categories ( + id bigint NOT NULL, + path text NOT NULL, + name text NOT NULL, + parent_path text, + depth integer DEFAULT 1 NOT NULL, + is_system boolean DEFAULT false NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: library_categories_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.library_categories_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: library_categories_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.library_categories_id_seq OWNED BY public.library_categories.id; + + +-- +-- Name: morning_briefings; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.morning_briefings ( + id bigint NOT NULL, + briefing_date date NOT NULL, + window_start timestamp with time zone NOT NULL, + window_end timestamp with time zone NOT NULL, + decay_lambda double precision NOT NULL, + total_articles integer DEFAULT 0 NOT NULL, + total_countries integer DEFAULT 0 NOT NULL, + total_topics integer DEFAULT 0 NOT NULL, + generation_ms integer, + llm_calls integer DEFAULT 0 NOT NULL, + llm_failures integer DEFAULT 0 NOT NULL, + status character varying(20) DEFAULT 'success'::character varying NOT NULL, + headline_oneliner text, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: morning_briefings_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.morning_briefings_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: morning_briefings_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.morning_briefings_id_seq OWNED BY public.morning_briefings.id; + + +-- +-- Name: news_sources; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.news_sources ( + id integer NOT NULL, + name character varying(100) NOT NULL, + country character varying(10), + feed_url text NOT NULL, + feed_type character varying(20) DEFAULT 'rss'::character varying, + category character varying(50), + language character varying(10), + enabled boolean DEFAULT true, + last_fetched_at timestamp with time zone, + created_at timestamp with time zone DEFAULT now(), + fetch_method character varying(20) DEFAULT 'rss'::character varying NOT NULL, + fulltext_policy character varying(20) DEFAULT 'none'::character varying NOT NULL, + auth_profile character varying(50), + poll_interval_minutes integer, + etag text, + last_modified text, + feed_content_hash character varying(64), + selector_override jsonb, + parser_quirk character varying(30), + source_channel public.source_channel DEFAULT 'news'::public.source_channel NOT NULL, + material_type text, + license_scheme text, + license_redistribute boolean, + CONSTRAINT news_sources_material_type_check CHECK ((material_type = ANY (ARRAY['law'::text, 'paper'::text, 'book'::text, 'incident'::text, 'manual'::text, 'standard'::text, 'guide'::text]))) +); + + +-- +-- Name: news_sources_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.news_sources_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: news_sources_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.news_sources_id_seq OWNED BY public.news_sources.id; + + +-- +-- Name: processing_queue; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.processing_queue ( + id bigint NOT NULL, + document_id bigint NOT NULL, + stage public.process_stage NOT NULL, + status public.process_status DEFAULT 'pending'::public.process_status, + attempts smallint DEFAULT 0, + max_attempts smallint DEFAULT 3, + error_message text, + created_at timestamp with time zone DEFAULT now(), + started_at timestamp with time zone, + completed_at timestamp with time zone, + payload jsonb +); + + +-- +-- Name: processing_queue_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.processing_queue_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: processing_queue_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.processing_queue_id_seq OWNED BY public.processing_queue.id; + + +-- +-- Name: search_failure_logs; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.search_failure_logs ( + id bigint NOT NULL, + query text NOT NULL, + user_id bigint, + created_at timestamp with time zone DEFAULT now() NOT NULL, + result_count integer NOT NULL, + confidence double precision, + failure_reason character varying(30) NOT NULL, + context jsonb, + reviewed boolean DEFAULT false NOT NULL +); + + +-- +-- Name: search_failure_logs_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.search_failure_logs_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: search_failure_logs_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.search_failure_logs_id_seq OWNED BY public.search_failure_logs.id; + + +-- +-- Name: source_health; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.source_health ( + id integer NOT NULL, + source_id integer NOT NULL, + consecutive_failures integer DEFAULT 0 NOT NULL, + total_fetches bigint DEFAULT 0 NOT NULL, + total_failures bigint DEFAULT 0 NOT NULL, + last_success_at timestamp with time zone, + last_error text, + last_error_at timestamp with time zone, + last_fetch_items integer, + empty_streak integer DEFAULT 0 NOT NULL, + circuit_state character varying(10) DEFAULT 'closed'::character varying NOT NULL, + circuit_opened_at timestamp with time zone, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + relogin_requested boolean DEFAULT false NOT NULL, + last_probe_at timestamp with time zone, + last_probe_ok boolean +); + + +-- +-- Name: source_health_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.source_health_id_seq + AS integer + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: source_health_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.source_health_id_seq OWNED BY public.source_health.id; + + +-- +-- Name: study_memo_card_evidence; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_memo_card_evidence ( + id bigint NOT NULL, + card_id bigint NOT NULL, + source_type character varying(40) NOT NULL, + source_id bigint, + chunk_index integer, + snippet text, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: study_memo_card_evidence_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_memo_card_evidence_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_memo_card_evidence_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_memo_card_evidence_id_seq OWNED BY public.study_memo_card_evidence.id; + + +-- +-- Name: study_memo_card_jobs; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_memo_card_jobs ( + id bigint NOT NULL, + user_id bigint NOT NULL, + source_kind character varying(40) NOT NULL, + source_id bigint NOT NULL, + source_version timestamp with time zone, + kind character varying(40) NOT NULL, + status character varying(20) DEFAULT 'pending'::character varying NOT NULL, + attempts smallint DEFAULT 0 NOT NULL, + max_attempts smallint DEFAULT 2 NOT NULL, + error_code character varying(40), + error_message text, + payload jsonb, + created_at timestamp with time zone DEFAULT now() NOT NULL, + started_at timestamp with time zone, + completed_at timestamp with time zone +); + + +-- +-- Name: study_memo_card_jobs_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_memo_card_jobs_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_memo_card_jobs_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_memo_card_jobs_id_seq OWNED BY public.study_memo_card_jobs.id; + + +-- +-- Name: study_memo_card_progress; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_memo_card_progress ( + id bigint NOT NULL, + user_id bigint NOT NULL, + study_topic_id bigint NOT NULL, + card_id bigint NOT NULL, + last_outcome character varying(20), + last_reviewed_at timestamp with time zone, + due_at timestamp with time zone, + review_stage smallint, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: study_memo_card_progress_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_memo_card_progress_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_memo_card_progress_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_memo_card_progress_id_seq OWNED BY public.study_memo_card_progress.id; + + +-- +-- Name: study_memo_cards; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_memo_cards ( + id bigint NOT NULL, + user_id bigint NOT NULL, + study_topic_id bigint NOT NULL, + source_kind character varying(40) NOT NULL, + source_question_id bigint, + source_subject_note_id bigint, + format character varying(20) NOT NULL, + cue text NOT NULL, + fact text NOT NULL, + cloze_text text, + extra jsonb, + source_generated_at timestamp with time zone, + dedup_hash character varying(64) NOT NULL, + needs_review boolean DEFAULT true NOT NULL, + flagged_at timestamp with time zone, + flagged_by character varying(40), + model character varying(120), + generated_at timestamp with time zone, + created_at timestamp with time zone DEFAULT now() NOT NULL, + deleted_at timestamp with time zone, + view_count integer DEFAULT 0 NOT NULL, + last_viewed_at timestamp with time zone +); + + +-- +-- Name: study_memo_cards_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_memo_cards_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_memo_cards_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_memo_cards_id_seq OWNED BY public.study_memo_cards.id; + + +-- +-- Name: study_question_attempts; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_question_attempts ( + id bigint NOT NULL, + user_id bigint NOT NULL, + study_question_id bigint NOT NULL, + study_topic_id bigint NOT NULL, + selected_choice smallint, + correct_choice smallint NOT NULL, + is_correct boolean NOT NULL, + answered_at timestamp with time zone DEFAULT now() NOT NULL, + outcome character varying(20) NOT NULL, + quiz_session_id bigint, + reviewed_at timestamp with time zone, + CONSTRAINT study_question_attempts_correct_choice_check CHECK (((correct_choice >= 1) AND (correct_choice <= 4))), + CONSTRAINT study_question_attempts_selected_choice_check CHECK (((selected_choice IS NULL) OR ((selected_choice >= 1) AND (selected_choice <= 4)))) +); + + +-- +-- Name: study_question_attempts_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_question_attempts_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_question_attempts_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_question_attempts_id_seq OWNED BY public.study_question_attempts.id; + + +-- +-- Name: study_question_images; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_question_images ( + id bigint NOT NULL, + user_id bigint NOT NULL, + study_question_id bigint NOT NULL, + file_path text NOT NULL, + file_size bigint NOT NULL, + mime_type character varying(80) NOT NULL, + sort_order integer DEFAULT 0 NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: study_question_images_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_question_images_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_question_images_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_question_images_id_seq OWNED BY public.study_question_images.id; + + +-- +-- Name: study_question_jobs; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_question_jobs ( + id bigint NOT NULL, + study_question_id bigint NOT NULL, + user_id bigint NOT NULL, + kind character varying(40) NOT NULL, + status character varying(20) DEFAULT 'pending'::character varying NOT NULL, + attempts smallint DEFAULT 0 NOT NULL, + max_attempts smallint DEFAULT 2 NOT NULL, + error_code character varying(40), + error_message text, + payload jsonb, + created_at timestamp with time zone DEFAULT now() NOT NULL, + started_at timestamp with time zone, + completed_at timestamp with time zone +); + + +-- +-- Name: study_question_jobs_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_question_jobs_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_question_jobs_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_question_jobs_id_seq OWNED BY public.study_question_jobs.id; + + +-- +-- Name: study_question_progress; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_question_progress ( + id bigint NOT NULL, + user_id bigint NOT NULL, + study_topic_id bigint NOT NULL, + study_question_id bigint NOT NULL, + last_outcome character varying(20), + last_attempted_at timestamp with time zone, + last_attempt_id bigint, + last_reviewed_at timestamp with time zone, + due_at timestamp with time zone, + review_stage smallint, + pattern_state character varying(30), + pattern_updated_at timestamp with time zone, + pattern_window_attempts smallint, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: study_question_progress_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_question_progress_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_question_progress_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_question_progress_id_seq OWNED BY public.study_question_progress.id; + + +-- +-- Name: study_questions; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_questions ( + id bigint NOT NULL, + user_id bigint NOT NULL, + study_topic_id bigint NOT NULL, + question_text text NOT NULL, + choice_1 text NOT NULL, + choice_2 text NOT NULL, + choice_3 text NOT NULL, + choice_4 text NOT NULL, + correct_choice smallint NOT NULL, + subject character varying(120), + scope character varying(200), + exam_name character varying(120), + exam_round character varying(120), + explanation text, + source_note text, + is_active boolean DEFAULT true NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + deleted_at timestamp with time zone, + ai_explanation text, + ai_explanation_status character varying(20) DEFAULT 'none'::character varying NOT NULL, + ai_explanation_generated_at timestamp with time zone, + ai_explanation_model character varying(120), + embedding public.vector(1024), + embedding_status character varying(20) DEFAULT 'none'::character varying NOT NULL, + embedding_updated_at timestamp with time zone, + embedding_model character varying(120), + exam_question_number smallint, + related_repeat jsonb, + related_similar jsonb, + related_repeat_round_count integer, + related_similar_round_count integer, + related_repeat_grade character varying(50), + related_computed_at timestamp with time zone, + related_threshold_version character varying(20), + needs_review boolean DEFAULT false NOT NULL, + flagged_at timestamp with time zone, + flagged_by character varying(40), + CONSTRAINT study_questions_correct_choice_check CHECK (((correct_choice >= 1) AND (correct_choice <= 4))), + CONSTRAINT study_questions_exam_question_number_check CHECK (((exam_question_number IS NULL) OR (exam_question_number > 0))) +); + + +-- +-- Name: study_questions_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_questions_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_questions_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_questions_id_seq OWNED BY public.study_questions.id; + + +-- +-- Name: study_quiz_session_analysis; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_quiz_session_analysis ( + study_quiz_session_id bigint NOT NULL, + user_id bigint NOT NULL, + summary_md text NOT NULL, + confidence character varying(10), + model_name character varying(120), + generated_at timestamp with time zone DEFAULT now() NOT NULL, + is_stale boolean DEFAULT false NOT NULL +); + + +-- +-- Name: study_quiz_session_jobs; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_quiz_session_jobs ( + id bigint NOT NULL, + study_quiz_session_id bigint NOT NULL, + user_id bigint NOT NULL, + status character varying(20) DEFAULT 'pending'::character varying NOT NULL, + attempts smallint DEFAULT 0 NOT NULL, + max_attempts smallint DEFAULT 2 NOT NULL, + error_code character varying(40), + error_message text, + payload jsonb, + created_at timestamp with time zone DEFAULT now() NOT NULL, + started_at timestamp with time zone, + completed_at timestamp with time zone +); + + +-- +-- Name: study_quiz_session_jobs_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_quiz_session_jobs_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_quiz_session_jobs_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_quiz_session_jobs_id_seq OWNED BY public.study_quiz_session_jobs.id; + + +-- +-- Name: study_quiz_sessions; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_quiz_sessions ( + id bigint NOT NULL, + user_id bigint NOT NULL, + study_topic_id bigint NOT NULL, + target_per_subject integer DEFAULT 20 NOT NULL, + subject_filter character varying(120), + wrong_only boolean DEFAULT false NOT NULL, + question_ids jsonb NOT NULL, + subject_distribution jsonb DEFAULT '{}'::jsonb NOT NULL, + status character varying(20) DEFAULT 'in_progress'::character varying NOT NULL, + cursor integer DEFAULT 0 NOT NULL, + correct_count integer DEFAULT 0 NOT NULL, + wrong_count integer DEFAULT 0 NOT NULL, + unsure_count integer DEFAULT 0 NOT NULL, + finished_at timestamp with time zone, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + quiz_mode character varying(30) DEFAULT 'random'::character varying NOT NULL, + newly_correct_count integer DEFAULT 0 NOT NULL, + relapsed_count integer DEFAULT 0 NOT NULL, + recovered_count integer DEFAULT 0 NOT NULL, + chronic_remaining_count integer DEFAULT 0 NOT NULL +); + + +-- +-- Name: study_quiz_sessions_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_quiz_sessions_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_quiz_sessions_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_quiz_sessions_id_seq OWNED BY public.study_quiz_sessions.id; + + +-- +-- Name: study_reminders; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_reminders ( + id bigint NOT NULL, + user_id bigint NOT NULL, + study_topic_id bigint, + due_count integer, + focus_topic_names jsonb, + fired_at timestamp with time zone NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: study_reminders_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_reminders_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_reminders_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_reminders_id_seq OWNED BY public.study_reminders.id; + + +-- +-- Name: study_session_assets; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_session_assets ( + id bigint NOT NULL, + study_session_id bigint NOT NULL, + document_id bigint NOT NULL, + asset_type character varying(30) NOT NULL, + role character varying(40), + sort_order integer DEFAULT 0 NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: study_session_assets_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_session_assets_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_session_assets_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_session_assets_id_seq OWNED BY public.study_session_assets.id; + + +-- +-- Name: study_sessions; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_sessions ( + id bigint NOT NULL, + user_id bigint NOT NULL, + study_type character varying(30) DEFAULT 'certification'::character varying NOT NULL, + certification character varying(120), + language_code character varying(20), + learning_level character varying(80), + subject character varying(120), + topic character varying(200), + source_text text, + source_page integer, + mode character varying(30) DEFAULT 'copy'::character varying NOT NULL, + prompt_question text, + expected_answer text, + metadata jsonb, + target_count integer, + repetition_count integer DEFAULT 0 NOT NULL, + strokes_json jsonb, + canvas_width integer, + canvas_height integer, + schema_version integer DEFAULT 1 NOT NULL, + ocr_text text, + user_corrected_text text, + ai_summary text, + review_state character varying(20), + next_review_at timestamp with time zone, + last_quiz_at timestamp with time zone, + correct_count integer DEFAULT 0 NOT NULL, + incorrect_count integer DEFAULT 0 NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + study_topic_id bigint +); + + +-- +-- Name: study_sessions_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_sessions_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_sessions_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_sessions_id_seq OWNED BY public.study_sessions.id; + + +-- +-- Name: study_topic_documents; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_topic_documents ( + study_topic_id bigint NOT NULL, + document_id bigint NOT NULL, + user_id bigint NOT NULL, + sort_order integer DEFAULT 0 NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: study_topic_subject_notes; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_topic_subject_notes ( + id bigint NOT NULL, + user_id bigint NOT NULL, + study_topic_id bigint NOT NULL, + subject character varying(120) NOT NULL, + scope character varying(200) DEFAULT ''::character varying NOT NULL, + content text, + status character varying(20) DEFAULT 'none'::character varying NOT NULL, + generated_at timestamp with time zone, + model character varying(120), + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: study_topic_subject_notes_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_topic_subject_notes_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_topic_subject_notes_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_topic_subject_notes_id_seq OWNED BY public.study_topic_subject_notes.id; + + +-- +-- Name: study_topics; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.study_topics ( + id bigint NOT NULL, + user_id bigint NOT NULL, + name character varying(120) NOT NULL, + description text, + color character varying(20), + study_type character varying(40), + sort_order integer DEFAULT 0 NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + updated_at timestamp with time zone DEFAULT now() NOT NULL, + deleted_at timestamp with time zone, + exam_round_size integer, + exam_subjects jsonb DEFAULT '[]'::jsonb NOT NULL, + focused_at timestamp with time zone, + CONSTRAINT study_topics_exam_round_size_check CHECK (((exam_round_size IS NULL) OR ((exam_round_size >= 1) AND (exam_round_size <= 300)))) +); + + +-- +-- Name: study_topics_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.study_topics_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: study_topics_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.study_topics_id_seq OWNED BY public.study_topics.id; + + +-- +-- Name: tasks; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.tasks ( + id bigint NOT NULL, + caldav_uid text, + title text NOT NULL, + description text, + due_date timestamp with time zone, + priority smallint DEFAULT 0, + completed boolean DEFAULT false, + completed_at timestamp with time zone, + document_id bigint, + source character varying(50), + created_at timestamp with time zone DEFAULT now(), + updated_at timestamp with time zone DEFAULT now() +); + + +-- +-- Name: tasks_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.tasks_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: tasks_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.tasks_id_seq OWNED BY public.tasks.id; + + +-- +-- Name: users; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.users ( + id bigint NOT NULL, + username character varying(50) NOT NULL, + password_hash text NOT NULL, + totp_secret character varying(64), + is_active boolean DEFAULT true, + created_at timestamp with time zone DEFAULT now(), + last_login_at timestamp with time zone, + is_admin boolean DEFAULT false NOT NULL, + password_changed_at timestamp with time zone +); + + +-- +-- Name: users_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.users_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: users_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.users_id_seq OWNED BY public.users.id; + + +-- +-- Name: v_schedule_defer_pattern; Type: VIEW; Schema: public; Owner: - +-- + +CREATE VIEW public.v_schedule_defer_pattern AS + SELECT event_id, + (count(*))::integer AS defer_reschedule_count, + max(changed_at) AS last_changed_at, + (count(*) >= 3) AS is_repeat_defer + FROM public.events_history eh + WHERE (change_kind = ANY (ARRAY['defer'::public.history_change_kind, 'reschedule'::public.history_change_kind])) + GROUP BY event_id; + + +-- +-- Name: v_schedule_today; Type: VIEW; Schema: public; Owner: - +-- + +CREATE VIEW public.v_schedule_today AS + SELECT e.id, + e.user_id, + e.title, + e.kind, + e.status, + e.priority, + e.due_at, + e.start_at, + e.end_at, + e.started_at, + e.defer_until, + e.project_tag + FROM (public.events e + CROSS JOIN LATERAL ( SELECT (date_trunc('day'::text, (now() AT TIME ZONE 'Asia/Seoul'::text)) AT TIME ZONE 'Asia/Seoul'::text) AS lo) b) + WHERE (((e.status = ANY (ARRAY['inbox'::public.event_status, 'next'::public.event_status, 'scheduled'::public.event_status, 'in_progress'::public.event_status])) OR ((e.status = 'deferred'::public.event_status) AND (e.defer_until IS NOT NULL) AND (e.defer_until <= now()))) AND (((e.kind = 'task'::public.event_kind) AND (e.due_at >= b.lo) AND (e.due_at < (b.lo + '1 day'::interval))) OR ((e.kind = 'calendar_event'::public.event_kind) AND (e.start_at >= b.lo) AND (e.start_at < (b.lo + '1 day'::interval))) OR ((e.kind = 'activity_log'::public.event_kind) AND (e.started_at >= b.lo) AND (e.started_at < (b.lo + '1 day'::interval))))); + + +-- +-- Name: worker_capabilities; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.worker_capabilities ( + worker_id text NOT NULL, + user_id bigint NOT NULL, + device_label text NOT NULL, + worker_class text NOT NULL, + tier text NOT NULL, + capabilities jsonb DEFAULT '[]'::jsonb NOT NULL, + models_loaded jsonb DEFAULT '[]'::jsonb NOT NULL, + endpoint text, + created_at timestamp with time zone DEFAULT now() NOT NULL, + last_registered_at timestamp with time zone DEFAULT now() NOT NULL +); + + +-- +-- Name: worker_heartbeats; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.worker_heartbeats ( + id bigint NOT NULL, + worker_id text NOT NULL, + heartbeat_at timestamp with time zone DEFAULT now() NOT NULL, + status text NOT NULL, + current_job_id bigint, + battery text, + thermal text, + raw_payload jsonb DEFAULT '{}'::jsonb NOT NULL +); + + +-- +-- Name: worker_heartbeats_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.worker_heartbeats_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: worker_heartbeats_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.worker_heartbeats_id_seq OWNED BY public.worker_heartbeats.id; + + +-- +-- Name: worker_jobs; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.worker_jobs ( + id bigint NOT NULL, + user_id bigint NOT NULL, + job_type text NOT NULL, + status text DEFAULT 'pending'::text NOT NULL, + worker_id text, + payload jsonb DEFAULT '{}'::jsonb NOT NULL, + result jsonb, + error_message text, + attempts smallint DEFAULT 0 NOT NULL, + max_attempts smallint DEFAULT 3 NOT NULL, + created_at timestamp with time zone DEFAULT now() NOT NULL, + claimed_at timestamp with time zone, + completed_at timestamp with time zone, + CONSTRAINT worker_jobs_status_check CHECK ((status = ANY (ARRAY['pending'::text, 'processing'::text, 'completed'::text, 'failed'::text]))) +); + + +-- +-- Name: worker_jobs_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE public.worker_jobs_id_seq + START WITH 1 + INCREMENT BY 1 + NO MINVALUE + NO MAXVALUE + CACHE 1; + + +-- +-- Name: worker_jobs_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE public.worker_jobs_id_seq OWNED BY public.worker_jobs.id; + + +-- +-- Name: analyze_events id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.analyze_events ALTER COLUMN id SET DEFAULT nextval('public.analyze_events_id_seq'::regclass); + + +-- +-- Name: approval_requests id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.approval_requests ALTER COLUMN id SET DEFAULT nextval('public.approval_requests_id_seq'::regclass); + + +-- +-- Name: ask_events id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.ask_events ALTER COLUMN id SET DEFAULT nextval('public.ask_events_id_seq'::regclass); + + +-- +-- Name: audio_segments id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.audio_segments ALTER COLUMN id SET DEFAULT nextval('public.audio_segments_id_seq'::regclass); + + +-- +-- Name: automation_state id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.automation_state ALTER COLUMN id SET DEFAULT nextval('public.automation_state_id_seq'::regclass); + + +-- +-- Name: background_jobs id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.background_jobs ALTER COLUMN id SET DEFAULT nextval('public.background_jobs_id_seq'::regclass); + + +-- +-- Name: briefing_topics id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.briefing_topics ALTER COLUMN id SET DEFAULT nextval('public.briefing_topics_id_seq'::regclass); + + +-- +-- Name: chunk_section_analysis id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.chunk_section_analysis ALTER COLUMN id SET DEFAULT nextval('public.chunk_section_analysis_id_seq'::regclass); + + +-- +-- Name: digest_topics id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.digest_topics ALTER COLUMN id SET DEFAULT nextval('public.digest_topics_id_seq'::regclass); + + +-- +-- Name: document_chunks id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_chunks ALTER COLUMN id SET DEFAULT nextval('public.document_chunks_id_seq'::regclass); + + +-- +-- Name: document_images id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_images ALTER COLUMN id SET DEFAULT nextval('public.document_images_id_seq'::regclass); + + +-- +-- Name: document_lineage id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_lineage ALTER COLUMN id SET DEFAULT nextval('public.document_lineage_id_seq'::regclass); + + +-- +-- Name: document_notes id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_notes ALTER COLUMN id SET DEFAULT nextval('public.document_notes_id_seq'::regclass); + + +-- +-- Name: document_reads id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_reads ALTER COLUMN id SET DEFAULT nextval('public.document_reads_id_seq'::regclass); + + +-- +-- Name: documents id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.documents ALTER COLUMN id SET DEFAULT nextval('public.documents_id_seq'::regclass); + + +-- +-- Name: eid_review_set_draft id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_review_set_draft ALTER COLUMN id SET DEFAULT nextval('public.eid_review_set_draft_id_seq'::regclass); + + +-- +-- Name: eid_study_weakness id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_study_weakness ALTER COLUMN id SET DEFAULT nextval('public.eid_study_weakness_id_seq'::regclass); + + +-- +-- Name: eid_weekly_recap id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_weekly_recap ALTER COLUMN id SET DEFAULT nextval('public.eid_weekly_recap_id_seq'::regclass); + + +-- +-- Name: events id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.events ALTER COLUMN id SET DEFAULT nextval('public.events_id_seq'::regclass); + + +-- +-- Name: events_history id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.events_history ALTER COLUMN id SET DEFAULT nextval('public.events_history_id_seq'::regclass); + + +-- +-- Name: facet_values id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.facet_values ALTER COLUMN id SET DEFAULT nextval('public.facet_values_id_seq'::regclass); + + +-- +-- Name: global_digests id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.global_digests ALTER COLUMN id SET DEFAULT nextval('public.global_digests_id_seq'::regclass); + + +-- +-- Name: library_categories id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.library_categories ALTER COLUMN id SET DEFAULT nextval('public.library_categories_id_seq'::regclass); + + +-- +-- Name: morning_briefings id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.morning_briefings ALTER COLUMN id SET DEFAULT nextval('public.morning_briefings_id_seq'::regclass); + + +-- +-- Name: news_sources id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.news_sources ALTER COLUMN id SET DEFAULT nextval('public.news_sources_id_seq'::regclass); + + +-- +-- Name: processing_queue id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.processing_queue ALTER COLUMN id SET DEFAULT nextval('public.processing_queue_id_seq'::regclass); + + +-- +-- Name: search_failure_logs id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.search_failure_logs ALTER COLUMN id SET DEFAULT nextval('public.search_failure_logs_id_seq'::regclass); + + +-- +-- Name: source_health id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.source_health ALTER COLUMN id SET DEFAULT nextval('public.source_health_id_seq'::regclass); + + +-- +-- Name: study_memo_card_evidence id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_card_evidence ALTER COLUMN id SET DEFAULT nextval('public.study_memo_card_evidence_id_seq'::regclass); + + +-- +-- Name: study_memo_card_jobs id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_card_jobs ALTER COLUMN id SET DEFAULT nextval('public.study_memo_card_jobs_id_seq'::regclass); + + +-- +-- Name: study_memo_card_progress id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_card_progress ALTER COLUMN id SET DEFAULT nextval('public.study_memo_card_progress_id_seq'::regclass); + + +-- +-- Name: study_memo_cards id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_cards ALTER COLUMN id SET DEFAULT nextval('public.study_memo_cards_id_seq'::regclass); + + +-- +-- Name: study_question_attempts id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_attempts ALTER COLUMN id SET DEFAULT nextval('public.study_question_attempts_id_seq'::regclass); + + +-- +-- Name: study_question_images id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_images ALTER COLUMN id SET DEFAULT nextval('public.study_question_images_id_seq'::regclass); + + +-- +-- Name: study_question_jobs id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_jobs ALTER COLUMN id SET DEFAULT nextval('public.study_question_jobs_id_seq'::regclass); + + +-- +-- Name: study_question_progress id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_progress ALTER COLUMN id SET DEFAULT nextval('public.study_question_progress_id_seq'::regclass); + + +-- +-- Name: study_questions id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_questions ALTER COLUMN id SET DEFAULT nextval('public.study_questions_id_seq'::regclass); + + +-- +-- Name: study_quiz_session_jobs id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_quiz_session_jobs ALTER COLUMN id SET DEFAULT nextval('public.study_quiz_session_jobs_id_seq'::regclass); + + +-- +-- Name: study_quiz_sessions id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_quiz_sessions ALTER COLUMN id SET DEFAULT nextval('public.study_quiz_sessions_id_seq'::regclass); + + +-- +-- Name: study_reminders id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_reminders ALTER COLUMN id SET DEFAULT nextval('public.study_reminders_id_seq'::regclass); + + +-- +-- Name: study_session_assets id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_session_assets ALTER COLUMN id SET DEFAULT nextval('public.study_session_assets_id_seq'::regclass); + + +-- +-- Name: study_sessions id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_sessions ALTER COLUMN id SET DEFAULT nextval('public.study_sessions_id_seq'::regclass); + + +-- +-- Name: study_topic_subject_notes id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_topic_subject_notes ALTER COLUMN id SET DEFAULT nextval('public.study_topic_subject_notes_id_seq'::regclass); + + +-- +-- Name: study_topics id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_topics ALTER COLUMN id SET DEFAULT nextval('public.study_topics_id_seq'::regclass); + + +-- +-- Name: tasks id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.tasks ALTER COLUMN id SET DEFAULT nextval('public.tasks_id_seq'::regclass); + + +-- +-- Name: users id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.users ALTER COLUMN id SET DEFAULT nextval('public.users_id_seq'::regclass); + + +-- +-- Name: worker_heartbeats id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.worker_heartbeats ALTER COLUMN id SET DEFAULT nextval('public.worker_heartbeats_id_seq'::regclass); + + +-- +-- Name: worker_jobs id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.worker_jobs ALTER COLUMN id SET DEFAULT nextval('public.worker_jobs_id_seq'::regclass); + + +-- +-- Name: analyze_events analyze_events_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.analyze_events + ADD CONSTRAINT analyze_events_pkey PRIMARY KEY (id); + + +-- +-- Name: approval_requests approval_requests_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.approval_requests + ADD CONSTRAINT approval_requests_pkey PRIMARY KEY (id); + + +-- +-- Name: ask_events ask_events_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.ask_events + ADD CONSTRAINT ask_events_pkey PRIMARY KEY (id); + + +-- +-- Name: audio_segments audio_segments_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.audio_segments + ADD CONSTRAINT audio_segments_pkey PRIMARY KEY (id); + + +-- +-- Name: automation_state automation_state_job_name_key; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.automation_state + ADD CONSTRAINT automation_state_job_name_key UNIQUE (job_name); + + +-- +-- Name: automation_state automation_state_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.automation_state + ADD CONSTRAINT automation_state_pkey PRIMARY KEY (id); + + +-- +-- Name: background_jobs background_jobs_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.background_jobs + ADD CONSTRAINT background_jobs_pkey PRIMARY KEY (id); + + +-- +-- Name: briefing_topics briefing_topics_briefing_id_topic_rank_key; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.briefing_topics + ADD CONSTRAINT briefing_topics_briefing_id_topic_rank_key UNIQUE (briefing_id, topic_rank); + + +-- +-- Name: briefing_topics briefing_topics_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.briefing_topics + ADD CONSTRAINT briefing_topics_pkey PRIMARY KEY (id); + + +-- +-- Name: chunk_section_analysis chunk_section_analysis_chunk_id_prompt_version_key; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.chunk_section_analysis + ADD CONSTRAINT chunk_section_analysis_chunk_id_prompt_version_key UNIQUE (chunk_id, prompt_version); + + +-- +-- Name: chunk_section_analysis chunk_section_analysis_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.chunk_section_analysis + ADD CONSTRAINT chunk_section_analysis_pkey PRIMARY KEY (id); + + +-- +-- Name: digest_topics digest_topics_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.digest_topics + ADD CONSTRAINT digest_topics_pkey PRIMARY KEY (id); + + +-- +-- Name: document_chunks_cand_qwen06 document_chunks_cand_qwen06_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_chunks_cand_qwen06 + ADD CONSTRAINT document_chunks_cand_qwen06_pkey PRIMARY KEY (id); + + +-- +-- Name: document_chunks_cand_qwen4 document_chunks_cand_qwen4_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_chunks_cand_qwen4 + ADD CONSTRAINT document_chunks_cand_qwen4_pkey PRIMARY KEY (id); + + +-- +-- Name: document_chunks_cand_qwen4m document_chunks_cand_qwen4m_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_chunks_cand_qwen4m + ADD CONSTRAINT document_chunks_cand_qwen4m_pkey PRIMARY KEY (id); + + +-- +-- Name: document_chunks document_chunks_doc_id_chunk_index_key; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_chunks + ADD CONSTRAINT document_chunks_doc_id_chunk_index_key UNIQUE (doc_id, chunk_index); + + +-- +-- Name: document_chunks document_chunks_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_chunks + ADD CONSTRAINT document_chunks_pkey PRIMARY KEY (id); + + +-- +-- Name: document_images document_images_document_id_image_key_key; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_images + ADD CONSTRAINT document_images_document_id_image_key_key UNIQUE (document_id, image_key); + + +-- +-- Name: document_images document_images_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_images + ADD CONSTRAINT document_images_pkey PRIMARY KEY (id); + + +-- +-- Name: document_lineage document_lineage_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_lineage + ADD CONSTRAINT document_lineage_pkey PRIMARY KEY (id); + + +-- +-- Name: document_lineage document_lineage_uq; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_lineage + ADD CONSTRAINT document_lineage_uq UNIQUE (source_document_id, derived_document_id, relation_type); + + +-- +-- Name: document_notes document_notes_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_notes + ADD CONSTRAINT document_notes_pkey PRIMARY KEY (id); + + +-- +-- Name: document_notes document_notes_user_id_document_id_key; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_notes + ADD CONSTRAINT document_notes_user_id_document_id_key UNIQUE (user_id, document_id); + + +-- +-- Name: document_reads document_reads_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_reads + ADD CONSTRAINT document_reads_pkey PRIMARY KEY (id); + + +-- +-- Name: documents_cand_qwen06 documents_cand_qwen06_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.documents_cand_qwen06 + ADD CONSTRAINT documents_cand_qwen06_pkey PRIMARY KEY (doc_id); + + +-- +-- Name: documents_cand_qwen4 documents_cand_qwen4_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.documents_cand_qwen4 + ADD CONSTRAINT documents_cand_qwen4_pkey PRIMARY KEY (doc_id); + + +-- +-- Name: documents_cand_qwen4m documents_cand_qwen4m_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.documents_cand_qwen4m + ADD CONSTRAINT documents_cand_qwen4m_pkey PRIMARY KEY (doc_id); + + +-- +-- Name: documents documents_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.documents + ADD CONSTRAINT documents_pkey PRIMARY KEY (id); + + +-- +-- Name: eid_review_set_draft eid_review_set_draft_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_review_set_draft + ADD CONSTRAINT eid_review_set_draft_pkey PRIMARY KEY (id); + + +-- +-- Name: eid_study_weakness eid_study_weakness_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_study_weakness + ADD CONSTRAINT eid_study_weakness_pkey PRIMARY KEY (id); + + +-- +-- Name: eid_weekly_recap eid_weekly_recap_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_weekly_recap + ADD CONSTRAINT eid_weekly_recap_pkey PRIMARY KEY (id); + + +-- +-- Name: events_history events_history_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.events_history + ADD CONSTRAINT events_history_pkey PRIMARY KEY (id); + + +-- +-- Name: events events_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.events + ADD CONSTRAINT events_pkey PRIMARY KEY (id); + + +-- +-- Name: facet_values facet_values_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.facet_values + ADD CONSTRAINT facet_values_pkey PRIMARY KEY (id); + + +-- +-- Name: global_digests global_digests_digest_date_key; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.global_digests + ADD CONSTRAINT global_digests_digest_date_key UNIQUE (digest_date); + + +-- +-- Name: global_digests global_digests_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.global_digests + ADD CONSTRAINT global_digests_pkey PRIMARY KEY (id); + + +-- +-- Name: legal_acts legal_acts_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.legal_acts + ADD CONSTRAINT legal_acts_pkey PRIMARY KEY (family_id); + + +-- +-- Name: legal_meta legal_meta_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.legal_meta + ADD CONSTRAINT legal_meta_pkey PRIMARY KEY (document_id); + + +-- +-- Name: library_categories library_categories_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.library_categories + ADD CONSTRAINT library_categories_pkey PRIMARY KEY (id); + + +-- +-- Name: morning_briefings morning_briefings_briefing_date_key; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.morning_briefings + ADD CONSTRAINT morning_briefings_briefing_date_key UNIQUE (briefing_date); + + +-- +-- Name: morning_briefings morning_briefings_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.morning_briefings + ADD CONSTRAINT morning_briefings_pkey PRIMARY KEY (id); + + +-- +-- Name: news_sources news_sources_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.news_sources + ADD CONSTRAINT news_sources_pkey PRIMARY KEY (id); + + +-- +-- Name: processing_queue processing_queue_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.processing_queue + ADD CONSTRAINT processing_queue_pkey PRIMARY KEY (id); + + +-- +-- Name: search_failure_logs search_failure_logs_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.search_failure_logs + ADD CONSTRAINT search_failure_logs_pkey PRIMARY KEY (id); + + +-- +-- Name: source_health source_health_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.source_health + ADD CONSTRAINT source_health_pkey PRIMARY KEY (id); + + +-- +-- Name: study_memo_card_evidence study_memo_card_evidence_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_card_evidence + ADD CONSTRAINT study_memo_card_evidence_pkey PRIMARY KEY (id); + + +-- +-- Name: study_memo_card_jobs study_memo_card_jobs_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_card_jobs + ADD CONSTRAINT study_memo_card_jobs_pkey PRIMARY KEY (id); + + +-- +-- Name: study_memo_card_progress study_memo_card_progress_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_card_progress + ADD CONSTRAINT study_memo_card_progress_pkey PRIMARY KEY (id); + + +-- +-- Name: study_memo_cards study_memo_cards_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_cards + ADD CONSTRAINT study_memo_cards_pkey PRIMARY KEY (id); + + +-- +-- Name: study_question_attempts study_question_attempts_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_attempts + ADD CONSTRAINT study_question_attempts_pkey PRIMARY KEY (id); + + +-- +-- Name: study_question_images study_question_images_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_images + ADD CONSTRAINT study_question_images_pkey PRIMARY KEY (id); + + +-- +-- Name: study_question_jobs study_question_jobs_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_jobs + ADD CONSTRAINT study_question_jobs_pkey PRIMARY KEY (id); + + +-- +-- Name: study_question_progress study_question_progress_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_progress + ADD CONSTRAINT study_question_progress_pkey PRIMARY KEY (id); + + +-- +-- Name: study_questions study_questions_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_questions + ADD CONSTRAINT study_questions_pkey PRIMARY KEY (id); + + +-- +-- Name: study_quiz_session_analysis study_quiz_session_analysis_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_quiz_session_analysis + ADD CONSTRAINT study_quiz_session_analysis_pkey PRIMARY KEY (study_quiz_session_id); + + +-- +-- Name: study_quiz_session_jobs study_quiz_session_jobs_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_quiz_session_jobs + ADD CONSTRAINT study_quiz_session_jobs_pkey PRIMARY KEY (id); + + +-- +-- Name: study_quiz_sessions study_quiz_sessions_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_quiz_sessions + ADD CONSTRAINT study_quiz_sessions_pkey PRIMARY KEY (id); + + +-- +-- Name: study_reminders study_reminders_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_reminders + ADD CONSTRAINT study_reminders_pkey PRIMARY KEY (id); + + +-- +-- Name: study_session_assets study_session_assets_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_session_assets + ADD CONSTRAINT study_session_assets_pkey PRIMARY KEY (id); + + +-- +-- Name: study_session_assets study_session_assets_study_session_id_document_id_asset_typ_key; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_session_assets + ADD CONSTRAINT study_session_assets_study_session_id_document_id_asset_typ_key UNIQUE (study_session_id, document_id, asset_type, role); + + +-- +-- Name: study_sessions study_sessions_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_sessions + ADD CONSTRAINT study_sessions_pkey PRIMARY KEY (id); + + +-- +-- Name: study_topic_documents study_topic_documents_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_topic_documents + ADD CONSTRAINT study_topic_documents_pkey PRIMARY KEY (study_topic_id, document_id); + + +-- +-- Name: study_topic_subject_notes study_topic_subject_notes_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_topic_subject_notes + ADD CONSTRAINT study_topic_subject_notes_pkey PRIMARY KEY (id); + + +-- +-- Name: study_topics study_topics_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_topics + ADD CONSTRAINT study_topics_pkey PRIMARY KEY (id); + + +-- +-- Name: tasks tasks_caldav_uid_key; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.tasks + ADD CONSTRAINT tasks_caldav_uid_key UNIQUE (caldav_uid); + + +-- +-- Name: tasks tasks_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.tasks + ADD CONSTRAINT tasks_pkey PRIMARY KEY (id); + + +-- +-- Name: study_memo_card_progress uq_card_progress_user_card; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_card_progress + ADD CONSTRAINT uq_card_progress_user_card UNIQUE (user_id, card_id); + + +-- +-- Name: legal_meta uq_legal_meta_version; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.legal_meta + ADD CONSTRAINT uq_legal_meta_version UNIQUE (family_id, law_doc_kind, version_key); + + +-- +-- Name: study_question_progress uq_progress_user_topic_question; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_progress + ADD CONSTRAINT uq_progress_user_topic_question UNIQUE (user_id, study_topic_id, study_question_id); + + +-- +-- Name: study_reminders uq_study_reminders_user_fired; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_reminders + ADD CONSTRAINT uq_study_reminders_user_fired UNIQUE (user_id, fired_at); + + +-- +-- Name: users users_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.users + ADD CONSTRAINT users_pkey PRIMARY KEY (id); + + +-- +-- Name: users users_username_key; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.users + ADD CONSTRAINT users_username_key UNIQUE (username); + + +-- +-- Name: worker_capabilities worker_capabilities_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.worker_capabilities + ADD CONSTRAINT worker_capabilities_pkey PRIMARY KEY (worker_id); + + +-- +-- Name: worker_heartbeats worker_heartbeats_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.worker_heartbeats + ADD CONSTRAINT worker_heartbeats_pkey PRIMARY KEY (id); + + +-- +-- Name: worker_jobs worker_jobs_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.worker_jobs + ADD CONSTRAINT worker_jobs_pkey PRIMARY KEY (id); + + +-- +-- Name: events_source_ref_uq; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX events_source_ref_uq ON public.events USING btree (source, source_ref) WHERE (source_ref IS NOT NULL); + + +-- +-- Name: idx_analyze_events_answerability; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_analyze_events_answerability ON public.analyze_events USING btree (answerability, created_at DESC) WHERE (answerability IS NOT NULL); + + +-- +-- Name: idx_analyze_events_policy_violation; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_analyze_events_policy_violation ON public.analyze_events USING btree (created_at) WHERE (policy_violation = true); + + +-- +-- Name: idx_analyze_events_shadow_ts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_analyze_events_shadow_ts ON public.analyze_events USING btree (created_at) WHERE (shadow_would_route_to IS NOT NULL); + + +-- +-- Name: idx_analyze_events_suppressed; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_analyze_events_suppressed ON public.analyze_events USING btree (created_at) WHERE (suppressed_reason IS NOT NULL); + + +-- +-- Name: idx_approval_requests_status; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_approval_requests_status ON public.approval_requests USING btree (status, created_at); + + +-- +-- Name: idx_ask_events_created; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_ask_events_created ON public.ask_events USING btree (created_at); + + +-- +-- Name: idx_ask_events_eval_case_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_ask_events_eval_case_id ON public.ask_events USING btree (eval_case_id) WHERE (eval_case_id IS NOT NULL); + + +-- +-- Name: idx_ask_events_prompt_version; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_ask_events_prompt_version ON public.ask_events USING btree (prompt_version); + + +-- +-- Name: idx_ask_events_source_created; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_ask_events_source_created ON public.ask_events USING btree (source, created_at DESC); + + +-- +-- Name: idx_audio_segments_doc_start; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_audio_segments_doc_start ON public.audio_segments USING btree (document_id, start_s); + + +-- +-- Name: idx_briefing_topics_briefing_rank; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_briefing_topics_briefing_rank ON public.briefing_topics USING btree (briefing_id, topic_rank); + + +-- +-- Name: idx_card_progress_due; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_card_progress_due ON public.study_memo_card_progress USING btree (user_id, due_at) WHERE (due_at IS NOT NULL); + + +-- +-- Name: idx_chunks_country; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_chunks_country ON public.document_chunks USING btree (country) WHERE (country IS NOT NULL); + + +-- +-- Name: idx_chunks_doc_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_chunks_doc_id ON public.document_chunks USING btree (doc_id); + + +-- +-- Name: idx_chunks_domain_category; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_chunks_domain_category ON public.document_chunks USING btree (domain_category); + + +-- +-- Name: idx_chunks_embedding; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_chunks_embedding ON public.document_chunks USING ivfflat (embedding public.vector_cosine_ops) WITH (lists='100') WHERE (in_corpus = true); + + +-- +-- Name: idx_chunks_fts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_chunks_fts ON public.document_chunks USING gin (to_tsvector('simple'::regconfig, text)); + + +-- +-- Name: idx_chunks_language; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_chunks_language ON public.document_chunks USING btree (language) WHERE (language IS NOT NULL); + + +-- +-- Name: idx_chunks_source; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_chunks_source ON public.document_chunks USING btree (source) WHERE (source IS NOT NULL); + + +-- +-- Name: idx_chunks_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_chunks_trgm ON public.document_chunks USING gin (text public.gin_trgm_ops); + + +-- +-- Name: idx_digest_topics_country; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_digest_topics_country ON public.digest_topics USING btree (country); + + +-- +-- Name: idx_digest_topics_digest; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_digest_topics_digest ON public.digest_topics USING btree (digest_id); + + +-- +-- Name: idx_digest_topics_rank; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_digest_topics_rank ON public.digest_topics USING btree (digest_id, country, topic_rank); + + +-- +-- Name: idx_document_images_content_hash; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_document_images_content_hash ON public.document_images USING btree (content_hash); + + +-- +-- Name: idx_document_images_document_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_document_images_document_id ON public.document_images USING btree (document_id); + + +-- +-- Name: idx_document_lineage_derived; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_document_lineage_derived ON public.document_lineage USING btree (derived_document_id); + + +-- +-- Name: idx_document_lineage_source; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_document_lineage_source ON public.document_lineage USING btree (source_document_id); + + +-- +-- Name: idx_document_notes_user_doc; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_document_notes_user_doc ON public.document_notes USING btree (user_id, document_id); + + +-- +-- Name: idx_document_reads_doc_time; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_document_reads_doc_time ON public.document_reads USING btree (document_id, read_at DESC); + + +-- +-- Name: idx_document_reads_user_doc; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_document_reads_user_doc ON public.document_reads USING btree (user_id, document_id); + + +-- +-- Name: idx_documents_ai_event_kind; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_ai_event_kind ON public.documents USING btree (ai_event_kind, created_at DESC) WHERE (ai_event_kind IS NOT NULL); + + +-- +-- Name: idx_documents_ai_summary_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_ai_summary_trgm ON public.documents USING gin (ai_summary public.gin_trgm_ops) WHERE ((ai_summary IS NOT NULL) AND (length(ai_summary) > 0)); + + +-- +-- Name: idx_documents_ai_version; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_ai_version ON public.documents USING btree (ai_model_version); + + +-- +-- Name: idx_documents_category; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_category ON public.documents USING btree (category); + + +-- +-- Name: idx_documents_content_origin; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_content_origin ON public.documents USING btree (content_origin); + + +-- +-- Name: idx_documents_embed_version; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_embed_version ON public.documents USING btree (embed_model_version); + + +-- +-- Name: idx_documents_embedding_hnsw; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_embedding_hnsw ON public.documents USING hnsw (embedding public.vector_cosine_ops) WHERE ((deleted_at IS NULL) AND (embedding IS NOT NULL)); + + +-- +-- Name: idx_documents_extracted_text_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_extracted_text_trgm ON public.documents USING gin (extracted_text public.gin_trgm_ops) WHERE ((extracted_text IS NOT NULL) AND (length(extracted_text) > 0)); + + +-- +-- Name: idx_documents_extractor_version; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_extractor_version ON public.documents USING btree (extractor_version); + + +-- +-- Name: idx_documents_facet_company; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_facet_company ON public.documents USING btree (facet_company) WHERE (facet_company IS NOT NULL); + + +-- +-- Name: idx_documents_facet_doctype; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_facet_doctype ON public.documents USING btree (facet_doctype) WHERE (facet_doctype IS NOT NULL); + + +-- +-- Name: idx_documents_facet_topic; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_facet_topic ON public.documents USING btree (facet_topic) WHERE (facet_topic IS NOT NULL); + + +-- +-- Name: idx_documents_facet_year; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_facet_year ON public.documents USING btree (facet_year) WHERE (facet_year IS NOT NULL); + + +-- +-- Name: idx_documents_fts; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_fts ON public.documents USING gin (to_tsvector('simple'::regconfig, ((COALESCE(title, ''::text) || ' '::text) || COALESCE(extracted_text, ''::text)))); + + +-- +-- Name: idx_documents_fts_full; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_fts_full ON public.documents USING gin (to_tsvector('simple'::regconfig, ((((((((COALESCE(title, ''::text) || ' '::text) || COALESCE((ai_tags)::text, ''::text)) || ' '::text) || COALESCE(ai_summary, ''::text)) || ' '::text) || COALESCE(user_note, ''::text)) || ' '::text) || COALESCE(extracted_text, ''::text)))); + + +-- +-- Name: idx_documents_has_suggestion; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_has_suggestion ON public.documents USING btree (id) WHERE (ai_suggestion IS NOT NULL); + + +-- +-- Name: idx_documents_hash; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_hash ON public.documents USING btree (file_hash); + + +-- +-- Name: idx_documents_is_read; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_is_read ON public.documents USING btree (is_read) WHERE (source_channel = 'news'::public.source_channel); + + +-- +-- Name: idx_documents_jurisdiction; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_jurisdiction ON public.documents USING btree (jurisdiction) WHERE (jurisdiction IS NOT NULL); + + +-- +-- Name: idx_documents_material_type; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_material_type ON public.documents USING btree (material_type) WHERE (material_type IS NOT NULL); + + +-- +-- Name: idx_documents_md_draft_status; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_md_draft_status ON public.documents USING btree (md_draft_status) WHERE (md_draft_status IS NOT NULL); + + +-- +-- Name: idx_documents_md_frontmatter_gin; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_md_frontmatter_gin ON public.documents USING gin (md_frontmatter); + + +-- +-- Name: idx_documents_md_status_pending; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_md_status_pending ON public.documents USING btree (md_status) WHERE (md_status = ANY (ARRAY['pending'::text, 'processing'::text])); + + +-- +-- Name: idx_documents_not_deleted; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_not_deleted ON public.documents USING btree (deleted_at) WHERE (deleted_at IS NULL); + + +-- +-- Name: idx_documents_notes; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_notes ON public.documents USING btree (pinned DESC, created_at DESC) WHERE ((file_type = 'note'::public.doc_type) AND (deleted_at IS NULL)); + + +-- +-- Name: idx_documents_review_status; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_review_status ON public.documents USING btree (review_status); + + +-- +-- Name: idx_documents_title_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_title_trgm ON public.documents USING gin (title public.gin_trgm_ops); + + +-- +-- Name: idx_documents_trgm; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_trgm ON public.documents USING gin ((((COALESCE(title, ''::text) || ' '::text) || COALESCE(extracted_text, ''::text))) public.gin_trgm_ops); + + +-- +-- Name: idx_documents_user_tags_gin; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_documents_user_tags_gin ON public.documents USING gin (user_tags jsonb_path_ops) WHERE (user_tags IS NOT NULL); + + +-- +-- Name: idx_eid_recap_user_current; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_eid_recap_user_current ON public.eid_weekly_recap USING btree (user_id, created_at DESC) WHERE ((status)::text = 'active'::text); + + +-- +-- Name: idx_eid_review_draft_user; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_eid_review_draft_user ON public.eid_review_set_draft USING btree (user_id, created_at DESC); + + +-- +-- Name: idx_eid_weakness_user_current; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_eid_weakness_user_current ON public.eid_study_weakness USING btree (user_id, created_at DESC) WHERE ((status)::text = 'active'::text); + + +-- +-- Name: idx_events_active; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_events_active ON public.events USING btree (user_id, due_at, start_at) WHERE (status = ANY (ARRAY['inbox'::public.event_status, 'next'::public.event_status, 'scheduled'::public.event_status, 'deferred'::public.event_status])); + + +-- +-- Name: idx_events_activity_user_started; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_events_activity_user_started ON public.events USING btree (user_id, started_at DESC) WHERE (kind = 'activity_log'::public.event_kind); + + +-- +-- Name: idx_events_history_event; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_events_history_event ON public.events_history USING btree (event_id, changed_at); + + +-- +-- Name: idx_global_digests_date; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_global_digests_date ON public.global_digests USING btree (digest_date DESC); + + +-- +-- Name: idx_legal_meta_family; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_legal_meta_family ON public.legal_meta USING btree (family_id, effective_date DESC); + + +-- +-- Name: idx_library_categories_parent; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_library_categories_parent ON public.library_categories USING btree (parent_path); + + +-- +-- Name: idx_morning_briefings_date; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_morning_briefings_date ON public.morning_briefings USING btree (briefing_date DESC); + + +-- +-- Name: idx_news_feed; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_news_feed ON public.documents USING btree (source_channel, created_at DESC); + + +-- +-- Name: idx_progress_due; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_progress_due ON public.study_question_progress USING btree (user_id, due_at) WHERE (due_at IS NOT NULL); + + +-- +-- Name: idx_progress_pending_review; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_progress_pending_review ON public.study_question_progress USING btree (user_id, study_topic_id, last_attempted_at DESC) WHERE ((last_outcome)::text = ANY ((ARRAY['wrong'::character varying, 'unsure'::character varying])::text[])); + + +-- +-- Name: idx_progress_topic_pattern; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_progress_topic_pattern ON public.study_question_progress USING btree (user_id, study_topic_id, pattern_state); + + +-- +-- Name: idx_queue_pending; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_queue_pending ON public.processing_queue USING btree (stage, status) WHERE (status = 'pending'::public.process_status); + + +-- +-- Name: idx_search_failure_reason; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_search_failure_reason ON public.search_failure_logs USING btree (failure_reason, created_at DESC); + + +-- +-- Name: idx_search_failure_unreviewed; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_search_failure_unreviewed ON public.search_failure_logs USING btree (created_at DESC) WHERE (reviewed = false); + + +-- +-- Name: idx_search_failure_user_time; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_search_failure_user_time ON public.search_failure_logs USING btree (user_id, created_at DESC); + + +-- +-- Name: idx_session_assets_document; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_session_assets_document ON public.study_session_assets USING btree (document_id); + + +-- +-- Name: idx_session_assets_session; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_session_assets_session ON public.study_session_assets USING btree (study_session_id, sort_order); + + +-- +-- Name: idx_session_assets_type; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_session_assets_type ON public.study_session_assets USING btree (study_session_id, asset_type); + + +-- +-- Name: idx_study_memo_card_jobs_lookup; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_memo_card_jobs_lookup ON public.study_memo_card_jobs USING btree (source_kind, source_id, source_version); + + +-- +-- Name: idx_study_memo_cards_source_q; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_memo_cards_source_q ON public.study_memo_cards USING btree (source_question_id) WHERE (deleted_at IS NULL); + + +-- +-- Name: idx_study_q_related_stale; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_q_related_stale ON public.study_questions USING btree (study_topic_id, related_computed_at) WHERE ((deleted_at IS NULL) AND ((embedding_status)::text = 'ready'::text) AND (related_computed_at IS NULL)); + + +-- +-- Name: idx_study_question_attempts_question; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_question_attempts_question ON public.study_question_attempts USING btree (study_question_id, answered_at DESC); + + +-- +-- Name: idx_study_question_attempts_user_topic; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_question_attempts_user_topic ON public.study_question_attempts USING btree (user_id, study_topic_id, answered_at DESC); + + +-- +-- Name: idx_study_question_images_qid; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_question_images_qid ON public.study_question_images USING btree (study_question_id, sort_order, id); + + +-- +-- Name: idx_study_questions_ai_status; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_questions_ai_status ON public.study_questions USING btree (study_topic_id, ai_explanation_status) WHERE ((deleted_at IS NULL) AND ((ai_explanation_status)::text <> 'none'::text)); + + +-- +-- Name: idx_study_questions_embedding_hnsw; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_questions_embedding_hnsw ON public.study_questions USING hnsw (embedding public.vector_cosine_ops) WHERE ((deleted_at IS NULL) AND (embedding IS NOT NULL)); + + +-- +-- Name: idx_study_questions_needs_review; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_questions_needs_review ON public.study_questions USING btree (study_topic_id) WHERE ((deleted_at IS NULL) AND needs_review); + + +-- +-- Name: idx_study_questions_topic; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_questions_topic ON public.study_questions USING btree (study_topic_id, created_at DESC, id) WHERE (deleted_at IS NULL); + + +-- +-- Name: idx_study_questions_topic_round_qnum; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_questions_topic_round_qnum ON public.study_questions USING btree (study_topic_id, exam_round, exam_question_number) WHERE ((deleted_at IS NULL) AND (exam_round IS NOT NULL)); + + +-- +-- Name: idx_study_sessions_cert; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_sessions_cert ON public.study_sessions USING btree (user_id, certification, subject, topic) WHERE ((study_type)::text = 'certification'::text); + + +-- +-- Name: idx_study_sessions_lang; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_sessions_lang ON public.study_sessions USING btree (user_id, language_code, learning_level, subject, topic) WHERE ((study_type)::text = 'language'::text); + + +-- +-- Name: idx_study_sessions_quiz_stats; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_sessions_quiz_stats ON public.study_sessions USING btree (user_id, study_type, last_quiz_at) WHERE (last_quiz_at IS NOT NULL); + + +-- +-- Name: idx_study_sessions_review; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_sessions_review ON public.study_sessions USING btree (user_id, review_state, next_review_at) WHERE (review_state IS NOT NULL); + + +-- +-- Name: idx_study_sessions_topic; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_sessions_topic ON public.study_sessions USING btree (study_topic_id, created_at DESC) WHERE (study_topic_id IS NOT NULL); + + +-- +-- Name: idx_study_sessions_type_user_created; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_sessions_type_user_created ON public.study_sessions USING btree (user_id, study_type, created_at DESC); + + +-- +-- Name: idx_study_topic_documents_doc; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_topic_documents_doc ON public.study_topic_documents USING btree (document_id); + + +-- +-- Name: idx_study_topics_user; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_study_topics_user ON public.study_topics USING btree (user_id, sort_order, id) WHERE (deleted_at IS NULL); + + +-- +-- Name: idx_worker_capabilities_class; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_worker_capabilities_class ON public.worker_capabilities USING btree (worker_class); + + +-- +-- Name: idx_worker_capabilities_tier; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_worker_capabilities_tier ON public.worker_capabilities USING btree (tier); + + +-- +-- Name: idx_worker_heartbeats_worker_at; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_worker_heartbeats_worker_at ON public.worker_heartbeats USING btree (worker_id, heartbeat_at DESC); + + +-- +-- Name: idx_worker_jobs_pending_type; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX idx_worker_jobs_pending_type ON public.worker_jobs USING btree (job_type, created_at) WHERE (status = 'pending'::text); + + +-- +-- Name: uq_document_chunks_source_version_index; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_document_chunks_source_version_index ON public.document_chunks USING btree (doc_id, source_type, chunker_version, chunk_index); + + +-- +-- Name: uq_documents_email_source_external_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_documents_email_source_external_id ON public.documents USING btree (source_external_id) WHERE ((source_channel = 'email'::public.source_channel) AND (source_external_id IS NOT NULL)); + + +-- +-- Name: uq_documents_file_path; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_documents_file_path ON public.documents USING btree (file_path) WHERE (file_path IS NOT NULL); + + +-- +-- Name: uq_documents_paper_doi; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_documents_paper_doi ON public.documents USING btree (lower((extract_meta #>> '{paper,doi}'::text[]))) WHERE ((material_type = 'paper'::text) AND ((extract_meta #>> '{paper,doi}'::text[]) IS NOT NULL)); + + +-- +-- Name: uq_facet_values_type_value; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_facet_values_type_value ON public.facet_values USING btree (facet_type, value); + + +-- +-- Name: uq_library_categories_path; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_library_categories_path ON public.library_categories USING btree (path); + + +-- +-- Name: uq_queue_active; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_queue_active ON public.processing_queue USING btree (document_id, stage) WHERE (status = ANY (ARRAY['pending'::public.process_status, 'processing'::public.process_status])); + + +-- +-- Name: uq_source_health_source_id; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_source_health_source_id ON public.source_health USING btree (source_id); + + +-- +-- Name: uq_study_memo_card_jobs_active; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_study_memo_card_jobs_active ON public.study_memo_card_jobs USING btree (source_kind, source_id) WHERE ((status)::text = ANY ((ARRAY['pending'::character varying, 'processing'::character varying])::text[])); + + +-- +-- Name: uq_study_memo_cards_dedup; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_study_memo_cards_dedup ON public.study_memo_cards USING btree (dedup_hash) WHERE (deleted_at IS NULL); + + +-- +-- Name: uq_study_q_jobs_active; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_study_q_jobs_active ON public.study_question_jobs USING btree (study_question_id, kind) WHERE ((status)::text = ANY ((ARRAY['pending'::character varying, 'processing'::character varying])::text[])); + + +-- +-- Name: uq_study_quiz_sessions_active; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_study_quiz_sessions_active ON public.study_quiz_sessions USING btree (user_id, study_topic_id) WHERE ((status)::text = 'in_progress'::text); + + +-- +-- Name: uq_study_session_jobs_active; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_study_session_jobs_active ON public.study_quiz_session_jobs USING btree (study_quiz_session_id) WHERE ((status)::text = ANY ((ARRAY['pending'::character varying, 'processing'::character varying])::text[])); + + +-- +-- Name: uq_study_topic_subject_notes; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_study_topic_subject_notes ON public.study_topic_subject_notes USING btree (user_id, study_topic_id, subject, scope); + + +-- +-- Name: uq_study_topics_user_name_active; Type: INDEX; Schema: public; Owner: - +-- + +CREATE UNIQUE INDEX uq_study_topics_user_name_active ON public.study_topics USING btree (user_id, name) WHERE (deleted_at IS NULL); + + +-- +-- Name: eid_review_set_draft eid_review_set_draft_no_delete; Type: RULE; Schema: public; Owner: - +-- + +CREATE RULE eid_review_set_draft_no_delete AS + ON DELETE TO public.eid_review_set_draft DO INSTEAD NOTHING; + + +-- +-- Name: eid_review_set_draft eid_review_set_draft_no_update; Type: RULE; Schema: public; Owner: - +-- + +CREATE RULE eid_review_set_draft_no_update AS + ON UPDATE TO public.eid_review_set_draft DO INSTEAD NOTHING; + + +-- +-- Name: eid_study_weakness eid_study_weakness_no_delete; Type: RULE; Schema: public; Owner: - +-- + +CREATE RULE eid_study_weakness_no_delete AS + ON DELETE TO public.eid_study_weakness DO INSTEAD NOTHING; + + +-- +-- Name: eid_study_weakness eid_study_weakness_no_update; Type: RULE; Schema: public; Owner: - +-- + +CREATE RULE eid_study_weakness_no_update AS + ON UPDATE TO public.eid_study_weakness DO INSTEAD NOTHING; + + +-- +-- Name: eid_weekly_recap eid_weekly_recap_no_delete; Type: RULE; Schema: public; Owner: - +-- + +CREATE RULE eid_weekly_recap_no_delete AS + ON DELETE TO public.eid_weekly_recap DO INSTEAD NOTHING; + + +-- +-- Name: eid_weekly_recap eid_weekly_recap_no_update; Type: RULE; Schema: public; Owner: - +-- + +CREATE RULE eid_weekly_recap_no_update AS + ON UPDATE TO public.eid_weekly_recap DO INSTEAD NOTHING; + + +-- +-- Name: analyze_events analyze_events_doc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.analyze_events + ADD CONSTRAINT analyze_events_doc_id_fkey FOREIGN KEY (doc_id) REFERENCES public.documents(id) ON DELETE CASCADE; + + +-- +-- Name: analyze_events analyze_events_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.analyze_events + ADD CONSTRAINT analyze_events_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE SET NULL; + + +-- +-- Name: approval_requests approval_requests_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.approval_requests + ADD CONSTRAINT approval_requests_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: ask_events ask_events_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.ask_events + ADD CONSTRAINT ask_events_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id); + + +-- +-- Name: audio_segments audio_segments_document_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.audio_segments + ADD CONSTRAINT audio_segments_document_id_fkey FOREIGN KEY (document_id) REFERENCES public.documents(id) ON DELETE CASCADE; + + +-- +-- Name: briefing_topics briefing_topics_briefing_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.briefing_topics + ADD CONSTRAINT briefing_topics_briefing_id_fkey FOREIGN KEY (briefing_id) REFERENCES public.morning_briefings(id) ON DELETE CASCADE; + + +-- +-- Name: chunk_section_analysis chunk_section_analysis_chunk_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.chunk_section_analysis + ADD CONSTRAINT chunk_section_analysis_chunk_id_fkey FOREIGN KEY (chunk_id) REFERENCES public.document_chunks(id) ON DELETE CASCADE; + + +-- +-- Name: digest_topics digest_topics_digest_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.digest_topics + ADD CONSTRAINT digest_topics_digest_id_fkey FOREIGN KEY (digest_id) REFERENCES public.global_digests(id) ON DELETE CASCADE; + + +-- +-- Name: document_chunks document_chunks_doc_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_chunks + ADD CONSTRAINT document_chunks_doc_id_fkey FOREIGN KEY (doc_id) REFERENCES public.documents(id) ON DELETE CASCADE; + + +-- +-- Name: document_images document_images_document_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_images + ADD CONSTRAINT document_images_document_id_fkey FOREIGN KEY (document_id) REFERENCES public.documents(id) ON DELETE CASCADE; + + +-- +-- Name: document_lineage document_lineage_derived_document_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_lineage + ADD CONSTRAINT document_lineage_derived_document_id_fkey FOREIGN KEY (derived_document_id) REFERENCES public.documents(id) ON DELETE RESTRICT; + + +-- +-- Name: document_lineage document_lineage_source_document_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_lineage + ADD CONSTRAINT document_lineage_source_document_id_fkey FOREIGN KEY (source_document_id) REFERENCES public.documents(id) ON DELETE RESTRICT; + + +-- +-- Name: document_notes document_notes_document_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_notes + ADD CONSTRAINT document_notes_document_id_fkey FOREIGN KEY (document_id) REFERENCES public.documents(id) ON DELETE CASCADE; + + +-- +-- Name: document_notes document_notes_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_notes + ADD CONSTRAINT document_notes_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: document_reads document_reads_document_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_reads + ADD CONSTRAINT document_reads_document_id_fkey FOREIGN KEY (document_id) REFERENCES public.documents(id) ON DELETE CASCADE; + + +-- +-- Name: document_reads document_reads_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.document_reads + ADD CONSTRAINT document_reads_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: documents documents_duplicate_of_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.documents + ADD CONSTRAINT documents_duplicate_of_fkey FOREIGN KEY (duplicate_of) REFERENCES public.documents(id) ON DELETE SET NULL; + + +-- +-- Name: eid_review_set_draft eid_review_set_draft_source_weakness_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_review_set_draft + ADD CONSTRAINT eid_review_set_draft_source_weakness_id_fkey FOREIGN KEY (source_weakness_id) REFERENCES public.eid_study_weakness(id) ON DELETE SET NULL; + + +-- +-- Name: eid_review_set_draft eid_review_set_draft_study_topic_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_review_set_draft + ADD CONSTRAINT eid_review_set_draft_study_topic_id_fkey FOREIGN KEY (study_topic_id) REFERENCES public.study_topics(id) ON DELETE CASCADE; + + +-- +-- Name: eid_review_set_draft eid_review_set_draft_supersedes_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_review_set_draft + ADD CONSTRAINT eid_review_set_draft_supersedes_id_fkey FOREIGN KEY (supersedes_id) REFERENCES public.eid_review_set_draft(id) ON DELETE SET NULL; + + +-- +-- Name: eid_review_set_draft eid_review_set_draft_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_review_set_draft + ADD CONSTRAINT eid_review_set_draft_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: eid_study_weakness eid_study_weakness_supersedes_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_study_weakness + ADD CONSTRAINT eid_study_weakness_supersedes_id_fkey FOREIGN KEY (supersedes_id) REFERENCES public.eid_study_weakness(id) ON DELETE SET NULL; + + +-- +-- Name: eid_study_weakness eid_study_weakness_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_study_weakness + ADD CONSTRAINT eid_study_weakness_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: eid_weekly_recap eid_weekly_recap_supersedes_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_weekly_recap + ADD CONSTRAINT eid_weekly_recap_supersedes_id_fkey FOREIGN KEY (supersedes_id) REFERENCES public.eid_weekly_recap(id) ON DELETE SET NULL; + + +-- +-- Name: eid_weekly_recap eid_weekly_recap_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.eid_weekly_recap + ADD CONSTRAINT eid_weekly_recap_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: events_history events_history_event_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.events_history + ADD CONSTRAINT events_history_event_id_fkey FOREIGN KEY (event_id) REFERENCES public.events(id) ON DELETE RESTRICT; + + +-- +-- Name: events events_memo_document_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.events + ADD CONSTRAINT events_memo_document_id_fkey FOREIGN KEY (memo_document_id) REFERENCES public.documents(id) ON DELETE SET NULL; + + +-- +-- Name: events events_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.events + ADD CONSTRAINT events_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id); + + +-- +-- Name: legal_acts legal_acts_parent_family_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.legal_acts + ADD CONSTRAINT legal_acts_parent_family_id_fkey FOREIGN KEY (parent_family_id) REFERENCES public.legal_acts(family_id); + + +-- +-- Name: legal_meta legal_meta_document_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.legal_meta + ADD CONSTRAINT legal_meta_document_id_fkey FOREIGN KEY (document_id) REFERENCES public.documents(id) ON DELETE CASCADE; + + +-- +-- Name: legal_meta legal_meta_family_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.legal_meta + ADD CONSTRAINT legal_meta_family_id_fkey FOREIGN KEY (family_id) REFERENCES public.legal_acts(family_id); + + +-- +-- Name: processing_queue processing_queue_document_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.processing_queue + ADD CONSTRAINT processing_queue_document_id_fkey FOREIGN KEY (document_id) REFERENCES public.documents(id); + + +-- +-- Name: search_failure_logs search_failure_logs_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.search_failure_logs + ADD CONSTRAINT search_failure_logs_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE SET NULL; + + +-- +-- Name: source_health source_health_source_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.source_health + ADD CONSTRAINT source_health_source_id_fkey FOREIGN KEY (source_id) REFERENCES public.news_sources(id) ON DELETE CASCADE; + + +-- +-- Name: study_memo_card_evidence study_memo_card_evidence_card_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_card_evidence + ADD CONSTRAINT study_memo_card_evidence_card_id_fkey FOREIGN KEY (card_id) REFERENCES public.study_memo_cards(id) ON DELETE CASCADE; + + +-- +-- Name: study_memo_card_jobs study_memo_card_jobs_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_card_jobs + ADD CONSTRAINT study_memo_card_jobs_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_memo_card_progress study_memo_card_progress_card_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_card_progress + ADD CONSTRAINT study_memo_card_progress_card_id_fkey FOREIGN KEY (card_id) REFERENCES public.study_memo_cards(id) ON DELETE CASCADE; + + +-- +-- Name: study_memo_card_progress study_memo_card_progress_study_topic_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_card_progress + ADD CONSTRAINT study_memo_card_progress_study_topic_id_fkey FOREIGN KEY (study_topic_id) REFERENCES public.study_topics(id) ON DELETE CASCADE; + + +-- +-- Name: study_memo_card_progress study_memo_card_progress_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_card_progress + ADD CONSTRAINT study_memo_card_progress_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_memo_cards study_memo_cards_source_question_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_cards + ADD CONSTRAINT study_memo_cards_source_question_id_fkey FOREIGN KEY (source_question_id) REFERENCES public.study_questions(id) ON DELETE CASCADE; + + +-- +-- Name: study_memo_cards study_memo_cards_study_topic_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_cards + ADD CONSTRAINT study_memo_cards_study_topic_id_fkey FOREIGN KEY (study_topic_id) REFERENCES public.study_topics(id) ON DELETE CASCADE; + + +-- +-- Name: study_memo_cards study_memo_cards_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_memo_cards + ADD CONSTRAINT study_memo_cards_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_question_attempts study_question_attempts_quiz_session_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_attempts + ADD CONSTRAINT study_question_attempts_quiz_session_id_fkey FOREIGN KEY (quiz_session_id) REFERENCES public.study_quiz_sessions(id) ON DELETE SET NULL; + + +-- +-- Name: study_question_attempts study_question_attempts_study_question_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_attempts + ADD CONSTRAINT study_question_attempts_study_question_id_fkey FOREIGN KEY (study_question_id) REFERENCES public.study_questions(id) ON DELETE RESTRICT; + + +-- +-- Name: study_question_attempts study_question_attempts_study_topic_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_attempts + ADD CONSTRAINT study_question_attempts_study_topic_id_fkey FOREIGN KEY (study_topic_id) REFERENCES public.study_topics(id) ON DELETE CASCADE; + + +-- +-- Name: study_question_attempts study_question_attempts_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_attempts + ADD CONSTRAINT study_question_attempts_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_question_images study_question_images_study_question_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_images + ADD CONSTRAINT study_question_images_study_question_id_fkey FOREIGN KEY (study_question_id) REFERENCES public.study_questions(id) ON DELETE CASCADE; + + +-- +-- Name: study_question_images study_question_images_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_images + ADD CONSTRAINT study_question_images_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_question_jobs study_question_jobs_study_question_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_jobs + ADD CONSTRAINT study_question_jobs_study_question_id_fkey FOREIGN KEY (study_question_id) REFERENCES public.study_questions(id) ON DELETE CASCADE; + + +-- +-- Name: study_question_jobs study_question_jobs_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_jobs + ADD CONSTRAINT study_question_jobs_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_question_progress study_question_progress_last_attempt_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_progress + ADD CONSTRAINT study_question_progress_last_attempt_id_fkey FOREIGN KEY (last_attempt_id) REFERENCES public.study_question_attempts(id) ON DELETE SET NULL; + + +-- +-- Name: study_question_progress study_question_progress_study_question_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_progress + ADD CONSTRAINT study_question_progress_study_question_id_fkey FOREIGN KEY (study_question_id) REFERENCES public.study_questions(id) ON DELETE RESTRICT; + + +-- +-- Name: study_question_progress study_question_progress_study_topic_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_progress + ADD CONSTRAINT study_question_progress_study_topic_id_fkey FOREIGN KEY (study_topic_id) REFERENCES public.study_topics(id) ON DELETE CASCADE; + + +-- +-- Name: study_question_progress study_question_progress_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_question_progress + ADD CONSTRAINT study_question_progress_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_questions study_questions_study_topic_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_questions + ADD CONSTRAINT study_questions_study_topic_id_fkey FOREIGN KEY (study_topic_id) REFERENCES public.study_topics(id) ON DELETE CASCADE; + + +-- +-- Name: study_questions study_questions_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_questions + ADD CONSTRAINT study_questions_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_quiz_session_analysis study_quiz_session_analysis_study_quiz_session_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_quiz_session_analysis + ADD CONSTRAINT study_quiz_session_analysis_study_quiz_session_id_fkey FOREIGN KEY (study_quiz_session_id) REFERENCES public.study_quiz_sessions(id) ON DELETE CASCADE; + + +-- +-- Name: study_quiz_session_analysis study_quiz_session_analysis_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_quiz_session_analysis + ADD CONSTRAINT study_quiz_session_analysis_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_quiz_session_jobs study_quiz_session_jobs_study_quiz_session_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_quiz_session_jobs + ADD CONSTRAINT study_quiz_session_jobs_study_quiz_session_id_fkey FOREIGN KEY (study_quiz_session_id) REFERENCES public.study_quiz_sessions(id) ON DELETE CASCADE; + + +-- +-- Name: study_quiz_session_jobs study_quiz_session_jobs_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_quiz_session_jobs + ADD CONSTRAINT study_quiz_session_jobs_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_quiz_sessions study_quiz_sessions_study_topic_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_quiz_sessions + ADD CONSTRAINT study_quiz_sessions_study_topic_id_fkey FOREIGN KEY (study_topic_id) REFERENCES public.study_topics(id) ON DELETE CASCADE; + + +-- +-- Name: study_quiz_sessions study_quiz_sessions_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_quiz_sessions + ADD CONSTRAINT study_quiz_sessions_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_reminders study_reminders_study_topic_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_reminders + ADD CONSTRAINT study_reminders_study_topic_id_fkey FOREIGN KEY (study_topic_id) REFERENCES public.study_topics(id) ON DELETE SET NULL; + + +-- +-- Name: study_reminders study_reminders_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_reminders + ADD CONSTRAINT study_reminders_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_session_assets study_session_assets_document_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_session_assets + ADD CONSTRAINT study_session_assets_document_id_fkey FOREIGN KEY (document_id) REFERENCES public.documents(id) ON DELETE CASCADE; + + +-- +-- Name: study_session_assets study_session_assets_study_session_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_session_assets + ADD CONSTRAINT study_session_assets_study_session_id_fkey FOREIGN KEY (study_session_id) REFERENCES public.study_sessions(id) ON DELETE CASCADE; + + +-- +-- Name: study_sessions study_sessions_study_topic_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_sessions + ADD CONSTRAINT study_sessions_study_topic_id_fkey FOREIGN KEY (study_topic_id) REFERENCES public.study_topics(id) ON DELETE SET NULL; + + +-- +-- Name: study_sessions study_sessions_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_sessions + ADD CONSTRAINT study_sessions_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_topic_documents study_topic_documents_document_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_topic_documents + ADD CONSTRAINT study_topic_documents_document_id_fkey FOREIGN KEY (document_id) REFERENCES public.documents(id) ON DELETE CASCADE; + + +-- +-- Name: study_topic_documents study_topic_documents_study_topic_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_topic_documents + ADD CONSTRAINT study_topic_documents_study_topic_id_fkey FOREIGN KEY (study_topic_id) REFERENCES public.study_topics(id) ON DELETE CASCADE; + + +-- +-- Name: study_topic_documents study_topic_documents_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_topic_documents + ADD CONSTRAINT study_topic_documents_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_topic_subject_notes study_topic_subject_notes_study_topic_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_topic_subject_notes + ADD CONSTRAINT study_topic_subject_notes_study_topic_id_fkey FOREIGN KEY (study_topic_id) REFERENCES public.study_topics(id) ON DELETE CASCADE; + + +-- +-- Name: study_topic_subject_notes study_topic_subject_notes_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_topic_subject_notes + ADD CONSTRAINT study_topic_subject_notes_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: study_topics study_topics_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.study_topics + ADD CONSTRAINT study_topics_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE CASCADE; + + +-- +-- Name: tasks tasks_document_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.tasks + ADD CONSTRAINT tasks_document_id_fkey FOREIGN KEY (document_id) REFERENCES public.documents(id); + + +-- +-- Name: worker_capabilities worker_capabilities_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.worker_capabilities + ADD CONSTRAINT worker_capabilities_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE RESTRICT; + + +-- +-- Name: worker_heartbeats worker_heartbeats_worker_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.worker_heartbeats + ADD CONSTRAINT worker_heartbeats_worker_id_fkey FOREIGN KEY (worker_id) REFERENCES public.worker_capabilities(worker_id) ON DELETE CASCADE; + + +-- +-- Name: worker_jobs worker_jobs_user_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.worker_jobs + ADD CONSTRAINT worker_jobs_user_id_fkey FOREIGN KEY (user_id) REFERENCES public.users(id) ON DELETE RESTRICT; + + +-- +-- Name: worker_jobs worker_jobs_worker_id_fkey; Type: FK CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.worker_jobs + ADD CONSTRAINT worker_jobs_worker_id_fkey FOREIGN KEY (worker_id) REFERENCES public.worker_capabilities(worker_id) ON DELETE SET NULL; + + +-- +-- PostgreSQL database dump complete +-- + + diff --git a/scripts/ci/boot_smoke.py b/scripts/ci/boot_smoke.py new file mode 100644 index 0000000..b924a1a --- /dev/null +++ b/scripts/ci/boot_smoke.py @@ -0,0 +1,122 @@ +"""전체 app 부팅 런타임 스모크 (GPU 격리) — deploy-blocker 게이트. + +init_db 자체는 initdb_runtime_test.py(R1)·migration_smoke.sh 가 검증한다. +본 스모크는 그 위에서 **실제 컨테이너 부팅 경로**(main:app + lifespan startup)를 실행해 +py_compile 이 못 잡는 deploy-blocker 클래스를 잡는다: + + ① `import main` = 전 router import + FastAPI app 빌드 (router 심볼누락·순환 검출) + ② lifespan startup = lifespan 안의 전 worker import(≈35) + init_db + 전 add_job 실행 + (worker import-time 오류·잡 등록 오류 검출, **drift 0** = 실제 경로) + ③ /health (health_check 직접 호출) = DB connected + +prod/AI/NAS 무접촉을 위해 부작용 3개만 외과적으로 중립화한다 (검증 대상 로직은 그대로): + - NAS 마운트 체크 → 임시 디렉토리(+PKM/) 로 통과 (실 NAS 의존 제거) + - scheduler.start() → no-op (잡은 등록되지만 실행 안 됨 = 워커 폴링·외부 API 호출 0) + - scheduler.shutdown() → no-op (start 안 했으니 __aexit__ 의 shutdown 이 raise 안 하도록) + - prewarm_analyzer() → no-op (AI 라우터 :8890 미호출 = 검색실험 soft-lock 안전) + +실행 (worktree 루트를 마운트한 prod fastapi 이미지 컨테이너 안): + docker run --rm --network -v :/work -w /work \ + -e PYTHONPATH=/work/app -e BOOT_SMOKE=1 \ + -e DATABASE_URL="postgresql+asyncpg://postgres@ds-bootsmoke-pg:5432/pkm" \ + python scripts/ci/boot_smoke.py + +기대: IMPORTS OK → LIFESPAN startup OK (jobs=N, purge_sweep 포함) → schema OK → HEALTH ok → PASS +""" +import asyncio +import os +import tempfile +from pathlib import Path + +from sqlalchemy import text + + +async def main() -> None: + # ── 0) 안전 가드: prod DB 오접속 차단 ───────────────────────────────── + from core.config import settings + + url = settings.database_url + print("DATABASE_URL:", url) + assert os.getenv("BOOT_SMOKE") == "1", "SAFETY ABORT: BOOT_SMOKE=1 미설정" + # prod = '...@postgres:5432/pkm' (user pkm). ephemeral = bootsmoke 호스트 / localhost / postgres user. + assert "@postgres:" not in url and "@postgres/" not in url, f"SAFETY ABORT: prod DB 로 보임: {url}" + assert ("bootsmoke" in url) or ("localhost" in url) or ("127.0.0.1" in url), \ + f"SAFETY ABORT: ephemeral 마커(bootsmoke/localhost) 없음: {url}" + + # ── 1) 부작용 3개 중립화 (검증 대상 로직 보존) ─────────────────────── + # prewarm: AI 라우터 미호출 + import services.search.query_analyzer as qa + + async def _noop_prewarm(*a, **k): + return None + + qa.prewarm_analyzer = _noop_prewarm + + # scheduler.start/shutdown no-op + start 캡처로 잡 개수 집계 + from apscheduler.schedulers.asyncio import AsyncIOScheduler + + captured: dict = {} + _orig_init = AsyncIOScheduler.__init__ + + def _init(self, *a, **k): + _orig_init(self, *a, **k) + captured["sched"] = self + + AsyncIOScheduler.__init__ = _init + AsyncIOScheduler.start = lambda self, *a, **k: None + AsyncIOScheduler.shutdown = lambda self, *a, **k: None + + # NAS 체크 통과용 임시 마운트 + tmp = tempfile.mkdtemp(prefix="bootsmoke-nas-") + (Path(tmp) / "PKM").mkdir(parents=True, exist_ok=True) + settings.nas_mount_path = tmp + print("nas_mount_path(override):", tmp) + + # ── 2) import main = 전 router import + app 빌드 ────────────────────── + import main + + route_count = len(main.app.routes) + print(f"IMPORTS OK — main 빌드, app.routes={route_count}") + assert route_count > 50, f"라우트 수 비정상({route_count}) — 라우터 누락 의심" + + # ── 3) lifespan startup 실행 (init_db + 전 worker import + 전 add_job) ─ + cm = main.lifespan(main.app) + await cm.__aenter__() + sched = captured.get("sched") + jobs = sched.get_jobs() if sched else [] + job_ids = sorted(j.id for j in jobs) + print(f"LIFESPAN startup OK — 등록 잡 {len(jobs)}건") + print(" job_ids:", ", ".join(job_ids)) + assert len(jobs) >= 30, f"잡 등록 수 비정상({len(jobs)})" + for required in ("purge_sweep", "auto_review", "queue_consumer", "statute_collector"): + assert required in job_ids, f"필수 잡 누락: {required}" + + # ── 4) 스키마 상태 (lifespan 의 실 init_db 가 359/360/361 적용했는지) ── + from core.database import async_session, engine + + async with async_session() as s: + docs = (await s.execute(text("SELECT to_regclass('public.documents') IS NOT NULL"))).scalar() + purge = (await s.execute(text( + "SELECT count(*) FROM information_schema.columns " + "WHERE table_name='documents' AND column_name='purge_requested_at'"))).scalar() + cand = (await s.execute(text( + "SELECT count(*) FROM information_schema.tables " + "WHERE table_name LIKE 'documents_cand_qwen%'"))).scalar() + uq = (await s.execute(text( + "SELECT count(*) FROM pg_indexes WHERE indexname='uq_attempt_session_question'"))).scalar() + mx = (await s.execute(text("SELECT max(version) FROM schema_migrations"))).scalar() + print(f"SCHEMA OK — max_migration={mx} documents={docs} purge_col={purge} cand_qwen={cand} attempt_uq={uq}") + assert docs and purge == 1 and cand == 0 and uq == 1 and mx == 361, "FAIL: 기대 스키마 상태 불일치" + + # ── 5) /health 직접 호출 ────────────────────────────────────────────── + health = await main.health_check() + print("HEALTH:", health) + assert health["status"] == "ok" and health["database"] == "connected", "FAIL: health degraded" + + # ── 6) 정리 ─────────────────────────────────────────────────────────── + await cm.__aexit__(None, None, None) + await engine.dispose() + print("RESULT: PASS — 전체 app 부팅(import·init_db·잡등록·health) 검증") + + +asyncio.run(main()) diff --git a/scripts/ci/initdb_runtime_test.py b/scripts/ci/initdb_runtime_test.py new file mode 100644 index 0000000..e3f75de --- /dev/null +++ b/scripts/ci/initdb_runtime_test.py @@ -0,0 +1,51 @@ +"""init_db() baseline 부팅 런타임 검증 (R1) — psql migration_smoke 가 못 잡는 asyncpg 경로 확인. + +migration_smoke.sh(psql)는 SQL 유효성만 검증한다. init_db 는 asyncpg exec_driver_sql(prepared) +경로라 ① multi-statement 불허 ② baseline 의 raw asyncpg 적재 ③ skip/stamp/멱등 — 이걸 실측한다. + +실행 (worktree 루트): + python3.11 -m venv /tmp/v && /tmp/v/bin/pip install -q "sqlalchemy[asyncio]>=2" asyncpg pydantic pyyaml + docker run -d --name idb -p 55432:5432 -e POSTGRES_HOST_AUTH_METHOD=trust pgvector/pgvector:pg16 + docker exec idb psql -U postgres -c "CREATE DATABASE pkm" + ln -sfn ../migrations app/migrations # Docker 의 /app/migrations 레이아웃 모사 (테스트 후 rm) + PYTHONPATH=app DATABASE_URL="postgresql+asyncpg://postgres@localhost:55432/pkm" /tmp/v/bin/python scripts/ci/initdb_runtime_test.py + rm -f app/migrations; docker rm -f idb + +기대: 1st OK(documents=True·purge_col=1·cand_qwen=0·attempt_unique=1), 2nd 멱등동일=True. +""" +import asyncio +from sqlalchemy import text + + +async def main(): + from core.config import settings + url = settings.database_url + print("effective DATABASE_URL:", url) + assert "localhost" in url or "127.0.0.1" in url, f"SAFETY ABORT non-local: {url}" + from core.database import init_db, async_session, engine + + print("=== 1st init_db (fresh DB) ===") + await init_db() + async with async_session() as s: + cnt = (await s.execute(text("SELECT count(*) FROM schema_migrations"))).scalar() + mx = (await s.execute(text("SELECT max(version) FROM schema_migrations"))).scalar() + bl = (await s.execute(text("SELECT count(*) FROM schema_migrations WHERE name LIKE 'baseline:%'"))).scalar() + docs = (await s.execute(text("SELECT to_regclass('public.documents') IS NOT NULL"))).scalar() + purge = (await s.execute(text("SELECT count(*) FROM information_schema.columns WHERE table_name='documents' AND column_name='purge_requested_at'"))).scalar() + cand = (await s.execute(text("SELECT count(*) FROM information_schema.tables WHERE table_name LIKE 'documents_cand_qwen%'"))).scalar() + uq = (await s.execute(text("SELECT count(*) FROM pg_indexes WHERE indexname='uq_attempt_session_question'"))).scalar() + print(f" schema_migrations count={cnt} max={mx} baseline_stamped={bl}") + print(f" documents={docs} purge_col={purge} cand_qwen_tables={cand} attempt_unique={uq}") + assert docs and purge == 1 and cand == 0 and uq == 1, "FAIL: 기대 스키마 상태 불일치" + + print("=== 2nd init_db (rerun = baseline skip + 멱등) ===") + await init_db() + async with async_session() as s: + cnt2 = (await s.execute(text("SELECT count(*) FROM schema_migrations"))).scalar() + assert cnt == cnt2, "FAIL: 멱등 아님 (재실행이 schema_migrations 변경)" + print(f" count={cnt2} 멱등동일={cnt == cnt2}") + print("RESULT: PASS — init_db baseline 부팅/멱등 검증") + await engine.dispose() + + +asyncio.run(main()) diff --git a/scripts/ci/migration_smoke.sh b/scripts/ci/migration_smoke.sh new file mode 100755 index 0000000..1266988 --- /dev/null +++ b/scripts/ci/migration_smoke.sh @@ -0,0 +1,138 @@ +#!/usr/bin/env bash +# migration_smoke.sh — fresh-DB + DR enum-same-txn 게이트 (plan ds-backend-audit-1 R0) +# +# app/core/database.py 의 init_db() 는 모든 pending migration 을 단일 트랜잭션 +# (`async with engine.begin()`) 으로 적용한다. 이 스크립트는 그 경로를 미러해 +# migrations/ 전체가 빈 DB / DR 업그레이드에서 한 트랜잭션으로 적용 가능한지 검증한다. +# +# 시나리오: +# FRESH — 빈 DB 에 migrations/ 전체를 단일 트랜잭션으로 적용 (신규 환경 부팅 경로) +# DR — 001~319 를 커밋(과거 운영 DB 모사) 후 320~end 를 단일 트랜잭션으로 적용 +# (pre-320 백업/지연 복제를 320 경계 너머로 catch-up 업그레이드하는 재해복구 경로) +# +# enum-same-txn 결함(ALTER TYPE ADD VALUE 한 값을 같은 트랜잭션에서 사용)이 있으면 +# 두 시나리오 모두 'unsafe use of new value' 로 abort 한다. +# R1(enum-barrier) fix 후에는 두 시나리오 모두 PASS 해야 한다. +# +# prod 동일 이미지(pg16)로 핀. 의존: docker. +# 사용: scripts/ci/migration_smoke.sh (ephemeral 컨테이너 자동 기동/정리) +set -uo pipefail + +IMAGE="pgvector/pgvector:pg16" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +MIG_DIR="$(cd "$SCRIPT_DIR/../../migrations" && pwd)" +CNAME="ds-mig-smoke-$$" +DB="pkm" # 358 의 ALTER DATABASE pkm 가 이 이름을 요구 + +cleanup() { docker rm -f "$CNAME" >/dev/null 2>&1 || true; } +trap cleanup EXIT + +# 버전순 마이그레이션 파일 목록 (NNN_ 3자리 zero-pad → lexical = numeric) +# bash 3.2(macOS) 호환 — mapfile 미사용 +MIGS=() +while IFS= read -r _line; do MIGS+=("$_line"); done < <(ls "$MIG_DIR"/[0-9]*.sql | sort) +[ "${#MIGS[@]}" -gt 0 ] || { echo "FATAL: migrations 없음 ($MIG_DIR)"; exit 2; } +echo "migrations: ${#MIGS[@]}건 ($(basename "${MIGS[0]}") ~ $(basename "${MIGS[$((${#MIGS[@]}-1))]}"))" + +psql_exec() { docker exec -i "$CNAME" psql -U postgres -v ON_ERROR_STOP=1 "$@"; } + +# 주어진 파일 범위를 단일 트랜잭션 스트림으로 묶어 출력 (psql stdin 용) +# 각 파일 앞에 \echo 마커 — 실패 시 마지막 마커가 깨진 마이그레이션. +emit_single_txn() { + echo '\set ON_ERROR_STOP on' + echo 'BEGIN;' + for f in "$@"; do + echo "\\echo >>>APPLY $(basename "$f")" + cat "$f"; echo + done + echo 'COMMIT;' +} + +# 자동커밋(파일별 즉시 커밋) 스트림 — DR phase1 (기존 운영 DB 모사) +emit_autocommit() { + echo '\set ON_ERROR_STOP on' + for f in "$@"; do + echo "\\echo >>>APPLY $(basename "$f")" + cat "$f"; echo + done +} + +reset_db() { + psql_exec -d postgres -c "DROP DATABASE IF EXISTS $DB" >/dev/null 2>&1 + psql_exec -d postgres -c "CREATE DATABASE $DB" >/dev/null +} + +run_scenario() { + local name="$1"; shift + local out rc last_apply + out="$( "$@" 2>&1 )"; rc=$? + last_apply="$(printf '%s\n' "$out" | grep '>>>APPLY' | tail -1 | sed 's/>>>APPLY //')" + if [ "$rc" -eq 0 ]; then + echo " [$name] PASS — 전체 적용 성공" + return 0 + else + echo " [$name] FAIL — 깨진 지점: ${last_apply:-?}" + printf '%s\n' "$out" | grep -iE 'ERROR|unsafe|HINT' | head -3 | sed 's/^/ /' + return 1 + fi +} + +BASELINE_CUTOFF=358 +BASELINE_FILE="$MIG_DIR/_baseline/0358_schema_baseline.sql" + +# post-baseline(버전 > cutoff) 마이그 파일만 출력 +_post_baseline() { + local f base ver + for f in "${MIGS[@]}"; do + base="$(basename "$f")"; ver="${base%%_*}"; ver="$((10#$ver))" + [ "$ver" -gt "$BASELINE_CUTOFF" ] && printf '%s\n' "$f" + done +} + +# FRESH — init_db fresh 경로 미러: baseline 적재 + post-baseline 을 단일 트랜잭션 +scenario_fresh() { + reset_db + local post=(); while IFS= read -r f; do post+=("$f"); done < <(_post_baseline) + { + echo '\set ON_ERROR_STOP on'; echo 'BEGIN;' + echo "\\echo >>>APPLY _baseline" + cat "$BASELINE_FILE"; echo + for f in "${post[@]}"; do + echo "\\echo >>>APPLY $(basename "$f")"; cat "$f"; echo + done + echo 'COMMIT;' + } | psql_exec -d "$DB" +} + +# INCREMENTAL — 기존 운영 DB(at cutoff) 모사: baseline 커밋 후 post-baseline 을 별 트랜잭션 +scenario_dr() { + reset_db + if ! { echo '\set ON_ERROR_STOP on'; cat "$BASELINE_FILE"; } | psql_exec -d "$DB" >/dev/null 2>&1; then + printf '%s\n' ">>>APPLY _baseline"; echo "baseline 적재 실패"; return 1 + fi + local post=(); while IFS= read -r f; do post+=("$f"); done < <(_post_baseline) + emit_single_txn "${post[@]}" 2>/dev/null | psql_exec -d "$DB" +} + +# ── 컨테이너 기동 ── +echo "기동: $IMAGE ($CNAME)" +docker run -d --name "$CNAME" -e POSTGRES_PASSWORD=x -e POSTGRES_HOST_AUTH_METHOD=trust "$IMAGE" >/dev/null +for _ in $(seq 1 40); do docker exec "$CNAME" pg_isready -U postgres -q 2>/dev/null && break; sleep 0.5; done +echo "pg: $(docker exec "$CNAME" psql -U postgres -tAc 'show server_version' 2>/dev/null)" +echo + +fail=0 +echo "── FRESH (baseline 적재 + post-baseline 단일 트랜잭션 = init_db fresh 경로) ──" +run_scenario FRESH scenario_fresh || fail=1 +echo +echo "── INCREMENTAL (baseline 커밋 후 post-baseline 별 트랜잭션 = 기존 DB 증분) ──" +run_scenario DR scenario_dr || fail=1 +echo + +if [ "$fail" -eq 0 ]; then + echo "RESULT: PASS — fresh/incremental 모두 baseline+post-baseline 적용 가능" + exit 0 +else + echo "RESULT: FAIL — baseline/post-baseline 적용 불가 (위 지점)" + exit 1 +fi