From 6490050b0419b315109db9bf21b7e0de84291a41 Mon Sep 17 00:00:00 2001 From: Hyungi Ahn Date: Mon, 11 May 2026 12:05:03 +0900 Subject: [PATCH] feat(memos): promote memo to event + voice memo upload endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR-2B/2C backend 2/2. plan v9 commit 분할 2~3 통합 (memos.py 단일 파일 변경). PR-2B promote-to-event: - POST /api/memos/{memo_id}/promote-to-event — 메모 → events 1-click 승급 · kind 결정: body.kind > documents.ai_event_kind > 400 · activity_log 면 status=done + ended_at=now() 자동 (5초 행동 기록 UX) · calendar_event + start_at 있으면 status=scheduled · Event row + events_history(create) 자동 생성 · memo_document_id 자동 link + source='memo' + raw_metadata 에 AI 추천값 보존 · 한 메모 → N events 가능 (사용자 의도에 따라 dedup 없음) - POST /api/memos/{memo_id}/dismiss-event-suggestion — '그냥 메모' (ai_event_kind='note' 강제) · MVP: AI 추천값과 사용자 확정값 같은 컬럼 (정확도 측정 흐려질 수 있음) · 백로그: user_event_kind 별 컬럼 분리 (plan Memo Intake Upgrade 백로그) - MemoResponse 확장: ai_event_kind / ai_event_confidence / source_channel / file_type / file_path - list_memos 필터 완화: file_type IN (note, audio) + source_channel IN (memo, voice) → voice 메모도 같은 inbox list 에 표시 (사용자 의도: 메모 = 모든 입력의 inbox) PR-2C voice upload: - migration 254: ALTER TYPE source_channel ADD VALUE 'voice' - POST /api/memos/voice (multipart audio + recorded_at + device_hint) · 검증: Content-Type audio/* + size ≤ 50MB + 확장자 화이트리스트 · NAS 저장: /documents/PKM/Recordings/{YYYY-MM}/{uuid}.{ext} · fsync + rename(atomic) 패턴 (NAS soft mount 안전) · Document row: file_type='audio' + source_channel='voice' + category='audio' · enqueue stt 큐 → 기존 stt_worker → classify (PR-2B triage) → embed → chunk · extract_meta 에 device_hint / recorded_at 보존 - 응답: MemoResponse (file_path 포함, frontend audio player 용) 원칙: AI worker 는 events row 직접 생성 X. 본 endpoint 가 사용자 의도 channel. --- app/api/memos.py | 305 +++++++++++++++++++++++- migrations/254_source_channel_voice.sql | 5 + 2 files changed, 303 insertions(+), 7 deletions(-) create mode 100644 migrations/254_source_channel_voice.sql diff --git a/app/api/memos.py b/app/api/memos.py index f5fa749..8444b88 100644 --- a/app/api/memos.py +++ b/app/api/memos.py @@ -1,22 +1,34 @@ -"""메모 CRUD API — 파일 없는 문서(file_type='note')""" +"""메모 CRUD API — 파일 없는 문서(file_type='note') + voice 메모 (file_type='audio', source_channel='voice')""" import hashlib import logging +import os import re +import uuid from datetime import datetime, timezone -from typing import Annotated +from pathlib import Path +from typing import Annotated, Any -from fastapi import APIRouter, Depends, HTTPException, Query -from pydantic import BaseModel +from fastapi import APIRouter, Depends, File, Form, HTTPException, Query, UploadFile +from pydantic import BaseModel, Field from sqlalchemy import delete, func, select from sqlalchemy.ext.asyncio import AsyncSession from core.auth import get_current_user +from core.config import settings from core.database import get_session from models.document import Document +from models.event import Event +from models.event_history import EventHistory from models.queue import ProcessingQueue, enqueue_stage from models.user import User +# Voice upload 제한 (plan v9 결정 — 10분 / 50MB) +VOICE_MAX_BYTES = 50 * 1024 * 1024 +VOICE_ALLOWED_EXTS = {".m4a", ".mp3", ".wav", ".webm", ".ogg", ".opus", ".aac"} +VOICE_ALLOWED_CONTENT_PREFIXES = ("audio/",) +VOICE_NAS_SUBDIR = "PKM/Recordings" # /mnt/nas/Document_Server/PKM/Recordings/{YYYY-MM}/{uuid}.{ext} + logger = logging.getLogger(__name__) router = APIRouter() @@ -156,6 +168,12 @@ class MemoResponse(BaseModel): archived: bool ask_includable: bool memo_task_state: dict # {"": {"checked_at": ""}} + # Memo Intake Upgrade PR-2B — AI 추천 분류 (사용자 1-click promote 의 hint) + ai_event_kind: str | None = None + ai_event_confidence: float | None = None + source_channel: str | None = None # voice/memo 등 진입점 식별 (UI 배지) + file_type: str | None = None # audio (voice 메모) vs note (text 메모) + file_path: str | None = None # voice 메모의 NAS audio 경로 (audio player 용) created_at: datetime updated_at: datetime @@ -185,6 +203,11 @@ def _to_memo_response(doc: Document) -> MemoResponse: archived=doc.archived, ask_includable=doc.ask_includable, memo_task_state=dict(doc.memo_task_state or {}), + ai_event_kind=doc.ai_event_kind, + ai_event_confidence=doc.ai_event_confidence, + source_channel=doc.source_channel, + file_type=doc.file_type, + file_path=doc.file_path, created_at=doc.created_at, updated_at=doc.updated_at, ) @@ -241,10 +264,13 @@ async def list_memos( archived: bool = Query(False, description="true면 아카이브 목록"), pinned: bool | None = Query(None, description="true면 핀 고정된 메모만"), ): - """메모 목록 — 활성: 핀 우선 + 최신순 / 아카이브: 최신순 (핀 무시)""" + """메모 목록 — 활성: 핀 우선 + 최신순 / 아카이브: 최신순 (핀 무시) + + PR-2C: source_channel='voice' (음성 메모) 도 포함. 사용자 의도 = 메모는 모든 입력의 inbox. + """ base = select(Document).where( - Document.file_type == "note", - Document.source_channel == "memo", + Document.file_type.in_(("note", "audio")), + Document.source_channel.in_(("memo", "voice")), Document.deleted_at == None, # noqa: E711 Document.archived == archived, ) @@ -483,3 +509,268 @@ async def toggle_ask_includable( await session.refresh(doc) return _to_memo_response(doc) + + +# ─── Memo Intake Upgrade PR-2B: promote to event ─── + + +class PromotePayload(BaseModel): + """메모 → events 승급. kind 미지정 시 documents.ai_event_kind 사용. + + AI worker 는 events row 직접 생성 X — 본 endpoint 만이 사용자 의도 channel. + """ + kind: str | None = None # 'task' | 'calendar_event' | 'activity_log' + due_at: datetime | None = None + start_at: datetime | None = None + end_at: datetime | None = None + started_at: datetime | None = None + ended_at: datetime | None = None + priority: int | None = None + project_tag: str | None = None + + +_PROMOTE_KIND_MAP = { + # AI 추천 (event_kind_hint) → events.kind + "task": "task", + "calendar_event": "calendar_event", + "activity_log": "activity_log", + # 'note' / 'reference' 는 promote 대상 아님 (사용자가 명시 kind 지정 필요) +} + + +@router.post("/{memo_id}/promote-to-event", status_code=201) +async def promote_memo_to_event( + memo_id: int, + body: PromotePayload, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """메모 1건 → events row 1건 생성. memo_document_id 자동 link. + + kind 결정 순서: body.kind > documents.ai_event_kind > 400 거부. + 한 메모 → N events 가능 (정책: dedup 없음, 사용자 의도 따라). + """ + doc = await session.get(Document, memo_id) + if ( + not doc + or doc.deleted_at is not None + or doc.source_channel not in ("memo", "voice") + ): + raise HTTPException(status_code=404, detail="메모를 찾을 수 없습니다") + + # kind 결정 + requested = (body.kind or "").strip().lower() or None + ai_hint = (doc.ai_event_kind or "").strip().lower() or None + chosen = requested or ai_hint + event_kind = _PROMOTE_KIND_MAP.get(chosen or "") + if not event_kind: + raise HTTPException( + status_code=400, + detail="promote 할 kind 가 명확하지 않습니다 (task/calendar_event/activity_log 중 1개 지정 또는 ai_event_kind 필요)", + ) + + # 시간 필드 default — activity_log 는 빠른 행동 기록 UX 그대로 + now = datetime.now(timezone.utc) + started_at = body.started_at + ended_at = body.ended_at + completed_at: datetime | None = None + status_val = "inbox" + if event_kind == "activity_log": + ended_at = ended_at or now + started_at = started_at or ended_at + completed_at = now + status_val = "done" + elif event_kind == "calendar_event": + status_val = "scheduled" if body.start_at else "inbox" + + title = (doc.title or "").strip() or "메모" + description = doc.extracted_text + + ev = Event( + title=title, + description=description, + kind=event_kind, + status=status_val, + due_at=body.due_at, + start_at=body.start_at, + end_at=body.end_at, + started_at=started_at, + ended_at=ended_at, + completed_at=completed_at, + priority=body.priority, + project_tag=body.project_tag, + source="memo", + source_ref=str(doc.id), # 같은 메모 N promote 시 별 row → dedup 의도 X + raw_metadata={ + "memo_id": doc.id, + "ai_event_kind": doc.ai_event_kind, + "ai_event_confidence": doc.ai_event_confidence, + "promoted_at": now.isoformat(), + }, + memo_document_id=doc.id, + user_id=user.id, + created_by="manual", + ) + session.add(ev) + await session.flush() + + # events_history.create row (events 도메인 패턴 — events/api/events.py 의 _record_history 와 동일 형태) + history = EventHistory( + event_id=ev.id, + changed_by="manual", + change_kind="create", + before=None, + after={ + "id": ev.id, + "title": ev.title, + "kind": ev.kind, + "status": ev.status, + "source": ev.source, + "memo_document_id": ev.memo_document_id, + }, + ) + session.add(history) + await session.commit() + await session.refresh(ev) + + return { + "event_id": ev.id, + "kind": ev.kind, + "status": ev.status, + "memo_document_id": ev.memo_document_id, + } + + +@router.post("/{memo_id}/dismiss-event-suggestion", response_model=MemoResponse) +async def dismiss_event_suggestion( + memo_id: int, + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """'그냥 메모' — AI 추천 무시 + ai_event_kind='note' 강제. 4 버튼 숨김 신호. + + MVP: AI 추천값과 사용자 확정값을 같은 컬럼에 저장 (정확도 측정 흐려짐 가능). + 백로그: user_event_kind 별 컬럼 분리 (plan Memo Intake Upgrade 백로그). + """ + doc = await session.get(Document, memo_id) + if ( + not doc + or doc.deleted_at is not None + or doc.source_channel not in ("memo", "voice") + ): + raise HTTPException(status_code=404, detail="메모를 찾을 수 없습니다") + + doc.ai_event_kind = "note" + doc.updated_at = datetime.now(timezone.utc) + await session.commit() + await session.refresh(doc) + return _to_memo_response(doc) + + +# ─── Memo Intake Upgrade PR-2C: voice upload ─── + + +@router.post("/voice", response_model=MemoResponse, status_code=201) +async def upload_voice_memo( + user: Annotated[User, Depends(get_current_user)], + session: Annotated[AsyncSession, Depends(get_session)], + audio: UploadFile = File(...), + recorded_at: str | None = Form(None), + device_hint: str | None = Form(None), +): + """애플워치 / 모바일 / 기타 음성 메모 업로드 → STT 큐 → 자동 분류. + + PR-2C: source_channel='voice' + file_type='audio'. 기존 stt_worker → classify + 파이프라인 자동 통과. plan 원칙: AI worker 는 events 직접 생성 X. + """ + # Content-Type 검증 + if audio.content_type and not audio.content_type.startswith(VOICE_ALLOWED_CONTENT_PREFIXES): + raise HTTPException(status_code=415, detail=f"지원되지 않는 Content-Type: {audio.content_type}") + + # 확장자 결정 + orig_name = audio.filename or "" + ext = (Path(orig_name).suffix or "").lower() + if ext and ext not in VOICE_ALLOWED_EXTS: + raise HTTPException(status_code=415, detail=f"지원되지 않는 확장자: {ext}") + if not ext: + # content_type 으로 추정 (audio/m4a 등) + ext = ".m4a" + + # 본문 읽기 + size 검증 + payload: bytes = await audio.read() + if len(payload) > VOICE_MAX_BYTES: + raise HTTPException(status_code=413, detail=f"50MB 초과 ({len(payload)//1024//1024}MB)") + if len(payload) == 0: + raise HTTPException(status_code=400, detail="빈 audio") + + # 저장 경로 (NAS) — fastapi 컨테이너 안 /documents = NAS mount + nas_root = Path(settings.nas_mount_path) + yyyy_mm = datetime.now(timezone.utc).astimezone().strftime("%Y-%m") + target_dir = nas_root / VOICE_NAS_SUBDIR / yyyy_mm + target_dir.mkdir(parents=True, exist_ok=True) + file_uuid = uuid.uuid4().hex + target_path = target_dir / f"{file_uuid}{ext}" + + # fsync + rename(atomic) 패턴 — NAS soft mount 안전 (feedback_nfs_korean_path_normalize 결) + tmp_path = target_path.with_suffix(target_path.suffix + ".tmp") + try: + with open(tmp_path, "wb") as fh: + fh.write(payload) + fh.flush() + os.fsync(fh.fileno()) + os.replace(tmp_path, target_path) + except OSError as e: + # NAS 쓰기 실패 graceful — DB row 미생성 + if tmp_path.exists(): + try: + tmp_path.unlink() + except OSError: + pass + logger.error("voice upload NAS write 실패: %s", e) + raise HTTPException(status_code=503, detail="NAS 저장 실패 (재시도 권장)") + + # recorded_at 파싱 + rec_at: datetime | None = None + if recorded_at: + try: + rec_at = datetime.fromisoformat(recorded_at.replace("Z", "+00:00")) + except ValueError: + rec_at = None + + raw_metadata: dict[str, Any] = {} + if device_hint: + raw_metadata["device_hint"] = device_hint + if rec_at: + raw_metadata["recorded_at"] = rec_at.isoformat() + + # file_path 는 NAS root 기준 상대 경로 (다른 documents 컨벤션, /api/documents/{id}/file endpoint 호환) + relative_path = target_path.relative_to(nas_root) + + # Document row — file_type='audio', source_channel='voice' + title_seed = (orig_name or "음성 메모").rsplit(".", 1)[0] + doc = Document( + file_path=str(relative_path), + file_hash=hashlib.sha256(payload).hexdigest(), + file_format=ext.lstrip(".") or "m4a", + file_size=len(payload), + file_type="audio", + title=title_seed[:80] or "음성 메모", + extracted_text=None, # STT 후 채움 + review_status="approved", + source_channel="voice", + category="audio", + ask_includable=True, + pinned=False, + archived=False, + memo_task_state={}, + extract_meta=raw_metadata or None, + ) + session.add(doc) + await session.flush() + + # STT 큐 등록 — 기존 stt_worker → classify → embed → chunk 파이프라인 자동 + await enqueue_stage(session, doc.id, "stt") + await session.commit() + await session.refresh(doc) + + return _to_memo_response(doc) diff --git a/migrations/254_source_channel_voice.sql b/migrations/254_source_channel_voice.sql new file mode 100644 index 0000000..e479ab0 --- /dev/null +++ b/migrations/254_source_channel_voice.sql @@ -0,0 +1,5 @@ +-- Memo Intake Upgrade PR-2C — source_channel enum 에 'voice' 추가 +-- 음성 메모 진입점 (애플워치 / 기타 mic) 의 source 식별. +-- file_type='audio' + category='audio' + source_channel='voice' 조합으로 메모 UI 에 노출. + +ALTER TYPE source_channel ADD VALUE IF NOT EXISTS 'voice';