diff --git a/app/ai/client.py b/app/ai/client.py index eadf245..ed69740 100644 --- a/app/ai/client.py +++ b/app/ai/client.py @@ -89,19 +89,42 @@ class AIClient: raise async def _request(self, model_config, prompt: str) -> str: - """단일 모델 API 호출""" - response = await self._http.post( - model_config.endpoint, - json={ - "model": model_config.model, - "messages": [{"role": "user", "content": prompt}], - "max_tokens": model_config.max_tokens, - }, - timeout=model_config.timeout, - ) - response.raise_for_status() - data = response.json() - return data["choices"][0]["message"]["content"] + """단일 모델 API 호출 (OpenAI 호환 + Anthropic Messages API)""" + is_anthropic = "anthropic.com" in model_config.endpoint + + if is_anthropic: + import os + headers = { + "x-api-key": os.getenv("CLAUDE_API_KEY", ""), + "anthropic-version": "2023-06-01", + "content-type": "application/json", + } + response = await self._http.post( + model_config.endpoint, + headers=headers, + json={ + "model": model_config.model, + "max_tokens": model_config.max_tokens, + "messages": [{"role": "user", "content": prompt}], + }, + timeout=model_config.timeout, + ) + response.raise_for_status() + data = response.json() + return data["content"][0]["text"] + else: + response = await self._http.post( + model_config.endpoint, + json={ + "model": model_config.model, + "messages": [{"role": "user", "content": prompt}], + "max_tokens": model_config.max_tokens, + }, + timeout=model_config.timeout, + ) + response.raise_for_status() + data = response.json() + return data["choices"][0]["message"]["content"] async def close(self): await self._http.aclose() diff --git a/app/api/documents.py b/app/api/documents.py index 6530867..9755b21 100644 --- a/app/api/documents.py +++ b/app/api/documents.py @@ -127,16 +127,25 @@ async def upload_document( if not file.filename: raise HTTPException(status_code=400, detail="파일명이 필요합니다") + # 파일명 정규화 (경로 이탈 방지) + safe_name = Path(file.filename).name + if not safe_name or safe_name.startswith("."): + raise HTTPException(status_code=400, detail="유효하지 않은 파일명") + # Inbox에 파일 저장 inbox_dir = Path(settings.nas_mount_path) / "PKM" / "Inbox" inbox_dir.mkdir(parents=True, exist_ok=True) - target = inbox_dir / file.filename + target = (inbox_dir / safe_name).resolve() + + # Inbox 하위 경로 검증 + if not str(target).startswith(str(inbox_dir.resolve())): + raise HTTPException(status_code=400, detail="잘못된 파일 경로") # 중복 파일명 처리 counter = 1 stem, suffix = target.stem, target.suffix while target.exists(): - target = inbox_dir / f"{stem}_{counter}{suffix}" + target = inbox_dir.resolve() / f"{stem}_{counter}{suffix}" counter += 1 content = await file.read() diff --git a/app/api/search.py b/app/api/search.py index c6fc519..d1efcc9 100644 --- a/app/api/search.py +++ b/app/api/search.py @@ -165,23 +165,26 @@ async def _search_hybrid(session: AsyncSession, query: str, limit: int) -> list[ result = await session.execute( text(f""" - SELECT d.id, d.title, d.ai_domain, d.ai_summary, d.file_format, - ( - :w_fts * coalesce(ts_rank( - to_tsvector('simple', coalesce(d.title, '') || ' ' || coalesce(d.extracted_text, '')), - plainto_tsquery('simple', :query) - ), 0) - + :w_trgm * coalesce(similarity( - coalesce(d.title, '') || ' ' || coalesce(d.extracted_text, ''), - :query - ), 0) - + :w_vector * {vector_score} - ) AS score, - left(d.extracted_text, 200) AS snippet - FROM documents d - {vector_clause} - WHERE coalesce(d.extracted_text, '') != '' - ORDER BY score DESC + SELECT * FROM ( + SELECT d.id, d.title, d.ai_domain, d.ai_summary, d.file_format, + ( + :w_fts * coalesce(ts_rank( + to_tsvector('simple', coalesce(d.title, '') || ' ' || coalesce(d.extracted_text, '')), + plainto_tsquery('simple', :query) + ), 0) + + :w_trgm * coalesce(similarity( + coalesce(d.title, '') || ' ' || coalesce(d.extracted_text, ''), + :query + ), 0) + + :w_vector * {vector_score} + ) AS score, + left(d.extracted_text, 200) AS snippet + FROM documents d + {vector_clause} + WHERE coalesce(d.extracted_text, '') != '' + ) sub + WHERE sub.score > 0.01 + ORDER BY sub.score DESC LIMIT :limit """), params, diff --git a/app/api/setup.py b/app/api/setup.py index 1cb29bb..16e8f26 100644 --- a/app/api/setup.py +++ b/app/api/setup.py @@ -154,8 +154,7 @@ async def totp_init( session: Annotated[AsyncSession, Depends(get_session)], ): """TOTP 시크릿 생성 + otpauth URI 반환 (DB에 저장하지 않음)""" - # 셋업 중이거나 인증된 유저만 사용 가능 - # 셋업 중에는 admin 생성 직후 호출됨 + await _require_setup(session) secret = pyotp.random_base32() totp = pyotp.TOTP(secret) uri = totp.provisioning_uri( @@ -171,7 +170,7 @@ async def totp_verify( session: Annotated[AsyncSession, Depends(get_session)], ): """TOTP 코드 검증 후 DB에 시크릿 저장""" - # 코드 검증 + await _require_setup(session) totp = pyotp.TOTP(body.secret) if not totp.verify(body.code): raise HTTPException( @@ -194,8 +193,12 @@ async def totp_verify( @router.post("/verify-nas", response_model=VerifyNASResponse) -async def verify_nas(body: VerifyNASRequest): +async def verify_nas( + body: VerifyNASRequest, + session: Annotated[AsyncSession, Depends(get_session)], +): """NAS 마운트 경로 읽기/쓰기 테스트""" + await _require_setup(session) path = Path(body.path) exists = path.exists() readable = path.is_dir() and any(True for _ in path.iterdir()) if exists else False diff --git a/app/workers/classify_worker.py b/app/workers/classify_worker.py index 10f5140..ae16441 100644 --- a/app/workers/classify_worker.py +++ b/app/workers/classify_worker.py @@ -1,10 +1,13 @@ -"""AI 분류 워커 — Qwen3.5로 도메인/태그/요약 생성""" +"""AI 분류 워커 — Qwen3.5로 도메인/태그/요약 생성 + Inbox→Knowledge 이동""" +import shutil from datetime import datetime, timezone +from pathlib import Path from sqlalchemy.ext.asyncio import AsyncSession from ai.client import AIClient, parse_json_response +from core.config import settings from core.utils import setup_logger from models.document import Document @@ -67,6 +70,10 @@ async def process(document_id: int, session: AsyncSession) -> None: doc.ai_model_version = "qwen3.5-35b-a3b" doc.ai_processed_at = datetime.now(timezone.utc) + # ─── Inbox → Knowledge 폴더 이동 ─── + if doc.file_path.startswith("PKM/Inbox/") and domain: + _move_to_knowledge(doc, domain) + logger.info( f"[분류] document_id={document_id}: " f"domain={domain}, tags={doc.ai_tags}, summary={len(summary)}자" @@ -74,3 +81,35 @@ async def process(document_id: int, session: AsyncSession) -> None: finally: await client.close() + + +def _move_to_knowledge(doc: Document, domain: str): + """분류 완료 후 Inbox에서 Knowledge 폴더로 파일 이동""" + nas_root = Path(settings.nas_mount_path) + src = nas_root / doc.file_path + + if not src.exists(): + logger.warning(f"[이동] 원본 파일 없음: {src}") + return + + # 대상 경로: PKM/{domain}/{파일명} + sub_group = doc.ai_sub_group + if sub_group: + new_rel = f"PKM/{domain}/{sub_group}/{src.name}" + else: + new_rel = f"PKM/{domain}/{src.name}" + + dst = nas_root / new_rel + dst.parent.mkdir(parents=True, exist_ok=True) + + # 중복 파일명 처리 + counter = 1 + stem, suffix = dst.stem, dst.suffix + while dst.exists(): + dst = dst.parent / f"{stem}_{counter}{suffix}" + new_rel = str(dst.relative_to(nas_root)) + counter += 1 + + shutil.move(str(src), str(dst)) + doc.file_path = new_rel + logger.info(f"[이동] {doc.file_path} → {new_rel}")