""" 검색 API 라우터 """ from fastapi import APIRouter, Depends, Query from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy import select, or_, and_, text from sqlalchemy.orm import joinedload, selectinload from typing import List, Optional, Dict, Any from datetime import datetime from ...core.database import get_db from ...models.user import User from ...models.document import Document, Tag from ...models.highlight import Highlight from ...models.note import Note from ...models.memo_tree import MemoTree, MemoNode from ...models.note_document import NoteDocument from ..dependencies import get_current_active_user from pydantic import BaseModel class SearchResult(BaseModel): """검색 결과""" type: str # "document", "note", "highlight" id: str title: str content: str document_id: str document_title: str created_at: datetime relevance_score: float = 0.0 highlight_info: Optional[Dict[str, Any]] = None class Config: from_attributes = True class SearchResponse(BaseModel): """검색 응답""" query: str total_results: int results: List[SearchResult] facets: Dict[str, List[Dict[str, Any]]] = {} router = APIRouter() @router.get("/", response_model=SearchResponse) async def search_all( q: str = Query(..., description="검색어"), type_filter: Optional[str] = Query(None, description="검색 타입 필터: document, note, memo, highlight"), document_id: Optional[str] = Query(None, description="특정 문서 내 검색"), tag: Optional[str] = Query(None, description="태그 필터"), skip: int = Query(0, ge=0), limit: int = Query(50, ge=1, le=100), current_user: User = Depends(get_current_active_user), db: AsyncSession = Depends(get_db) ): """통합 검색 (문서 + 메모 + 하이라이트)""" results = [] # 1. 문서 검색 if not type_filter or type_filter == "document": document_results = await search_documents(q, document_id, tag, current_user, db) results.extend(document_results) # 2. 노트 문서 검색 if not type_filter or type_filter == "note": note_results = await search_note_documents(q, current_user, db) results.extend(note_results) # 3. 메모 트리 노드 검색 if not type_filter or type_filter == "memo": memo_results = await search_memo_nodes(q, current_user, db) results.extend(memo_results) # 4. 기존 메모 검색 (하위 호환성) if not type_filter or type_filter == "note": old_note_results = await search_notes(q, document_id, tag, current_user, db) results.extend(old_note_results) # 5. 하이라이트 검색 if not type_filter or type_filter == "highlight": highlight_results = await search_highlights(q, document_id, current_user, db) results.extend(highlight_results) # 6. 하이라이트 메모 검색 if not type_filter or type_filter == "highlight_note": highlight_note_results = await search_highlight_notes(q, document_id, current_user, db) results.extend(highlight_note_results) # 7. 문서 본문 검색 (OCR 데이터) if not type_filter or type_filter == "document_content": content_results = await search_document_content(q, document_id, current_user, db) results.extend(content_results) # 관련성 점수로 정렬 results.sort(key=lambda x: x.relevance_score, reverse=True) # 페이지네이션 total_results = len(results) paginated_results = results[skip:skip + limit] # 패싯 정보 생성 facets = await generate_search_facets(results, current_user, db) return SearchResponse( query=q, total_results=total_results, results=paginated_results, facets=facets ) async def search_documents( query: str, document_id: Optional[str], tag: Optional[str], current_user: User, db: AsyncSession ) -> List[SearchResult]: """문서 검색""" query_obj = select(Document).options( selectinload(Document.uploader), selectinload(Document.tags) ) # 권한 필터링 if not current_user.is_admin: query_obj = query_obj.where( or_( Document.is_public == True, Document.uploaded_by == current_user.id ) ) # 특정 문서 필터 if document_id: query_obj = query_obj.where(Document.id == document_id) # 태그 필터 if tag: query_obj = query_obj.join(Document.tags).where(Tag.name == tag) # 텍스트 검색 search_condition = or_( Document.title.ilike(f"%{query}%"), Document.description.ilike(f"%{query}%") ) query_obj = query_obj.where(search_condition) result = await db.execute(query_obj) documents = result.scalars().all() search_results = [] for doc in documents: # 관련성 점수 계산 (제목 매치가 더 높은 점수) score = 0.0 if query.lower() in doc.title.lower(): score += 2.0 if doc.description and query.lower() in doc.description.lower(): score += 1.0 search_results.append(SearchResult( type="document", id=str(doc.id), title=doc.title, content=doc.description or "", document_id=str(doc.id), document_title=doc.title, created_at=doc.created_at, relevance_score=score )) return search_results async def search_notes( query: str, document_id: Optional[str], tag: Optional[str], current_user: User, db: AsyncSession ) -> List[SearchResult]: """메모 검색""" query_obj = ( select(Note) .options( joinedload(Note.highlight).joinedload(Highlight.document) ) .join(Highlight) .where(Highlight.user_id == current_user.id) ) # 특정 문서 필터 if document_id: query_obj = query_obj.where(Highlight.document_id == document_id) # 태그 필터 if tag: query_obj = query_obj.where(Note.tags.contains([tag])) # 텍스트 검색 (메모 내용 + 하이라이트된 텍스트) search_condition = or_( Note.content.ilike(f"%{query}%"), Highlight.selected_text.ilike(f"%{query}%") ) query_obj = query_obj.where(search_condition) result = await db.execute(query_obj) notes = result.scalars().all() search_results = [] for note in notes: # 관련성 점수 계산 score = 0.0 if query.lower() in note.content.lower(): score += 2.0 if query.lower() in note.highlight.selected_text.lower(): score += 1.5 search_results.append(SearchResult( type="note", id=str(note.id), title=f"메모: {note.highlight.selected_text[:50]}...", content=note.content, document_id=str(note.highlight.document.id), document_title=note.highlight.document.title, created_at=note.created_at, relevance_score=score, highlight_info={ "highlight_id": str(note.highlight.id), "selected_text": note.highlight.selected_text, "start_offset": note.highlight.start_offset, "end_offset": note.highlight.end_offset } )) return search_results async def search_highlights( query: str, document_id: Optional[str], current_user: User, db: AsyncSession ) -> List[SearchResult]: """하이라이트 검색""" query_obj = ( select(Highlight) .options(joinedload(Highlight.document)) .where(Highlight.user_id == current_user.id) ) # 특정 문서 필터 if document_id: query_obj = query_obj.where(Highlight.document_id == document_id) # 텍스트 검색 query_obj = query_obj.where(Highlight.selected_text.ilike(f"%{query}%")) result = await db.execute(query_obj) highlights = result.scalars().all() search_results = [] for highlight in highlights: # 관련성 점수 계산 score = 1.0 if query.lower() in highlight.selected_text.lower() else 0.5 search_results.append(SearchResult( type="highlight", id=str(highlight.id), title=f"하이라이트: {highlight.selected_text[:50]}...", content=highlight.selected_text, document_id=str(highlight.document.id), document_title=highlight.document.title, created_at=highlight.created_at, relevance_score=score, highlight_info={ "highlight_id": str(highlight.id), "selected_text": highlight.selected_text, "start_offset": highlight.start_offset, "end_offset": highlight.end_offset, "highlight_color": highlight.highlight_color } )) return search_results async def generate_search_facets( results: List[SearchResult], current_user: User, db: AsyncSession ) -> Dict[str, List[Dict[str, Any]]]: """검색 결과 패싯 생성""" facets = {} # 타입별 개수 type_counts = {} for result in results: type_counts[result.type] = type_counts.get(result.type, 0) + 1 facets["types"] = [ {"name": type_name, "count": count} for type_name, count in type_counts.items() ] # 문서별 개수 document_counts = {} for result in results: doc_title = result.document_title document_counts[doc_title] = document_counts.get(doc_title, 0) + 1 facets["documents"] = [ {"name": doc_title, "count": count} for doc_title, count in sorted(document_counts.items(), key=lambda x: x[1], reverse=True)[:10] ] return facets @router.get("/suggestions") async def get_search_suggestions( q: str = Query(..., min_length=2, description="검색어 (최소 2글자)"), current_user: User = Depends(get_current_active_user), db: AsyncSession = Depends(get_db) ): """검색어 자동완성 제안""" suggestions = [] # 문서 제목에서 제안 doc_result = await db.execute( select(Document.title) .where( and_( Document.title.ilike(f"%{q}%"), or_( Document.is_public == True, Document.uploaded_by == current_user.id ) if not current_user.is_admin else text("true") ) ) .limit(5) ) doc_titles = doc_result.scalars().all() suggestions.extend([{"text": title, "type": "document"} for title in doc_titles]) # 태그에서 제안 tag_result = await db.execute( select(Tag.name) .where(Tag.name.ilike(f"%{q}%")) .limit(5) ) tag_names = tag_result.scalars().all() suggestions.extend([{"text": name, "type": "tag"} for name in tag_names]) # 메모 태그에서 제안 note_result = await db.execute( select(Note.tags) .join(Highlight) .where(Highlight.user_id == current_user.id) ) notes = note_result.scalars().all() note_tags = set() for note in notes: if note and isinstance(note, list): for tag in note: if q.lower() in tag.lower(): note_tags.add(tag) suggestions.extend([{"text": tag, "type": "note_tag"} for tag in list(note_tags)[:5]]) return {"suggestions": suggestions[:10]} async def search_highlight_notes( query: str, document_id: Optional[str], current_user: User, db: AsyncSession ) -> List[SearchResult]: """하이라이트 메모 내용 검색""" query_obj = select(Note).options( selectinload(Note.highlight).selectinload(Highlight.document) ) # 하이라이트가 있는 노트만 query_obj = query_obj.where(Note.highlight_id.isnot(None)) # 권한 필터링 - 사용자의 노트만 query_obj = query_obj.where(Note.created_by == current_user.id) # 특정 문서 필터 if document_id: query_obj = query_obj.join(Highlight).where(Highlight.document_id == document_id) # 메모 내용에서 검색 query_obj = query_obj.where(Note.content.ilike(f"%{query}%")) result = await db.execute(query_obj) notes = result.scalars().all() search_results = [] for note in notes: if not note.highlight or not note.highlight.document: continue # 관련성 점수 계산 score = 1.5 # 메모 내용 매치는 높은 점수 content_lower = (note.content or "").lower() if query.lower() in content_lower: score += 2.0 search_results.append(SearchResult( type="highlight_note", id=str(note.id), title=f"하이라이트 메모: {note.highlight.selected_text[:30]}...", content=note.content or "", document_id=str(note.highlight.document.id), document_title=note.highlight.document.title, created_at=note.created_at, relevance_score=score, highlight_info={ "highlight_id": str(note.highlight.id), "selected_text": note.highlight.selected_text, "start_offset": note.highlight.start_offset, "end_offset": note.highlight.end_offset, "note_content": note.content } )) return search_results async def search_note_documents( query: str, current_user: User, db: AsyncSession ) -> List[SearchResult]: """노트 문서 검색""" query_obj = select(NoteDocument).where( or_( NoteDocument.title.ilike(f"%{query}%"), NoteDocument.content.ilike(f"%{query}%") ) ) # 권한 필터링 - 사용자의 노트만 query_obj = query_obj.where(NoteDocument.created_by == current_user.email) result = await db.execute(query_obj) notes = result.scalars().all() search_results = [] for note in notes: # 관련성 점수 계산 score = 1.0 if query.lower() in note.title.lower(): score += 2.0 if note.content and query.lower() in note.content.lower(): score += 1.0 search_results.append(SearchResult( type="note", id=str(note.id), title=note.title, content=note.content or "", document_id=str(note.id), # 노트 자체가 문서 document_title=note.title, created_at=note.created_at, relevance_score=score )) return search_results async def search_memo_nodes( query: str, current_user: User, db: AsyncSession ) -> List[SearchResult]: """메모 트리 노드 검색""" query_obj = select(MemoNode).options( selectinload(MemoNode.tree) ).where( or_( MemoNode.title.ilike(f"%{query}%"), MemoNode.content.ilike(f"%{query}%") ) ) # 권한 필터링 - 사용자의 트리에 속한 노드만 query_obj = query_obj.join(MemoTree).where(MemoTree.user_id == current_user.id) result = await db.execute(query_obj) nodes = result.scalars().all() search_results = [] for node in nodes: # 관련성 점수 계산 score = 1.0 if query.lower() in node.title.lower(): score += 2.0 if node.content and query.lower() in node.content.lower(): score += 1.0 search_results.append(SearchResult( type="memo", id=str(node.id), title=node.title, content=node.content or "", document_id=str(node.tree.id), # 트리 ID를 문서 ID로 사용 document_title=f"📚 {node.tree.title}", created_at=node.created_at, relevance_score=score )) return search_results async def search_document_content( query: str, document_id: Optional[str], current_user: User, db: AsyncSession ) -> List[SearchResult]: """문서 본문 내용 검색 (OCR 데이터 포함)""" # 문서 권한 확인 doc_query = select(Document) if not current_user.is_admin: doc_query = doc_query.where( or_( Document.is_public == True, Document.uploaded_by == current_user.id ) ) if document_id: doc_query = doc_query.where(Document.id == document_id) result = await db.execute(doc_query) documents = result.scalars().all() search_results = [] for doc in documents: # HTML 파일에서 텍스트 검색 if doc.html_path: try: import os from bs4 import BeautifulSoup html_file_path = os.path.join("/app/data/documents", doc.html_path) if os.path.exists(html_file_path): with open(html_file_path, 'r', encoding='utf-8') as f: html_content = f.read() # HTML에서 텍스트 추출 soup = BeautifulSoup(html_content, 'html.parser') text_content = soup.get_text() # 검색어가 포함된 경우 if query.lower() in text_content.lower(): # 검색어 주변 컨텍스트 추출 context = extract_search_context(text_content, query) # 관련성 점수 계산 score = 2.0 # 본문 매치는 높은 점수 search_results.append(SearchResult( type="document_content", id=str(doc.id), title=f"📄 {doc.title} (본문)", content=context, document_id=str(doc.id), document_title=doc.title, created_at=doc.created_at, relevance_score=score )) except Exception as e: print(f"문서 본문 검색 오류: {e}") continue return search_results def extract_search_context(text: str, query: str, context_length: int = 200) -> str: """검색어 주변 컨텍스트 추출""" text_lower = text.lower() query_lower = query.lower() # 첫 번째 매치 위치 찾기 match_pos = text_lower.find(query_lower) if match_pos == -1: return text[:context_length] + "..." # 컨텍스트 시작/끝 위치 계산 start = max(0, match_pos - context_length // 2) end = min(len(text), match_pos + len(query) + context_length // 2) context = text[start:end] # 앞뒤에 ... 추가 if start > 0: context = "..." + context if end < len(text): context = context + "..." return context