🔍 고급 통합 검색 시스템 완성

🎯 주요 기능: - 하이라이트 메모 내용 별도 검색 (highlight_note 타입) - PDF/HTML 본문 전체 텍스트 검색 (OCR 데이터 활용) - 검색 결과 미리보기 모달 (전체 내용 로드) - 메모 트리 노드 검색 지원 - 노트 문서 통합 검색 🔧 백엔드 개선: - search_highlight_notes: 하이라이트 메모 내용 검색 - search_document_content: HTML/PDF 본문 검색 (BeautifulSoup) - search_memo_nodes: 메모 트리 노드 검색 - search_note_documents: 노트 문서 검색 - extract_search_context: 검색어 주변 컨텍스트 추출 🎨 프론트엔드 기능: - 통합 검색 UI (/search.html) 완전 구현 - 검색 필터: 문서/노트/메모/하이라이트/메모/본문 - 미리보기 모달: 전체 내용 로드 및 표시 - 검색 결과 하이라이트 및 컨텍스트 표시 - 타입별 배지 및 관련도 점수 표시 📱 사용자 경험: - 실시간 검색 디바운스 (500ms) - 검색어 자동완성 제안 - 검색 통계 및 성능 표시 - 빠른 검색 예시 버튼 - 새 탭에서 결과 열기 🔗 네비게이션 통합: - 헤더에 '통합 검색' 링크 추가 - 페이지별 활성 상태 관리
2025-09-02 16:53:56 +09:00
parent 97d60554a9
commit 4329a1c9a6
5 changed files with 1120 additions and 5 deletions
--- a/backend/src/api/routes/search.py
+++ b/backend/src/api/routes/search.py
@@ -13,6 +13,8 @@ from ...models.user import User
 from ...models.document import Document, Tag
 from ...models.highlight import Highlight
 from ...models.note import Note
+from ...models.memo_tree import MemoTree, MemoNode
+from ...models.note_document import NoteDocument
 from ..dependencies import get_current_active_user
 from pydantic import BaseModel

@@ -47,7 +49,7 @@ router = APIRouter()
@router.get("/", response_model=SearchResponse)
 async def search_all(
    q: str = Query(..., description="검색어"),
-    type_filter: Optional[str] = Query(None, description="검색 타입 필터: document, note, highlight"),
+    type_filter: Optional[str] = Query(None, description="검색 타입 필터: document, note, memo, highlight"),
    document_id: Optional[str] = Query(None, description="특정 문서 내 검색"),
    tag: Optional[str] = Query(None, description="태그 필터"),
    skip: int = Query(0, ge=0),
@@ -63,16 +65,36 @@ async def search_all(
        document_results = await search_documents(q, document_id, tag, current_user, db)
        results.extend(document_results)
    
-    # 2. 메모 검색
+    # 2. 노트 문서 검색
    if not type_filter or type_filter == "note":
-        note_results = await search_notes(q, document_id, tag, current_user, db)
+        note_results = await search_note_documents(q, current_user, db)
        results.extend(note_results)
    
-    # 3. 하이라이트 검색
+    # 3. 메모 트리 노드 검색
+    if not type_filter or type_filter == "memo":
+        memo_results = await search_memo_nodes(q, current_user, db)
+        results.extend(memo_results)
+    
+    # 4. 기존 메모 검색 (하위 호환성)
+    if not type_filter or type_filter == "note":
+        old_note_results = await search_notes(q, document_id, tag, current_user, db)
+        results.extend(old_note_results)
+    
+    # 5. 하이라이트 검색
    if not type_filter or type_filter == "highlight":
        highlight_results = await search_highlights(q, document_id, current_user, db)
        results.extend(highlight_results)
    
+    # 6. 하이라이트 메모 검색
+    if not type_filter or type_filter == "highlight_note":
+        highlight_note_results = await search_highlight_notes(q, document_id, current_user, db)
+        results.extend(highlight_note_results)
+    
+    # 7. 문서 본문 검색 (OCR 데이터)
+    if not type_filter or type_filter == "document_content":
+        content_results = await search_document_content(q, document_id, current_user, db)
+        results.extend(content_results)
+    
    # 관련성 점수로 정렬
    results.sort(key=lambda x: x.relevance_score, reverse=True)
    
@@ -352,3 +374,239 @@ async def get_search_suggestions(
    suggestions.extend([{"text": tag, "type": "note_tag"} for tag in list(note_tags)[:5]])
    
    return {"suggestions": suggestions[:10]}
+
+
+async def search_highlight_notes(
+    query: str,
+    document_id: Optional[str],
+    current_user: User,
+    db: AsyncSession
+) -> List[SearchResult]:
+    """하이라이트 메모 내용 검색"""
+    query_obj = select(Note).options(
+        selectinload(Note.highlight).selectinload(Highlight.document)
+    )
+    
+    # 하이라이트가 있는 노트만
+    query_obj = query_obj.where(Note.highlight_id.isnot(None))
+    
+    # 권한 필터링 - 사용자의 노트만
+    query_obj = query_obj.where(Note.created_by == current_user.id)
+    
+    # 특정 문서 필터
+    if document_id:
+        query_obj = query_obj.join(Highlight).where(Highlight.document_id == document_id)
+    
+    # 메모 내용에서 검색
+    query_obj = query_obj.where(Note.content.ilike(f"%{query}%"))
+    
+    result = await db.execute(query_obj)
+    notes = result.scalars().all()
+    
+    search_results = []
+    for note in notes:
+        if not note.highlight or not note.highlight.document:
+            continue
+            
+        # 관련성 점수 계산
+        score = 1.5  # 메모 내용 매치는 높은 점수
+        content_lower = (note.content or "").lower()
+        if query.lower() in content_lower:
+            score += 2.0
+        
+        search_results.append(SearchResult(
+            type="highlight_note",
+            id=str(note.id),
+            title=f"하이라이트 메모: {note.highlight.selected_text[:30]}...",
+            content=note.content or "",
+            document_id=str(note.highlight.document.id),
+            document_title=note.highlight.document.title,
+            created_at=note.created_at,
+            relevance_score=score,
+            highlight_info={
+                "highlight_id": str(note.highlight.id),
+                "selected_text": note.highlight.selected_text,
+                "start_offset": note.highlight.start_offset,
+                "end_offset": note.highlight.end_offset,
+                "note_content": note.content
+            }
+        ))
+    
+    return search_results
+
+
+async def search_note_documents(
+    query: str,
+    current_user: User,
+    db: AsyncSession
+) -> List[SearchResult]:
+    """노트 문서 검색"""
+    query_obj = select(NoteDocument).where(
+        or_(
+            NoteDocument.title.ilike(f"%{query}%"),
+            NoteDocument.content.ilike(f"%{query}%")
+        )
+    )
+    
+    # 권한 필터링 - 사용자의 노트만
+    query_obj = query_obj.where(NoteDocument.created_by == current_user.email)
+    
+    result = await db.execute(query_obj)
+    notes = result.scalars().all()
+    
+    search_results = []
+    for note in notes:
+        # 관련성 점수 계산
+        score = 1.0
+        if query.lower() in note.title.lower():
+            score += 2.0
+        if note.content and query.lower() in note.content.lower():
+            score += 1.0
+        
+        search_results.append(SearchResult(
+            type="note",
+            id=str(note.id),
+            title=note.title,
+            content=note.content or "",
+            document_id=str(note.id),  # 노트 자체가 문서
+            document_title=note.title,
+            created_at=note.created_at,
+            relevance_score=score
+        ))
+    
+    return search_results
+
+
+async def search_memo_nodes(
+    query: str,
+    current_user: User,
+    db: AsyncSession
+) -> List[SearchResult]:
+    """메모 트리 노드 검색"""
+    query_obj = select(MemoNode).options(
+        selectinload(MemoNode.tree)
+    ).where(
+        or_(
+            MemoNode.title.ilike(f"%{query}%"),
+            MemoNode.content.ilike(f"%{query}%")
+        )
+    )
+    
+    # 권한 필터링 - 사용자의 트리에 속한 노드만
+    query_obj = query_obj.join(MemoTree).where(MemoTree.user_id == current_user.id)
+    
+    result = await db.execute(query_obj)
+    nodes = result.scalars().all()
+    
+    search_results = []
+    for node in nodes:
+        # 관련성 점수 계산
+        score = 1.0
+        if query.lower() in node.title.lower():
+            score += 2.0
+        if node.content and query.lower() in node.content.lower():
+            score += 1.0
+        
+        search_results.append(SearchResult(
+            type="memo",
+            id=str(node.id),
+            title=node.title,
+            content=node.content or "",
+            document_id=str(node.tree.id),  # 트리 ID를 문서 ID로 사용
+            document_title=f"📚 {node.tree.title}",
+            created_at=node.created_at,
+            relevance_score=score
+        ))
+    
+    return search_results
+
+
+async def search_document_content(
+    query: str,
+    document_id: Optional[str],
+    current_user: User,
+    db: AsyncSession
+) -> List[SearchResult]:
+    """문서 본문 내용 검색 (OCR 데이터 포함)"""
+    # 문서 권한 확인
+    doc_query = select(Document)
+    if not current_user.is_admin:
+        doc_query = doc_query.where(
+            or_(
+                Document.is_public == True,
+                Document.uploaded_by == current_user.id
+            )
+        )
+    
+    if document_id:
+        doc_query = doc_query.where(Document.id == document_id)
+    
+    result = await db.execute(doc_query)
+    documents = result.scalars().all()
+    
+    search_results = []
+    
+    for doc in documents:
+        # HTML 파일에서 텍스트 검색
+        if doc.html_path:
+            try:
+                import os
+                from bs4 import BeautifulSoup
+                
+                html_file_path = os.path.join("/app/data/documents", doc.html_path)
+                if os.path.exists(html_file_path):
+                    with open(html_file_path, 'r', encoding='utf-8') as f:
+                        html_content = f.read()
+                    
+                    # HTML에서 텍스트 추출
+                    soup = BeautifulSoup(html_content, 'html.parser')
+                    text_content = soup.get_text()
+                    
+                    # 검색어가 포함된 경우
+                    if query.lower() in text_content.lower():
+                        # 검색어 주변 컨텍스트 추출
+                        context = extract_search_context(text_content, query)
+                        
+                        # 관련성 점수 계산
+                        score = 2.0  # 본문 매치는 높은 점수
+                        
+                        search_results.append(SearchResult(
+                            type="document_content",
+                            id=str(doc.id),
+                            title=f"📄 {doc.title} (본문)",
+                            content=context,
+                            document_id=str(doc.id),
+                            document_title=doc.title,
+                            created_at=doc.created_at,
+                            relevance_score=score
+                        ))
+            except Exception as e:
+                print(f"문서 본문 검색 오류: {e}")
+                continue
+    
+    return search_results
+
+
+def extract_search_context(text: str, query: str, context_length: int = 200) -> str:
+    """검색어 주변 컨텍스트 추출"""
+    text_lower = text.lower()
+    query_lower = query.lower()
+    
+    # 첫 번째 매치 위치 찾기
+    match_pos = text_lower.find(query_lower)
+    if match_pos == -1:
+        return text[:context_length] + "..."
+    
+    # 컨텍스트 시작/끝 위치 계산
+    start = max(0, match_pos - context_length // 2)
+    end = min(len(text), match_pos + len(query) + context_length // 2)
+    
+    context = text[start:end]
+    
+    # 앞뒤에 ... 추가
+    if start > 0:
+        context = "..." + context
+    if end < len(text):
+        context = context + "..."
+    
+    return context