- FastAPI 라우터에서 슬래시 문제로 인한 307 리다이렉트 수정 - Nginx 프록시 설정에서 경로 중복 문제 해결 - 계정 관리 시스템 구현 (로그인, 사용자 관리, 권한 설정) - 노트북 연결 기능 수정 (notebook_id 필드 추가) - 메모 트리 UI 개선 (수평 레이아웃, 드래그 기능 제거) - 헤더 UI 개선 및 고정 위치 설정 - 백업/복원 스크립트 추가 - PDF 미리보기 토큰 인증 지원
672 lines
21 KiB
Python
672 lines
21 KiB
Python
"""
|
|
검색 API 라우터
|
|
"""
|
|
from fastapi import APIRouter, Depends, Query
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from sqlalchemy import select, or_, and_, text
|
|
from sqlalchemy.orm import joinedload, selectinload
|
|
from typing import List, Optional, Dict, Any
|
|
from datetime import datetime
|
|
|
|
from ...core.database import get_db
|
|
from ...models.user import User
|
|
from ...models.document import Document, Tag
|
|
from ...models.highlight import Highlight
|
|
from ...models.note import Note
|
|
from ...models.memo_tree import MemoTree, MemoNode
|
|
from ...models.note_document import NoteDocument
|
|
from ..dependencies import get_current_active_user
|
|
from pydantic import BaseModel
|
|
|
|
|
|
class SearchResult(BaseModel):
|
|
"""검색 결과"""
|
|
type: str # "document", "note", "highlight"
|
|
id: str
|
|
title: str
|
|
content: str
|
|
document_id: str
|
|
document_title: str
|
|
created_at: datetime
|
|
relevance_score: float = 0.0
|
|
highlight_info: Optional[Dict[str, Any]] = None
|
|
|
|
class Config:
|
|
from_attributes = True
|
|
|
|
|
|
class SearchResponse(BaseModel):
|
|
"""검색 응답"""
|
|
query: str
|
|
total_results: int
|
|
results: List[SearchResult]
|
|
facets: Dict[str, List[Dict[str, Any]]] = {}
|
|
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.get("/", response_model=SearchResponse)
|
|
async def search_all(
|
|
q: str = Query(..., description="검색어"),
|
|
type_filter: Optional[str] = Query(None, description="검색 타입 필터: document, note, memo, highlight"),
|
|
document_id: Optional[str] = Query(None, description="특정 문서 내 검색"),
|
|
tag: Optional[str] = Query(None, description="태그 필터"),
|
|
skip: int = Query(0, ge=0),
|
|
limit: int = Query(50, ge=1, le=100),
|
|
current_user: User = Depends(get_current_active_user),
|
|
db: AsyncSession = Depends(get_db)
|
|
):
|
|
"""통합 검색 (문서 + 메모 + 하이라이트)"""
|
|
results = []
|
|
|
|
# 1. 문서 검색
|
|
if not type_filter or type_filter == "document":
|
|
document_results = await search_documents(q, document_id, tag, current_user, db)
|
|
results.extend(document_results)
|
|
|
|
# 2. 노트 문서 검색
|
|
if not type_filter or type_filter == "note":
|
|
note_results = await search_note_documents(q, current_user, db)
|
|
results.extend(note_results)
|
|
|
|
# 3. 메모 트리 노드 검색
|
|
if not type_filter or type_filter == "memo":
|
|
memo_results = await search_memo_nodes(q, current_user, db)
|
|
results.extend(memo_results)
|
|
|
|
# 4. 기존 메모 검색 (하위 호환성)
|
|
if not type_filter or type_filter == "note":
|
|
old_note_results = await search_notes(q, document_id, tag, current_user, db)
|
|
results.extend(old_note_results)
|
|
|
|
# 5. 하이라이트 검색
|
|
if not type_filter or type_filter == "highlight":
|
|
highlight_results = await search_highlights(q, document_id, current_user, db)
|
|
results.extend(highlight_results)
|
|
|
|
# 6. 하이라이트 메모 검색
|
|
if not type_filter or type_filter == "highlight_note":
|
|
highlight_note_results = await search_highlight_notes(q, document_id, current_user, db)
|
|
results.extend(highlight_note_results)
|
|
|
|
# 7. 문서 본문 검색 (OCR 데이터)
|
|
if not type_filter or type_filter == "document_content":
|
|
content_results = await search_document_content(q, document_id, current_user, db)
|
|
results.extend(content_results)
|
|
|
|
# 관련성 점수로 정렬
|
|
results.sort(key=lambda x: x.relevance_score, reverse=True)
|
|
|
|
# 페이지네이션
|
|
total_results = len(results)
|
|
paginated_results = results[skip:skip + limit]
|
|
|
|
# 패싯 정보 생성
|
|
facets = await generate_search_facets(results, current_user, db)
|
|
|
|
return SearchResponse(
|
|
query=q,
|
|
total_results=total_results,
|
|
results=paginated_results,
|
|
facets=facets
|
|
)
|
|
|
|
|
|
async def search_documents(
|
|
query: str,
|
|
document_id: Optional[str],
|
|
tag: Optional[str],
|
|
current_user: User,
|
|
db: AsyncSession
|
|
) -> List[SearchResult]:
|
|
"""문서 검색"""
|
|
query_obj = select(Document).options(
|
|
selectinload(Document.uploader),
|
|
selectinload(Document.tags)
|
|
)
|
|
|
|
# 권한 필터링
|
|
if not current_user.is_admin:
|
|
query_obj = query_obj.where(
|
|
or_(
|
|
Document.is_public == True,
|
|
Document.uploaded_by == current_user.id
|
|
)
|
|
)
|
|
|
|
# 특정 문서 필터
|
|
if document_id:
|
|
query_obj = query_obj.where(Document.id == document_id)
|
|
|
|
# 태그 필터
|
|
if tag:
|
|
query_obj = query_obj.join(Document.tags).where(Tag.name == tag)
|
|
|
|
# 텍스트 검색
|
|
search_condition = or_(
|
|
Document.title.ilike(f"%{query}%"),
|
|
Document.description.ilike(f"%{query}%")
|
|
)
|
|
query_obj = query_obj.where(search_condition)
|
|
|
|
result = await db.execute(query_obj)
|
|
documents = result.scalars().all()
|
|
|
|
search_results = []
|
|
for doc in documents:
|
|
# 관련성 점수 계산 (제목 매치가 더 높은 점수)
|
|
score = 0.0
|
|
if query.lower() in doc.title.lower():
|
|
score += 2.0
|
|
if doc.description and query.lower() in doc.description.lower():
|
|
score += 1.0
|
|
|
|
search_results.append(SearchResult(
|
|
type="document",
|
|
id=str(doc.id),
|
|
title=doc.title,
|
|
content=doc.description or "",
|
|
document_id=str(doc.id),
|
|
document_title=doc.title,
|
|
created_at=doc.created_at,
|
|
relevance_score=score
|
|
))
|
|
|
|
return search_results
|
|
|
|
|
|
async def search_notes(
|
|
query: str,
|
|
document_id: Optional[str],
|
|
tag: Optional[str],
|
|
current_user: User,
|
|
db: AsyncSession
|
|
) -> List[SearchResult]:
|
|
"""메모 검색"""
|
|
query_obj = (
|
|
select(Note)
|
|
.options(
|
|
joinedload(Note.highlight).joinedload(Highlight.document)
|
|
)
|
|
.join(Highlight)
|
|
.where(Highlight.user_id == current_user.id)
|
|
)
|
|
|
|
# 특정 문서 필터
|
|
if document_id:
|
|
query_obj = query_obj.where(Highlight.document_id == document_id)
|
|
|
|
# 태그 필터
|
|
if tag:
|
|
query_obj = query_obj.where(Note.tags.contains([tag]))
|
|
|
|
# 텍스트 검색 (메모 내용 + 하이라이트된 텍스트)
|
|
search_condition = or_(
|
|
Note.content.ilike(f"%{query}%"),
|
|
Highlight.selected_text.ilike(f"%{query}%")
|
|
)
|
|
query_obj = query_obj.where(search_condition)
|
|
|
|
result = await db.execute(query_obj)
|
|
notes = result.scalars().all()
|
|
|
|
search_results = []
|
|
for note in notes:
|
|
# 관련성 점수 계산
|
|
score = 0.0
|
|
if query.lower() in note.content.lower():
|
|
score += 2.0
|
|
if query.lower() in note.highlight.selected_text.lower():
|
|
score += 1.5
|
|
|
|
search_results.append(SearchResult(
|
|
type="note",
|
|
id=str(note.id),
|
|
title=f"메모: {note.highlight.selected_text[:50]}...",
|
|
content=note.content,
|
|
document_id=str(note.highlight.document.id),
|
|
document_title=note.highlight.document.title,
|
|
created_at=note.created_at,
|
|
relevance_score=score,
|
|
highlight_info={
|
|
"highlight_id": str(note.highlight.id),
|
|
"selected_text": note.highlight.selected_text,
|
|
"start_offset": note.highlight.start_offset,
|
|
"end_offset": note.highlight.end_offset
|
|
}
|
|
))
|
|
|
|
return search_results
|
|
|
|
|
|
async def search_highlights(
|
|
query: str,
|
|
document_id: Optional[str],
|
|
current_user: User,
|
|
db: AsyncSession
|
|
) -> List[SearchResult]:
|
|
"""하이라이트 검색"""
|
|
query_obj = (
|
|
select(Highlight)
|
|
.options(joinedload(Highlight.document))
|
|
.where(Highlight.user_id == current_user.id)
|
|
)
|
|
|
|
# 특정 문서 필터
|
|
if document_id:
|
|
query_obj = query_obj.where(Highlight.document_id == document_id)
|
|
|
|
# 텍스트 검색
|
|
query_obj = query_obj.where(Highlight.selected_text.ilike(f"%{query}%"))
|
|
|
|
result = await db.execute(query_obj)
|
|
highlights = result.scalars().all()
|
|
|
|
search_results = []
|
|
for highlight in highlights:
|
|
# 관련성 점수 계산
|
|
score = 1.0 if query.lower() in highlight.selected_text.lower() else 0.5
|
|
|
|
search_results.append(SearchResult(
|
|
type="highlight",
|
|
id=str(highlight.id),
|
|
title=f"하이라이트: {highlight.selected_text[:50]}...",
|
|
content=highlight.selected_text,
|
|
document_id=str(highlight.document.id),
|
|
document_title=highlight.document.title,
|
|
created_at=highlight.created_at,
|
|
relevance_score=score,
|
|
highlight_info={
|
|
"highlight_id": str(highlight.id),
|
|
"selected_text": highlight.selected_text,
|
|
"start_offset": highlight.start_offset,
|
|
"end_offset": highlight.end_offset,
|
|
"highlight_color": highlight.highlight_color
|
|
}
|
|
))
|
|
|
|
return search_results
|
|
|
|
|
|
async def generate_search_facets(
|
|
results: List[SearchResult],
|
|
current_user: User,
|
|
db: AsyncSession
|
|
) -> Dict[str, List[Dict[str, Any]]]:
|
|
"""검색 결과 패싯 생성"""
|
|
facets = {}
|
|
|
|
# 타입별 개수
|
|
type_counts = {}
|
|
for result in results:
|
|
type_counts[result.type] = type_counts.get(result.type, 0) + 1
|
|
|
|
facets["types"] = [
|
|
{"name": type_name, "count": count}
|
|
for type_name, count in type_counts.items()
|
|
]
|
|
|
|
# 문서별 개수
|
|
document_counts = {}
|
|
for result in results:
|
|
doc_title = result.document_title
|
|
document_counts[doc_title] = document_counts.get(doc_title, 0) + 1
|
|
|
|
facets["documents"] = [
|
|
{"name": doc_title, "count": count}
|
|
for doc_title, count in sorted(document_counts.items(), key=lambda x: x[1], reverse=True)[:10]
|
|
]
|
|
|
|
return facets
|
|
|
|
|
|
@router.get("/suggestions")
|
|
async def get_search_suggestions(
|
|
q: str = Query(..., min_length=2, description="검색어 (최소 2글자)"),
|
|
current_user: User = Depends(get_current_active_user),
|
|
db: AsyncSession = Depends(get_db)
|
|
):
|
|
"""검색어 자동완성 제안"""
|
|
suggestions = []
|
|
|
|
# 문서 제목에서 제안
|
|
doc_result = await db.execute(
|
|
select(Document.title)
|
|
.where(
|
|
and_(
|
|
Document.title.ilike(f"%{q}%"),
|
|
or_(
|
|
Document.is_public == True,
|
|
Document.uploaded_by == current_user.id
|
|
) if not current_user.is_admin else text("true")
|
|
)
|
|
)
|
|
.limit(5)
|
|
)
|
|
doc_titles = doc_result.scalars().all()
|
|
suggestions.extend([{"text": title, "type": "document"} for title in doc_titles])
|
|
|
|
# 태그에서 제안
|
|
tag_result = await db.execute(
|
|
select(Tag.name)
|
|
.where(Tag.name.ilike(f"%{q}%"))
|
|
.limit(5)
|
|
)
|
|
tag_names = tag_result.scalars().all()
|
|
suggestions.extend([{"text": name, "type": "tag"} for name in tag_names])
|
|
|
|
# 메모 태그에서 제안
|
|
note_result = await db.execute(
|
|
select(Note.tags)
|
|
.join(Highlight)
|
|
.where(Highlight.user_id == current_user.id)
|
|
)
|
|
notes = note_result.scalars().all()
|
|
|
|
note_tags = set()
|
|
for note in notes:
|
|
if note and isinstance(note, list):
|
|
for tag in note:
|
|
if q.lower() in tag.lower():
|
|
note_tags.add(tag)
|
|
|
|
suggestions.extend([{"text": tag, "type": "note_tag"} for tag in list(note_tags)[:5]])
|
|
|
|
return {"suggestions": suggestions[:10]}
|
|
|
|
|
|
async def search_highlight_notes(
|
|
query: str,
|
|
document_id: Optional[str],
|
|
current_user: User,
|
|
db: AsyncSession
|
|
) -> List[SearchResult]:
|
|
"""하이라이트 메모 내용 검색"""
|
|
query_obj = select(Note).options(
|
|
selectinload(Note.highlight).selectinload(Highlight.document)
|
|
)
|
|
|
|
# 하이라이트가 있는 노트만
|
|
query_obj = query_obj.where(Note.highlight_id.isnot(None))
|
|
|
|
# Highlight와 조인 (권한 및 문서 필터링을 위해)
|
|
query_obj = query_obj.join(Highlight)
|
|
|
|
# 권한 필터링 - 사용자의 노트만
|
|
query_obj = query_obj.where(Highlight.user_id == current_user.id)
|
|
|
|
# 특정 문서 필터
|
|
if document_id:
|
|
query_obj = query_obj.where(Highlight.document_id == document_id)
|
|
|
|
# 메모 내용에서 검색
|
|
query_obj = query_obj.where(Note.content.ilike(f"%{query}%"))
|
|
|
|
result = await db.execute(query_obj)
|
|
notes = result.scalars().all()
|
|
|
|
search_results = []
|
|
for note in notes:
|
|
if not note.highlight or not note.highlight.document:
|
|
continue
|
|
|
|
# 관련성 점수 계산
|
|
score = 1.5 # 메모 내용 매치는 높은 점수
|
|
content_lower = (note.content or "").lower()
|
|
if query.lower() in content_lower:
|
|
score += 2.0
|
|
|
|
search_results.append(SearchResult(
|
|
type="highlight_note",
|
|
id=str(note.id),
|
|
title=f"하이라이트 메모: {note.highlight.selected_text[:30]}...",
|
|
content=note.content or "",
|
|
document_id=str(note.highlight.document.id),
|
|
document_title=note.highlight.document.title,
|
|
created_at=note.created_at,
|
|
relevance_score=score,
|
|
highlight_info={
|
|
"highlight_id": str(note.highlight.id),
|
|
"selected_text": note.highlight.selected_text,
|
|
"start_offset": note.highlight.start_offset,
|
|
"end_offset": note.highlight.end_offset,
|
|
"note_content": note.content
|
|
}
|
|
))
|
|
|
|
return search_results
|
|
|
|
|
|
async def search_note_documents(
|
|
query: str,
|
|
current_user: User,
|
|
db: AsyncSession
|
|
) -> List[SearchResult]:
|
|
"""노트 문서 검색"""
|
|
query_obj = select(NoteDocument).where(
|
|
or_(
|
|
NoteDocument.title.ilike(f"%{query}%"),
|
|
NoteDocument.content.ilike(f"%{query}%")
|
|
)
|
|
)
|
|
|
|
# 권한 필터링 - 사용자의 노트만
|
|
query_obj = query_obj.where(NoteDocument.created_by == current_user.email)
|
|
|
|
result = await db.execute(query_obj)
|
|
notes = result.scalars().all()
|
|
|
|
search_results = []
|
|
for note in notes:
|
|
# 관련성 점수 계산
|
|
score = 1.0
|
|
if query.lower() in note.title.lower():
|
|
score += 2.0
|
|
if note.content and query.lower() in note.content.lower():
|
|
score += 1.0
|
|
|
|
search_results.append(SearchResult(
|
|
type="note",
|
|
id=str(note.id),
|
|
title=note.title,
|
|
content=note.content or "",
|
|
document_id=str(note.id), # 노트 자체가 문서
|
|
document_title=note.title,
|
|
created_at=note.created_at,
|
|
relevance_score=score
|
|
))
|
|
|
|
return search_results
|
|
|
|
|
|
async def search_memo_nodes(
|
|
query: str,
|
|
current_user: User,
|
|
db: AsyncSession
|
|
) -> List[SearchResult]:
|
|
"""메모 트리 노드 검색"""
|
|
query_obj = select(MemoNode).options(
|
|
selectinload(MemoNode.tree)
|
|
).where(
|
|
or_(
|
|
MemoNode.title.ilike(f"%{query}%"),
|
|
MemoNode.content.ilike(f"%{query}%")
|
|
)
|
|
)
|
|
|
|
# 권한 필터링 - 사용자의 트리에 속한 노드만
|
|
query_obj = query_obj.join(MemoTree).where(MemoTree.user_id == current_user.id)
|
|
|
|
result = await db.execute(query_obj)
|
|
nodes = result.scalars().all()
|
|
|
|
search_results = []
|
|
for node in nodes:
|
|
# 관련성 점수 계산
|
|
score = 1.0
|
|
if query.lower() in node.title.lower():
|
|
score += 2.0
|
|
if node.content and query.lower() in node.content.lower():
|
|
score += 1.0
|
|
|
|
search_results.append(SearchResult(
|
|
type="memo",
|
|
id=str(node.id),
|
|
title=node.title,
|
|
content=node.content or "",
|
|
document_id=str(node.tree.id), # 트리 ID를 문서 ID로 사용
|
|
document_title=f"📚 {node.tree.title}",
|
|
created_at=node.created_at,
|
|
relevance_score=score
|
|
))
|
|
|
|
return search_results
|
|
|
|
|
|
async def search_document_content(
|
|
query: str,
|
|
document_id: Optional[str],
|
|
current_user: User,
|
|
db: AsyncSession
|
|
) -> List[SearchResult]:
|
|
"""문서 본문 내용 검색 (OCR 데이터 포함)"""
|
|
# 문서 권한 확인
|
|
doc_query = select(Document)
|
|
if not current_user.is_admin:
|
|
doc_query = doc_query.where(
|
|
or_(
|
|
Document.is_public == True,
|
|
Document.uploaded_by == current_user.id
|
|
)
|
|
)
|
|
|
|
if document_id:
|
|
doc_query = doc_query.where(Document.id == document_id)
|
|
|
|
result = await db.execute(doc_query)
|
|
documents = result.scalars().all()
|
|
|
|
search_results = []
|
|
|
|
for doc in documents:
|
|
text_content = ""
|
|
file_type = ""
|
|
|
|
# HTML 파일에서 텍스트 검색 (PDF OCR 결과 또는 서적 HTML)
|
|
if doc.html_path:
|
|
try:
|
|
import os
|
|
from bs4 import BeautifulSoup
|
|
|
|
# 절대 경로 처리
|
|
if doc.html_path.startswith('/'):
|
|
html_file_path = doc.html_path
|
|
else:
|
|
html_file_path = os.path.join("/app", doc.html_path)
|
|
|
|
if os.path.exists(html_file_path):
|
|
with open(html_file_path, 'r', encoding='utf-8') as f:
|
|
html_content = f.read()
|
|
|
|
# HTML에서 텍스트 추출
|
|
soup = BeautifulSoup(html_content, 'html.parser')
|
|
text_content = soup.get_text()
|
|
|
|
# PDF인지 서적인지 구분
|
|
if doc.pdf_path:
|
|
file_type = "PDF"
|
|
else:
|
|
file_type = "HTML"
|
|
|
|
except Exception as e:
|
|
print(f"HTML 파일 읽기 오류 ({doc.html_path}): {e}")
|
|
continue
|
|
|
|
# PDF 파일 직접 텍스트 추출 (HTML이 없는 경우)
|
|
elif doc.pdf_path:
|
|
try:
|
|
import os
|
|
import PyPDF2
|
|
|
|
# 절대 경로 처리
|
|
if doc.pdf_path.startswith('/'):
|
|
pdf_file_path = doc.pdf_path
|
|
else:
|
|
pdf_file_path = os.path.join("/app", doc.pdf_path)
|
|
|
|
if os.path.exists(pdf_file_path):
|
|
with open(pdf_file_path, 'rb') as f:
|
|
pdf_reader = PyPDF2.PdfReader(f)
|
|
text_pages = []
|
|
|
|
# 모든 페이지에서 텍스트 추출
|
|
for page_num in range(len(pdf_reader.pages)):
|
|
page = pdf_reader.pages[page_num]
|
|
page_text = page.extract_text()
|
|
if page_text.strip():
|
|
text_pages.append(f"[페이지 {page_num + 1}]\n{page_text}")
|
|
|
|
text_content = "\n\n".join(text_pages)
|
|
file_type = "PDF (직접추출)"
|
|
|
|
except Exception as e:
|
|
print(f"PDF 파일 읽기 오류 ({doc.pdf_path}): {e}")
|
|
continue
|
|
|
|
# 검색어가 포함된 경우
|
|
if text_content and query.lower() in text_content.lower():
|
|
# 검색어 주변 컨텍스트 추출
|
|
context = extract_search_context(text_content, query, context_length=300)
|
|
|
|
# 관련성 점수 계산
|
|
score = 2.0 # 본문 매치는 높은 점수
|
|
|
|
# 검색어 매치 횟수로 점수 조정
|
|
match_count = text_content.lower().count(query.lower())
|
|
score += min(match_count * 0.1, 1.0) # 최대 1점 추가
|
|
|
|
search_results.append(SearchResult(
|
|
type="document_content",
|
|
id=str(doc.id),
|
|
title=f"📄 {doc.title} ({file_type} 본문)",
|
|
content=context,
|
|
document_id=str(doc.id),
|
|
document_title=doc.title,
|
|
created_at=doc.created_at,
|
|
relevance_score=score,
|
|
highlight_info={
|
|
"file_type": file_type,
|
|
"match_count": match_count,
|
|
"has_pdf": bool(doc.pdf_path),
|
|
"has_html": bool(doc.html_path)
|
|
}
|
|
))
|
|
|
|
return search_results
|
|
|
|
|
|
def extract_search_context(text: str, query: str, context_length: int = 200) -> str:
|
|
"""검색어 주변 컨텍스트 추출"""
|
|
text_lower = text.lower()
|
|
query_lower = query.lower()
|
|
|
|
# 첫 번째 매치 위치 찾기
|
|
match_pos = text_lower.find(query_lower)
|
|
if match_pos == -1:
|
|
return text[:context_length] + "..."
|
|
|
|
# 컨텍스트 시작/끝 위치 계산
|
|
start = max(0, match_pos - context_length // 2)
|
|
end = min(len(text), match_pos + len(query) + context_length // 2)
|
|
|
|
context = text[start:end]
|
|
|
|
# 앞뒤에 ... 추가
|
|
if start > 0:
|
|
context = "..." + context
|
|
if end < len(text):
|
|
context = context + "..."
|
|
|
|
return context
|