diff --git a/app/api/search.py b/app/api/search.py
index caf7ce8..5682866 100644
--- a/app/api/search.py
+++ b/app/api/search.py
@@ -1,19 +1,22 @@
-"""하이브리드 검색 API — FTS + ILIKE + 벡터 (필드별 가중치)"""
+"""하이브리드 검색 API — orchestrator (Phase 1.1: thin endpoint).
+
+retrieval / fusion / rerank 등 실제 로직은 services/search/* 모듈로 분리.
+이 파일은 mode 분기, 응답 직렬화, debug 응답 구성, BackgroundTask dispatch만 담당.
+"""
 
 import time
 from typing import Annotated
 
 from fastapi import APIRouter, BackgroundTasks, Depends, Query
 from pydantic import BaseModel
-from sqlalchemy import text
 from sqlalchemy.ext.asyncio import AsyncSession
 
-from ai.client import AIClient
 from core.auth import get_current_user
 from core.database import get_session
 from core.utils import setup_logger
 from models.user import User
-from services.search_fusion import DEFAULT_FUSION, get_strategy, normalize_display_scores
+from services.search.fusion_service import DEFAULT_FUSION, get_strategy, normalize_display_scores
+from services.search.retrieval_service import search_text, search_vector
 from services.search_telemetry import (
     compute_confidence,
     compute_confidence_hybrid,
@@ -102,19 +105,19 @@ async def search(
 
     if mode == "vector":
         t0 = time.perf_counter()
-        vector_results = await _search_vector(session, q, limit)
+        vector_results = await search_vector(session, q, limit)
         timing["vector_ms"] = (time.perf_counter() - t0) * 1000
         if not vector_results:
             notes.append("vector_search_returned_empty (AI client error or no embeddings)")
         results = vector_results
     else:
         t0 = time.perf_counter()
-        text_results = await _search_text(session, q, limit)
+        text_results = await search_text(session, q, limit)
         timing["text_ms"] = (time.perf_counter() - t0) * 1000
 
         if mode == "hybrid":
             t1 = time.perf_counter()
-            vector_results = await _search_vector(session, q, limit)
+            vector_results = await search_vector(session, q, limit)
             timing["vector_ms"] = (time.perf_counter() - t1) * 1000
             if not vector_results:
                 notes.append("vector_search_returned_empty — text-only fallback")
@@ -172,79 +175,3 @@ async def search(
         mode=mode,
         debug=debug_obj,
     )
-
-
-async def _search_text(session: AsyncSession, query: str, limit: int) -> list[SearchResult]:
-    """FTS + ILIKE — 필드별 가중치 적용"""
-    result = await session.execute(
-        text("""
-            SELECT id, title, ai_domain, ai_summary, file_format,
-                   left(extracted_text, 200) AS snippet,
-                   (
-                       -- title 매칭 (가중치 최고)
-                       CASE WHEN coalesce(title, '') ILIKE '%%' || :q || '%%' THEN 3.0 ELSE 0 END
-                       -- ai_tags 매칭 (가중치 높음)
-                       + CASE WHEN coalesce(ai_tags::text, '') ILIKE '%%' || :q || '%%' THEN 2.5 ELSE 0 END
-                       -- user_note 매칭 (가중치 높음)
-                       + CASE WHEN coalesce(user_note, '') ILIKE '%%' || :q || '%%' THEN 2.0 ELSE 0 END
-                       -- ai_summary 매칭 (가중치 중상)
-                       + CASE WHEN coalesce(ai_summary, '') ILIKE '%%' || :q || '%%' THEN 1.5 ELSE 0 END
-                       -- extracted_text 매칭 (가중치 중간)
-                       + CASE WHEN coalesce(extracted_text, '') ILIKE '%%' || :q || '%%' THEN 1.0 ELSE 0 END
-                       -- FTS 점수 (보너스)
-                       + coalesce(ts_rank(
-                           to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(extracted_text, '')),
-                           plainto_tsquery('simple', :q)
-                       ), 0) * 2.0
-                   ) AS score,
-                   -- match reason
-                   CASE
-                       WHEN coalesce(title, '') ILIKE '%%' || :q || '%%' THEN 'title'
-                       WHEN coalesce(ai_tags::text, '') ILIKE '%%' || :q || '%%' THEN 'tags'
-                       WHEN coalesce(user_note, '') ILIKE '%%' || :q || '%%' THEN 'note'
-                       WHEN coalesce(ai_summary, '') ILIKE '%%' || :q || '%%' THEN 'summary'
-                       WHEN coalesce(extracted_text, '') ILIKE '%%' || :q || '%%' THEN 'content'
-                       ELSE 'fts'
-                   END AS match_reason
-            FROM documents
-            WHERE deleted_at IS NULL
-              AND (coalesce(title, '') ILIKE '%%' || :q || '%%'
-               OR coalesce(ai_tags::text, '') ILIKE '%%' || :q || '%%'
-               OR coalesce(user_note, '') ILIKE '%%' || :q || '%%'
-               OR coalesce(ai_summary, '') ILIKE '%%' || :q || '%%'
-               OR coalesce(extracted_text, '') ILIKE '%%' || :q || '%%'
-               OR to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(extracted_text, ''))
-                  @@ plainto_tsquery('simple', :q))
-            ORDER BY score DESC
-            LIMIT :limit
-        """),
-        {"q": query, "limit": limit},
-    )
-    return [SearchResult(**row._mapping) for row in result]
-
-
-async def _search_vector(session: AsyncSession, query: str, limit: int) -> list[SearchResult]:
-    """벡터 유사도 검색 (코사인 거리)"""
-    try:
-        client = AIClient()
-        query_embedding = await client.embed(query)
-        await client.close()
-    except Exception:
-        return []
-
-    result = await session.execute(
-        text("""
-            SELECT id, title, ai_domain, ai_summary, file_format,
-                   (1 - (embedding <=> cast(:embedding AS vector))) AS score,
-                   left(extracted_text, 200) AS snippet,
-                   'vector' AS match_reason
-            FROM documents
-            WHERE embedding IS NOT NULL AND deleted_at IS NULL
-            ORDER BY embedding <=> cast(:embedding AS vector)
-            LIMIT :limit
-        """),
-        {"embedding": str(query_embedding), "limit": limit},
-    )
-    return [SearchResult(**row._mapping) for row in result]
-
-
diff --git a/app/services/search/__init__.py b/app/services/search/__init__.py
new file mode 100644
index 0000000..2d10089
--- /dev/null
+++ b/app/services/search/__init__.py
@@ -0,0 +1,11 @@
+"""Search service 모듈 — Phase 1.1 분리.
+
+검색 파이프라인의 각 단계를 모듈로 분리해 디버깅/테스트/병목 추적을 용이하게 한다.
+
+- retrieval_service: text/vector/trigram 후보 수집
+- fusion_service: RRF / weighted-sum / boost (Phase 0.5에서 이동)
+- rerank_service: bge-reranker-v2-m3 통합 (Phase 1.3)
+- query_analyzer: 자연어 쿼리 분석 (Phase 2)
+- evidence_service: evidence extraction (Phase 3)
+- synthesis_service: grounded answer synthesis (Phase 3)
+"""
diff --git a/app/services/search/evidence_service.py b/app/services/search/evidence_service.py
new file mode 100644
index 0000000..adc5a0f
--- /dev/null
+++ b/app/services/search/evidence_service.py
@@ -0,0 +1,5 @@
+"""Evidence extraction 서비스 (Phase 3).
+
+reranked chunks에서 query-relevant span을 rule + LLM hybrid로 추출.
+구현은 Phase 3에서 채움.
+"""
diff --git a/app/services/search_fusion.py b/app/services/search/fusion_service.py
similarity index 100%
rename from app/services/search_fusion.py
rename to app/services/search/fusion_service.py
diff --git a/app/services/search/query_analyzer.py b/app/services/search/query_analyzer.py
new file mode 100644
index 0000000..3daff9a
--- /dev/null
+++ b/app/services/search/query_analyzer.py
@@ -0,0 +1,5 @@
+"""Query analyzer — 자연어 쿼리 분석 (Phase 2).
+
+domain_hint, intent, hard/soft filter, normalized_queries 등 추출.
+구현은 Phase 2에서 채움.
+"""
diff --git a/app/services/search/rerank_service.py b/app/services/search/rerank_service.py
new file mode 100644
index 0000000..16d9373
--- /dev/null
+++ b/app/services/search/rerank_service.py
@@ -0,0 +1,5 @@
+"""Reranker 서비스 — bge-reranker-v2-m3 통합 (Phase 1.3).
+
+TEI 컨테이너 호출 + asyncio.Semaphore(2) + soft timeout fallback.
+구현은 Phase 1.3에서 채움.
+"""
diff --git a/app/services/search/retrieval_service.py b/app/services/search/retrieval_service.py
new file mode 100644
index 0000000..3fea96f
--- /dev/null
+++ b/app/services/search/retrieval_service.py
@@ -0,0 +1,111 @@
+"""검색 후보 수집 서비스 (Phase 1.1).
+
+text(documents FTS + 키워드) + vector(documents.embedding) 후보를
+SearchResult 리스트로 반환.
+
+Phase 1.1: search.py의 _search_text/_search_vector를 이전.
+Phase 1.1 후속 substep: ILIKE → trigram `similarity()` + `gin_trgm_ops`.
+Phase 1.2: vector retrieval을 document_chunks 테이블 기반으로 전환.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from sqlalchemy import text
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from ai.client import AIClient
+
+if TYPE_CHECKING:
+    from api.search import SearchResult
+
+
+async def search_text(
+    session: AsyncSession, query: str, limit: int
+) -> list["SearchResult"]:
+    """FTS + ILIKE 필드별 가중치 검색.
+
+    가중치: title 3.0 / ai_tags 2.5 / user_note 2.0 / ai_summary 1.5 / extracted_text 1.0
+    + ts_rank * 2.0 보너스.
+    """
+    from api.search import SearchResult  # 순환 import 회피
+
+    result = await session.execute(
+        text("""
+            SELECT id, title, ai_domain, ai_summary, file_format,
+                   left(extracted_text, 200) AS snippet,
+                   (
+                       -- title 매칭 (가중치 최고)
+                       CASE WHEN coalesce(title, '') ILIKE '%%' || :q || '%%' THEN 3.0 ELSE 0 END
+                       -- ai_tags 매칭 (가중치 높음)
+                       + CASE WHEN coalesce(ai_tags::text, '') ILIKE '%%' || :q || '%%' THEN 2.5 ELSE 0 END
+                       -- user_note 매칭 (가중치 높음)
+                       + CASE WHEN coalesce(user_note, '') ILIKE '%%' || :q || '%%' THEN 2.0 ELSE 0 END
+                       -- ai_summary 매칭 (가중치 중상)
+                       + CASE WHEN coalesce(ai_summary, '') ILIKE '%%' || :q || '%%' THEN 1.5 ELSE 0 END
+                       -- extracted_text 매칭 (가중치 중간)
+                       + CASE WHEN coalesce(extracted_text, '') ILIKE '%%' || :q || '%%' THEN 1.0 ELSE 0 END
+                       -- FTS 점수 (보너스)
+                       + coalesce(ts_rank(
+                           to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(extracted_text, '')),
+                           plainto_tsquery('simple', :q)
+                       ), 0) * 2.0
+                   ) AS score,
+                   -- match reason
+                   CASE
+                       WHEN coalesce(title, '') ILIKE '%%' || :q || '%%' THEN 'title'
+                       WHEN coalesce(ai_tags::text, '') ILIKE '%%' || :q || '%%' THEN 'tags'
+                       WHEN coalesce(user_note, '') ILIKE '%%' || :q || '%%' THEN 'note'
+                       WHEN coalesce(ai_summary, '') ILIKE '%%' || :q || '%%' THEN 'summary'
+                       WHEN coalesce(extracted_text, '') ILIKE '%%' || :q || '%%' THEN 'content'
+                       ELSE 'fts'
+                   END AS match_reason
+            FROM documents
+            WHERE deleted_at IS NULL
+              AND (coalesce(title, '') ILIKE '%%' || :q || '%%'
+               OR coalesce(ai_tags::text, '') ILIKE '%%' || :q || '%%'
+               OR coalesce(user_note, '') ILIKE '%%' || :q || '%%'
+               OR coalesce(ai_summary, '') ILIKE '%%' || :q || '%%'
+               OR coalesce(extracted_text, '') ILIKE '%%' || :q || '%%'
+               OR to_tsvector('simple', coalesce(title, '') || ' ' || coalesce(extracted_text, ''))
+                  @@ plainto_tsquery('simple', :q))
+            ORDER BY score DESC
+            LIMIT :limit
+        """),
+        {"q": query, "limit": limit},
+    )
+    return [SearchResult(**row._mapping) for row in result]
+
+
+async def search_vector(
+    session: AsyncSession, query: str, limit: int
+) -> list["SearchResult"]:
+    """벡터 유사도 검색 (코사인 거리).
+
+    Phase 1.2에서 document_chunks 테이블 기반으로 전환 예정.
+    현재는 documents.embedding 사용.
+    """
+    from api.search import SearchResult  # 순환 import 회피
+
+    try:
+        client = AIClient()
+        query_embedding = await client.embed(query)
+        await client.close()
+    except Exception:
+        return []
+
+    result = await session.execute(
+        text("""
+            SELECT id, title, ai_domain, ai_summary, file_format,
+                   (1 - (embedding <=> cast(:embedding AS vector))) AS score,
+                   left(extracted_text, 200) AS snippet,
+                   'vector' AS match_reason
+            FROM documents
+            WHERE embedding IS NOT NULL AND deleted_at IS NULL
+            ORDER BY embedding <=> cast(:embedding AS vector)
+            LIMIT :limit
+        """),
+        {"embedding": str(query_embedding), "limit": limit},
+    )
+    return [SearchResult(**row._mapping) for row in result]
diff --git a/app/services/search/synthesis_service.py b/app/services/search/synthesis_service.py
new file mode 100644
index 0000000..b9dbfe4
--- /dev/null
+++ b/app/services/search/synthesis_service.py
@@ -0,0 +1,6 @@
+"""Grounded answer synthesis 서비스 (Phase 3).
+
+evidence span을 Gemma 4에 전달해 인용 기반 답변 생성.
+3~4초 soft timeout, 타임아웃 시 결과만 반환 fallback.
+구현은 Phase 3에서 채움.
+"""