diff --git a/app/api/digest.py b/app/api/digest.py index ceb8061..996c830 100644 --- a/app/api/digest.py +++ b/app/api/digest.py @@ -20,7 +20,7 @@ from sqlalchemy import select from sqlalchemy.ext.asyncio import AsyncSession from sqlalchemy.orm import selectinload -from core.auth import get_current_user +from core.auth import get_current_user, require_admin from core.database import get_session from models.digest import DigestTopic, GlobalDigest from models.user import User @@ -155,9 +155,9 @@ async def get_digest( @router.post("/regenerate") async def regenerate( - user: Annotated[User, Depends(get_current_user)], + user: Annotated[User, Depends(require_admin)], ): - """디버그용 수동 트리거 — 백그라운드 태스크로 워커 실행 (auth 필요).""" + """수동 트리거 — 백그라운드 태스크로 워커 실행 (admin 필요).""" from workers.digest_worker import run asyncio.create_task(run()) diff --git a/app/api/news.py b/app/api/news.py index e2eca2a..5c122e7 100644 --- a/app/api/news.py +++ b/app/api/news.py @@ -8,7 +8,7 @@ from pydantic import BaseModel from sqlalchemy import String, select from sqlalchemy.ext.asyncio import AsyncSession -from core.auth import get_current_user +from core.auth import get_current_user, require_admin from core.database import get_session from models.news_source import NewsSource from models.user import User @@ -60,9 +60,14 @@ async def list_sources( @router.post("/sources") async def create_source( body: NewsSourceCreate, - user: Annotated[User, Depends(get_current_user)], + user: Annotated[User, Depends(require_admin)], session: Annotated[AsyncSession, Depends(get_session)], ): + from core.url_validator import validate_feed_url + try: + validate_feed_url(body.feed_url) + except ValueError as e: + raise HTTPException(status_code=422, detail=f"feed_url 검증 실패: {e}") source = NewsSource(**body.model_dump()) session.add(source) await session.commit() @@ -73,12 +78,18 @@ async def create_source( async def update_source( source_id: int, body: NewsSourceUpdate, - user: Annotated[User, Depends(get_current_user)], + user: Annotated[User, Depends(require_admin)], session: Annotated[AsyncSession, Depends(get_session)], ): source = await session.get(NewsSource, source_id) if not source: raise HTTPException(status_code=404) + if body.feed_url is not None: + from core.url_validator import validate_feed_url + try: + validate_feed_url(body.feed_url) + except ValueError as e: + raise HTTPException(status_code=422, detail=f"feed_url 검증 실패: {e}") for field, value in body.model_dump(exclude_unset=True).items(): setattr(source, field, value) await session.commit() @@ -88,7 +99,7 @@ async def update_source( @router.delete("/sources/{source_id}") async def delete_source( source_id: int, - user: Annotated[User, Depends(get_current_user)], + user: Annotated[User, Depends(require_admin)], session: Annotated[AsyncSession, Depends(get_session)], ): source = await session.get(NewsSource, source_id) @@ -162,12 +173,28 @@ async def mark_all_read( return {"marked": result.rowcount} +import asyncio + +_collect_lock = asyncio.Lock() + + @router.post("/collect") async def trigger_collect( - user: Annotated[User, Depends(get_current_user)], + user: Annotated[User, Depends(require_admin)], ): - """수동 수집 트리거""" - from workers.news_collector import run - import asyncio - asyncio.create_task(run()) + """수동 수집 트리거 (admin 전용). + + asyncio.Lock은 단일 프로세스/이벤트루프 기준. + 현재 FastAPI 단일 인스턴스 운영이므로 유효하지만, + scale-out 시 DB advisory lock으로 교체 필요. + """ + if _collect_lock.locked(): + raise HTTPException(status_code=429, detail="수집이 이미 진행 중입니다") + + async def _run_with_lock(): + async with _collect_lock: + from workers.news_collector import run + await run() + + asyncio.create_task(_run_with_lock()) return {"message": "뉴스 수집 시작됨"} diff --git a/app/core/auth.py b/app/core/auth.py index 6c51916..d3646e1 100644 --- a/app/core/auth.py +++ b/app/core/auth.py @@ -83,3 +83,17 @@ async def get_current_user( detail="유저를 찾을 수 없음", ) return user + + +async def require_admin( + credentials: Annotated[HTTPAuthorizationCredentials, Depends(security)], + session: Annotated[AsyncSession, Depends(get_session)], +): + """관리자 권한 확인 — 뉴스 소스 CRUD, 수집 트리거, digest 재생성 등""" + user = await get_current_user(credentials, session) + if not user.is_admin: + raise HTTPException( + status_code=status.HTTP_403_FORBIDDEN, + detail="관리자 권한 필요", + ) + return user diff --git a/app/core/url_validator.py b/app/core/url_validator.py new file mode 100644 index 0000000..b0e115b --- /dev/null +++ b/app/core/url_validator.py @@ -0,0 +1,61 @@ +"""외부 피드 URL 검증 — SSRF 차단 + redirect target 재검증 + +등록 시 validate_feed_url()로 1차 검증, fetch 시 redirect target마다 +동일 함수로 재검증. 완전한 TOCTOU 방어는 httpx transport 레벨 후킹이 +필요하므로 이 이중 검증이 현재 현실적 상한선. +""" + +import ipaddress +import socket +from urllib.parse import urlparse + +ALLOWED_SCHEMES = {"https"} + +# HTTP 예외 도메인 — 여기에 없으면 HTTPS만 허용 +# 추가 시 사유/승인일/재검토일을 주석에 기록 +HTTP_EXCEPTION_DOMAINS: set[str] = set() +# 예: {"www.chinadaily.com.cn"} # 2026-04-14 승인, HTTPS 미지원 확인, 2026-07 재검토 + + +def _is_blocked_ip(ip: ipaddress.IPv4Address | ipaddress.IPv6Address) -> bool: + """ipaddress 내장 속성으로 넓게 차단 (단순 대역 비교보다 안전)""" + return ( + ip.is_private + or ip.is_loopback + or ip.is_link_local + or ip.is_reserved + or ip.is_multicast + or ip.is_unspecified + # Tailscale CGNAT 대역 (is_private에 포함 안 됨) + or ip in ipaddress.ip_network("100.64.0.0/10") + ) + + +def validate_feed_url(url: str, allow_http: bool = False) -> str: + """URL 검증. 실패 시 ValueError raise. + + allow_http는 HTTP_EXCEPTION_DOMAINS allowlist 연동 시에만 사용. + API 파라미터로 노출하지 않는다. + """ + parsed = urlparse(url) + + allowed = ALLOWED_SCHEMES | ({"http"} if allow_http else set()) + if parsed.scheme not in allowed: + raise ValueError(f"허용되지 않은 스킴: {parsed.scheme}") + + if not parsed.hostname: + raise ValueError("호스트명 누락") + + # DNS 해석 후 IP 차단 + try: + addrs = socket.getaddrinfo(parsed.hostname, None) + except socket.gaierror: + raise ValueError(f"DNS 해석 실패: {parsed.hostname}") + + for _, _, _, _, sockaddr in addrs: + ip = ipaddress.ip_address(sockaddr[0]) + if _is_blocked_ip(ip): + # IP 자체를 에러에 노출하지 않음 — hostname만 + raise ValueError(f"차단된 네트워크: {parsed.hostname}") + + return url diff --git a/app/models/user.py b/app/models/user.py index 9f415ef..9029c76 100644 --- a/app/models/user.py +++ b/app/models/user.py @@ -16,6 +16,7 @@ class User(Base): password_hash: Mapped[str] = mapped_column(Text, nullable=False) totp_secret: Mapped[str | None] = mapped_column(String(64)) is_active: Mapped[bool] = mapped_column(Boolean, default=True) + is_admin: Mapped[bool] = mapped_column(Boolean, default=False, server_default="false") created_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), default=datetime.now ) diff --git a/app/workers/news_collector.py b/app/workers/news_collector.py index 228b56f..9945f3a 100644 --- a/app/workers/news_collector.py +++ b/app/workers/news_collector.py @@ -103,13 +103,64 @@ async def run(): logger.info(f"뉴스 수집 완료: {total}건 신규") +MAX_RESPONSE_SIZE = 5 * 1024 * 1024 # 5MB +ALLOWED_CONTENT_TYPES = ("application/rss+xml", "application/atom+xml", + "application/xml", "text/xml") + + async def _fetch_rss(session, source: NewsSource) -> int: - """RSS 피드 수집""" - async with httpx.AsyncClient(timeout=10) as client: + """RSS 피드 수집 — redirect 재검증 + 크기/content-type 제한""" + from urllib.parse import urljoin + from core.url_validator import validate_feed_url, HTTP_EXCEPTION_DOMAINS + + # HTTP allowlist 체크 + if source.feed_url.startswith("http://"): + hostname = urlparse(source.feed_url).hostname + if hostname not in HTTP_EXCEPTION_DOMAINS: + logger.error(f"[{source.name}] HTTP 차단 (allowlist 미등록): {hostname}") + return 0 + + # fetch 전 URL 재검증 (등록 이후 DNS 변경 대비) + try: + validate_feed_url(source.feed_url, allow_http=source.feed_url.startswith("http://")) + except ValueError as e: + logger.error(f"[{source.name}] URL 검증 실패: {e}") + return 0 + + async with httpx.AsyncClient(timeout=10, follow_redirects=False) as client: resp = await client.get(source.feed_url) + + # redirect 수동 처리 (최대 3회, 각 target 재검증) + redirects = 0 + while resp.is_redirect and redirects < 3: + location = resp.headers.get("location", "") + location = urljoin(str(resp.request.url), location) + try: + validate_feed_url(location, allow_http=source.feed_url.startswith("http://")) + except ValueError as e: + logger.error(f"[{source.name}] redirect target 차단: {e}") + return 0 + resp = await client.get(location) + redirects += 1 + if resp.is_redirect: + logger.error(f"[{source.name}] redirect 3회 초과") + return 0 + resp.raise_for_status() + if len(resp.content) > MAX_RESPONSE_SIZE: + logger.warning(f"[{source.name}] 응답 크기 초과: {len(resp.content)} bytes") + return 0 + + ct = resp.headers.get("content-type", "").lower() + if not any(t in ct for t in ALLOWED_CONTENT_TYPES): + logger.warning(f"[{source.name}] 비정상 content-type: {ct}") + return 0 + feed = feedparser.parse(resp.text) + if feed.bozo and not feed.entries: + logger.warning(f"[{source.name}] RSS 파싱 실패: {feed.bozo_exception}") + return 0 count = 0 for entry in feed.entries: @@ -175,19 +226,29 @@ async def _fetch_rss(session, source: NewsSource) -> int: async def _fetch_api(session, source: NewsSource) -> int: - """NYT API 수집""" + """NYT API 수집 — 키 마스킹 + health degradation""" import os nyt_key = os.getenv("NYT_API_KEY", "") if not nyt_key: - logger.warning("NYT_API_KEY 미설정") + logger.error("NYT_API_KEY 미설정 — US 뉴스 수집 불가") return 0 - async with httpx.AsyncClient(timeout=10) as client: - resp = await client.get( - f"https://api.nytimes.com/svc/topstories/v2/{source.category or 'world'}.json", - params={"api-key": nyt_key}, - ) - resp.raise_for_status() + try: + async with httpx.AsyncClient(timeout=10) as client: + resp = await client.get( + f"https://api.nytimes.com/svc/topstories/v2/{source.category or 'world'}.json", + params={"api-key": nyt_key}, + ) + resp.raise_for_status() + except httpx.HTTPStatusError as e: + # 쿼리스트링(api-key 포함) 제거 — path까지만 로깅 + safe_url = str(e.request.url).split("?")[0] + logger.error(f"NYT API 실패: {e.response.status_code} @ {safe_url}") + return 0 + except httpx.RequestError as e: + safe_url = str(e.request.url).split("?")[0] if e.request else "unknown" + logger.error(f"NYT API 연결 실패: {safe_url}") + return 0 data = resp.json() count = 0 diff --git a/migrations/104_user_admin.sql b/migrations/104_user_admin.sql new file mode 100644 index 0000000..ab1a4db --- /dev/null +++ b/migrations/104_user_admin.sql @@ -0,0 +1,2 @@ +-- 관리자 권한 컬럼 추가 (뉴스 소스 CRUD, 수집 트리거, digest 재생성 등) +ALTER TABLE users ADD COLUMN IF NOT EXISTS is_admin BOOLEAN NOT NULL DEFAULT false;