feat: 뉴스 자동 수집 시스템 — 6개국 신문 RSS/API

- news_sources 테이블 (소스 관리, UI 동적 제어)
- news_collector 워커: RSS(feedparser) + NYT API
  - 중복 체크: hash(title+date+source) + URL normalize
  - category 표준화, summary HTML 정제, timezone UTC
  - 30일 이내만 embed, source별 try/catch
- News API: 소스 CRUD + 수동 수집 트리거
- APScheduler: 6시간 간격 자동 수집
- 대상: 경향/아사히/NYT/르몽드/신화/슈피겔

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-06 13:38:07 +09:00
parent bf8efd1cd3
commit a6c19ef76c
6 changed files with 405 additions and 0 deletions

View File

@@ -9,6 +9,7 @@ from sqlalchemy import func, select, text
from api.auth import router as auth_router
from api.dashboard import router as dashboard_router
from api.documents import router as documents_router
from api.news import router as news_router
from api.search import router as search_router
from api.setup import router as setup_router
from core.config import settings
@@ -25,6 +26,7 @@ async def lifespan(app: FastAPI):
from workers.file_watcher import watch_inbox
from workers.law_monitor import run as law_monitor_run
from workers.mailplus_archive import run as mailplus_run
from workers.news_collector import run as news_collector_run
from workers.queue_consumer import consume_queue
# 시작: DB 연결 확인
@@ -49,6 +51,7 @@ async def lifespan(app: FastAPI):
scheduler.add_job(mailplus_run, CronTrigger(hour=7), id="mailplus_morning")
scheduler.add_job(mailplus_run, CronTrigger(hour=18), id="mailplus_evening")
scheduler.add_job(daily_digest_run, CronTrigger(hour=20), id="daily_digest")
scheduler.add_job(news_collector_run, "interval", hours=6, id="news_collector")
scheduler.start()
yield
@@ -72,6 +75,7 @@ app.include_router(documents_router, prefix="/api/documents", tags=["documents"]
app.include_router(search_router, prefix="/api/search", tags=["search"])
app.include_router(dashboard_router, prefix="/api/dashboard", tags=["dashboard"])
app.include_router(news_router, prefix="/api/news", tags=["news"])
# TODO: Phase 5에서 추가
# app.include_router(tasks.router, prefix="/api/tasks", tags=["tasks"])