feat: 뉴스 자동 수집 시스템 — 6개국 신문 RSS/API

- news_sources 테이블 (소스 관리, UI 동적 제어)
- news_collector 워커: RSS(feedparser) + NYT API
  - 중복 체크: hash(title+date+source) + URL normalize
  - category 표준화, summary HTML 정제, timezone UTC
  - 30일 이내만 embed, source별 try/catch
- News API: 소스 CRUD + 수동 수집 트리거
- APScheduler: 6시간 간격 자동 수집
- 대상: 경향/아사히/NYT/르몽드/신화/슈피겔

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi Ahn
2026-04-06 13:38:07 +09:00
parent bf8efd1cd3
commit a6c19ef76c
6 changed files with 405 additions and 0 deletions

25
app/models/news_source.py Normal file
View File

@@ -0,0 +1,25 @@
"""news_sources 테이블 ORM"""
from datetime import datetime
from sqlalchemy import Boolean, DateTime, String, Text
from sqlalchemy.orm import Mapped, mapped_column
from core.database import Base
class NewsSource(Base):
__tablename__ = "news_sources"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column(String(100), nullable=False)
country: Mapped[str | None] = mapped_column(String(10))
feed_url: Mapped[str] = mapped_column(Text, nullable=False)
feed_type: Mapped[str] = mapped_column(String(20), default="rss")
category: Mapped[str | None] = mapped_column(String(50))
language: Mapped[str | None] = mapped_column(String(10))
enabled: Mapped[bool] = mapped_column(Boolean, default=True)
last_fetched_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
created_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), default=datetime.now
)