diff --git a/app/services/briefing/clustering.py b/app/services/briefing/clustering.py index 2b62569..6896f6e 100644 --- a/app/services/briefing/clustering.py +++ b/app/services/briefing/clustering.py @@ -5,7 +5,7 @@ Phase 4 와 axis 반대: country 별 cluster 가 아닌 **전체 doc 합쳐서 t 파라미터 (5h 윈도우용): - LAMBDA = ln(2)/2h ≈ 0.347 (2시간 반감기, 야간 5h 윈도우라 빠른 감쇠) -- threshold = 0.78 고정 (Phase 4 0.75~0.80 중간값) +- threshold = 0.70 (2026-05-13 조정 — 0.78 에서 spread case kept=1 발생 후 완화) - MIN_ARTICLES_PER_TOPIC = 2 (야간 sparse 대비 완화) - MIN_COUNTRIES_PER_TOPIC = 2 (cross-country 가치 핵심) - MAX_TOPICS = 7 (1페이지 분량) @@ -22,7 +22,7 @@ from services.clustering_common import ( logger = setup_logger("briefing_clustering") LAMBDA = math.log(2) / (2.0 / 24.0) # 2시간 반감기 (단위: 일) -THRESHOLD = 0.78 +THRESHOLD = 0.70 CENTROID_ALPHA = 0.7 MIN_ARTICLES_PER_TOPIC = 2 MIN_COUNTRIES_PER_TOPIC = 2 diff --git a/migrations/262_seed_tech_ai_news_sources.sql b/migrations/262_seed_tech_ai_news_sources.sql new file mode 100644 index 0000000..6866a48 --- /dev/null +++ b/migrations/262_seed_tech_ai_news_sources.sql @@ -0,0 +1,26 @@ +-- 2026-05-13 — 기술/AI 뉴스 source seed (14건, 8개국) +-- WHERE NOT EXISTS 로 idempotent. 기존 row 보존, 신규만 insert. +-- briefing/digest 의 cross-country tech 토픽 cluster 다양성 확보. +-- 8 country: CN, DE, FR, GB, IN, JP, KR, US. category = Tech / AI. + +INSERT INTO news_sources (name, country, language, feed_type, feed_url, category, enabled) +SELECT v.name, v.country, v.language, v.feed_type, v.feed_url, v.category, v.enabled +FROM (VALUES + ('GeekNews (Hada)', 'KR', 'ko', 'rss', 'https://feeds.feedburner.com/geeknews-feed', 'Tech', true), + ('AI Times', 'KR', 'ko', 'rss', 'https://www.aitimes.com/rss/S1N1.xml', 'AI', true), + ('Hacker News', 'US', 'en', 'rss', 'https://hnrss.org/frontpage?count=30', 'Tech', true), + ('ArsTechnica AI', 'US', 'en', 'rss', 'https://arstechnica.com/ai/feed/', 'AI', true), + ('The Verge Tech', 'US', 'en', 'rss', 'https://www.theverge.com/rss/index.xml', 'Tech', true), + ('TechCrunch', 'US', 'en', 'rss', 'https://techcrunch.com/feed/', 'Tech', true), + ('The Register', 'GB', 'en', 'rss', 'https://www.theregister.com/headlines.atom', 'Tech', true), + ('Heise Online', 'DE', 'de', 'rss', 'https://www.heise.de/rss/heise-atom.xml', 'Tech', true), + ('ITmedia News', 'JP', 'ja', 'rss', 'https://rss.itmedia.co.jp/rss/2.0/aiplus.xml', 'AI', true), + ('Gigazine', 'JP', 'ja', 'rss', 'https://gigazine.net/news/rss_2.0/', 'Tech', true), + ('36Kr', 'CN', 'zh', 'rss', 'https://36kr.com/feed', 'Tech', true), + ('Numerama', 'FR', 'fr', 'rss', 'https://www.numerama.com/feed', 'Tech', true), + ('YourStory', 'IN', 'en', 'rss', 'https://yourstory.com/feed', 'Tech', true), + ('BBC Technology', 'GB', 'en', 'rss', 'https://feeds.bbci.co.uk/news/technology/rss.xml', 'Tech', true) +) AS v(name, country, language, feed_type, feed_url, category, enabled) +WHERE NOT EXISTS ( + SELECT 1 FROM news_sources ns WHERE ns.name = v.name +);