feat(news): crawl-24x7 사이클 3 — B-4 시그널·C-4 공학 지속·CSB sitemap·CCPS Beacon (마이그 327)
- B-4 fetch_method='signal-only': 페이지 fetch 0 + summarize 스킵(검색 색인만, 맥미니 부하 0) + 본문 무절단(_entry_body — arXiv 초록 1.6K 보존). 다이제스트는 ai_summary NULL 제외 규칙으로 자연 배제. 레지스트리 오설정(page) 방어 가드. - 시드 9 소스 (전 URL 2026-06-11 live 검증): Bloomberg Markets/Technology(skip-video, 비디오 혼재 실측)·Economist Latest·Nikkei Asia(RDF — feedparser 네이티브, 분기 불요 fixture 박제)·ASME JPVT(site_1000037 실측 매핑)·arXiv 2종·IEEE Spectrum 2종(feed-full, 피드 description 이 전문 7.9~14K자 실측). - csb_collector: sitemap lastmod diff (weekly 월 06:50) — 워터마크(selector_override) + cap 40/회 점진 백필 + diff sanity 300 + 보고서 PDF(/assets/, recommendation 제외) → extract 파이프라인. 초기 일괄 = CLI --bulk. - api_standards_collector: 공지 목록 링크 파싱(실측 — 페이지 diff 아님, 상세 URL 10건/페이지) → 신규 상세만 ingest (monthly 5일 07:05). 초기 백필 = CLI --bulk. - ccps_collector: aiche.org 평문 403(UA 무관 실측) → playwright-fetcher 익명 컨텍스트 + referer 쿠키 승계 /download(base64) 신설로 월간 Beacon PDF (monthly 5일 07:20). 헤드리스 차단 시 CrawlBlocked → health 가시화 (르몽드 PARK 선례). - B-5 잔여: rdf/feed-reader-UA = 코드 분기 불요 실측 박제 (Economist 는 Archiver UA 200). table-strip/gn-redirect 는 해당 소스 미진입 — 백로그 유지. - 테스트 24건 신규 (fixture 9건 live 박제, economist/ieee 는 item trim) — 39 passed. - 마이그 327 단일 statement (PKM 트랙과 번호 경합 주의 — 327 본 트랙 선점). Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
@@ -12,6 +12,7 @@ SSRF 차단은 core.url_validator.validate_feed_url 재사용 (redirect target
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import random
|
||||
import time
|
||||
import urllib.robotparser
|
||||
@@ -206,11 +207,13 @@ async def fetch_page(
|
||||
|
||||
# ── B-3 구독 세션 fetch (Playwright 격리 컨테이너 경유) ──────────────────────
|
||||
|
||||
async def fetch_page_via_browser(url: str, profile: str) -> tuple[str, str]:
|
||||
"""인증 페이지 1건 — playwright-fetcher 에 위임, politeness 는 사람 속도(30~60s).
|
||||
async def fetch_page_via_browser(url: str, profile: str | None) -> tuple[str, str]:
|
||||
"""브라우저 페이지 1건 — playwright-fetcher 에 위임, politeness 는 사람 속도(30~60s).
|
||||
|
||||
(html_text, final_url) 반환. robots 미적용 — 구독 계약 기반 개인 보관 fetch 로
|
||||
공개 크롤러 규약 대상이 아님 (대신 사람 속도 + 동시 1 + 야간 저빈도가 보호 장치).
|
||||
profile=None = 익명 컨텍스트 (사이클 3 — 평문 httpx 를 UA 무관 403 하는 공개
|
||||
사이트의 WAF 우회 전용, CCPS aiche.org 실측). 값 = B-3 구독 세션.
|
||||
(html_text, final_url) 반환. robots 미적용 — 구독 fetch 는 사용자 행위 성격,
|
||||
익명 WAF 우회는 월간 1~2회 저빈도 + 사람 속도가 보호 장치.
|
||||
예외 어휘는 fetch_page 와 동일 (호출측 분기 재사용).
|
||||
"""
|
||||
try:
|
||||
@@ -218,14 +221,16 @@ async def fetch_page_via_browser(url: str, profile: str) -> tuple[str, str]:
|
||||
except ValueError as e:
|
||||
raise CrawlSkip(f"URL 검증 실패: {e}") from e
|
||||
|
||||
payload = {"url": url}
|
||||
if profile:
|
||||
payload["profile"] = profile
|
||||
|
||||
domain = _domain_of(url)
|
||||
async with _get_lock(domain):
|
||||
await _respect_domain_rate(domain, _AUTH_DELAY_MIN, _AUTH_DELAY_MAX)
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=_FETCHER_TIMEOUT) as client:
|
||||
resp = await client.post(
|
||||
f"{_FETCHER_URL}/fetch", json={"url": url, "profile": profile}
|
||||
)
|
||||
resp = await client.post(f"{_FETCHER_URL}/fetch", json=payload)
|
||||
except httpx.TimeoutException as e:
|
||||
raise CrawlFetchError(f"browser fetch timeout: {url}") from e
|
||||
except httpx.HTTPError as e:
|
||||
@@ -250,6 +255,60 @@ async def fetch_page_via_browser(url: str, profile: str) -> tuple[str, str]:
|
||||
return html_text, data.get("final_url", url)
|
||||
|
||||
|
||||
_MAX_DOWNLOAD_BYTES = 60 * 1024 * 1024 # fetcher MAX_DOWNLOAD_BYTES 와 동률
|
||||
|
||||
|
||||
async def download_via_browser(
|
||||
url: str, *, referer: str | None = None, profile: str | None = None
|
||||
) -> tuple[bytes, str]:
|
||||
"""바이너리(PDF) 1건 — fetcher /download 위임. (content, content_type) 반환.
|
||||
|
||||
referer = WAF 챌린지 쿠키를 먼저 획득할 목록 페이지 (CCPS Beacon 패턴).
|
||||
내부 status 판정: 403/429 = CrawlBlocked, 그 외 4xx = CrawlSkip, 5xx = CrawlFetchError
|
||||
(fetch_page 와 동일 어휘 — 호출측 분기 재사용).
|
||||
"""
|
||||
try:
|
||||
validate_feed_url(url)
|
||||
except ValueError as e:
|
||||
raise CrawlSkip(f"URL 검증 실패: {e}") from e
|
||||
|
||||
payload: dict = {"url": url}
|
||||
if referer:
|
||||
payload["referer"] = referer
|
||||
if profile:
|
||||
payload["profile"] = profile
|
||||
|
||||
domain = _domain_of(url)
|
||||
async with _get_lock(domain):
|
||||
await _respect_domain_rate(domain, _AUTH_DELAY_MIN, _AUTH_DELAY_MAX)
|
||||
try:
|
||||
async with httpx.AsyncClient(timeout=_FETCHER_TIMEOUT) as client:
|
||||
resp = await client.post(f"{_FETCHER_URL}/download", json=payload)
|
||||
except httpx.TimeoutException as e:
|
||||
raise CrawlFetchError(f"browser download timeout: {url}") from e
|
||||
except httpx.HTTPError as e:
|
||||
raise CrawlFetchError(f"playwright-fetcher 연결 오류: {e}") from e
|
||||
finally:
|
||||
_domain_last_request[domain] = time.monotonic()
|
||||
|
||||
if resp.status_code == 503:
|
||||
raise CrawlBlocked(f"세션 프로필 부재: {profile}")
|
||||
if resp.status_code != 200:
|
||||
raise CrawlFetchError(f"playwright-fetcher {resp.status_code}: {url}")
|
||||
data = resp.json()
|
||||
inner = int(data.get("status", 0))
|
||||
if inner in (403, 429):
|
||||
raise CrawlBlocked(f"{inner} (browser download): {url}")
|
||||
if 400 <= inner < 500:
|
||||
raise CrawlSkip(f"{inner} (browser download): {url}")
|
||||
if inner != 200:
|
||||
raise CrawlFetchError(f"{inner} (browser download): {url}")
|
||||
content = base64.b64decode(data.get("body_b64", ""))
|
||||
if len(content) > _MAX_DOWNLOAD_BYTES:
|
||||
raise CrawlSkip(f"크기 초과 (browser download): {url}")
|
||||
return content, data.get("content_type", "")
|
||||
|
||||
|
||||
async def probe_session(
|
||||
profile: str, probe_url: str, min_body_chars: int, paywall_markers: list[str]
|
||||
) -> dict:
|
||||
|
||||
@@ -56,6 +56,9 @@ async def lifespan(app: FastAPI):
|
||||
from workers.news_collector import run as news_collector_run
|
||||
from workers.fulltext_worker import reconcile_unresolved as fulltext_reconcile_run
|
||||
from workers.kosha_collector import run as kosha_collector_run
|
||||
from workers.csb_collector import run as csb_collector_run
|
||||
from workers.api_standards_collector import run as api_standards_run
|
||||
from workers.ccps_collector import run as ccps_collector_run
|
||||
from workers.queue_consumer import consume_queue, consume_markdown_queue
|
||||
from workers.study_queue_consumer import consume_study_queue
|
||||
from workers.study_session_queue_consumer import consume_study_session_queue
|
||||
@@ -131,6 +134,12 @@ async def lifespan(app: FastAPI):
|
||||
scheduler.add_job(dedup_reconcile_run, CronTrigger(hour=3, minute=30, timezone=KST), id="dedup_reconcile")
|
||||
# crawl-24x7 C-2: KOSHA 재해사례 diff + GUIDE 점진 백필 (daily, 새벽 잡들과 비충돌 슬롯).
|
||||
scheduler.add_job(kosha_collector_run, CronTrigger(hour=6, minute=40, timezone=KST), id="kosha_collector")
|
||||
# 사이클 3 C-2 잔여: CSB sitemap lastmod diff (weekly 월, cap 40 + 워터마크 점진 백필).
|
||||
scheduler.add_job(csb_collector_run, CronTrigger(day_of_week="mon", hour=6, minute=50, timezone=KST), id="csb_collector")
|
||||
# 사이클 3 C-4: API 표준 공지 목록 diff (monthly — 월 1~2건 공지 페이스).
|
||||
scheduler.add_job(api_standards_run, CronTrigger(day=5, hour=7, minute=5, timezone=KST), id="api_standards_collector")
|
||||
# 사이클 3 C-2 잔여: CCPS Beacon 월간 PDF (playwright 익명 경유 — WAF 차단 시 health 로 가시화).
|
||||
scheduler.add_job(ccps_collector_run, CronTrigger(day=5, hour=7, minute=20, timezone=KST), id="ccps_collector")
|
||||
scheduler.start()
|
||||
|
||||
# Phase 2.1 (async 구조): QueryAnalyzer prewarm.
|
||||
|
||||
@@ -0,0 +1,250 @@
|
||||
"""C-4 ① API 표준 공지(Important Standards Announcements) 수집 워커 (사이클 3).
|
||||
|
||||
RSS 없음. 실측(2026-06-11) 결과 '페이지 diff' 가 아니라 공지별 상세 URL 이 있는
|
||||
목록 페이지(10건/페이지, ?page=N&pageSize=10 페이지네이션 ~12+) — 목록 링크 파싱
|
||||
→ 신규 상세 페이지만 ingest 가 정확하고 dedup 도 자연스럽다 (rss+page 패턴의 HTML 판).
|
||||
510/570/653 개정 공지가 업무 직결 — 표준 본문은 유료라 공지만 수집 (카드 C-4).
|
||||
|
||||
스케줄 = monthly (main.py 5일 07:05 KST) — 최근 2페이지 diff (월 1~2건 공지 페이스).
|
||||
초기 일괄: docker exec hyungi_document_server-fastapi-1 \
|
||||
python -m workers.api_standards_collector --bulk # 전 페이지 (~120건, politeness ~30분)
|
||||
|
||||
멱등: edit_url(정규화)+file_hash dedup — 재실행 = 신규분만.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import hashlib
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
from core.crawl_politeness import (
|
||||
CrawlBlocked,
|
||||
CrawlFetchError,
|
||||
CrawlSkip,
|
||||
fetch_page,
|
||||
)
|
||||
from core.database import async_session
|
||||
from core.utils import setup_logger
|
||||
from models.document import Document
|
||||
from models.news_source import NewsSource
|
||||
from models.queue import enqueue_stage
|
||||
from workers.fulltext_worker import (
|
||||
_WEB_MIN_BODY_LEN,
|
||||
_extract_body,
|
||||
_raw_html_path,
|
||||
_save_raw_html,
|
||||
_strip_article_footer,
|
||||
)
|
||||
from workers.news_collector import (
|
||||
_get_or_create_health,
|
||||
_normalize_url,
|
||||
_record_failure,
|
||||
_record_success,
|
||||
)
|
||||
from workers.static_corpus_ingest import _page_title
|
||||
|
||||
logger = setup_logger("api_standards")
|
||||
|
||||
_BASE = "https://www.api.org"
|
||||
_LISTING_PATH = "/products-and-services/standards/important-standards-announcements"
|
||||
_LISTING_URL = f"{_BASE}{_LISTING_PATH}"
|
||||
_SOURCE_NAME = "API 표준 공지"
|
||||
|
||||
_SCHEDULED_PAGES = 2 # monthly diff 범위 (20건 — 월 1~2건 페이스에 충분한 겹침)
|
||||
_BULK_MAX_PAGES = 15 # 실측 12페이지 + 여유. 빈 페이지에서 조기 종료.
|
||||
|
||||
_DETAIL_RE = re.compile(
|
||||
r'href="(' + re.escape(_LISTING_PATH) + r'/[^"?#]+)"'
|
||||
)
|
||||
_DATE_RE = re.compile(
|
||||
r"(January|February|March|April|May|June|July|August|September|October"
|
||||
r"|November|December)\s+(\d{1,2}),?\s+(\d{4})"
|
||||
)
|
||||
_MONTHS = {m: i for i, m in enumerate(
|
||||
["January", "February", "March", "April", "May", "June", "July",
|
||||
"August", "September", "October", "November", "December"], start=1)}
|
||||
|
||||
|
||||
def _parse_listing(html_text: str) -> list[str]:
|
||||
"""상세 공지 절대 URL — 순서 보존 dedup (페이지네이션 링크는 ?가 패턴에서 배제)."""
|
||||
seen: set[str] = set()
|
||||
out: list[str] = []
|
||||
for m in _DETAIL_RE.finditer(html_text):
|
||||
url = f"{_BASE}{m.group(1)}"
|
||||
if url not in seen:
|
||||
seen.add(url)
|
||||
out.append(url)
|
||||
return out
|
||||
|
||||
|
||||
def _parse_pub_date(text: str) -> datetime | None:
|
||||
"""본문 첫 'Month DD, YYYY' — 공지 게시일 관행. 실패 = None (색인은 채널 게이트로 무조건)."""
|
||||
m = _DATE_RE.search(text)
|
||||
if not m:
|
||||
return None
|
||||
try:
|
||||
return datetime(int(m.group(3)), _MONTHS[m.group(1)], int(m.group(2)),
|
||||
tzinfo=timezone.utc)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
async def _get_or_create_source(session) -> NewsSource:
|
||||
result = await session.execute(
|
||||
select(NewsSource).where(NewsSource.name == _SOURCE_NAME)
|
||||
)
|
||||
source = result.scalars().first()
|
||||
if source is None:
|
||||
source = NewsSource(
|
||||
name=_SOURCE_NAME, feed_url=_LISTING_URL, feed_type="rss",
|
||||
fetch_method="page", fulltext_policy="none",
|
||||
source_channel="crawl", category="Engineering", language="en", country="US",
|
||||
enabled=False, # 6h 뉴스 사이클 비대상 — 본 워커가 monthly 폴링
|
||||
)
|
||||
session.add(source)
|
||||
await session.flush()
|
||||
return source
|
||||
|
||||
|
||||
async def _ingest_detail(session, source: NewsSource, url: str) -> str:
|
||||
"""공지 1건. 반환: 'ok' / 'dup' / 'skip'."""
|
||||
normalized_url = _normalize_url(url)
|
||||
ann_hash = hashlib.sha256(f"api-ann|{normalized_url}".encode()).hexdigest()[:32]
|
||||
existing = await session.execute(
|
||||
select(Document).where(
|
||||
(Document.file_hash == ann_hash)
|
||||
| (Document.edit_url.in_([normalized_url, url]))
|
||||
).limit(1)
|
||||
)
|
||||
if existing.scalars().first():
|
||||
return "dup"
|
||||
|
||||
try:
|
||||
html_text, final_url = await fetch_page(url)
|
||||
except (CrawlBlocked, CrawlSkip, CrawlFetchError) as e:
|
||||
logger.warning(f"[api-std] fetch 실패 skip: {url} — {type(e).__name__}: {e}")
|
||||
return "skip"
|
||||
|
||||
body, engine, engine_ver = _extract_body(html_text)
|
||||
if not engine:
|
||||
logger.warning(f"[api-std] 추출 실패 skip (< {_WEB_MIN_BODY_LEN}자): {url}")
|
||||
return "skip"
|
||||
clean_body = _strip_article_footer(body.replace("\x00", ""))
|
||||
if len(clean_body) < _WEB_MIN_BODY_LEN:
|
||||
return "skip"
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
raw_path = _raw_html_path(source.id, ann_hash, now)
|
||||
raw_saved = True
|
||||
try:
|
||||
_save_raw_html(raw_path, html_text)
|
||||
except OSError as e:
|
||||
raw_saved = False
|
||||
logger.error(f"[api-std] 원본 보존 실패 (ingest 는 진행): {e}")
|
||||
|
||||
pub_dt = _parse_pub_date(clean_body)
|
||||
title = _page_title(html_text, fallback=url.rsplit("/", 1)[-1][:90])
|
||||
title = re.sub(r"\s*\|\s*API\s*$", "", title).strip() or title
|
||||
|
||||
doc = Document(
|
||||
file_path=f"crawl/{_SOURCE_NAME}/{ann_hash}",
|
||||
file_hash=ann_hash,
|
||||
file_format="article",
|
||||
file_size=0,
|
||||
file_type="note",
|
||||
title=title,
|
||||
extracted_text=f"{title}\n\n{clean_body}",
|
||||
extracted_at=now,
|
||||
extractor_version=f"listing+page@{engine}",
|
||||
md_content=clean_body,
|
||||
md_status="success",
|
||||
md_extraction_engine=engine,
|
||||
md_extraction_engine_version=engine_ver,
|
||||
md_format_version="1.0",
|
||||
md_generated_at=now,
|
||||
md_source_hash=hashlib.sha256(html_text.encode("utf-8", errors="replace")).hexdigest(),
|
||||
md_content_hash=hashlib.sha256(clean_body.encode("utf-8")).hexdigest(),
|
||||
content_origin="extracted",
|
||||
source_channel="crawl",
|
||||
data_origin="external",
|
||||
edit_url=normalized_url,
|
||||
review_status="approved",
|
||||
ai_domain="Engineering",
|
||||
ai_sub_group=_SOURCE_NAME,
|
||||
ai_tags=["Engineering/API 표준 공지"],
|
||||
extract_meta={
|
||||
"source_id": source.id,
|
||||
"source_name": _SOURCE_NAME,
|
||||
"published_at": pub_dt.isoformat() if pub_dt else None,
|
||||
"fulltext": {
|
||||
"status": "api_announcement",
|
||||
"engine": engine,
|
||||
"final_url": final_url,
|
||||
"raw_html_path": str(raw_path) if raw_saved else None,
|
||||
"body_chars": len(clean_body),
|
||||
"resolved_at": now.isoformat(),
|
||||
},
|
||||
},
|
||||
)
|
||||
doc.file_size = len(doc.extracted_text.encode())
|
||||
session.add(doc)
|
||||
await session.flush()
|
||||
await enqueue_stage(session, doc.id, "summarize")
|
||||
await enqueue_stage(session, doc.id, "embed")
|
||||
await enqueue_stage(session, doc.id, "chunk")
|
||||
logger.info(f"[api-std] ingest {len(clean_body)}자 ({engine}): {title[:60]}")
|
||||
return "ok"
|
||||
|
||||
|
||||
async def run(bulk: bool = False) -> None:
|
||||
"""monthly 진입점 (스케줄러) — bulk 는 CLI 전용 (전 페이지 일괄)."""
|
||||
now = datetime.now(timezone.utc)
|
||||
async with async_session() as session:
|
||||
source = await _get_or_create_source(session)
|
||||
await session.commit()
|
||||
source_id = source.id
|
||||
|
||||
max_pages = _BULK_MAX_PAGES if bulk else _SCHEDULED_PAGES
|
||||
counts = {"ok": 0, "dup": 0, "skip": 0}
|
||||
try:
|
||||
for page in range(1, max_pages + 1):
|
||||
listing_url = (
|
||||
_LISTING_URL if page == 1
|
||||
else f"{_LISTING_URL}?page={page}&pageSize=10"
|
||||
)
|
||||
html_text, _ = await fetch_page(listing_url)
|
||||
detail_urls = _parse_listing(html_text)
|
||||
if not detail_urls:
|
||||
break # 빈 페이지 = 끝 (bulk 조기 종료)
|
||||
for url in detail_urls:
|
||||
async with async_session() as session:
|
||||
src = await session.get(NewsSource, source_id)
|
||||
status = await _ingest_detail(session, src, url)
|
||||
await session.commit()
|
||||
counts[status] += 1
|
||||
logger.info(f"[api-std] 목록 p{page}: 누적 {counts}")
|
||||
except (CrawlBlocked, CrawlSkip, CrawlFetchError) as e:
|
||||
logger.error(f"[api-std] 목록 수집 실패: {e}")
|
||||
async with async_session() as session:
|
||||
health = await _get_or_create_health(session, source_id)
|
||||
_record_failure(health, str(e) or repr(e), now)
|
||||
await session.commit()
|
||||
return
|
||||
|
||||
async with async_session() as session:
|
||||
health = await _get_or_create_health(session, source_id)
|
||||
_record_success(health, counts["ok"], False, now)
|
||||
src = await session.get(NewsSource, source_id)
|
||||
src.last_fetched_at = now
|
||||
await session.commit()
|
||||
logger.info(f"[api-std] 완료: {counts}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="API 표준 공지 수집")
|
||||
parser.add_argument("--bulk", action="store_true", help="전 페이지 일괄 (초기 백필)")
|
||||
args = parser.parse_args()
|
||||
asyncio.run(run(bulk=args.bulk))
|
||||
@@ -0,0 +1,185 @@
|
||||
"""C-2 잔여 ② CCPS Process Safety Beacon 수집 워커 (사이클 3).
|
||||
|
||||
월간 1페이지 PDF + 한국어 번역판 — RAG 청크로 이상적 크기 (카드 C-2).
|
||||
aiche.org 는 평문 httpx 를 UA 무관 403 (2026-06-11 실측: Archiver UA·브라우저 UA 모두)
|
||||
→ playwright-fetcher 익명 컨텍스트 경유 (B-3 인프라 재사용):
|
||||
목록 페이지 브라우저 fetch → beacon PDF 링크 파싱 → referer 쿠키 승계 다운로드.
|
||||
|
||||
알려진 리스크: WAF 가 헤드리스 자체를 차단하면 _CHALLENGE_MARKERS → CrawlBlocked
|
||||
→ health 실패 기록 후 종료 (르몽드 B-3 PARK 선례 — 그 경우 대안 = 이메일 구독
|
||||
.eml 트랙 결합, [[feedback_antibot_headless_subscription_wall]]).
|
||||
|
||||
스케줄 = monthly (main.py 5일 07:20 KST). 월간 1건 페이스라 diff 는 file_path dedup 으로 충분.
|
||||
수동: docker exec hyungi_document_server-fastapi-1 python -m workers.ccps_collector
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
from sqlalchemy import select
|
||||
|
||||
from core.config import settings
|
||||
from core.crawl_politeness import (
|
||||
CrawlBlocked,
|
||||
CrawlFetchError,
|
||||
CrawlSkip,
|
||||
download_via_browser,
|
||||
fetch_page_via_browser,
|
||||
)
|
||||
from core.database import async_session
|
||||
from core.utils import setup_logger
|
||||
from models.document import Document
|
||||
from models.news_source import NewsSource
|
||||
from models.queue import enqueue_stage
|
||||
from workers.kosha_collector import _safe_filename
|
||||
from workers.news_collector import (
|
||||
_get_or_create_health,
|
||||
_record_failure,
|
||||
_record_success,
|
||||
)
|
||||
|
||||
logger = setup_logger("ccps_collector")
|
||||
|
||||
_BEACON_URL = "https://www.aiche.org/ccps/resources/process-safety-beacon"
|
||||
_SOURCE_NAME = "CCPS Process Safety Beacon"
|
||||
_MAX_PDFS_PER_RUN = 10 # 월간 1~2건(영/한) 페이스 — 페이지 구조 오판 시 폭주 방지
|
||||
|
||||
|
||||
def _beacon_pdf_links(html_text: str, base_url: str) -> list[str]:
|
||||
"""beacon 관련 PDF 링크 — href/앵커텍스트에 'beacon' 포함만 (보수적).
|
||||
|
||||
필터에 안 걸린 PDF 가 있으면 호출측이 로그로 가시화 (첫 실측에서 패턴 보정용).
|
||||
"""
|
||||
seen: set[str] = set()
|
||||
out: list[str] = []
|
||||
for m in re.finditer(
|
||||
r'<a\s+[^>]*href="([^"]+\.pdf(?:\?[^"]*)?)"[^>]*>(.*?)</a>',
|
||||
html_text, re.I | re.S,
|
||||
):
|
||||
href, text = m.group(1), re.sub(r"<[^>]+>", " ", m.group(2))
|
||||
if "beacon" not in href.lower() and "beacon" not in text.lower():
|
||||
continue
|
||||
absolute = urljoin(base_url, href)
|
||||
path = urlparse(absolute).path
|
||||
if path not in seen:
|
||||
seen.add(path)
|
||||
out.append(absolute)
|
||||
return out
|
||||
|
||||
|
||||
def _all_pdf_hrefs(html_text: str) -> list[str]:
|
||||
return sorted({m.group(1) for m in re.finditer(r'href="([^"]+\.pdf(?:\?[^"]*)?)"', html_text, re.I)})
|
||||
|
||||
|
||||
async def _get_or_create_source(session) -> NewsSource:
|
||||
result = await session.execute(
|
||||
select(NewsSource).where(NewsSource.name == _SOURCE_NAME)
|
||||
)
|
||||
source = result.scalars().first()
|
||||
if source is None:
|
||||
source = NewsSource(
|
||||
name=_SOURCE_NAME, feed_url=_BEACON_URL, feed_type="rss",
|
||||
fetch_method="page", fulltext_policy="none",
|
||||
source_channel="crawl", category="Safety", language="en", country="US",
|
||||
enabled=False, # 6h 뉴스 사이클 비대상 — 본 워커가 monthly 폴링
|
||||
)
|
||||
session.add(source)
|
||||
await session.flush()
|
||||
return source
|
||||
|
||||
|
||||
async def _ingest_pdf(session, pdf_url: str) -> bool:
|
||||
"""Beacon PDF 1건 → NAS 저장 + Document + extract enqueue. 반환 = 신규 여부."""
|
||||
fname = _safe_filename(Path(urlparse(pdf_url).path).name)
|
||||
rel_path = f"crawl_raw/ccps_beacon/{fname}"
|
||||
existing = await session.execute(
|
||||
select(Document).where(Document.file_path == rel_path).limit(1)
|
||||
)
|
||||
if existing.scalars().first():
|
||||
return False
|
||||
|
||||
content, content_type = await download_via_browser(pdf_url, referer=_BEACON_URL)
|
||||
if "pdf" not in content_type.lower() and not content.startswith(b"%PDF"):
|
||||
raise CrawlSkip(f"PDF 아님 (content-type={content_type[:60]}): {pdf_url}")
|
||||
|
||||
dest = Path(settings.nas_mount_path) / rel_path
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
dest.write_bytes(content)
|
||||
|
||||
doc = Document(
|
||||
file_path=rel_path,
|
||||
file_hash=hashlib.sha256(content).hexdigest(),
|
||||
file_format="pdf",
|
||||
file_size=len(content),
|
||||
file_type="immutable",
|
||||
title=fname.rsplit(".", 1)[0].replace("_", " ").replace("-", " "),
|
||||
source_channel="crawl",
|
||||
data_origin="external",
|
||||
import_source="ccps_beacon",
|
||||
edit_url=pdf_url,
|
||||
ai_tags=["Safety/CCPS Beacon"],
|
||||
extract_meta={"ccps": {"kind": "beacon_pdf"}},
|
||||
)
|
||||
session.add(doc)
|
||||
await session.flush()
|
||||
await enqueue_stage(session, doc.id, "extract")
|
||||
logger.info(f"[ccps] Beacon ingest: {rel_path} ({len(content)} bytes)")
|
||||
return True
|
||||
|
||||
|
||||
async def run() -> None:
|
||||
"""monthly 진입점 — 실패는 health 기록 (circuit 가 A-8 패널 가시화)."""
|
||||
now = datetime.now(timezone.utc)
|
||||
async with async_session() as session:
|
||||
source = await _get_or_create_source(session)
|
||||
await session.commit()
|
||||
source_id = source.id
|
||||
|
||||
try:
|
||||
html_text, final_url = await fetch_page_via_browser(_BEACON_URL, profile=None)
|
||||
links = _beacon_pdf_links(html_text, final_url)
|
||||
if not links:
|
||||
others = _all_pdf_hrefs(html_text)
|
||||
# 필터 0건 = 페이지 구조/명명 변경 가능성 — 발견 PDF 를 가시화해 보정 단서 제공
|
||||
raise CrawlFetchError(
|
||||
f"beacon PDF 0건 (전체 PDF {len(others)}건: {others[:5]})"
|
||||
)
|
||||
|
||||
new_count = 0
|
||||
for pdf_url in links[:_MAX_PDFS_PER_RUN]:
|
||||
async with async_session() as session:
|
||||
try:
|
||||
if await _ingest_pdf(session, pdf_url):
|
||||
new_count += 1
|
||||
await session.commit()
|
||||
except (CrawlBlocked, CrawlSkip, CrawlFetchError) as e:
|
||||
await session.rollback()
|
||||
logger.warning(f"[ccps] PDF 실패 skip ({pdf_url}): {e}")
|
||||
if len(links) > _MAX_PDFS_PER_RUN:
|
||||
logger.warning(
|
||||
f"[ccps] PDF {len(links)}건 중 {_MAX_PDFS_PER_RUN}건만 처리 "
|
||||
f"(월간 1~2건 가정 초과 — 페이지 구조 확인 필요)"
|
||||
)
|
||||
|
||||
async with async_session() as session:
|
||||
health = await _get_or_create_health(session, source_id)
|
||||
_record_success(health, new_count, False, now)
|
||||
src = await session.get(NewsSource, source_id)
|
||||
src.last_fetched_at = now
|
||||
await session.commit()
|
||||
logger.info(f"[ccps] 완료: 신규 {new_count}건 (링크 {len(links)}건)")
|
||||
except (CrawlBlocked, CrawlSkip, CrawlFetchError) as e:
|
||||
# CrawlBlocked = WAF 헤드리스 차단 신호 — 연속되면 circuit open (PARK 판단 근거)
|
||||
logger.error(f"[ccps] 수집 실패: {type(e).__name__}: {e}")
|
||||
async with async_session() as session:
|
||||
health = await _get_or_create_health(session, source_id)
|
||||
_record_failure(health, str(e) or repr(e), now)
|
||||
await session.commit()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(run())
|
||||
@@ -0,0 +1,390 @@
|
||||
"""C-2 잔여 ① US CSB sitemap diff 수집 워커 (plan crawl-24x7-1, 사이클 3).
|
||||
|
||||
RSS 폐지 → sitemap.xml lastmod diff 폴링이 정석 (정부 사이트라 lastmod 양호 —
|
||||
2026-06-11 실측 1,307 URL, 조사 보고서 페이지는 루트 슬러그). 페이지 본문(4-tier
|
||||
≥200자 게이트) + 보고서 PDF(/assets/, recommendation 상태요약 제외) →
|
||||
기존 extract 파이프라인(marker/kordoc) 재사용.
|
||||
|
||||
스케줄 = weekly (main.py 월 06:50 KST):
|
||||
워터마크(selector_override.sitemap_watermark — B-3 probe 설정과 같은 JSONB 슬롯)
|
||||
이후 lastmod 만, 오래된 것부터 cap(40페이지/회). 워터마크는 처리분까지만 전진
|
||||
= 잔량 자동 점진 백필 (KOSHA GUIDE cap 패턴). cap 미처리 잔량은 매회 로그
|
||||
(silent cap 금지). diff 건수 > sanity(300) = sitemap 부패/lastmod 남발 의심 가시 경고.
|
||||
|
||||
초기 일괄 (cap 해제, politeness 로 수 시간 — docker exec -d, 진행 중 같은 서비스
|
||||
재배포 금지 [[feedback_docker_exec_orphan_kill]] 자매 함정):
|
||||
docker exec hyungi_document_server-fastapi-1 \
|
||||
python -m workers.csb_collector --limit 3 # 검증용
|
||||
docker exec -d hyungi_document_server-fastapi-1 \
|
||||
python -m workers.csb_collector --bulk # 전체
|
||||
|
||||
멱등: 페이지 = edit_url(정규화)+file_hash dedup (first-wins — lastmod 갱신 페이지의
|
||||
본문 재적재는 안 함, 갱신의 실체인 신규 PDF 는 개별 dedup 으로 적재됨).
|
||||
PDF = file_path dedup. 워터마크 경계는 >= 재조회 — 경계 페이지 1회 재fetch 후
|
||||
dedup 이 잡는다 (lastmod 실측 distinct 라 누적 재fetch 없음).
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import hashlib
|
||||
import random
|
||||
import re
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
import httpx
|
||||
from sqlalchemy import select
|
||||
|
||||
from core.config import settings
|
||||
from core.crawl_politeness import (
|
||||
CRAWL_UA,
|
||||
CrawlBlocked,
|
||||
CrawlFetchError,
|
||||
CrawlSkip,
|
||||
fetch_page,
|
||||
)
|
||||
from core.database import async_session
|
||||
from core.utils import setup_logger
|
||||
from models.document import Document
|
||||
from models.news_source import NewsSource
|
||||
from models.queue import enqueue_stage
|
||||
from workers.fulltext_worker import (
|
||||
_WEB_MIN_BODY_LEN,
|
||||
_extract_body,
|
||||
_raw_html_path,
|
||||
_save_raw_html,
|
||||
_strip_article_footer,
|
||||
)
|
||||
from workers.kosha_collector import _safe_filename
|
||||
from workers.news_collector import (
|
||||
FeedError,
|
||||
_get_or_create_health,
|
||||
_normalize_url,
|
||||
_record_failure,
|
||||
_record_success,
|
||||
)
|
||||
from workers.static_corpus_ingest import _page_title
|
||||
|
||||
logger = setup_logger("csb_collector")
|
||||
|
||||
_SITEMAP_URL = "https://www.csb.gov/sitemap.xml"
|
||||
_SOURCE_NAME = "US CSB 사고조사보고서"
|
||||
|
||||
_RUN_PAGE_CAP = 40 # weekly 1회 처리 상한 — 잔량은 워터마크 미전진으로 자동 이월
|
||||
_DIFF_SANITY = 300 # 주간 diff 가 이를 넘으면 sitemap lastmod 남발/부패 의심 (카드 C-2)
|
||||
_MAX_PDF_BYTES = 50 * 1024 * 1024
|
||||
_PDF_DELAY = (2.0, 5.0) # 같은 도메인 연속 PDF 다운로드 간격 (kosha _DOWNLOAD_DELAY 동률)
|
||||
|
||||
# 텍스트 코퍼스 무가치/관리성 섹션 — 첫 path segment 기준 (조사 보고서·뉴스 릴리스는
|
||||
# 루트 슬러그라 영향 없음. /news/·/investigations/ 는 목록 페이지뿐이라 제외).
|
||||
_SKIP_FIRST_SEGMENT = {
|
||||
"videos", "photos", "events", "members", "disclaimers", "media-room",
|
||||
"about-the-csb", "about-us", "foia", "news", "investigations",
|
||||
"site-map", "subscribe", "unsubscribe", "optout", "test",
|
||||
"privacy-policy", "vulnerability-disclosure-policy", "en-espanol",
|
||||
"newsletter", "recom-stats", "500.aspx", "documents", "records-details",
|
||||
}
|
||||
|
||||
|
||||
def _parse_sitemap(xml_text: str) -> list[tuple[str, datetime]]:
|
||||
"""(url, lastmod) 목록 — lastmod 없는/파싱불가 항목은 제외 (diff 축이 없음)."""
|
||||
out: list[tuple[str, datetime]] = []
|
||||
for m in re.finditer(
|
||||
r"<url>\s*<loc>([^<]+)</loc>\s*<lastmod>([^<]+)</lastmod>", xml_text
|
||||
):
|
||||
try:
|
||||
lastmod = datetime.fromisoformat(m.group(2).strip())
|
||||
except ValueError:
|
||||
continue
|
||||
if lastmod.tzinfo is None:
|
||||
lastmod = lastmod.replace(tzinfo=timezone.utc)
|
||||
out.append((m.group(1).strip(), lastmod))
|
||||
return out
|
||||
|
||||
|
||||
def _should_skip(url: str) -> bool:
|
||||
path = urlparse(url).path.strip("/")
|
||||
if not path:
|
||||
return True # 홈
|
||||
return path.split("/", 1)[0].lower() in _SKIP_FIRST_SEGMENT
|
||||
|
||||
|
||||
def _pdf_links(html_text: str, base_url: str) -> list[str]:
|
||||
"""페이지 내 보고서 PDF — /assets/recommendation/(상태변경 요약 다수)은 제외.
|
||||
|
||||
cache-buster 쿼리(?17346)는 다운로드 URL 에는 유지, dedup/파일명은 path 기준.
|
||||
"""
|
||||
seen: set[str] = set()
|
||||
out: list[str] = []
|
||||
for m in re.finditer(r'href="([^"]+\.pdf(?:\?[^"]*)?)"', html_text, re.I):
|
||||
absolute = urljoin(base_url, m.group(1))
|
||||
path = urlparse(absolute).path
|
||||
if "/assets/recommendation/" in path.lower():
|
||||
continue
|
||||
if (urlparse(absolute).hostname or "").lower() != "www.csb.gov":
|
||||
continue
|
||||
if path not in seen:
|
||||
seen.add(path)
|
||||
out.append(absolute)
|
||||
return out
|
||||
|
||||
|
||||
async def _download_pdf(url: str, dest: Path) -> int:
|
||||
"""PDF 다운로드 — 크기 cap + 연속 간격 (politeness 는 순차 실행 전제)."""
|
||||
await asyncio.sleep(random.uniform(*_PDF_DELAY))
|
||||
async with httpx.AsyncClient(timeout=60, follow_redirects=True) as client:
|
||||
resp = await client.get(url, headers={"User-Agent": CRAWL_UA})
|
||||
if resp.status_code != 200:
|
||||
raise FeedError(f"PDF 다운로드 {resp.status_code}: {url}")
|
||||
if len(resp.content) > _MAX_PDF_BYTES:
|
||||
raise FeedError(f"PDF 크기 초과 ({len(resp.content)} bytes): {url}")
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
dest.write_bytes(resp.content)
|
||||
return len(resp.content)
|
||||
|
||||
|
||||
async def _get_or_create_source(session) -> NewsSource:
|
||||
result = await session.execute(
|
||||
select(NewsSource).where(NewsSource.name == _SOURCE_NAME)
|
||||
)
|
||||
source = result.scalars().first()
|
||||
if source is None:
|
||||
source = NewsSource(
|
||||
name=_SOURCE_NAME, feed_url=_SITEMAP_URL, feed_type="rss",
|
||||
fetch_method="sitemap+page", fulltext_policy="none",
|
||||
source_channel="crawl", category="Safety", language="en", country="US",
|
||||
enabled=False, # 6h 뉴스 사이클 비대상 — 본 워커가 weekly 폴링
|
||||
)
|
||||
session.add(source)
|
||||
await session.flush()
|
||||
return source
|
||||
|
||||
|
||||
def _watermark(source: NewsSource) -> datetime | None:
|
||||
raw = (source.selector_override or {}).get("sitemap_watermark")
|
||||
if not raw:
|
||||
return None
|
||||
try:
|
||||
return datetime.fromisoformat(raw)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def _set_watermark(source: NewsSource, value: datetime) -> None:
|
||||
# JSONB 변경 감지를 위해 dict 재할당 (fulltext_worker._set_fulltext_meta 동일 규약)
|
||||
cfg = dict(source.selector_override or {})
|
||||
cfg["sitemap_watermark"] = value.isoformat()
|
||||
source.selector_override = cfg
|
||||
|
||||
|
||||
async def _ingest_pdf(session, page_slug: str, pdf_url: str) -> bool:
|
||||
"""PDF 1건 → NAS 저장 + Document + extract enqueue. 반환 = 신규 여부."""
|
||||
fname = _safe_filename(Path(urlparse(pdf_url).path).name)
|
||||
rel_path = f"crawl_raw/csb/{page_slug}/{fname}"
|
||||
existing = await session.execute(
|
||||
select(Document).where(Document.file_path == rel_path).limit(1)
|
||||
)
|
||||
if existing.scalars().first():
|
||||
return False
|
||||
|
||||
dest = Path(settings.nas_mount_path) / rel_path
|
||||
size = await _download_pdf(pdf_url, dest)
|
||||
doc = Document(
|
||||
file_path=rel_path,
|
||||
file_hash=hashlib.sha256(dest.read_bytes()).hexdigest(),
|
||||
file_format="pdf",
|
||||
file_size=size,
|
||||
file_type="immutable",
|
||||
title=fname.rsplit(".", 1)[0].replace("_", " "),
|
||||
source_channel="crawl",
|
||||
data_origin="external",
|
||||
import_source="csb_sitemap",
|
||||
edit_url=pdf_url,
|
||||
ai_tags=["Safety/CSB/보고서"],
|
||||
extract_meta={"csb": {"page_slug": page_slug, "kind": "report_pdf"}},
|
||||
)
|
||||
session.add(doc)
|
||||
await session.flush()
|
||||
await enqueue_stage(session, doc.id, "extract")
|
||||
logger.info(f"[csb] PDF ingest: {rel_path} ({size} bytes)")
|
||||
return True
|
||||
|
||||
|
||||
async def _ingest_url(session, source: NewsSource, url: str, lastmod: datetime) -> dict:
|
||||
"""변경 URL 1건: 페이지 fetch → PDF 전수 스캔(개별 dedup) + 본문 신규면 적재.
|
||||
|
||||
페이지 재방문(lastmod 갱신)에서도 PDF 스캔은 항상 수행 — 갱신의 실체
|
||||
(최종 보고서 추가 등)가 PDF 로 오는 경우가 핵심 가치다.
|
||||
"""
|
||||
counts = {"page": 0, "pdf": 0, "skip": 0}
|
||||
try:
|
||||
html_text, final_url = await fetch_page(url)
|
||||
except (CrawlBlocked, CrawlSkip, CrawlFetchError) as e:
|
||||
logger.warning(f"[csb] fetch 실패 skip: {url} — {type(e).__name__}: {e}")
|
||||
counts["skip"] = 1
|
||||
return counts
|
||||
|
||||
page_slug = _safe_filename(urlparse(url).path.strip("/").split("/")[-1] or "root")
|
||||
|
||||
for pdf_url in _pdf_links(html_text, final_url):
|
||||
try:
|
||||
if await _ingest_pdf(session, page_slug, pdf_url):
|
||||
counts["pdf"] += 1
|
||||
except FeedError as e:
|
||||
logger.warning(f"[csb] PDF 실패 skip ({pdf_url}): {e}")
|
||||
|
||||
# 페이지 본문 — first-wins (이미 있으면 본문 재적재 없음)
|
||||
normalized_url = _normalize_url(url)
|
||||
page_hash = hashlib.sha256(f"csb-page|{normalized_url}".encode()).hexdigest()[:32]
|
||||
existing = await session.execute(
|
||||
select(Document).where(
|
||||
(Document.file_hash == page_hash)
|
||||
| (Document.edit_url.in_([normalized_url, url]))
|
||||
).limit(1)
|
||||
)
|
||||
if existing.scalars().first():
|
||||
return counts
|
||||
|
||||
body, engine, engine_ver = _extract_body(html_text)
|
||||
if not engine:
|
||||
logger.info(f"[csb] 본문 부족 — 페이지 비적재 (PDF 만): {url}")
|
||||
return counts
|
||||
clean_body = _strip_article_footer(body.replace("\x00", ""))
|
||||
if len(clean_body) < _WEB_MIN_BODY_LEN:
|
||||
return counts
|
||||
|
||||
now = datetime.now(timezone.utc)
|
||||
raw_path = _raw_html_path(source.id, page_hash, now)
|
||||
raw_saved = True
|
||||
try:
|
||||
_save_raw_html(raw_path, html_text)
|
||||
except OSError as e:
|
||||
raw_saved = False
|
||||
logger.error(f"[csb] 원본 보존 실패 (ingest 는 진행): {e}")
|
||||
|
||||
title = _page_title(html_text, fallback=page_slug.replace("-", " ")[:90])
|
||||
doc = Document(
|
||||
file_path=f"crawl/{_SOURCE_NAME}/{page_hash}",
|
||||
file_hash=page_hash,
|
||||
file_format="article",
|
||||
file_size=0,
|
||||
file_type="note",
|
||||
title=title,
|
||||
extracted_text=f"{title}\n\n{clean_body}",
|
||||
extracted_at=now,
|
||||
extractor_version=f"sitemap+page@{engine}",
|
||||
md_content=clean_body,
|
||||
md_status="success",
|
||||
md_extraction_engine=engine,
|
||||
md_extraction_engine_version=engine_ver,
|
||||
md_format_version="1.0",
|
||||
md_generated_at=now,
|
||||
md_source_hash=hashlib.sha256(html_text.encode("utf-8", errors="replace")).hexdigest(),
|
||||
md_content_hash=hashlib.sha256(clean_body.encode("utf-8")).hexdigest(),
|
||||
content_origin="extracted",
|
||||
source_channel="crawl",
|
||||
data_origin="external",
|
||||
edit_url=normalized_url,
|
||||
review_status="approved",
|
||||
ai_domain="Safety",
|
||||
ai_sub_group=_SOURCE_NAME,
|
||||
ai_tags=["Safety/CSB"],
|
||||
extract_meta={
|
||||
"source_id": source.id,
|
||||
"source_name": _SOURCE_NAME,
|
||||
"published_at": lastmod.isoformat(),
|
||||
"fulltext": {
|
||||
"status": "csb_sitemap",
|
||||
"engine": engine,
|
||||
"final_url": final_url,
|
||||
"raw_html_path": str(raw_path) if raw_saved else None,
|
||||
"body_chars": len(clean_body),
|
||||
"resolved_at": now.isoformat(),
|
||||
},
|
||||
},
|
||||
)
|
||||
doc.file_size = len(doc.extracted_text.encode())
|
||||
session.add(doc)
|
||||
await session.flush()
|
||||
await enqueue_stage(session, doc.id, "summarize")
|
||||
await enqueue_stage(session, doc.id, "embed")
|
||||
await enqueue_stage(session, doc.id, "chunk")
|
||||
counts["page"] = 1
|
||||
logger.info(f"[csb] page ingest {len(clean_body)}자 ({engine}): {title[:60]}")
|
||||
return counts
|
||||
|
||||
|
||||
async def run(bulk: bool = False, limit: int = 0) -> None:
|
||||
"""weekly 진입점 (스케줄러) — bulk/limit 은 CLI 전용."""
|
||||
now = datetime.now(timezone.utc)
|
||||
async with async_session() as session:
|
||||
source = await _get_or_create_source(session)
|
||||
await session.commit()
|
||||
source_id = source.id
|
||||
watermark = _watermark(source)
|
||||
|
||||
try:
|
||||
xml_text, _ = await fetch_page(
|
||||
_SITEMAP_URL, content_types=("text/xml", "application/xml", "text/html")
|
||||
)
|
||||
entries = _parse_sitemap(xml_text)
|
||||
if not entries:
|
||||
raise FeedError("sitemap 파싱 0건 — 포맷 변경/부패 의심")
|
||||
except (CrawlBlocked, CrawlSkip, CrawlFetchError, FeedError) as e:
|
||||
logger.error(f"[csb] sitemap 수집 실패: {e}")
|
||||
async with async_session() as session:
|
||||
health = await _get_or_create_health(session, source_id)
|
||||
_record_failure(health, str(e) or repr(e), now)
|
||||
await session.commit()
|
||||
return
|
||||
|
||||
changed = sorted(
|
||||
(
|
||||
(url, lastmod) for url, lastmod in entries
|
||||
if not _should_skip(url) and (watermark is None or lastmod >= watermark)
|
||||
),
|
||||
key=lambda pair: pair[1],
|
||||
)
|
||||
if watermark is not None and len(changed) > _DIFF_SANITY:
|
||||
logger.error(
|
||||
f"[csb] diff {len(changed)}건 > sanity {_DIFF_SANITY} — "
|
||||
f"sitemap lastmod 남발/부패 의심 (cap 처리는 계속, 관찰 필요)"
|
||||
)
|
||||
|
||||
cap = len(changed) if bulk else _RUN_PAGE_CAP
|
||||
if limit:
|
||||
cap = min(cap, limit)
|
||||
todo, deferred = changed[:cap], max(len(changed) - cap, 0)
|
||||
logger.info(
|
||||
f"[csb] sitemap {len(entries)}건 중 변경 {len(changed)}건, 처리 {len(todo)}건"
|
||||
+ (f" (잔여 {deferred}건 — 워터마크 미전진으로 자동 이월)" if deferred else "")
|
||||
)
|
||||
|
||||
totals = {"page": 0, "pdf": 0, "skip": 0}
|
||||
for i, (url, lastmod) in enumerate(todo, 1):
|
||||
async with async_session() as session:
|
||||
src = await session.get(NewsSource, source_id)
|
||||
counts = await _ingest_url(session, src, url, lastmod)
|
||||
_set_watermark(src, lastmod)
|
||||
await session.commit()
|
||||
for k in totals:
|
||||
totals[k] += counts[k]
|
||||
if i % 10 == 0:
|
||||
logger.info(f"[csb] 진행 {i}/{len(todo)} {totals}")
|
||||
|
||||
async with async_session() as session:
|
||||
health = await _get_or_create_health(session, source_id)
|
||||
_record_success(health, totals["page"] + totals["pdf"], False, now)
|
||||
src = await session.get(NewsSource, source_id)
|
||||
src.last_fetched_at = now
|
||||
await session.commit()
|
||||
logger.info(f"[csb] 완료: {totals} (변경 {len(changed)}건 중 {len(todo)}건 처리)")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
parser = argparse.ArgumentParser(description="CSB sitemap diff 수집")
|
||||
parser.add_argument("--bulk", action="store_true", help="cap 해제 — 초기 일괄")
|
||||
parser.add_argument("--limit", type=int, default=0, help="처리 상한 (검증용)")
|
||||
args = parser.parse_args()
|
||||
asyncio.run(run(bulk=args.bulk, limit=args.limit))
|
||||
@@ -271,6 +271,15 @@ async def _enqueue_processing(session, doc: Document, source: NewsSource, pub_dt
|
||||
fulltext_worker 가 승격(또는 격하) 확정 후 enqueue (RSS 요약 선요약 → 풀텍스트
|
||||
도착 시 summarize_worker 의 '이미 요약 있음 skip' 에 막히는 순서 함정 회피).
|
||||
"""
|
||||
if source.fetch_method == "signal-only":
|
||||
# B-4: 시그널 = 검색 색인만 (embed/chunk). fulltext/summarize 절대 enqueue 안 함 —
|
||||
# 레지스트리가 fulltext_policy='page' 로 잘못 설정돼도 페이지 fetch 0 (방어 우선).
|
||||
# 요약 LLM 스킵 = 맥미니 부하 0. 다이제스트/브리핑은 ai_summary IS NULL 문서를
|
||||
# 처음부터 제외(services/digest/loader.py)하므로 시그널 문서가 자연 배제된다.
|
||||
if source.source_channel == "crawl" or (datetime.now(timezone.utc) - pub_dt).days <= 30:
|
||||
await enqueue_stage(session, doc.id, "embed")
|
||||
await enqueue_stage(session, doc.id, "chunk")
|
||||
return
|
||||
if source.fulltext_policy == "page" and doc.edit_url:
|
||||
await enqueue_stage(session, doc.id, "fulltext")
|
||||
return
|
||||
@@ -286,6 +295,28 @@ async def _enqueue_processing(session, doc: Document, source: NewsSource, pub_dt
|
||||
await enqueue_stage(session, doc.id, "chunk")
|
||||
|
||||
|
||||
def _entry_body(source: NewsSource, entry, summary: str) -> tuple[str, str]:
|
||||
"""(body, extractor_version) — 정책별 본문 선택, 순수 함수 (shape 테스트 대상).
|
||||
|
||||
signal-only: 피드 요약이 곧 본문 — 절단 없음 (arXiv 초록 1.3~1.6K자 보존,
|
||||
1000자 cap 적용 시 초록 꼬리 유실). 페이지 fetch 는 어떤 경우에도 없음 (B-4).
|
||||
feed-full: 피드 본문이 전문인 소스만 신뢰 (truncate·광고 삽입이 흔해 일반
|
||||
소스의 summary/content:encoded 를 전문으로 오인 저장 금지 — A-6).
|
||||
"""
|
||||
if source.fetch_method == "signal-only":
|
||||
body = _clean_html(
|
||||
entry.get("summary", "") or entry.get("description", ""), max_len=None
|
||||
)
|
||||
return (body or summary), "rss-signal"
|
||||
if source.fulltext_policy == "feed-full":
|
||||
content_list = entry.get("content") or []
|
||||
raw_body = content_list[0].get("value", "") if content_list else ""
|
||||
full_body = _clean_html(raw_body or entry.get("summary", ""), max_len=None)
|
||||
if len(full_body) > len(summary):
|
||||
return full_body, "rss-feed-full"
|
||||
return summary, "rss"
|
||||
|
||||
|
||||
def _build_extract_meta(source: NewsSource, pub_dt: datetime) -> dict:
|
||||
"""fulltext_worker / 패널이 쓰는 출처 메타 (documents 에 source FK 가 없어 여기 기록)."""
|
||||
return {
|
||||
@@ -415,17 +446,8 @@ async def _fetch_rss(session, source: NewsSource) -> tuple[int, str]:
|
||||
if not summary:
|
||||
summary = title
|
||||
|
||||
# A-6: feed-full 소스만 피드 본문을 전문으로 신뢰 (truncate·광고 삽입이 흔해
|
||||
# 일반 소스의 summary/content:encoded 를 전문으로 오인 저장 금지)
|
||||
body = summary
|
||||
is_feed_full = False
|
||||
if source.fulltext_policy == "feed-full":
|
||||
content_list = entry.get("content") or []
|
||||
raw_body = content_list[0].get("value", "") if content_list else ""
|
||||
full_body = _clean_html(raw_body or entry.get("summary", ""), max_len=None)
|
||||
if len(full_body) > len(summary):
|
||||
body = full_body
|
||||
is_feed_full = True
|
||||
# 정책별 본문 선택 — signal-only(무절단 요약) / feed-full(피드 전문) / 기본(요약)
|
||||
body, extractor_version = _entry_body(source, entry, summary)
|
||||
|
||||
link = entry.get("link", "")
|
||||
|
||||
@@ -469,7 +491,7 @@ async def _fetch_rss(session, source: NewsSource) -> tuple[int, str]:
|
||||
title=title,
|
||||
extracted_text=f"{title}\n\n{body}",
|
||||
extracted_at=datetime.now(timezone.utc),
|
||||
extractor_version="rss-feed-full" if is_feed_full else "rss",
|
||||
extractor_version=extractor_version,
|
||||
# article = 텍스트 네이티브(본문=extracted_text). markdown 단계 미enqueue 라
|
||||
# 기본값 'pending' 이면 영구 비수렴 → backlog 지표 오염 + md_status_pending partial
|
||||
# 인덱스 비대. 생성 시점에 terminal 'skipped' 로 명시(변환 비대상).
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
-- crawl-24x7 사이클 3 소스 seed (B-4 시그널 + C-4 공학 지속수집) — 2026-06-11 전 URL live 검증.
|
||||
-- 326 선례: WHERE NOT EXISTS idempotent, 기존 행 보존, 신규만 insert (단일 statement).
|
||||
-- fetch_method='signal-only' (B-4): 헤드라인+요약만 인제스트, 페이지 fetch 0,
|
||||
-- summarize 스킵(검색 색인만 — embed/chunk). 다이제스트는 ai_summary NULL 제외라 자연 배제.
|
||||
-- Bloomberg = anti-bot 최강이라 본문 수집 비권고 → 시그널 전용. 피드에 비디오 혼재 실측 → skip-video.
|
||||
-- Economist = 실측 200 (Archiver UA 는 feed-reader 로 취급됨 — 브라우저 UA 만 403). 구독 없음 = 시그널.
|
||||
-- Nikkei Asia = RSS 1.0(RDF) 실측 — feedparser 가 네이티브 정규화 (title/link 만, 요약·날짜 없음
|
||||
-- = 제목 시그널). 코드 분기 불요 (tests/test_crawl_cycle3_shapes.py fixture 회귀로 박제).
|
||||
-- arXiv/ASME = 초록이 곧 본문 (C-4 2단: 초록 색인 먼저, 선별 전문은 Phase 3) → signal-only 재사용.
|
||||
-- IEEE Spectrum = 피드 description 이 전문 (7.9~14K자 실측) → feed-full. 카테고리 필터 = topic 피드.
|
||||
INSERT INTO news_sources
|
||||
(name, country, language, feed_type, feed_url, category, enabled,
|
||||
fetch_method, fulltext_policy, source_channel, parser_quirk)
|
||||
SELECT v.name, v.country, v.language, v.feed_type, v.feed_url, v.category, v.enabled,
|
||||
v.fetch_method, v.fulltext_policy, v.source_channel::source_channel, v.parser_quirk
|
||||
FROM (VALUES
|
||||
-- B-4: 시그널 전용 (news 채널 — 헤드라인 시그널)
|
||||
('Bloomberg Markets', 'US', 'en', 'rss', 'https://feeds.bloomberg.com/markets/news.rss', 'Economy', true, 'signal-only', 'none', 'news', 'skip-video'),
|
||||
('Bloomberg Technology', 'US', 'en', 'rss', 'https://feeds.bloomberg.com/technology/news.rss', 'Technology', true, 'signal-only', 'none', 'news', 'skip-video'),
|
||||
('Economist Latest', 'GB', 'en', 'rss', 'https://www.economist.com/latest/rss.xml', 'International', true, 'signal-only', 'none', 'news', NULL),
|
||||
('Nikkei Asia', 'JP', 'en', 'rss', 'https://asia.nikkei.com/rss/feed/nar', 'International', true, 'signal-only', 'none', 'news', NULL),
|
||||
-- C-4: 공학 지속수집 (crawl 채널 — 도메인 재료. API 공지/CSB/CCPS 는 전용 워커가 runtime 등록)
|
||||
('ASME J. Pressure Vessel Technology', 'US', 'en', 'rss', 'https://asmedigitalcollection.asme.org/rss/site_1000037/LatestOpenIssueArticles_1000020.xml', 'Engineering', true, 'signal-only', 'none', 'crawl', NULL),
|
||||
('arXiv cond-mat.mtrl-sci', 'US', 'en', 'rss', 'https://rss.arxiv.org/rss/cond-mat.mtrl-sci', 'Engineering', true, 'signal-only', 'none', 'crawl', NULL),
|
||||
('arXiv physics.app-ph', 'US', 'en', 'rss', 'https://rss.arxiv.org/rss/physics.app-ph', 'Engineering', true, 'signal-only', 'none', 'crawl', NULL),
|
||||
('IEEE Spectrum Energy', 'US', 'en', 'rss', 'https://spectrum.ieee.org/feeds/topic/energy.rss', 'Engineering', true, 'rss', 'feed-full', 'crawl', NULL),
|
||||
('IEEE Spectrum Robotics', 'US', 'en', 'rss', 'https://spectrum.ieee.org/feeds/topic/robotics.rss', 'Engineering', true, 'rss', 'feed-full', 'crawl', NULL)
|
||||
) AS v(name, country, language, feed_type, feed_url, category, enabled,
|
||||
fetch_method, fulltext_policy, source_channel, parser_quirk)
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM news_sources ns WHERE ns.name = v.name
|
||||
);
|
||||
@@ -1,4 +1,4 @@
|
||||
"""B-3 구독 세션 Playwright fetcher (plan crawl-24x7-1).
|
||||
"""B-3 구독 세션 Playwright fetcher (plan crawl-24x7-1) + 익명 브라우저 fetch/다운로드 (사이클 3).
|
||||
|
||||
storage_state JSON(쿠키+localStorage 스냅샷) 기반 인증 페이지 fetch + 내용 기반 probe.
|
||||
- 동시 1 인스턴스 (글로벌 세마포어) — 계정 보호 + 사람 속도는 호출측 politeness 가 담당.
|
||||
@@ -7,9 +7,15 @@ storage_state JSON(쿠키+localStorage 스냅샷) 기반 인증 페이지 fetch
|
||||
부재 = 503 profile_missing (silent fallback 없음 — 호출측이 degrade).
|
||||
- 시간 기반 만료 판정 금지 — probe 는 알려진 유료 기사에서 본문 길이 + 페이월 마커 부재 검증
|
||||
(만료 후 200 '페이월 안내문'이 본문으로 저장되는 silent corruption 차단).
|
||||
|
||||
사이클 3 증축 (C-2 CCPS Beacon — aiche.org 가 평문 httpx 를 UA 무관 403):
|
||||
- /fetch profile 생략 = 익명 컨텍스트 (storage_state 없음, 공개 페이지의 WAF 우회 전용).
|
||||
- /download = referer 페이지를 먼저 방문(WAF 쿠키 획득) 후 같은 컨텍스트의
|
||||
request.get 으로 바이너리(PDF) 다운로드 — base64 반환, 60MB cap.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
@@ -23,6 +29,7 @@ logger = logging.getLogger("playwright-fetcher")
|
||||
AUTH_DIR = Path("/auth")
|
||||
NAV_TIMEOUT_MS = 45_000
|
||||
SETTLE_MS = 1_500 # domcontentloaded 후 lazy 본문 settle 대기
|
||||
MAX_DOWNLOAD_BYTES = 60 * 1024 * 1024
|
||||
|
||||
app = FastAPI(title="playwright-fetcher")
|
||||
_browser_slot = asyncio.Semaphore(1) # 동시 1 인스턴스 (B-3 ① persistent 제약과 동일 규율)
|
||||
@@ -30,7 +37,8 @@ _browser_slot = asyncio.Semaphore(1) # 동시 1 인스턴스 (B-3 ① persisten
|
||||
|
||||
class FetchReq(BaseModel):
|
||||
url: str
|
||||
profile: str = Field(pattern=r"^[a-z0-9_-]{1,50}$")
|
||||
# None = 익명 컨텍스트 (공개 페이지 WAF 우회 — CCPS). 값 = B-3 구독 세션.
|
||||
profile: str | None = Field(default=None, pattern=r"^[a-z0-9_-]{1,50}$")
|
||||
|
||||
|
||||
class ProbeReq(BaseModel):
|
||||
@@ -40,6 +48,13 @@ class ProbeReq(BaseModel):
|
||||
paywall_markers: list[str] = []
|
||||
|
||||
|
||||
class DownloadReq(BaseModel):
|
||||
url: str
|
||||
# referer 페이지를 먼저 방문해 WAF 챌린지 쿠키를 컨텍스트에 적재 후 다운로드
|
||||
referer: str | None = None
|
||||
profile: str | None = Field(default=None, pattern=r"^[a-z0-9_-]{1,50}$")
|
||||
|
||||
|
||||
def _state_path(profile: str) -> Path:
|
||||
p = AUTH_DIR / f"{profile}.json"
|
||||
if not p.is_file():
|
||||
@@ -47,16 +62,23 @@ def _state_path(profile: str) -> Path:
|
||||
return p
|
||||
|
||||
|
||||
async def _browse(url: str, state: Path) -> tuple[str, str, str]:
|
||||
def _context_kwargs(state: Path | None) -> dict:
|
||||
kwargs = {"viewport": {"width": 1366, "height": 900}}
|
||||
if state is not None:
|
||||
# B-3 르몽드 세션 회귀 방지 — 기존 인증 fetch 의 locale 그대로
|
||||
kwargs["storage_state"] = str(state)
|
||||
kwargs["locale"] = "fr-FR"
|
||||
else:
|
||||
kwargs["locale"] = "en-US"
|
||||
return kwargs
|
||||
|
||||
|
||||
async def _browse(url: str, state: Path | None) -> tuple[str, str, str]:
|
||||
"""(html, final_url, visible_text). 요청당 브라우저 — 종료를 finally 로 보장."""
|
||||
async with async_playwright() as pw:
|
||||
browser = await pw.chromium.launch(headless=True)
|
||||
try:
|
||||
context = await browser.new_context(
|
||||
storage_state=str(state),
|
||||
viewport={"width": 1366, "height": 900},
|
||||
locale="fr-FR",
|
||||
)
|
||||
context = await browser.new_context(**_context_kwargs(state))
|
||||
page = await context.new_page()
|
||||
await page.goto(url, wait_until="domcontentloaded", timeout=NAV_TIMEOUT_MS)
|
||||
await page.wait_for_timeout(SETTLE_MS)
|
||||
@@ -76,17 +98,53 @@ def health():
|
||||
|
||||
@app.post("/fetch")
|
||||
async def fetch(req: FetchReq):
|
||||
state = _state_path(req.profile)
|
||||
state = _state_path(req.profile) if req.profile else None
|
||||
async with _browser_slot:
|
||||
try:
|
||||
html, final_url, _ = await _browse(req.url, state)
|
||||
except PlaywrightError as e:
|
||||
logger.warning("fetch 실패 %s: %s", req.url, e)
|
||||
raise HTTPException(502, detail={"error_reason": "browse_failed", "message": str(e)[:300]})
|
||||
logger.info("fetch ok profile=%s %s (%d bytes)", req.profile, req.url, len(html))
|
||||
logger.info("fetch ok profile=%s %s (%d bytes)", req.profile or "-", req.url, len(html))
|
||||
return {"html": html, "final_url": final_url}
|
||||
|
||||
|
||||
@app.post("/download")
|
||||
async def download(req: DownloadReq):
|
||||
"""바이너리(PDF 등) 다운로드 — referer 방문으로 WAF 쿠키 획득 후 같은 컨텍스트로 GET.
|
||||
|
||||
응답의 status/content_type 판정은 호출측(crawl_politeness) 책임 — 여기서는
|
||||
전송 계층 오류만 502 로 구분 (silent fallback 없음).
|
||||
"""
|
||||
state = _state_path(req.profile) if req.profile else None
|
||||
async with _browser_slot:
|
||||
try:
|
||||
async with async_playwright() as pw:
|
||||
browser = await pw.chromium.launch(headless=True)
|
||||
try:
|
||||
context = await browser.new_context(**_context_kwargs(state))
|
||||
if req.referer:
|
||||
page = await context.new_page()
|
||||
await page.goto(req.referer, wait_until="domcontentloaded",
|
||||
timeout=NAV_TIMEOUT_MS)
|
||||
await page.wait_for_timeout(SETTLE_MS)
|
||||
resp = await context.request.get(req.url, timeout=NAV_TIMEOUT_MS)
|
||||
body = await resp.body()
|
||||
finally:
|
||||
await browser.close()
|
||||
except PlaywrightError as e:
|
||||
logger.warning("download 실패 %s: %s", req.url, e)
|
||||
raise HTTPException(502, detail={"error_reason": "download_failed", "message": str(e)[:300]})
|
||||
if len(body) > MAX_DOWNLOAD_BYTES:
|
||||
raise HTTPException(502, detail={"error_reason": "too_large", "bytes": len(body)})
|
||||
logger.info("download status=%d %s (%d bytes)", resp.status, req.url, len(body))
|
||||
return {
|
||||
"status": resp.status,
|
||||
"content_type": resp.headers.get("content-type", ""),
|
||||
"body_b64": base64.b64encode(body).decode(),
|
||||
}
|
||||
|
||||
|
||||
@app.post("/probe")
|
||||
async def probe(req: ProbeReq):
|
||||
"""내용 기반 세션 probe — ok=False 사유를 명시 반환 (호출측이 health 에 기록)."""
|
||||
|
||||
@@ -0,0 +1,848 @@
|
||||
<!DOCTYPE html>
|
||||
|
||||
<html lang="en" class="no-js">
|
||||
<head>
|
||||
|
||||
<title>American Petroleum Institute | API | Standards News Highlights</title>
|
||||
|
||||
<link rel="apple-touch-icon" sizes="180x180" href="/library/APIWeb/favicon/apple-touch-icon.png">
|
||||
<link rel="icon" type="image/png" sizes="32x32" href="/library/APIWeb/favicon/favicon-32x32.png">
|
||||
<link rel="icon" type="image/png" sizes="16x16" href="/library/APIWeb/favicon/favicon-16x16.png">
|
||||
<link rel="manifest" href="/library/APIWeb/favicon/site.webmanifest">
|
||||
|
||||
|
||||
|
||||
<meta charset="UTF-8">
|
||||
<meta http-equiv="X-UA-Compatible" content="IE=edge">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1">
|
||||
|
||||
<meta name="keywords" content="api, american petroleum institute, what is energy, monogram, ICP, api standards, fossil fuels facts, api 510, api.org, api certification, api 653, api 570, api 610, api 650, api icp, keystone xl, LNG exports, pipeline, refinery, hydraulic fracturing, api 6a, petroleum, shale gas, api q1, api oil, api training, gasoline, natural gas, oil sands, groundwater, ethanol, E15," />
|
||||
|
||||
<meta name="description" content="Stay informed with the latest API standards updates, addenda, and global adoption trends. Explore how these standards support industry safety and innovation.
|
||||
" />
|
||||
|
||||
<link rel="canonical" href="https://www.api.org/products-and-services/standards/important-standards-announcements" />
|
||||
|
||||
<meta property="fb:admins" content="rachidmrad" />
|
||||
|
||||
<meta property="fb:app_id" content="880866755363034" />
|
||||
|
||||
<meta property="og:url" content="https://www.api.org/products-and-services/standards/important-standards-announcements" />
|
||||
|
||||
<meta property="og:type" content="website" />
|
||||
|
||||
<meta property="og:title" content="Standards News Highlights" />
|
||||
|
||||
<meta property="og:description" content="Stay informed with the latest API standards updates, addenda, and global adoption trends. Explore how these standards support industry safety and innovation.
|
||||
" />
|
||||
|
||||
<meta property="og:image" content="https://www.api.org/-/media/APIWebsite/news-policies-and-issues/liveblog/APILogo-liveblog-primary-debate-06262019.jpg" />
|
||||
|
||||
<meta property="og:site_name" content="American Petroleum Institute" />
|
||||
|
||||
<meta name="twitter:card" content="summary_large_image" />
|
||||
|
||||
<meta name="twitter:site" content="@APIenergy" />
|
||||
|
||||
<meta name="twitter:title" content="Standards News Highlights" />
|
||||
|
||||
<meta name="twitter:description" content="Stay informed with the latest API standards updates, addenda, and global adoption trends. Explore how these standards support industry safety and innovation.
|
||||
" />
|
||||
|
||||
<meta name="twitter:image" content="https://www.api.org/-/media/APIWebsite/news-policies-and-issues/liveblog/APILogo-liveblog-primary-debate-06262019.jpg" />
|
||||
|
||||
|
||||
|
||||
<style>
|
||||
.carousel {
|
||||
position: relative;
|
||||
z-index: 2;
|
||||
}
|
||||
.carousel-header {
|
||||
position: relative;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background-size: cover;
|
||||
background-repeat: no-repeat;
|
||||
background-position: top;
|
||||
overflow:hidden;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: flex-start;
|
||||
|
||||
}
|
||||
.carousel-mobile {
|
||||
display: block;
|
||||
width: 120%;
|
||||
height: auto;
|
||||
}
|
||||
.carousel-title {
|
||||
position: absolute;
|
||||
inset: 0;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
justify-content: center;
|
||||
line-height: 1.25;
|
||||
margin:0 auto 0 4rem;
|
||||
width: 70%;
|
||||
}
|
||||
.carousel-heading,
|
||||
.carousel-subtext {
|
||||
margin: 0;
|
||||
color: #fff;
|
||||
text-transform: uppercase;
|
||||
width: 80%;
|
||||
}
|
||||
@media screen and (min-width: 992px) {
|
||||
.carousel-mobile {
|
||||
display: none;
|
||||
}
|
||||
.carousel-title h1, .carousel-title h2 {
|
||||
font-size: 3.3rem;
|
||||
}
|
||||
.carousel-top {
|
||||
height: 600px;
|
||||
}
|
||||
}
|
||||
@media screen and (max-width:991px) {
|
||||
.carousel-top > div:first-child {
|
||||
background-image: none !important;
|
||||
}
|
||||
.carousel-top {
|
||||
height: 700px;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
<!-- Redesign CSS/Fonts -->
|
||||
<link rel="stylesheet" href="/library/APIWeb/css/style.css" />
|
||||
<link rel="stylesheet" href="https://use.typekit.net/wvd1mgg.css" />
|
||||
<script src="https://kit.fontawesome.com/a190867a63.js" crossorigin="anonymous"></script>
|
||||
<!--BEGIN COVEO NOINDEX-->
|
||||
<!-- Google Tag Manager -->
|
||||
<script>
|
||||
(function (w, d, s, l, i) {
|
||||
w[l] = w[l] || []; w[l].push({
|
||||
'gtm.start': new Date().getTime(), event: 'gtm.js'
|
||||
});
|
||||
var f = d.getElementsByTagName(s)[0],
|
||||
j = d.createElement(s), dl = l != 'dataLayer' ? '&l=' + l : '';
|
||||
j.async = true; j.src = 'https://www.googletagmanager.com/gtm.js?id=' + i + dl;
|
||||
f.parentNode.insertBefore(j, f);
|
||||
})(window, document, 'script', 'dataLayer', 'GTM-PR8RWJW');
|
||||
</script>
|
||||
<!-- End Google Tag Manager -->
|
||||
<!--END COVEO NOINDEX-->
|
||||
<script src="https://code.jquery.com/jquery-1.11.0.min.js"></script>
|
||||
<!-- reCAPTCHA -->
|
||||
<script src="https://www.google.com/recaptcha/api.js?render=explicit" async defer></script>
|
||||
<script type="text/javascript">
|
||||
var your_site_key = '6LdAV_ciAAAAAGP_PxfR-AzZcVbyQQXyJc9OV461';
|
||||
var recaptchaRenderAttempted = false;
|
||||
var recaptchaCheckCount = 0;
|
||||
var maxChecks = 20; // ~10 seconds max
|
||||
|
||||
function renderRecaptcha() {
|
||||
// Stop retrying if container doesn't exist
|
||||
var container = document.getElementById('ReCaptchContainer');
|
||||
if (!container) {
|
||||
if (recaptchaCheckCount === 0) {
|
||||
console.log('No reCAPTCHA container found. Skipping checks for this page.');
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// Already rendered?
|
||||
if (recaptchaRenderAttempted) return;
|
||||
|
||||
// Max retries
|
||||
if (recaptchaCheckCount >= maxChecks) {
|
||||
console.warn('reCAPTCHA load timeout after ' + (maxChecks * 500 / 1000) + 's');
|
||||
return;
|
||||
}
|
||||
|
||||
recaptchaCheckCount++;
|
||||
|
||||
if (typeof grecaptcha !== 'undefined' && typeof grecaptcha.render === 'function') {
|
||||
grecaptcha.render('ReCaptchContainer', {
|
||||
'sitekey': your_site_key,
|
||||
'theme': 'light',
|
||||
'type': 'image',
|
||||
'size': 'normal'
|
||||
});
|
||||
recaptchaRenderAttempted = true;
|
||||
console.log('reCAPTCHA rendered successfully');
|
||||
} else {
|
||||
// Only log first few waits to avoid console spam
|
||||
if (recaptchaCheckCount <= 5) {
|
||||
console.log('Waiting for grecaptcha (' + recaptchaCheckCount + ')...');
|
||||
}
|
||||
setTimeout(renderRecaptcha, 500);
|
||||
}
|
||||
}
|
||||
|
||||
// Start after DOM ready
|
||||
document.addEventListener('DOMContentLoaded', function () {
|
||||
renderRecaptcha();
|
||||
});
|
||||
|
||||
// Fallback for late load
|
||||
window.addEventListener('load', function () {
|
||||
setTimeout(renderRecaptcha, 1000);
|
||||
});
|
||||
</script>
|
||||
<!-- reCAPTCHA End -->
|
||||
|
||||
</head>
|
||||
|
||||
|
||||
<body class="">
|
||||
|
||||
|
||||
|
||||
<!-- Google Tag Manager (noscript) -->
|
||||
<noscript>
|
||||
<iframe src="https://www.googletagmanager.com/ns.html?id=GTM-PR8RWJW"
|
||||
height="0" width="0" style="display:none;visibility:hidden"></iframe>
|
||||
</noscript>
|
||||
<!-- End Google Tag Manager (noscript) -->
|
||||
|
||||
|
||||
|
||||
<header>
|
||||
<div class="header">
|
||||
<div class="second-nav">
|
||||
<div class="logo-container">
|
||||
<a href="/">
|
||||
<img id="apiLogo" src="/library/APIWeb/img/apiLogoPrimary.svg" alt="API Logo" />
|
||||
</a>
|
||||
<a href="/">
|
||||
<img id="apiMobileLogo" src="/library/APIWeb/img/apiMobileLogo.svg" alt="API Mobile Logo" />
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<ul class="utilities">
|
||||
|
||||
|
||||
<li>
|
||||
<a href="/about">About</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/membership">Membership</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="https://events.api.org/" target="_blank" rel="noopener noreferrer" class="external-url">
|
||||
Events
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="https://www.apiwebstore.org/?utm_campaign=apitowebstore&utm_source=navigation&utm_medium=web" target="_blank" rel="noopener noreferrer" class="external-url">
|
||||
Webstore
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="https://myportal.api.org/" target="_blank" rel="noopener noreferrer" class="external-url">
|
||||
API Portal
|
||||
</a>
|
||||
</li>
|
||||
|
||||
<li><a href="/contact" class="btn-red">Contact</a></li>
|
||||
</ul>
|
||||
|
||||
<div class="mobile-nav">
|
||||
<div class="nav-trigger menu-toggle">
|
||||
<img class="menu-icon"
|
||||
src="/library/APIWeb/img/icons/bars-solid-full.svg"
|
||||
data-cross-src="/library/APIWeb/img/icons/xmark-solid-full.svg"
|
||||
alt="Mobile menu trigger">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="main-nav">
|
||||
|
||||
|
||||
<div class="parent-nav">
|
||||
<div class="main-item"
|
||||
id="parent-606c005fb35d407da123d1b7f735cfc6"
|
||||
data-haschildren="true"
|
||||
tabindex="0"
|
||||
role="button"
|
||||
aria-haspopup="true"
|
||||
aria-expanded="false"
|
||||
aria-label="Issues menu toggle">
|
||||
Issues
|
||||
</div>
|
||||
<div class="main-item"
|
||||
id="parent-5f629feb57834e2691684a3f5d4b24e6"
|
||||
data-haschildren="true"
|
||||
tabindex="0"
|
||||
role="button"
|
||||
aria-haspopup="true"
|
||||
aria-expanded="false"
|
||||
aria-label="Energy Insights menu toggle">
|
||||
Energy Insights
|
||||
</div>
|
||||
<div class="main-item" id="parent-95ad0674d8244aeb92db777cc4a9c2cf">
|
||||
<a href="/news-policy-and-issues/news" aria-label="News link">News</a>
|
||||
</div>
|
||||
<div class="main-item"
|
||||
id="parent-c374e7ac945b4221917ed410add09145"
|
||||
data-haschildren="true"
|
||||
tabindex="0"
|
||||
role="button"
|
||||
aria-haspopup="true"
|
||||
aria-expanded="false"
|
||||
aria-label="Products + Services menu toggle">
|
||||
Products + Services
|
||||
</div>
|
||||
|
||||
<div class="main-item search-container">
|
||||
<form>
|
||||
<div>
|
||||
|
||||
|
||||
|
||||
<div id="_051E6CD8-A1A3-4CF3-97EA-E3C6386C2F6B"
|
||||
data-search-interface-id='coveo728075ff'
|
||||
>
|
||||
|
||||
<div class='coveo-search-section'>
|
||||
|
||||
|
||||
|
||||
|
||||
<script>
|
||||
document.addEventListener("CoveoSearchEndpointInitialized", function() {
|
||||
var searchboxElement = document.getElementById("_185F67BF-1D73-4AEE-9564-FDE5B0EB24D9");
|
||||
searchboxElement.addEventListener("CoveoComponentInitialized", function() {
|
||||
CoveoForSitecore.initSearchboxIfStandalone(searchboxElement, "/searchresults");
|
||||
});
|
||||
})
|
||||
</script> <div id="_185F67BF-1D73-4AEE-9564-FDE5B0EB24D9_container" class="coveo-for-sitecore-search-box-container"
|
||||
data-prebind-maximum-age='currentMaximumAge'
|
||||
data-pipeline='API Site Search'
|
||||
>
|
||||
<div id="_185F67BF-1D73-4AEE-9564-FDE5B0EB24D9"
|
||||
class="CoveoSearchbox"
|
||||
data-enable-lowercase-operators='true'
|
||||
data-enable-omnibox='true'
|
||||
data-enable-partial-match='true'
|
||||
data-enable-query-suggest-addon='true'
|
||||
data-partial-match-keywords='5'
|
||||
data-partial-match-threshold='75%'
|
||||
data-prebind-maximum-age='currentMaximumAge'
|
||||
data-pipeline='API Site Search'
|
||||
data-placeholder='Find Something'
|
||||
data-query-suggest-character-threshold='0'
|
||||
data-clear-filters-on-new-query='true'
|
||||
>
|
||||
|
||||
|
||||
<script type="text/javascript">
|
||||
document.addEventListener("CoveoSearchEndpointInitialized", function() {
|
||||
var componentId = "_185F67BF-1D73-4AEE-9564-FDE5B0EB24D9";
|
||||
var componentElement = document.getElementById(componentId);
|
||||
|
||||
function showError(error) {
|
||||
console.error(error);
|
||||
}
|
||||
|
||||
function areCoveoResourcesIncluded() {
|
||||
return typeof (Coveo) !== "undefined";
|
||||
}
|
||||
|
||||
if (areCoveoResourcesIncluded()) {
|
||||
var event = document.createEvent("CustomEvent");
|
||||
event.initEvent("CoveoComponentInitialized", false, true);
|
||||
|
||||
setTimeout(function() {
|
||||
componentElement.dispatchEvent(event);
|
||||
}, 0);
|
||||
} else {
|
||||
componentElement.classList.add("invalid");
|
||||
showError("The Coveo Resources component must be included in this page.");
|
||||
}
|
||||
});
|
||||
</script>
|
||||
<div class="CoveoForSitecoreBindWithUserContext"></div>
|
||||
<div class="CoveoForSitecoreExpressions"></div>
|
||||
<div class="CoveoForSitecoreConfigureSearchHub" data-sc-search-hub="searchresults"></div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<script type="text/javascript">
|
||||
document.addEventListener("CoveoSearchEndpointInitialized", function() {
|
||||
var externalComponentsSection = document.getElementById("_051E6CD8-A1A3-4CF3-97EA-E3C6386C2F6B");
|
||||
CoveoForSitecore.initExternalComponentsSection(externalComponentsSection);
|
||||
});
|
||||
</script>
|
||||
|
||||
</form>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="dropdown">
|
||||
<div class="dropdown-panel" for="parent-606c005fb35d407da123d1b7f735cfc6">
|
||||
<div class="dropdown-desc">
|
||||
<p class="h5">Issues</p>
|
||||
<p>Learn how we engage with policy makers to ensure safe, reliable, and affordable energy for the future as demand continues to grow.</p>
|
||||
</div>
|
||||
|
||||
<div class="dropdown-items">
|
||||
<a href="/news-policy-and-issues/access" >Access & Production</a>
|
||||
<a href="/news-policy-and-issues/trade-global-markets" >Trade & Global Markets</a>
|
||||
<a href="/news-policy-and-issues/fuels-refining" >Fuels & Refining</a>
|
||||
<a href="/news-policy-and-issues/infrastructure-permitting" >Infrastructure & Permitting</a>
|
||||
<a href="/news-policy-and-issues/tax" >Tax Policy</a>
|
||||
<a href="/news-policy-and-issues/climate" >Climate Policy</a>
|
||||
<a href="/news-policy-and-issues/safety" >Industry Safety</a>
|
||||
</div>
|
||||
</div>
|
||||
<div class="dropdown-panel" for="parent-5f629feb57834e2691684a3f5d4b24e6">
|
||||
<div class="dropdown-desc">
|
||||
<p class="h5">Energy Insights</p>
|
||||
<p>API's Energy Insights Hub provides updated statistics, data visualizations, timely analysis, and in-depth reports on all aspects of the oil and natural gas industry.</p>
|
||||
</div>
|
||||
|
||||
<div class="dropdown-items">
|
||||
<a href="/energy-insights/charts-analysis" >Charts & Analysis</a>
|
||||
<a href="/energy-insights/industry-explained" >Industry Explained </a>
|
||||
<a href="/energy-insights/studies" >Studies</a>
|
||||
<a href="/energy-insights/statistics" >Statistics</a>
|
||||
</div>
|
||||
</div>
|
||||
<div class="dropdown-panel" for="parent-c374e7ac945b4221917ed410add09145">
|
||||
<div class="dropdown-desc">
|
||||
<p class="h5">Global Industry Services</p>
|
||||
<p>API’s Global Industry Services drives safety and efficiency within the oil and gas industry through standards, certifications, assessments, training and more.</p>
|
||||
<p class="add-on">
|
||||
<a href="/products-and-services/get-a-quote" >Request a Quote</a>
|
||||
</p>
|
||||
<p class="add-on">
|
||||
<a href="/products-and-services/certifications-directories" >Certifications Directories</a>
|
||||
</p>
|
||||
<p class="add-on">
|
||||
<a href="/products-and-services/worldwide-representatives" >Worldwide Representatives</a>
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<div class="dropdown-items">
|
||||
<a href="/products-and-services/standards" >Standards</a>
|
||||
<a href="/products-and-services/site-safety" >Process Safety Site Assessment Program</a>
|
||||
<a href="/products-and-services/pipeline-sms-assessment-program" >Pipeline SMS Assessment Program</a>
|
||||
<a href="/products-and-services/contractor-safety" >Pipeline SMS Contractor Assessment Program</a>
|
||||
<a href="/products-and-services/offshore-safety" >Center for Offshore Safety (COS)</a>
|
||||
<a href="/products-and-services/individual-certification-programs" >Individual Certification Programs (ICP)</a>
|
||||
<a href="/products-and-services/training" >Training</a>
|
||||
<a href="/products-and-services/api-monogram-and-apiqr" >API Monogram and APIQR</a>
|
||||
<a href="/products-and-services/api-monogram-and-apiqr#tab-repair-and-remanufacture">API Repair and Remanufacture Program</a>
|
||||
<a href="/products-and-services/witnessing-programs" >19B Perforator Program</a>
|
||||
<a href="/products-and-services/engine-oil" >Engine Oil (EOLCS)</a>
|
||||
<a href="/products-and-services/diesel-exhaust-fluid" >Diesel Exhaust Fluid (DEF)</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
|
||||
|
||||
<!-- ===== Page content wrapper from redesign ===== -->
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
<div class="page-wrapper">
|
||||
<div class="large-banner" style="background-image:url('/-/media/APIWebsite/Banners/Large/gis-lg-v2.jpg');">
|
||||
<div class="title">
|
||||
<h1>Standards News Highlights</h1>
|
||||
</div>
|
||||
</div>
|
||||
<div class="page-wrapper-inner">
|
||||
<div class="breadcrumbs">
|
||||
|
||||
|
||||
<span class="breadcrumbs-parent"><a href="/products-and-services">Products + Services</a></span>
|
||||
/
|
||||
|
||||
|
||||
<span class="breadcrumbs-child">
|
||||
<a href="/products-and-services/standards">Standards</a>
|
||||
|
||||
</span>
|
||||
/
|
||||
|
||||
|
||||
<span class="breadcrumbs-child">Standards News Highlights</span>
|
||||
|
||||
</div>
|
||||
|
||||
<div class="two-columns">
|
||||
<div class="left">
|
||||
|
||||
|
||||
|
||||
<div class="pagination-wrapper">
|
||||
<div class="row">
|
||||
<div class="col-lg-8">
|
||||
<div class="pagination-container"><ul class="pagination"><li class="page-item active"><a>1</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=2&pageSize=10">2</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=3&pageSize=10">3</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=4&pageSize=10">4</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=5&pageSize=10">5</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=6&pageSize=10">6</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=7&pageSize=10">7</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=8&pageSize=10">8</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=9&pageSize=10">9</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=10&pageSize=10">10</a></li><li class="page-item disabled PagedList-ellipses"><a>…</a></li><li class="page-item PagedList-skipToNext"><a href="/products-and-services/standards/important-standards-announcements?page=2&pageSize=10" rel="next">»</a></li><li class="page-item PagedList-skipToLast"><a href="/products-and-services/standards/important-standards-announcements?page=12&pageSize=10">»»</a></li></ul></div>
|
||||
</div>
|
||||
<div class="col-lg-4">
|
||||
<form action="/products-and-services/standards/important-standards-announcements" method="post"> <div class="form-group">
|
||||
<select class="form-control" id="pageSize" name="pageSize" onchange="this.form.submit();"><option value="0">Show All</option>
|
||||
<option value="50">Show 50</option>
|
||||
<option value="20">Show 20</option>
|
||||
<option selected="selected" value="10">Show 10</option>
|
||||
</select>
|
||||
</div>
|
||||
</form> </div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="media-item">
|
||||
<h3><a href="/products-and-services/standards/important-standards-announcements/api-announces-47th-edition-of-foundational-line-pipe-standard-5l">API Announces 47th Edition of Foundational Line Pipe Standard</a></h3>
|
||||
|
||||
<p style="margin-bottom: 0in; line-height: normal;">WASHINGTON, June 2, 2026 — The American Petroleum Institute (API) today announced the publication of the <a href="https://nam04.safelinks.protection.outlook.com/?url=https%3A%2F%2Fwww.apiwebstore.org%2Fstandards%2F5L%3Futm_campaign%3D43765133-Standards-5L%26utm_source%3Dpr&data=05%7C02%7CMaxwellC%40api.org%7Cb4ba54a657b049ff58c808dec0a2bad7%7C2df2418fe75f46f0898d65f4eeecb14b%7C0%7C0%7C639160002307544852%7CUnknown%7CTWFpbGZsb3d8eyJFbXB0eU1hcGkiOnRydWUsIlYiOiIwLjAuMDAwMCIsIlAiOiJXaW4zMiIsIkFOIjoiTWFpbCIsIldUIjoyfQ%3D%3D%7C0%7C%7C%7C&sdata=qvuVR2PsJQx5M9oShqTN%2BBc%2FXQQiE9NdPwNm6MZ8VoE%3D&reserved=0" data-auth="NotApplicable" originalsrc="https://www.apiwebstore.org/standards/5L?utm_campaign=43765133-Standards-5L&utm_source=pr" data-outlook-id="e5cfad5e-c8c0-439b-9809-73c2341274b0" data-linkindex="0" title="Original URL: https://www.apiwebstore.org/standards/5L?utm_campaign=43765133-Standards-5L&utm_source=pr. Click or tap if you trust this link." style="color: blue; margin: 0px; padding: 0px; border: 0px; line-height: inherit;"><span style="text-decoration: underline;">47th edition of API Specification 5L (API Spec 5L), </span><em><span style="text-decoration: underline;">Line Pipe</span></em></a>. Originally published in 1924 as API’s first standard, API Spec 5L has supported the safe and reliable manufacture of steel line pipe used to transport oil and gas for more than a century. The 47th edition includes important new requirements across more than 15 topic areas, including high-frequency weld (HFW) pipe quality and pipe used in CO2 transport.</p>
|
||||
<p><a class="more" href="/products-and-services/standards/important-standards-announcements/api-announces-47th-edition-of-foundational-line-pipe-standard-5l">More »</a></p>
|
||||
</div>
|
||||
<div class="media-item">
|
||||
<h3><a href="/products-and-services/standards/important-standards-announcements/api-spec-5l-47th">API Specification 5L, Line Pipe</a></h3>
|
||||
|
||||
<p>API has published the 47th edition of American Petroleum Institute <a href="https://www.apiwebstore.org/standards/5L">Specification 5L (API Spec 5L), Line Pipe</a>. Originally introduced as API’s first standard in 1924, the updated edition includes new requirements across more than 15 topic areas to support the safe and reliable manufacture of steel line pipe used in energy transportation, including CO2 transport. </p>
|
||||
<p><a class="more" href="/products-and-services/standards/important-standards-announcements/api-spec-5l-47th">More »</a></p>
|
||||
</div>
|
||||
<div class="media-item">
|
||||
<h3><a href="/products-and-services/standards/important-standards-announcements/api-rp-1192">API Recommended Practice 1192 (RP 1192), Transportation of Carbon Dioxide by Pipeline</a></h3>
|
||||
|
||||
<p>API has published<span style="line-height: 18.4px;"><a href="https://www.apiwebstore.org/standards/1192?utm_campaign=32314451-rp-1192&utm_source=pub-announcement"> Recommended Practice 1192 (RP 1192), <em>Transportation of Carbon Dioxide by Pipeline<strong></strong></em></a><strong><em></em></strong></span><span style="line-height: 18.4px;">.This first edition standard provides performance requirements for the safe and reliable transport of carbon dioxide (CO<sub>2</sub>) by pipeline. It also addresses the design, construction, operation, and management of CO<sub>2</sub> </span><span style="line-height: 18.4px;">pipelines</span></p>
|
||||
<p><a class="more" href="/products-and-services/standards/important-standards-announcements/api-rp-1192">More »</a></p>
|
||||
</div>
|
||||
<div class="media-item">
|
||||
<h3><a href="/products-and-services/standards/important-standards-announcements/api-strengthens-requirements-for-steel-casing-and-tubing">API Strengthens Requirements for Steel Casing and Tubing</a></h3>
|
||||
|
||||
<p style="margin-bottom: 0in; line-height: normal;"><span>WASHINGTON, May 5, 2025 — The American Petroleum Institute (API) is pleased to announce the publication of an </span><a href="https://www.apiwebstore.org/standards/5CT?utm_campaign=Spec%205ct&utm_source=standardshighlights&utm_medium=PubNotice&__hstc=23321061.e37da81b94fb192a0eca1fd2b60ae651.1745351530524.1745592885893.1745605800922.3&__hssc=23321061.3.1745605800922&__hsfp=509228229">Addendum to the 11th edition of the API 5CT, Casing and Tubing</a><span>. The update strengthens the requirements for the manufacture of steel casing and tubing used in oil and gas drilling and production operations, enhancing safety, environmental protection and operational integrity.</span></p>
|
||||
<p><a class="more" href="/products-and-services/standards/important-standards-announcements/api-strengthens-requirements-for-steel-casing-and-tubing">More »</a></p>
|
||||
</div>
|
||||
<div class="media-item">
|
||||
<h3><a href="/products-and-services/standards/important-standards-announcements/addendum-1-rp-1183">Addendum to API RP 1183 for Improved Dent Screening</a></h3>
|
||||
|
||||
<p><span style="color: black;">In December 2020, American Petroleum Institute (API) published Recommended Practice 1183, First<sup> </sup>Edition (RP 1183), <em>Assessment and Management of Dents in Pipelines</em>. Since being issued, RP 1183 has been applied by pipeline operating companies and engineering consultants providing services to the energy pipeline industry to evaluate dents and deformations on pipeline systems and to support mitigation and repair decisions. RP 1183 includes various screening tools to estimate the remaining fatigue life of a dent in a pipeline. </span></p>
|
||||
<p><a class="more" href="/products-and-services/standards/important-standards-announcements/addendum-1-rp-1183">More »</a></p>
|
||||
</div>
|
||||
<div class="media-item">
|
||||
<h3><a href="/products-and-services/standards/important-standards-announcements/2025-international-standards-report-announcement">New API Report Highlights Broader Global Adoption of API Standards </a></h3>
|
||||
|
||||
<p>February 4, 2025 – The American Petroleum Institute (API) today released a new report, <em><a href="/-/media/APIWebsite/products-and-services/2025_Intnl-Usage_Report_web-final.pdf"><em>202</em><em>5 API Standards</em><em></em><em>International Usage Report</em></a></em>, detailing the growing international influence of API standards. The report identifies where governments and standards bodies reference API standards in policies, national and international standards, and technical regulations, highlighting the paramount role of API standards in advancing safety, sustainability, and efficiency across the global natural gas and oil industry.</p>
|
||||
<p><a class="more" href="/products-and-services/standards/important-standards-announcements/2025-international-standards-report-announcement">More »</a></p>
|
||||
</div>
|
||||
<div class="media-item">
|
||||
<h3><a href="/products-and-services/standards/important-standards-announcements/20s-3d-printing-update">API Enhances 3D Printing Guidelines with Updated Additive Manufacturing Standard</a></h3>
|
||||
|
||||
<p style="margin-bottom: 0in; line-height: normal;"><span style="color: black;">The American Petroleum Institute (API) is pleased to announce the release of the second edition of API Standard 20S, <em>Qualification of Metal Additive Manufacturing Processes and Components Production Control for Use in the Petroleum and Natural Gas Industries</em>. This update strengthens the industry’s ability to effectively deploy additive manufacturing (AM), or 3D printing, improving efficiency, supply chain resilience and sustainability across oil and natural gas operations.</span></p>
|
||||
<p><a class="more" href="/products-and-services/standards/important-standards-announcements/20s-3d-printing-update">More »</a></p>
|
||||
</div>
|
||||
<div class="media-item">
|
||||
<h3><a href="/products-and-services/standards/important-standards-announcements/addendum-1-to-api-rp-2001-10th-ed">Addendum 1 to API RP 2001, 10th Edition: Fire Protection in Refineries</a></h3>
|
||||
|
||||
<p>API has published Addendum 1 to API Recommended Practice 2001, 10th Edition - "Fire Protection in Refineries."<br />
|
||||
<br />
|
||||
This addendum strengthens existing fire safety measures by introducing new protocols for pre-planning and incident response in refineries.</p>
|
||||
<p><a class="more" href="/products-and-services/standards/important-standards-announcements/addendum-1-to-api-rp-2001-10th-ed">More »</a></p>
|
||||
</div>
|
||||
<div class="media-item">
|
||||
<h3><a href="/products-and-services/standards/important-standards-announcements/api-5ct-casing-and-tubing-addendum-1-11th">Addendum to the 11th edition of the API 5CT, Casing and Tubing</a></h3>
|
||||
|
||||
<p><span style="color: black;">The American Petroleum Institute (API) is pleased to announce the publication of an Addendum to the 11th edition of the API 5CT, <em>Casing and Tubing</em>. The update strengthens the requirements for the manufacture of steel casing and tubing used in oil and gas drilling and production operations, enhancing safety, environmental protection and operational integrity.</span></p>
|
||||
<p><a class="more" href="/products-and-services/standards/important-standards-announcements/api-5ct-casing-and-tubing-addendum-1-11th">More »</a></p>
|
||||
</div>
|
||||
<div class="media-item">
|
||||
<h3><a href="/products-and-services/standards/important-standards-announcements/api-updates-fire-protection-standard-for-refineries">API Updates Fire Protection Standard for Refineries</a></h3>
|
||||
|
||||
<p>October 24, 2024 – The American Petroleum Institute (API) today announced the publication of Addendum 1 to API RP 2001, 10th Edition, “Fire Protection in Refineries.” This addendum strengthens existing fire safety measures by introducing new protocols for pre-planning and incident response in refineries.</p>
|
||||
<br class="t-last-br" />
|
||||
<p><a class="more" href="/products-and-services/standards/important-standards-announcements/api-updates-fire-protection-standard-for-refineries">More »</a></p>
|
||||
</div>
|
||||
<div class="pagination-wrapper">
|
||||
<div class="row">
|
||||
<div class="col-lg-8">
|
||||
<div class="pagination-container"><ul class="pagination"><li class="page-item active"><a>1</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=2&pageSize=10">2</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=3&pageSize=10">3</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=4&pageSize=10">4</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=5&pageSize=10">5</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=6&pageSize=10">6</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=7&pageSize=10">7</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=8&pageSize=10">8</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=9&pageSize=10">9</a></li><li class="page-item"><a href="/products-and-services/standards/important-standards-announcements?page=10&pageSize=10">10</a></li><li class="page-item disabled PagedList-ellipses"><a>…</a></li><li class="page-item PagedList-skipToNext"><a href="/products-and-services/standards/important-standards-announcements?page=2&pageSize=10" rel="next">»</a></li><li class="page-item PagedList-skipToLast"><a href="/products-and-services/standards/important-standards-announcements?page=12&pageSize=10">»»</a></li></ul></div>
|
||||
</div>
|
||||
<div class="col-lg-4">
|
||||
<form action="/products-and-services/standards/important-standards-announcements" method="post"> <div class="form-group">
|
||||
<select class="form-control" id="pageSize" name="pageSize" onchange="this.form.submit();"><option value="0">Show All</option>
|
||||
<option value="50">Show 50</option>
|
||||
<option value="20">Show 20</option>
|
||||
<option selected="selected" value="10">Show 10</option>
|
||||
</select>
|
||||
</div>
|
||||
</form> </div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
</div>
|
||||
<div class="right">
|
||||
|
||||
|
||||
|
||||
<div class="sidebar">
|
||||
<ul>
|
||||
<li>
|
||||
<a href="/products-and-services/standards">Overview</a>
|
||||
</li>
|
||||
<li>
|
||||
<a class="active" href="/products-and-services/standards/important-standards-announcements">Standards News Highlights</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/products-and-services/standards/purchase">Purchase</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/products-and-services/standards/committees">Committees</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/products-and-services/standards/global-standards">Global Standards</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/products-and-services/standards/get-involved">Get Involved</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/products-and-services/standards/standards-plan">Standards Plan</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/products-and-services/standards/standards-inquiries">Requests for Interpretation</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="/products-and-services/standards/rights-and-usage-policy">Copyright Information</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<link rel="stylesheet" href="/library/APIWeb/css/subpage.css" />
|
||||
|
||||
|
||||
|
||||
|
||||
<!-- ===== Footer placeholder (Footer.cshtml renders here) ===== -->
|
||||
<!--BEGIN COVEO NOINDEX-->
|
||||
<footer>
|
||||
|
||||
|
||||
|
||||
<div class="related-products">
|
||||
<div class="wrap">
|
||||
<div class="left">
|
||||
<div class="hubspot-form">
|
||||
<span>Sign-Up for Updates</span>
|
||||
<script charset="utf-8" type="text/javascript" src="//js.hsforms.net/forms/embed/v2.js"></script>
|
||||
<script>
|
||||
hbspt.forms.create({
|
||||
portalId: "20801443",
|
||||
formId: "3fbd74a7-aee1-41a5-9832-d92fee519bad",
|
||||
region: "na1"
|
||||
});
|
||||
</script>
|
||||
</div>
|
||||
</div>
|
||||
<div class="right">
|
||||
<h4>Certification Directories</h4>
|
||||
<p>Free directories listing all of the participants in our certification-related programs.</p>
|
||||
<a href="https://www.api.org/products-and-services/certifications-directories" class="btn-blue">View All</a>
|
||||
<hr />
|
||||
<h4>Request a Quote</h4>
|
||||
<p>Request a quotation for programs like API Monogram, APIQR, API Standards Subscription, PSSAP®, and more.</p>
|
||||
<a href="https://www.api.org/products-and-services/get-a-quote" class="btn-blue">Get a Quote</a>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="footer-wrap">
|
||||
<div class="top-footer">
|
||||
<div class="left-footer">
|
||||
<div class="logo-container">
|
||||
<img src="/-/media/APIWebsite/Thumbs/api-logo-stacked.png" alt="API" />
|
||||
</div>
|
||||
<div class="footer-nav">
|
||||
<a href="/about">About</a>
|
||||
<a href="/about/careers">Careers</a>
|
||||
<a href="/contact">Contact</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="right-footer">
|
||||
<h4>Stay Connected</h4>
|
||||
<h5>API Energy</h5>
|
||||
<ul class="social-nav">
|
||||
<li>
|
||||
<a class="btn-social" href="https://www.facebook.com/TheAmericanPetroleumInstitute/">
|
||||
<i class="fab fa-facebook-f" aria-hidden="true"></i>
|
||||
<span class="visually-hidden">Follow us on Facebook</span>
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a class="btn-social" href="https://www.instagram.com/americanpetroleum/">
|
||||
<i class="fab fa-instagram" aria-hidden="true"></i>
|
||||
<span class="visually-hidden">Follow us on Instagram</span>
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a class="btn-social" href="https://twitter.com/APIenergy">
|
||||
<i class="fab fa-x-twitter" aria-hidden="true"></i>
|
||||
<span class="visually-hidden">Follow us on X</span>
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a class="btn-social" href="https://www.linkedin.com/company/americanpetroleuminstitute/">
|
||||
<i class="fab fa-linkedin-in" aria-hidden="true"></i>
|
||||
<span class="visually-hidden">
|
||||
Follow us on LinkedIn
|
||||
</span>
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a class="btn-social" href="https://www.youtube.com/energy">
|
||||
<i class="fab fa-youtube" aria-hidden="true"></i>
|
||||
<span class="visually-hidden">Follow us on YouTube</span>
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
|
||||
<h5>API Global</h5>
|
||||
<ul class="social-nav">
|
||||
<li><a class="btn-social" href="https://www.facebook.com/OfficialAPIGlobal"><i class="fab fa-facebook-f" aria-hidden="true"></i></a></li>
|
||||
<li><a class="btn-social" href="https://twitter.com/apiglobal"><i class="fab fa-x-twitter" aria-hidden="true"></i></a></li>
|
||||
<li><a class="btn-social" href="https://www.linkedin.com/company/api-global-official-/"><i class="fab fa-linkedin-in" aria-hidden="true"></i></a></li>
|
||||
<li><a class="btn-social" href="https://www.youtube.com/channel/UCitegkCxi2r-GGJRabGpRKg"><i class="fab fa-youtube" aria-hidden="true"></i></a></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="bottom-footer">
|
||||
<p class="small">
|
||||
© Copyright 2026 - API. All Rights Reserved. | <a href="/terms-and-conditions">Terms & Conditions</a> | <a href="/privacy">Privacy</a></p>
|
||||
<div class="group">
|
||||
<p><a href="/privacy">Privacy & Cookies Notice</a></p>
|
||||
<p><a href="/terms-and-conditions">Terms & Conditions</a></p>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<script src="/library/APIWeb/js/menu.js"></script>
|
||||
<script src="/library/APIWeb/js/mobile-menu.js"></script>
|
||||
<script src="https://kit.fontawesome.com/a2be981ec3.js" crossorigin="anonymous"></script>
|
||||
</footer>
|
||||
<!--END COVEO NOINDEX-->
|
||||
|
||||
|
||||
<!--BEGIN COVEO NOINDEX-->
|
||||
<!-- Back to Top Button -->
|
||||
<a href="#" id="backToTop" class="back-to-top" aria-label="Back to Top"></a>
|
||||
|
||||
|
||||
<script>
|
||||
// Picture element HTML5 shiv for older browsers
|
||||
document.createElement("picture");
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
<!-- Coveo Search Resources -->
|
||||
<link rel="stylesheet" href="https://static.cloud.coveo.com/searchui/v2.10116/css/CoveoFullSearch.css" />
|
||||
<link rel="stylesheet" href="https://static.cloud.coveo.com/coveoforsitecore/ui/v0.64.7/css/CoveoForSitecore.css" />
|
||||
<script class="coveo-script" type="text/javascript" src='https://static.cloud.coveo.com/searchui/v2.10116/js/CoveoJsSearch.Lazy.min.js'></script>
|
||||
<script class="coveo-for-sitecore-script" type="text/javascript" src='https://static.cloud.coveo.com/coveoforsitecore/ui/v0.64.7/js/CoveoForSitecore.Lazy.min.js'></script>
|
||||
<script type="text/javascript" src="https://static.cloud.coveo.com/searchui/v2.10116/js/cultures/en.js"></script>
|
||||
|
||||
<div>
|
||||
|
||||
|
||||
|
||||
|
||||
<!-- Resources -->
|
||||
<div id="SearchResourcesProperties_a3a158ca24074042a057852064d1ead7"
|
||||
class="CoveoForSitecoreContext"
|
||||
data-sc-should-have-analytics-component='true'
|
||||
data-sc-analytics-enabled='true'
|
||||
data-sc-current-language='en'
|
||||
data-prebind-sc-language-field-name='fieldTranslator'
|
||||
data-sc-language-field-name='_language'
|
||||
data-sc-labels='{"Created" : "Created" , "Created By" : "Created by" , "Creation Time" : "Creation time." , "Language" : "Language" , "Last Time Modified" : "Last time modified." , "Template" : "Template" , "Uniform resource identifier" : "URI" , "Updated By" : "Updated by" , "If the problem persists contact the administrator." : "If the problem persists contact the administrator." , "Search is currently unavailable" : "Oops! Something went wrong on the server." , "Ascending" : "Ascending" , "Descending" : "Descending"}'
|
||||
data-sc-maximum-age='900000'
|
||||
data-sc-page-name='important-standards-announcements'
|
||||
data-sc-page-name-full-path='/sitecore/content/Sites/API2/Home/products-and-services/standards/important-standards-announcements'
|
||||
data-sc-index-source-name='Coveo_web_index - Prod104'
|
||||
data-sc-is-in-experience-editor='false'
|
||||
data-sc-is-user-anonymous='true'
|
||||
data-sc-item-uri='sitecore://web/{1BA7D892-F03B-45B8-90A2-9F074C53FA6A}?lang=en&ver=1'
|
||||
data-sc-item-id='1ba7d892-f03b-45b8-90a2-9f074c53fa6a'
|
||||
data-prebind-sc-latest-version-field-name='fieldTranslator'
|
||||
data-sc-latest-version-field-name='_latestversion'
|
||||
data-sc-rest-endpoint-uri='/coveo/rest'
|
||||
data-sc-analytics-endpoint-uri='/coveo/rest/ua'
|
||||
data-sc-site-name='api2'
|
||||
data-sc-field-prefix='f'
|
||||
data-sc-field-suffix='7509'
|
||||
data-sc-prefer-source-specific-fields='false'
|
||||
data-sc-external-fields='[{"fieldName":"permanentid","shouldEscape":false}]'
|
||||
data-sc-source-specific-fields='[{"fieldName":"attachmentparentid"},{"fieldName":"author"},{"fieldName":"clickableuri"},{"fieldName":"collection"},{"fieldName":"concepts"},{"fieldName":"date"},{"fieldName":"filetype"},{"fieldName":"indexeddate"},{"fieldName":"isattachment"},{"fieldName":"language"},{"fieldName":"printableuri"},{"fieldName":"rowid"},{"fieldName":"size"},{"fieldName":"source"},{"fieldName":"title"},{"fieldName":"topparent"},{"fieldName":"topparentid"},{"fieldName":"transactionid"},{"fieldName":"uri"},{"fieldName":"urihash"}]'
|
||||
>
|
||||
</div>
|
||||
<script type="text/javascript">
|
||||
var endpointConfiguration = {
|
||||
itemUri: "sitecore://web/{1BA7D892-F03B-45B8-90A2-9F074C53FA6A}?lang=en&ver=1",
|
||||
siteName: "api2",
|
||||
restEndpointUri: "/coveo/rest"
|
||||
};
|
||||
if (typeof (CoveoForSitecore) !== "undefined") {
|
||||
CoveoForSitecore.SearchEndpoint.configureSitecoreEndpoint(endpointConfiguration);
|
||||
CoveoForSitecore.version = "5.0.1368.1";
|
||||
var context = document.getElementById("SearchResourcesProperties_a3a158ca24074042a057852064d1ead7");
|
||||
if (!!context) {
|
||||
CoveoForSitecore.Context.configureContext(context);
|
||||
}
|
||||
}
|
||||
</script>
|
||||
</div>
|
||||
<!--END COVEO NOINDEX-->
|
||||
<!-- news-policy-and-issues/blog -->
|
||||
<script>
|
||||
function formatDateElementsWithCoveo() {
|
||||
Coveo.$$(document).on('newResultDisplayed', function (event, args) {
|
||||
var dateElements = args.item.getElementsByClassName('posted-date');
|
||||
Array.prototype.forEach.call(dateElements, function (elem) {
|
||||
var timestamp = parseInt(elem.textContent.replace('Posted: ', '').trim(), 10);
|
||||
var date = new Date(timestamp);
|
||||
var options = { year: 'numeric', month: 'long', day: 'numeric' };
|
||||
var formattedDate = date.toLocaleDateString("en-US", options);
|
||||
|
||||
if (isNaN(date.getTime())) {
|
||||
elem.textContent = '';
|
||||
} else {
|
||||
elem.textContent = 'Posted: ' + formattedDate;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function waitForCoveoReady() {
|
||||
if (typeof Coveo !== 'undefined' && Coveo.$) {
|
||||
formatDateElementsWithCoveo();
|
||||
} else {
|
||||
setTimeout(waitForCoveoReady, 100);
|
||||
}
|
||||
}
|
||||
|
||||
document.addEventListener('DOMContentLoaded', waitForCoveoReady);
|
||||
</script>
|
||||
<!--END news-policy-and-issues/blog-->
|
||||
</body>
|
||||
</html>
|
||||
Vendored
+196
@@ -0,0 +1,196 @@
|
||||
<?xml version='1.0' encoding='UTF-8'?>
|
||||
<rss xmlns:arxiv="http://arxiv.org/schemas/atom" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
|
||||
<channel>
|
||||
<title>physics.app-ph updates on arXiv.org</title>
|
||||
<link>http://rss.arxiv.org/rss/physics.app-ph</link>
|
||||
<description>physics.app-ph updates on the arXiv.org e-print archive.</description>
|
||||
<atom:link href="http://rss.arxiv.org/rss/physics.app-ph" rel="self" type="application/rss+xml"/>
|
||||
<docs>http://www.rssboard.org/rss-specification</docs>
|
||||
<language>en-us</language>
|
||||
<lastBuildDate>Wed, 10 Jun 2026 04:00:28 +0000</lastBuildDate>
|
||||
<managingEditor>rss-help@arxiv.org</managingEditor>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<skipDays>
|
||||
<day>Sunday</day>
|
||||
<day>Saturday</day>
|
||||
</skipDays>
|
||||
<item>
|
||||
<title>Limits of Trap-assisted Photomultiplication Gain</title>
|
||||
<link>https://arxiv.org/abs/2606.10236</link>
|
||||
<description>arXiv:2606.10236v1 Announce Type: new
|
||||
Abstract: Photodiodes based on trap-assisted current injection can exhibit internal photomultiplication with apparent quantum efficiencies far exceeding unity, raising the question of whether such gain fundamentally enhances detector sensitivity. We employ a minimal analytical framework based on a single gain-active trapped state coupling photogenerated carriers to contact injection. The gain is intrinsically self-limiting: the injection process that amplifies the current simultaneously accelerates relaxation of the gain-enabling state, producing an inherently nonlinear, operating-point-dependent response. The form of this nonlinearity is not universal -- once the trap level is generalized to an energetic distribution and recombination is allowed to be bimolecular, the same mechanism yields superlinear, linear, or strongly sublinear responses. A single chord gain is therefore not a meaningful device descriptor, and chord-gain comparisons across the literature conflate devices in different regimes. Treating trap occupancy and injection as coupled stochastic processes, we show that internal gain introduces a strictly non-negative fluctuation penalty from the dissipative dynamics that sustain the gain state. A local, small-signal detectivity exhibits a finite optimum yet cannot exceed the intrinsic thermodynamic limit of the underlying unity-gain photodiode. Gain is thus equivalent to driven stochastic amplification: it can suppress downstream readout noise, but cannot reduce the fundamental noise floor set by the primary photodetection process.</description>
|
||||
<guid isPermaLink="false">oai:arXiv.org:2606.10236v1</guid>
|
||||
<category>physics.app-ph</category>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<arxiv:announce_type>new</arxiv:announce_type>
|
||||
<dc:rights>http://creativecommons.org/licenses/by/4.0/</dc:rights>
|
||||
<dc:creator>Ardalan Armin</dc:creator>
|
||||
</item>
|
||||
<item>
|
||||
<title>Filamentary Transport and Thermoelectric Effects in Mushroom Phase Change Memory Cells</title>
|
||||
<link>https://arxiv.org/abs/2606.10262</link>
|
||||
<description>arXiv:2606.10262v1 Announce Type: new
|
||||
Abstract: We performed a 2D finite-element electrothermal computational study of thermoelectric effects and filamentary electronic transport in Ge$_2$Sb$_2$Te$_5$ mushroom phase change memory cells during Reset and Set operations, accounting for spatial activation energy variations in amorphous Ge$_2$Sb$_2$Te$_5$ and phase-change dynamics. Reset operations with current going from the top electrode to the narrow 4 nm bottom electrode require $\sim$3x less energy and power, and $\sim$2x lower current to achieve the same Reset resistance, compared to the opposite polarity, due to thermoelectric effects. Filamentary conduction, electrical breakdown, thermal runaway, and local crystallization of amorphous Ge$_2$Sb$_2$Te$_5$ depend on current polarity and thermal boundary conditions, and determine the location, shape, and volume of the programming region, which may be significantly smaller than the semi-cylindrical mushroom region. The programming volume does not scale with contact dimensions larger than 10 nm. Larger contact areas introduce increased device-to-device and cycle-to-cycle variability due to filamentary conduction but are expected to lead to higher reliability and endurance.</description>
|
||||
<guid isPermaLink="false">oai:arXiv.org:2606.10262v1</guid>
|
||||
<category>physics.app-ph</category>
|
||||
<category>cond-mat.mtrl-sci</category>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<arxiv:announce_type>new</arxiv:announce_type>
|
||||
<dc:rights>http://creativecommons.org/licenses/by/4.0/</dc:rights>
|
||||
<dc:creator>Md Samzid Bin Hafiz, Helena Silva, Ali Gokirmak</dc:creator>
|
||||
</item>
|
||||
<item>
|
||||
<title>Fast-Neutron Irradiation Effect in Heteroepitaxial $\beta$-Ga$_2$O$_3$ Schottky Diodes Fabricated on Low-Cost Sapphire Substrates</title>
|
||||
<link>https://arxiv.org/abs/2606.10269</link>
|
||||
<description>arXiv:2606.10269v1 Announce Type: new
|
||||
Abstract: In this work, we investigate the response of Ni/$\beta$-Ga$_2$O$_3$ Schottky barrier diodes fabricated on c-plane sapphire to fast-neutron irradiation up to a fluence of $1\times10^{15}$ n$\cdot$cm$^{-2}$. The LPCVD-grown heteroepitaxial structure consists of an unintentionally doped buffer, an n$^{+}$ contact layer, and an n-type drift layer, with mesa isolation realized by plasma-free Ga-assisted LPCVD etching. Prior to irradiation, the devices exhibit a turn-on voltage of 1.20 V, specific on-resistance of 8.43 m$\Omega\cdot$cm$^2$, ideality factor of 1.32, and Schottky barrier height of 1.29 eV. Following irradiation, the devices remain operational, although the forward current decreases, the turn-on voltage increases to 2.40 V, and the barrier height increases to 1.34 eV. Capacitance-voltage measurements reveal a $\sim$50% reduction in net donor concentration, corresponding to a carrier-removal rate of $\sim$105 cm$^{-1}$. Temperature-dependent measurements from 25 to 250 $^\circ$C confirm that thermionic emission remains the dominant transport mechanism and show significant suppression of reverse leakage current after irradiation. The breakdown voltage increases from 101 to 135 V, consistent with neutron-induced donor compensation. TCAD simulations show a more uniform electric-field distribution and reduced field crowding at the Schottky edge after irradiation. These results provide insight into neutron-induced donor compensation in heteroepitaxial $\beta$-Ga$_2$O$_3$ and demonstrate the ability of LPCVD-grown $\beta$-Ga$_2$O$_3$ Schottky diodes on sapphire to maintain stable operation under high-fluence neutron environments relevant to space and nuclear electronics.</description>
|
||||
<guid isPermaLink="false">oai:arXiv.org:2606.10269v1</guid>
|
||||
<category>physics.app-ph</category>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<arxiv:announce_type>new</arxiv:announce_type>
|
||||
<dc:rights>http://arxiv.org/licenses/nonexclusive-distrib/1.0/</dc:rights>
|
||||
<dc:creator>Saleh Ahmed Khan, Ahmed Ibreljic, Sourav Sarker, Stephen Margiotta, Anhar Bhuiyan</dc:creator>
|
||||
</item>
|
||||
<item>
|
||||
<title>Virtual-Array Operational Modal Analysis of Rolling Tires Using a Single Tire Cavity Accelerometer</title>
|
||||
<link>https://arxiv.org/abs/2606.10437</link>
|
||||
<description>arXiv:2606.10437v1 Announce Type: new
|
||||
Abstract: The dynamics of rolling tires significantly influence the low-frequency (0-500 Hz) structure-borne noise within vehicles. Accurately characterizing these dynamics under realistic operating conditions remains challenging. Current state-of-the-art methods, primarily relying on Laser Doppler Vibrometers (LDV), are complex to implement, time-intensive, and generally limited to smooth tires in laboratory environments due to issues with speckle formation on treaded surfaces. This study introduces an innovative strategy for Operational Modal Analysis (OMA) of a rolling tire using a single wireless Tire Cavity Accelerometer (TCA) together with two optical sensors. The methodology leverages the non-integer ratio between the tire and drum diameters in a test rig to create a virtual sensor array. By utilizing optical sensors to time-stamp the cleat impact (on the drum) precisely and the TCA position (on the tire), the vibration responses from multiple revolutions are clustered according to the TCA's circumferential position at the moment of impact. This effectively synthesizes responses from an array of virtual sensors distributed around the tire circumference using data from a single test run. The clustered signals are conditioned using order tracking to remove periodic components arising from contact patch deformation. Both Frequency Domain Decomposition (FDD) and Covariance-based Stochastic Subspace Identification (SSI-Cov) were employed for modal identification. The SSI-Cov method proved more robust, successfully identifying 11 circumferential modes up to 240 Hz. The proposed approach offers a significantly more efficient, cost-effective method for characterizing rolling tire dynamics, which is readily applicable to treaded tires and adaptable for on-road testing.</description>
|
||||
<guid isPermaLink="false">oai:arXiv.org:2606.10437v1</guid>
|
||||
<category>physics.app-ph</category>
|
||||
<category>physics.data-an</category>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<arxiv:announce_type>new</arxiv:announce_type>
|
||||
<dc:rights>http://arxiv.org/licenses/nonexclusive-distrib/1.0/</dc:rights>
|
||||
<dc:creator>Pradosh Pritam Dash, Ricardo Burdisso, Pablo A Tarazaga</dc:creator>
|
||||
</item>
|
||||
<item>
|
||||
<title>Finite-temperature Fe K-edge X-ray absorption simulations reveal local structural dynamics of an iron(II) photosensitizer in solution and the crystalline phase</title>
|
||||
<link>https://arxiv.org/abs/2606.10221</link>
|
||||
<description>arXiv:2606.10221v1 Announce Type: cross
|
||||
Abstract: Interpreting metal K-edge spectra of flexible photosensitizers requires a structural model that separates electronic signatures from thermal motion, solvent disorder, and crystal-packing effects. We combine Fe K-edge X-ray absorption measurements with second-generation Car--Parrinello ab initio molecular dynamics and all-electron Gaussian and augmented-plane-wave simulations for an iron(II) N-heterocyclic carbene photosensitizer in acetonitrile solution and in the crystalline phase. Ensemble-averaged spectra reproduce the main near-edge features in both environments and preserve the experimentally observed similarity of the first Fe coordination shell upon dissolution. Comparison with radial distributions extracted from extended fine-structure measurements validates the Fe--N and Fe--C coordination shells sampled by the trajectories, while element-resolved pair distributions explain why higher-shell experimental contrast is rapidly lost. The same dynamical ensembles reveal a broad out-of-plane distribution of the terpyridine nitrogen atom and a nearly octahedral distribution of the Fe-centered coordination planes. The results show that finite-temperature X-ray absorption simulations can provide a compact structural-dynamics picture of molecular transition metal photosensitizers by linking local spectra, solvent-phase ligand motion, and medium-range structural disorder within one trajectory-based description.</description>
|
||||
<guid isPermaLink="false">oai:arXiv.org:2606.10221v1</guid>
|
||||
<category>cond-mat.mtrl-sci</category>
|
||||
<category>physics.app-ph</category>
|
||||
<category>physics.chem-ph</category>
|
||||
<category>physics.comp-ph</category>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<arxiv:announce_type>cross</arxiv:announce_type>
|
||||
<dc:rights>http://arxiv.org/licenses/nonexclusive-distrib/1.0/</dc:rights>
|
||||
<dc:creator>Patrick M\"uller, Lorena Fritsch, Matthias Bauer, Thomas D. K\"uhne</dc:creator>
|
||||
</item>
|
||||
<item>
|
||||
<title>Multi-channel Optical Vision Model</title>
|
||||
<link>https://arxiv.org/abs/2606.10253</link>
|
||||
<description>arXiv:2606.10253v1 Announce Type: cross
|
||||
Abstract: Spatial multiplexing is one of the natural strengths of optics, yet in optical neural networks, it is often used mainly as parallel throughput. Here, we show that spatial multiplexing in an optical neural network can be used not only to process multiple inputs in parallel, but also to define a trainable representational coordinate of the model. In three implemented scenarios, parallel-input processing, class-code readout and channel-mixed feature interaction, spatial channels act as independent learners, structured code dimensions, and interacting feature groups. The programmable free-space optical processor is trained through an online physical-forward/surrogate-backward scheme, where measured optical outputs define the forward pass while a differentiable surrogate estimates gradients and is continually fine-tuned during training from newly acquired optical data. We demonstrate these channel roles in image classification and regression tasks using multi-layer architectures with more than one million trainable optical phase parameters. We further implement a hybrid optical-electronic vision-language model, in which the optical neural network provides visual tokens to a digital transformer decoder for controlled image-captioning tasks. These results establish spatially multiplexed optical channels as a programmable feature and readout space for hybrid optical vision models.</description>
|
||||
<guid isPermaLink="false">oai:arXiv.org:2606.10253v1</guid>
|
||||
<category>physics.optics</category>
|
||||
<category>physics.app-ph</category>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<arxiv:announce_type>cross</arxiv:announce_type>
|
||||
<dc:rights>http://arxiv.org/licenses/nonexclusive-distrib/1.0/</dc:rights>
|
||||
<dc:creator>Ali Momeni, Guillaume Noetinger, Tim Tuuva, Romain Fleury</dc:creator>
|
||||
</item>
|
||||
<item>
|
||||
<title>Spontaneous translation of charged droplets during evaporation on dry surfaces</title>
|
||||
<link>https://arxiv.org/abs/2606.10755</link>
|
||||
<description>arXiv:2606.10755v1 Announce Type: cross
|
||||
Abstract: Evaporating sessile droplets are usually treated as capillary objects, but droplets generated by routine handling can carry tens to hundreds of picocoulombs of electric charge. Here we combine Faraday-cup charge measurements with optical imaging to determine how such charge evolves as water droplets evaporate on dry polymer substrates. A zero-time protocol shows that a reproducible initial charge is preserved on poly(methylpentene) (PMP), whereas PDMS, SOCAL-coated surfaces, and polystyrene either exchange, dissipate, or inject charge on contact. On PMP, ensemble-resolved measurements reveal two regimes: the charge remains nearly constant during early evaporation and then decreases abruptly once the droplet reaches a small-volume state. This charge collapse coincides with spontaneous lateral translation rather than jetting or breakup. A Rayleigh-normalized analysis, including a spherical-cap stress correction and measured contact-angle retention scale, shows that motion occurs only after evaporation drives the droplet into a high electro-pinning state. High-speed imaging and kinematic analysis support a picture in which the subsequent motion is governed by repeated contact-line depinning and re-pinning: the total distance traveled is strongly affected by dry-surface pinning, whereas the peak translational velocity serves as a more robust indicator of the discharge strength. These results identify a dry-substrate mode of evaporation-driven electrostatic relaxation, distinct from Coulomb fission on lubricated surfaces, in which substrate electrostatic passivity enables charge retention, droplet geometry selects the instability onset, and whole-droplet translation provides the charge-release pathway.</description>
|
||||
<guid isPermaLink="false">oai:arXiv.org:2606.10755v1</guid>
|
||||
<category>cond-mat.soft</category>
|
||||
<category>physics.app-ph</category>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<arxiv:announce_type>cross</arxiv:announce_type>
|
||||
<dc:rights>http://creativecommons.org/licenses/by-nc-nd/4.0/</dc:rights>
|
||||
<dc:creator>Riming Xu, Yanbo Li, Jiawen Zhang, Jin Wang, Yikai Li</dc:creator>
|
||||
</item>
|
||||
<item>
|
||||
<title>Programmable Integrated Magnonic Meshes</title>
|
||||
<link>https://arxiv.org/abs/2605.00290</link>
|
||||
<description>arXiv:2605.00290v2 Announce Type: replace
|
||||
Abstract: Integrated circuits are a cornerstone of modern information technology, and analog wave-based architectures could enable fast and efficient processing beyond conventional charge electronics. In magnonics, spin waves provide a highly tunable, compact and energy-efficient medium for on-chip microwave signal transport and processing. However, progress has been limited to isolated elements or short devices, severely limiting the overall functional complexity and scalability. Here we realize the key elements of universal magnonic circuitry, using a single-step direct laser writing process in yttrium iron garnet, and monolithically cascade them in multi-stage programmable devices and networks. Using magneto-optical Kerr effect microscopy, we show efficient spin-wave propagation and preserved phase coherence in waveguide structures for hundreds of wavelengths. In coupled waveguides, we observe complete and periodic power transfer over several coupling lengths, and in phase shifters we achieve arbitrary, tunable phase delays. By cascading these elements, we realize programmable splitters, frequency demultiplexers, and phase-controlled 2x2 routers, where output power and relative phase can be programmed on demand via external fields. Finally, we realize programmable magnonic interferometric meshes for on-chip radio-frequency signal routing, with up to six magnonic inputs and outputs and seven cascaded stages, without the need for intermediate amplification. These direct-write cascaded networks bridge a long-standing gap in magnonic scalability, offering a viable pathway toward integrated, large-scale architectures for both classical and quantum processing.</description>
|
||||
<guid isPermaLink="false">oai:arXiv.org:2605.00290v2</guid>
|
||||
<category>physics.app-ph</category>
|
||||
<category>cond-mat.mtrl-sci</category>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<arxiv:announce_type>replace</arxiv:announce_type>
|
||||
<dc:rights>http://creativecommons.org/licenses/by/4.0/</dc:rights>
|
||||
<dc:creator>Piero Florio, Matteo Vitali, Valerio Levati, Rasheed M. Ishola, Luca Ciaccarini Mavilla, Nora Lecis, Carsten Dubs, Riccardo Bertacco, Marco Madami, Silvia Tacchi, Daniela Petti, Edoardo Albisetti</dc:creator>
|
||||
</item>
|
||||
<item>
|
||||
<title>Interpretable deep convolutional model for nonlinear multivariate time series in complex systems</title>
|
||||
<link>https://arxiv.org/abs/2501.04339</link>
|
||||
<description>arXiv:2501.04339v2 Announce Type: replace-cross
|
||||
Abstract: We introduce the Deep Convolutional Interpreter for Time Series (DCIts), a deep-learning architecture for nonlinear multivariate time series that provides sample-specific, locally interpretable descriptions of the underlying interaction structure. Unlike standard black-box forecasters, DCIts learns a time- and lag-dependent transition tensor explicitly factorized into two components: a Focuser, which selects relevant source series and time lags via a sparse masking mechanism, and a Modeler, which assigns signed coefficients to these selected interactions. This decomposition yields a local lag-adjacency structure and signed source-lag contributions for every forecast instance, enabling direct inspection of effective connectivity; when higher-order branches are activated, the same framework yields order-resolved elementwise polynomial contributions. Architecturally, DCIts uses a diverse bank of convolutional filters to capture temporal and cross-variable dependencies, which are mapped through a bottleneck network to the transition tensor. On controlled benchmark datasets with a known interaction structure, we demonstrate that DCIts achieves competitive forecasting error relative to a strong interpretable baseline while recovering stable, signed, lag-resolved interaction patterns. The framework thus prioritizes intrinsic interpretability, using forecasting accuracy as a faithfulness constraint rather than the sole objective.</description>
|
||||
<guid isPermaLink="false">oai:arXiv.org:2501.04339v2</guid>
|
||||
<category>stat.ML</category>
|
||||
<category>cs.LG</category>
|
||||
<category>physics.app-ph</category>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<arxiv:announce_type>replace-cross</arxiv:announce_type>
|
||||
<dc:rights>http://arxiv.org/licenses/nonexclusive-distrib/1.0/</dc:rights>
|
||||
<arxiv:DOI>10.1063/5.0325209</arxiv:DOI>
|
||||
<arxiv:journal_reference>Chaos 36, 063116 (2026)</arxiv:journal_reference>
|
||||
<dc:creator>Domjan Baric, Davor Horvatic</dc:creator>
|
||||
</item>
|
||||
<item>
|
||||
<title>Probing laser-driven surface and subsurface dynamics via grazing-incidence XFEL scattering and diffraction</title>
|
||||
<link>https://arxiv.org/abs/2509.12015</link>
|
||||
<description>arXiv:2509.12015v2 Announce Type: replace-cross
|
||||
Abstract: We demonstrate a grazing-incidence x-ray platform that simultaneously records time-resolved grazing-incidence small-angle x-ray scattering (GISAXS) and grazing-incidence x-ray diffraction (GID) from a femtosecond laser-irradiated gold film above the melting threshold, with picosecond resolution at an x-ray free-electron laser (XFEL). By tuning the x-ray incidence angle, the probe depth is set to tens of nanometers, enabling depth-selective sensitivity to near-surface dynamics. GISAXS resolves ultrafast changes in surface nanomorphology (correlation length, roughness), while GID quantifies subsurface lattice compression, grain orientation, melting, and recrystallization. The approach overcomes photon-flux limitations of synchrotron grazing-incidence geometries and provides stringent, time-resolved benchmarks for complex theoretical models of ultrafast laser-matter interaction and warm dense matter. Looking ahead, the same depth-selective methodology is well suited to inertial confinement fusion (ICF): it can visualize buried-interface perturbations and interfacial thermal resistance on micron to sub-micron scales that affect instability seeding and burn propagation.</description>
|
||||
<guid isPermaLink="false">oai:arXiv.org:2509.12015v2</guid>
|
||||
<category>physics.optics</category>
|
||||
<category>physics.app-ph</category>
|
||||
<category>physics.ins-det</category>
|
||||
<category>physics.plasm-ph</category>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<arxiv:announce_type>replace-cross</arxiv:announce_type>
|
||||
<dc:rights>http://arxiv.org/licenses/nonexclusive-distrib/1.0/</dc:rights>
|
||||
<arxiv:DOI>10.1107/S2052252526001727</arxiv:DOI>
|
||||
<arxiv:journal_reference>IUCrJ Vol.13, Pages 249-259 (2026)</arxiv:journal_reference>
|
||||
<dc:creator>Lisa Randolph, \"Ozg\"ul \"Ozt\"urk, Dmitriy Ksenzov, Lingen Huang, Thomas Kluge, S. V. Rahul, Victorien Bouffetier, Carsten Baehtz, Mohammadreza Banjafar, Erik Brambrink, Fabien Brieuc, Byoung Ick Cho, Sebastian G\"ode, Tobias Held, Hauke H\"oppner, Gerhard Jakob, Mathias Kl\"aui, Zuzana Kon\^opkov\'a, Changhoo Lee, Gyusang Lee, Mikako Makita, Mikhail Mishchenko, Mianzhen Mo, Pascal D. Ndione, Michael Paulus, Alexander Pelka, Franziska Paschke-Bruehl, Thomas R. Preston, Baerbel Rethfeld, Christian R\"odel, Michal \v{S}m\'id, Ling Wang, Sebastian T. Weber, Lennart Wollenweber, Jan-Patrick Schwinkendorf, Christian Gutt, Motoaki Nakatsutsumi</dc:creator>
|
||||
</item>
|
||||
<item>
|
||||
<title>Real-space imaging reveals symmetry-selected nonlinear energy routing in a mechanical resonator</title>
|
||||
<link>https://arxiv.org/abs/2605.01469</link>
|
||||
<description>arXiv:2605.01469v2 Announce Type: replace-cross
|
||||
Abstract: Nonlinear energy exchange between vibrational modes underlies phenomena ranging from internal resonance and wave mixing to frequency-comb generation, yet modal interactions are typically inferred from spectra rather than directly observed in space. Here, we image nonlinear modal energy routing in a nearly mirror-symmetric microelectromechanical resonator using phase-locked multi-harmonic stroboscopic interferometry. By reconstructing the spatial eigenmode content of individual harmonics, we show that harmonics generated by a driven mode can be carried by distinct spatial eigenmodes, directly resolving spatial pathways of nonlinear energy transfer. Our measurements further reveal that this modal routing persists away from integer frequency matching: in the off-resonant regime, generated harmonic components are dominated by eigenmodes sharing the driven mode's mirror parity, whereas spectrally closer opposite-parity modes remain strongly suppressed. A nonlinear modal framework based on geometric nonlinearity shows that the relevant cubic coupling coefficients factorize into symmetry-dependent modal-overlap integrals, identifying mirror parity as the selection rule for nonlinear modal interaction. This work identifies spatial symmetry as a design parameter for nonlinear energy routing and provides a route to symmetry-engineered control of energy flow in multimode nonlinear wave systems.</description>
|
||||
<guid isPermaLink="false">oai:arXiv.org:2605.01469v2</guid>
|
||||
<category>physics.optics</category>
|
||||
<category>physics.app-ph</category>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<arxiv:announce_type>replace-cross</arxiv:announce_type>
|
||||
<dc:rights>http://creativecommons.org/licenses/by-nc-nd/4.0/</dc:rights>
|
||||
<dc:creator>Ya Zhang, Yuko Terasawa, Qian Liu, Shumpei Takenaka, Hua Li, Yutao Xu, Xueyong Wei, Kazuhiko Hirakawa</dc:creator>
|
||||
</item>
|
||||
<item>
|
||||
<title>Designing single-layer PDMS devices for micron to millimeter-scale deformations</title>
|
||||
<link>https://arxiv.org/abs/2605.17402</link>
|
||||
<description>arXiv:2605.17402v2 Announce Type: replace-cross
|
||||
Abstract: The elasticity of PDMS has played a central role in advancing important microfluidic technologies, ranging from early valves to sophisticated organ-on-a-chip systems. However, most deformable microfluidic devices are based on geometries that require complex multi-layer PDMS architectures and include thin membranes, leading to difficult microfabrication and poor stability. Recently, Jain, Belkadi et al. (Biofabrication 16.3 (2024): 035010) introduced a single-layer PDMS device in which a wide and long microfluidic channel was deformed by pressurizing two adjacent air chambers. While they demonstrated how the channel ceiling deformation can be leveraged to compress biological materials, it remains unknown how the device geometry influences this deformation. Here, a systematic numerical study is performed on 14,336 variants of this device, through which the height of the PDMS layer is identified as the main feature that determines the ceiling deformation. Three modes of channel deformation are identified as the geometry are varied: a U shape with a central minimum, a W shape with two minima and a central maximum, or an inverse U shape with an upward-bulging single maximum. The numerical results are validated in experiments that reproduce the three modes for the predicted geometries and demonstrate vertical ceiling deformations ranging from a few microns to the millimeter scale. The generality of this approach is demonstrated for two example applications: A fully closing single-layer microfluidic valve and an optical lens of controllable anisotropic magnification. This work leverages the rapid prototyping enabled by 3D printing or micro-milling to open new perspectives in microfluidic actuation.</description>
|
||||
<guid isPermaLink="false">oai:arXiv.org:2605.17402v2</guid>
|
||||
<category>physics.flu-dyn</category>
|
||||
<category>physics.app-ph</category>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<arxiv:announce_type>replace-cross</arxiv:announce_type>
|
||||
<dc:rights>http://creativecommons.org/licenses/by/4.0/</dc:rights>
|
||||
<dc:creator>Leon V. Gebhard, Alexandre S. Avaro, Gabriel Amselem, Charles N. Baroud</dc:creator>
|
||||
</item>
|
||||
<item>
|
||||
<title>Metasurfaces for neutral-atom trapping</title>
|
||||
<link>https://arxiv.org/abs/2605.30498</link>
|
||||
<description>arXiv:2605.30498v2 Announce Type: replace-cross
|
||||
Abstract: Trapped neutral atoms are one of the leading platforms for quantum information technologies, in particular for quantum computing, but scaling them to array sizes needed for utility-scale quantum computing is a major engineering challenge. Here we review optical metasurfaces as an enabling technology that provides fine control over the phase, amplitude, and polarization of light, with pixel counts far exceeding what is available with spatial light modulators (SLMs) and other active devices. The large pixel counts have recently led to demonstrations of arrays of optical tweezers with hundreds of thousands of sites and arrays of optical bottle-beams with complex three-dimensional trapping profiles. The flexibility and scalability of optical metasurfaces provides a route towards miniaturized, integrated, and highly scalable atomic experiments and instruments.</description>
|
||||
<guid isPermaLink="false">oai:arXiv.org:2605.30498v2</guid>
|
||||
<category>physics.optics</category>
|
||||
<category>physics.app-ph</category>
|
||||
<category>physics.atom-ph</category>
|
||||
<category>quant-ph</category>
|
||||
<pubDate>Wed, 10 Jun 2026 00:00:00 -0400</pubDate>
|
||||
<arxiv:announce_type>replace-cross</arxiv:announce_type>
|
||||
<dc:rights>http://arxiv.org/licenses/nonexclusive-distrib/1.0/</dc:rights>
|
||||
<dc:creator>Chengyu Fang, Minjeong Kim, Mark Saffman, Jennifer T. Choy, Mikhail Kats</dc:creator>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
+37
@@ -0,0 +1,37 @@
|
||||
<?xml version="1.0"?>
|
||||
<rss version="2.0" xmlns:prism="http://purl.org/rss/1.0/modules/prism/">
|
||||
<channel>
|
||||
<title>Journal of Pressure Vessel Technology Open Issues</title>
|
||||
<link>https://asmedigitalcollection.asme.org/pressurevesseltech</link>
|
||||
<description>
|
||||
</description>
|
||||
<language>en-us</language>
|
||||
<pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate>
|
||||
<lastBuildDate>Tue, 12 May 2026 00:00:37 GMT</lastBuildDate>
|
||||
<generator>Silverchair</generator>
|
||||
<managingEditor>ASMEDigitalCollection@asme.org</managingEditor>
|
||||
<webMaster>ASMEDigitalCollection@asme.org</webMaster>
|
||||
<item>
|
||||
<title>Research on Low-Temperature Mechanical Properties and Fracture Behavior of 09MnNiDR Steel Based on Small Punch Test</title>
|
||||
<link>https://asmedigitalcollection.asme.org/pressurevesseltech/article/148/5/051504/1232699/Research-on-Low-Temperature-Mechanical-Properties</link>
|
||||
<pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate>
|
||||
<description><span class="paragraphSection"><div class="boxTitle">Abstract</div>To develop a microdamage evaluation method applicable to in-service equipment under low-temperature conditions, this study systematically investigates the mechanical properties and fracture behavior of 09MnNiDR cryogenic steel over a broad temperature range from room temperature to −196 °C. The small punch test (SPT) technique is employed, supplemented by electron backscatter diffraction (EBSD) and scanning electron microscopy (SEM) for micromechanism analysis. Results indicate that under cryogenic conditions, dislocation slip is suppressed, leading to a more uniform distribution of plastic strain. Concurrently, the deformation process at low temperatures refines the grains within the plastic zone through mechanisms such as mechanical subdivision. As temperature decreases, the material strength increases linearly, exhibiting a significant cryogenic strengthening effect. The fracture mode transitions from ductile to brittle, with a ductile-to-brittle transition zone identified near −150 °C. An empirical formula based on SPT deformation energy is proposed to predict yield and true tensile strength, with prediction errors below 6%. By introducing a normalized energy parameter, an empirical correlation model is established between the SPT ductile-to-brittle transition temperature (DBTT) and the standard Charpy impact transition temperature. This study presents a viable methodology for safety assessment of in-service cryogenic pressure vessels through minimally invasive testing and performance prediction.</span></description>
|
||||
<prism:volume xmlns:prism="prism">148</prism:volume>
|
||||
<prism:number xmlns:prism="prism">5</prism:number>
|
||||
<prism:startingPage xmlns:prism="prism">051504</prism:startingPage>
|
||||
<prism:doi xmlns:prism="prism">10.1115/1.4071740</prism:doi>
|
||||
<guid>https://asmedigitalcollection.asme.org/pressurevesseltech/article/148/5/051504/1232699/Research-on-Low-Temperature-Mechanical-Properties</guid>
|
||||
</item>
|
||||
<item>
|
||||
<title>Improved Oxidation, Carburization Resistance and Creep Strength of Ethylene Pyrolysis Furnace Tubes at 1100 °C Through Aluminum and Tungsten Alloying</title>
|
||||
<link>https://asmedigitalcollection.asme.org/pressurevesseltech/article/148/4/041701/1232556/Improved-Oxidation-Carburization-Resistance-and</link>
|
||||
<pubDate>Mon, 11 May 2026 00:00:00 GMT</pubDate>
|
||||
<description><span class="paragraphSection"><div class="boxTitle">Abstract</div>The oxidation resistance, carburization resistance, and mechanical properties of ethylene pyrolysis furnace tube alloys modified by Al/Al-W alloying were comparatively investigated with conventional alloys using various microstructural characterization techniques and mechanical property testing methods. The Al-alloyed 29Cr44Ni4AlNb+microalloy (MA) exhibits superior oxidation and carburization resistance compared to conventional 25Cr35NiNb+MA and 35Cr45NiNb+MA alloys; however, its creep rupture life was significantly reduced. Further addition of W enhanced the solid solution strengthening effect, thereby improving high-temperature tensile properties and mitigating the detrimental impact of Al on creep performance. The creep rupture life of the Al/W-modified 27Cr44Ni5W3Al+MA alloy reached levels comparable to those of conventional alloys while retaining the beneficial effects of Al in improving oxidation and carburization resistance. Through alloying strategies, this study successfully achieved a balance between corrosion resistance and mechanical properties in ethylene pyrolysis furnace tube alloys, enabling them to withstand their harsh service conditions effectively.</span></description>
|
||||
<prism:volume xmlns:prism="prism">148</prism:volume>
|
||||
<prism:number xmlns:prism="prism">4</prism:number>
|
||||
<prism:startingPage xmlns:prism="prism">041701</prism:startingPage>
|
||||
<prism:doi xmlns:prism="prism">10.1115/1.4071682</prism:doi>
|
||||
<guid>https://asmedigitalcollection.asme.org/pressurevesseltech/article/148/4/041701/1232556/Improved-Oxidation-Carburization-Resistance-and</guid>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>
|
||||
+8
File diff suppressed because one or more lines are too long
@@ -0,0 +1,11 @@
|
||||
<html><head><title>Givaudan Sense Colour Explosion</title></head><body>
|
||||
<!-- 실측 발췌 (2026-06-11, csb.gov givaudan-sense-colour-explosion-) — PDF 앵커 원형 보존:
|
||||
보고서/부록/업데이트 + recommendation 상태요약 혼재 페이지 -->
|
||||
<a href="/assets/1/20/Appendix_C_Reactivity_Testing_Results_Publication.pdf?17347" id="CT_InvestigationDetails_8_rptDocumentsCategory_ctl06_rptDocuments_ctl01_lnkDocument" class="bold" target="_blank">Appendix C – Reactivity Testing Results and Analysis</a>
|
||||
<a href="/assets/1/20/Givaudan_Investigation_Report_Publication.pdf?17346" id="CT_InvestigationDetails_8_rptDocumentsCategory_ctl29_rptDocuments_ctl01_lnkDocument" class="bold" target="_blank">Fatal Runaway Reaction and Explosion at Givaudan Sense Colour / D.D. Williamson</a>
|
||||
<a href="/assets/1/6/Givaudan_Investigation_Update_-_final.pdf?17132" id="CT_InvestigationDetails_8_rptDocumentsCategory_ctl35_rptDocuments_ctl01_lnkDocument" class="bold" target="_blank">Givaudan Explosion Investigation Update</a>
|
||||
<a target="_blank" href="/assets/recommendation/Status_Change_Summary_CRA_(Givaudan_R12).pdf" > Recommendation Status Change Summary</a>
|
||||
<a target="_blank" href="/assets/recommendation/Status_Change_Summary_Givaudan_Corp_(Givaudan_R8).pdf" > Recommendation Status Change Summary</a>
|
||||
<a target="_blank" href="/assets/recommendation/Status_Change_Summary_Givaudan_(Givaudan_R1).pdf" > Recommendation Status Change Summary</a>
|
||||
<a target="_blank" href="/assets/recommendation/Status_Change_Summary_Givaudan_(Givaudan_R2).pdf" > Recommendation Status Change Summary</a>
|
||||
</body></html>
|
||||
+1
@@ -0,0 +1 @@
|
||||
<?xml version="1.0" encoding="utf-8"?><urlset xmlns:xsi="https://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="https://www.sitemaps.org/schemas/sitemap/0.9 https://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" xmlns="https://www.sitemaps.org/schemas/sitemap/0.9"><url><loc>https://www.csb.gov/recommendations/preventive-maintenance/</loc><lastmod>2022-06-02T17:17:27-06:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://www.csb.gov/site-map/</loc><lastmod>2017-05-05T23:59:28-06:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://www.csb.gov/recommendations/preventive-maintenance-investigations/</loc><lastmod>2018-04-27T14:32:25-06:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://www.csb.gov/investigations/data-quality-/</loc><lastmod>2025-07-28T13:37:44-06:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://www.csb.gov/recommendations/preventive-maintenances/</loc><lastmod>2022-06-02T17:19:06-06:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://www.csb.gov/videos/video-feedback-form/</loc><lastmod>2017-05-04T18:17:43-06:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://www.csb.gov/investigations/</loc><lastmod>2017-05-08T16:06:42-06:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://www.csb.gov/investigations/completed-investigations/</loc><lastmod>2017-05-30T19:02:58-06:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://www.csb.gov/investigations/current-investigations/</loc><lastmod>2020-10-19T15:06:55-06:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://www.csb.gov/videos/</loc><lastmod>2017-03-09T13:38:53-06:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://www.csb.gov/videos/take-more-action-to-prevent-dust-explosions/</loc><lastmod>2013-05-17T16:46:08-06:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url><url><loc>https://www.csb.gov/videos/protect-public-employees-from-workplace-accidents/</loc><lastmod>2013-05-17T16:46:34-06:00</lastmod><changefreq>weekly</changefreq><priority>0.5</priority></url></urlset>
|
||||
+71
@@ -0,0 +1,71 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rss version="2.0" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:atom="http://www.w3.org/2005/Atom">
|
||||
<channel>
|
||||
<title>
|
||||
<![CDATA[Latest Updates]]>
|
||||
</title>
|
||||
<description>
|
||||
<![CDATA[The most recent blogs and online articles from The Economist]]>
|
||||
</description>
|
||||
<link>https://www.economist.com/latest</link>
|
||||
<pubDate>Wed, 10 Jun 2026 21:11:56 +0000</pubDate>
|
||||
<lastBuildDate>Wed, 10 Jun 2026 21:11:56 +0000</lastBuildDate>
|
||||
<atom:link href="https://www.economist.com/latest/rss.xml" rel="self" type="application/rss+xml"/>
|
||||
<item>
|
||||
<title>
|
||||
<![CDATA[Syria is an unexpected beneficiary of the Gulf war]]>
|
||||
</title>
|
||||
<description>
|
||||
<![CDATA[The revival of an old oil-export route from Iraq to the Mediterranean helps Syria’s new regime]]>
|
||||
</description>
|
||||
<link>https://www.economist.com/middle-east-and-africa/2026/06/10/syria-is-an-unexpected-beneficiary-of-the-gulf-war</link>
|
||||
<guid isPermaLink="false">5737613e-c6cd-4cf0-b7da-fbfb52872f63</guid>
|
||||
<pubDate>Wed, 10 Jun 2026 19:26:42 +0000</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title>
|
||||
<![CDATA[How to win the World Cup]]>
|
||||
</title>
|
||||
<description>
|
||||
<![CDATA[Being rich helps, but being open to immigration works best of all]]>
|
||||
</description>
|
||||
<link>https://www.economist.com/international/2026/06/10/how-to-win-the-world-cup</link>
|
||||
<guid isPermaLink="false">1019df1e-5c1e-4784-ae0c-31741c176e41</guid>
|
||||
<pubDate>Wed, 10 Jun 2026 19:07:01 +0000</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title>
|
||||
<![CDATA[American capitalism is run by millionaires, not billionaires]]>
|
||||
</title>
|
||||
<description>
|
||||
<![CDATA[They hide in plain sight—and wield enormous power]]>
|
||||
</description>
|
||||
<link>https://www.economist.com/business/2026/06/10/american-capitalism-is-run-by-millionaires-not-billionaires</link>
|
||||
<guid isPermaLink="false">dbbcb101-a7de-472b-a62c-d969ab033b90</guid>
|
||||
<pubDate>Wed, 10 Jun 2026 19:01:31 +0000</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title>
|
||||
<![CDATA[New techniques can predict and prevent lung cancer ]]>
|
||||
</title>
|
||||
<description>
|
||||
<![CDATA[A molecular signature can identify those most at risk]]>
|
||||
</description>
|
||||
<link>https://www.economist.com/science-and-technology/2026/06/10/new-techniques-can-predict-and-prevent-lung-cancer</link>
|
||||
<guid isPermaLink="false">dbc7231c-6c7c-42fb-8930-bb099e1d3015</guid>
|
||||
<pubDate>Wed, 10 Jun 2026 18:48:35 +0000</pubDate>
|
||||
</item>
|
||||
<item>
|
||||
<title>
|
||||
<![CDATA[The World Cup has always been beset by scandal and strife]]>
|
||||
</title>
|
||||
<description>
|
||||
<![CDATA[So has FIFA, the outfit that administers it]]>
|
||||
</description>
|
||||
<link>https://www.economist.com/international/2026/06/10/the-world-cup-has-always-been-beset-by-scandal-and-strife</link>
|
||||
<guid isPermaLink="false">f2213e72-3531-4894-a33f-47bce2fea4e9</guid>
|
||||
<pubDate>Wed, 10 Jun 2026 18:25:19 +0000</pubDate>
|
||||
</item>
|
||||
|
||||
</channel>
|
||||
</rss>
|
||||
+4
File diff suppressed because one or more lines are too long
+262
@@ -0,0 +1,262 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
||||
<channel rdf:about="https://asia.nikkei.com/rss/feed/nar" xml:lang="en-GB">
|
||||
<title>Nikkei Asia</title>
|
||||
<link>https://asia.nikkei.com/</link>
|
||||
<description/>
|
||||
<items>
|
||||
<rdf:Seq>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/media-entertainment/tokyo-disneyland-magic-in-doubt-as-operator-s-stock-falls"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/tech/semiconductors/sk-hynix-to-triple-wafer-capacity-by-2034-chairman-chey"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/editor-s-picks/china-up-close/analysis-kim-jong-un-emerges-as-winner-in-summit-with-xi-jinping"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/technology/ntt-sets-sights-on-nvidia-ai-race-with-500m-optical-network-fund"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/politics/japan-to-help-content-industry-sue-over-copyright-infringement-abroad"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/technology/artificial-intelligence/anthropic-plugs-claude-ai-in-japan-for-automated-software-development"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/spotlight/supply-chain/us-tungsten-scrap-exports-to-japan-soar-on-chinese-curbs"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/business-deals/tdk-to-buy-us-maker-of-ai-data-center-cooling-components-for-up-to-400m"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/insurance/nippon-life-s-private-credit-assets-reach-4.6bn"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/business-trends/us-firms-see-china-as-essential-despite-rising-economic-and-political-risks"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/economy/fires/manslaughter-other-charges-filed-over-hong-kong-s-wang-fuk-court-fire"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/adb-and-peers-need-to-anchor-international-order-president-kanda-says"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/asia-faces-risks-of-economic-spillover-from-iran-and-ai-disinformation"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/companies/swire-dangles-cathay-shares-in-600m-convertible-bond-issuance"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/materials/shin-etsu-to-set-up-rare-earth-smelter-in-japan-to-ease-reliance-on-china"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/editor-s-picks/interview/setting-sea-border-with-japan-vital-philippine-foreign-secretary"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/markets/strong-dollar-rally-weighs-heavier-on-struggling-asian-countries"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/fisheries/thailand-s-shrimp-industry-hit-by-malaysia-s-import-ban"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/technology/applied-materials-opens-500m-manufacturing-campus-in-singapore"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/malaysia-s-anwar-warns-against-global-powers-weaponizing-trade"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/energy/apple-and-nvidia-supplier-foxconn-invests-in-vietnam-solar-wind-power"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/spotlight/sports/world-cup-tests-asia-s-appetite-for-costly-broadcast-rights"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/finance/brookfield-bets-on-its-japan-business-to-top-hong-kong-and-singapore"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/technology/tencent-raises-4.6bn-in-dual-dollar-yuan-bond-issuances"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/automobiles/electric-vehicles/ferrari-luce-ev-highlights-european-struggle-to-lure-back-china-s-superrich"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/markets/commodities/indian-families-scale-back-on-gold-for-weddings-as-prices-hover-near-highs"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/politics/defense/japan-s-new-defense-document-to-name-china-the-biggest-concern"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/food-beverage/japan-to-bolster-ip-protections-for-prized-new-fruit-vegetable-varieties"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/spotlight/environment/climate-change/japan-s-jgc-bets-on-carbon-feeding-bacteria-to-create-bioplastics"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/travel-leisure/rural-japan-hopes-to-charm-domestic-travelers-priced-out-of-overseas-trips"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/energy/gm-partners-with-peak-energy-for-sodium-ion-battery-storage"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/technology/tech-asia/japan-seeks-bigger-role-in-asia-s-subsea-cables-as-ai-rewires-demand"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/spotlight/comment/why-japan-s-takaichi-has-stepped-back-from-boj-rate-hike-debate"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/transportation/japan-flying-car-startup-skydrive-aims-for-the-skies-in-2028"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/technology/hanwha-qcells-kicks-off-first-fully-onshore-us-solar-supply-chain"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/energy/japan-s-mitsubishi-hc-canada-s-brookfield-to-buy-european-wind-solar-farms-in-ai-play2"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/technology/g7-plans-first-joint-statement-for-protecting-minors-on-social-media"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/automobiles/toyota-backs-japan-self-driving-startup-tier-iv-in-development-push"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/softbank/jpmorgan-chase-emerges-as-softbank-group-s-top-lender-surpassing-mizuho"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/energy/malaysia-to-promise-japan-maximum-possible-lng-naphtha"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/photos/in-focus-mindanao-reels-from-another-deadly-earthquake"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/politics/international-relations/us-china-tensions/pentagon-blacklists-alibaba-byd-and-baidu-over-alleged-military-ties"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/economy/bank-of-japan/bank-of-japan-set-to-hike-key-interest-rate-to-1"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/semiconductor-advances-a-must-for-data-centers-says-tokyo-electron-boss"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/politics/international-relations/xi-shores-up-china-s-sway-in-pyongyang-wary-of-north-korea-russia-ties"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/opinion/south-korea-election-yoon-s-legacy-partially-survives-progressive-victory"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/automobiles/electric-vehicles/chinese-entrepreneur-s-e-truck-startup-windrose-faces-unpaid-wage-claims"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/economy/bank-indonesia-raises-rates-0.25-at-emergency-meeting-to-defend-rupiah"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/japan-ex-pm-kishida-calls-for-deeper-energy-ties-with-south-korea"/>
|
||||
<rdf:li resource="https://asia.nikkei.com/business/markets/equities/binance-eyes-asian-stock-trading-as-bitcoin-slumps"/>
|
||||
</rdf:Seq>
|
||||
</items>
|
||||
</channel>
|
||||
<item rdf:about="https://asia.nikkei.com/business/media-entertainment/tokyo-disneyland-magic-in-doubt-as-operator-s-stock-falls">
|
||||
<title><![CDATA[Tokyo Disneyland 'magic' in doubt as operator's stock falls]]></title>
|
||||
<link>https://asia.nikkei.com/business/media-entertainment/tokyo-disneyland-magic-in-doubt-as-operator-s-stock-falls</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/tech/semiconductors/sk-hynix-to-triple-wafer-capacity-by-2034-chairman-chey">
|
||||
<title><![CDATA[SK Hynix to triple wafer capacity by 2034: Chairman Chey]]></title>
|
||||
<link>https://asia.nikkei.com/business/tech/semiconductors/sk-hynix-to-triple-wafer-capacity-by-2034-chairman-chey</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/editor-s-picks/china-up-close/analysis-kim-jong-un-emerges-as-winner-in-summit-with-xi-jinping">
|
||||
<title><![CDATA[Analysis: Kim Jong Un emerges as winner in summit with Xi Jinping]]></title>
|
||||
<link>https://asia.nikkei.com/editor-s-picks/china-up-close/analysis-kim-jong-un-emerges-as-winner-in-summit-with-xi-jinping</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/technology/ntt-sets-sights-on-nvidia-ai-race-with-500m-optical-network-fund">
|
||||
<title><![CDATA[NTT sets sights on Nvidia, AI race with $500m optical network fund]]></title>
|
||||
<link>https://asia.nikkei.com/business/technology/ntt-sets-sights-on-nvidia-ai-race-with-500m-optical-network-fund</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/politics/japan-to-help-content-industry-sue-over-copyright-infringement-abroad">
|
||||
<title><![CDATA[Japan to help content industry sue over copyright infringement abroad]]></title>
|
||||
<link>https://asia.nikkei.com/politics/japan-to-help-content-industry-sue-over-copyright-infringement-abroad</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/technology/artificial-intelligence/anthropic-plugs-claude-ai-in-japan-for-automated-software-development">
|
||||
<title><![CDATA[Anthropic plugs Claude AI in Japan for automated software development]]></title>
|
||||
<link>https://asia.nikkei.com/business/technology/artificial-intelligence/anthropic-plugs-claude-ai-in-japan-for-automated-software-development</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/spotlight/supply-chain/us-tungsten-scrap-exports-to-japan-soar-on-chinese-curbs">
|
||||
<title><![CDATA[US tungsten scrap exports to Japan soar on Chinese curbs]]></title>
|
||||
<link>https://asia.nikkei.com/spotlight/supply-chain/us-tungsten-scrap-exports-to-japan-soar-on-chinese-curbs</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/business-deals/tdk-to-buy-us-maker-of-ai-data-center-cooling-components-for-up-to-400m">
|
||||
<title><![CDATA[TDK to buy US maker of AI data center cooling components for up to $400m]]></title>
|
||||
<link>https://asia.nikkei.com/business/business-deals/tdk-to-buy-us-maker-of-ai-data-center-cooling-components-for-up-to-400m</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/insurance/nippon-life-s-private-credit-assets-reach-4.6bn">
|
||||
<title><![CDATA[Nippon Life's private credit assets reach $4.6bn]]></title>
|
||||
<link>https://asia.nikkei.com/business/insurance/nippon-life-s-private-credit-assets-reach-4.6bn</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/business-trends/us-firms-see-china-as-essential-despite-rising-economic-and-political-risks">
|
||||
<title><![CDATA[US firms see China as essential despite rising economic and political risks]]></title>
|
||||
<link>https://asia.nikkei.com/business/business-trends/us-firms-see-china-as-essential-despite-rising-economic-and-political-risks</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/economy/fires/manslaughter-other-charges-filed-over-hong-kong-s-wang-fuk-court-fire">
|
||||
<title><![CDATA[Manslaughter, other charges filed over Hong Kong's Wang Fuk Court fire]]></title>
|
||||
<link>https://asia.nikkei.com/economy/fires/manslaughter-other-charges-filed-over-hong-kong-s-wang-fuk-court-fire</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/adb-and-peers-need-to-anchor-international-order-president-kanda-says">
|
||||
<title><![CDATA[ADB and peers need to 'anchor' international order: President Kanda says]]></title>
|
||||
<link>https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/adb-and-peers-need-to-anchor-international-order-president-kanda-says</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/asia-faces-risks-of-economic-spillover-from-iran-and-ai-disinformation">
|
||||
<title><![CDATA[Asia faces risks of economic spillover from Iran and AI disinformation]]></title>
|
||||
<link>https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/asia-faces-risks-of-economic-spillover-from-iran-and-ai-disinformation</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/companies/swire-dangles-cathay-shares-in-600m-convertible-bond-issuance">
|
||||
<title><![CDATA[Swire dangles Cathay shares in $600m convertible bond issuance]]></title>
|
||||
<link>https://asia.nikkei.com/business/companies/swire-dangles-cathay-shares-in-600m-convertible-bond-issuance</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/materials/shin-etsu-to-set-up-rare-earth-smelter-in-japan-to-ease-reliance-on-china">
|
||||
<title><![CDATA[Shin-Etsu to set up rare-earth smelter in Japan to ease reliance on China]]></title>
|
||||
<link>https://asia.nikkei.com/business/materials/shin-etsu-to-set-up-rare-earth-smelter-in-japan-to-ease-reliance-on-china</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/editor-s-picks/interview/setting-sea-border-with-japan-vital-philippine-foreign-secretary">
|
||||
<title><![CDATA[Setting sea border with Japan vital: Philippine foreign secretary]]></title>
|
||||
<link>https://asia.nikkei.com/editor-s-picks/interview/setting-sea-border-with-japan-vital-philippine-foreign-secretary</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/markets/strong-dollar-rally-weighs-heavier-on-struggling-asian-countries">
|
||||
<title><![CDATA[Strong dollar rally weighs heavier on struggling Asian countries]]></title>
|
||||
<link>https://asia.nikkei.com/business/markets/strong-dollar-rally-weighs-heavier-on-struggling-asian-countries</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/fisheries/thailand-s-shrimp-industry-hit-by-malaysia-s-import-ban">
|
||||
<title><![CDATA[Thailand's shrimp industry hit by Malaysia's import ban]]></title>
|
||||
<link>https://asia.nikkei.com/business/fisheries/thailand-s-shrimp-industry-hit-by-malaysia-s-import-ban</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/technology/applied-materials-opens-500m-manufacturing-campus-in-singapore">
|
||||
<title><![CDATA[Applied Materials opens $500m manufacturing campus in Singapore]]></title>
|
||||
<link>https://asia.nikkei.com/business/technology/applied-materials-opens-500m-manufacturing-campus-in-singapore</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/malaysia-s-anwar-warns-against-global-powers-weaponizing-trade">
|
||||
<title><![CDATA[Malaysia's Anwar warns against global powers weaponizing trade]]></title>
|
||||
<link>https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/malaysia-s-anwar-warns-against-global-powers-weaponizing-trade</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/energy/apple-and-nvidia-supplier-foxconn-invests-in-vietnam-solar-wind-power">
|
||||
<title><![CDATA[Apple and Nvidia supplier Foxconn invests in Vietnam solar, wind power]]></title>
|
||||
<link>https://asia.nikkei.com/business/energy/apple-and-nvidia-supplier-foxconn-invests-in-vietnam-solar-wind-power</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/spotlight/sports/world-cup-tests-asia-s-appetite-for-costly-broadcast-rights">
|
||||
<title><![CDATA[World Cup tests Asia's appetite for costly broadcast rights]]></title>
|
||||
<link>https://asia.nikkei.com/spotlight/sports/world-cup-tests-asia-s-appetite-for-costly-broadcast-rights</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/finance/brookfield-bets-on-its-japan-business-to-top-hong-kong-and-singapore">
|
||||
<title><![CDATA[Brookfield bets on its Japan business to top Hong Kong and Singapore]]></title>
|
||||
<link>https://asia.nikkei.com/business/finance/brookfield-bets-on-its-japan-business-to-top-hong-kong-and-singapore</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/technology/tencent-raises-4.6bn-in-dual-dollar-yuan-bond-issuances">
|
||||
<title><![CDATA[Tencent raises $4.6bn in dual dollar, yuan bond issuances]]></title>
|
||||
<link>https://asia.nikkei.com/business/technology/tencent-raises-4.6bn-in-dual-dollar-yuan-bond-issuances</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/automobiles/electric-vehicles/ferrari-luce-ev-highlights-european-struggle-to-lure-back-china-s-superrich">
|
||||
<title><![CDATA[Ferrari Luce EV highlights European struggle to lure back China's superrich]]></title>
|
||||
<link>https://asia.nikkei.com/business/automobiles/electric-vehicles/ferrari-luce-ev-highlights-european-struggle-to-lure-back-china-s-superrich</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/markets/commodities/indian-families-scale-back-on-gold-for-weddings-as-prices-hover-near-highs">
|
||||
<title><![CDATA[Indian families scale back on gold for weddings as prices hover near highs]]></title>
|
||||
<link>https://asia.nikkei.com/business/markets/commodities/indian-families-scale-back-on-gold-for-weddings-as-prices-hover-near-highs</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/politics/defense/japan-s-new-defense-document-to-name-china-the-biggest-concern">
|
||||
<title><![CDATA[Japan's new defense document to name China the biggest concern]]></title>
|
||||
<link>https://asia.nikkei.com/politics/defense/japan-s-new-defense-document-to-name-china-the-biggest-concern</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/food-beverage/japan-to-bolster-ip-protections-for-prized-new-fruit-vegetable-varieties">
|
||||
<title><![CDATA[Japan to bolster IP protections for prized new fruit, vegetable varieties]]></title>
|
||||
<link>https://asia.nikkei.com/business/food-beverage/japan-to-bolster-ip-protections-for-prized-new-fruit-vegetable-varieties</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/spotlight/environment/climate-change/japan-s-jgc-bets-on-carbon-feeding-bacteria-to-create-bioplastics">
|
||||
<title><![CDATA[Japan's JGC bets on carbon-feeding bacteria to create bioplastics]]></title>
|
||||
<link>https://asia.nikkei.com/spotlight/environment/climate-change/japan-s-jgc-bets-on-carbon-feeding-bacteria-to-create-bioplastics</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/travel-leisure/rural-japan-hopes-to-charm-domestic-travelers-priced-out-of-overseas-trips">
|
||||
<title><![CDATA[Rural Japan hopes to charm domestic travelers priced out of overseas trips]]></title>
|
||||
<link>https://asia.nikkei.com/business/travel-leisure/rural-japan-hopes-to-charm-domestic-travelers-priced-out-of-overseas-trips</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/energy/gm-partners-with-peak-energy-for-sodium-ion-battery-storage">
|
||||
<title><![CDATA[GM partners with Peak Energy for sodium-ion battery storage]]></title>
|
||||
<link>https://asia.nikkei.com/business/energy/gm-partners-with-peak-energy-for-sodium-ion-battery-storage</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/technology/tech-asia/japan-seeks-bigger-role-in-asia-s-subsea-cables-as-ai-rewires-demand">
|
||||
<title><![CDATA[Japan seeks bigger role in Asia's subsea cables as AI rewires demand]]></title>
|
||||
<link>https://asia.nikkei.com/business/technology/tech-asia/japan-seeks-bigger-role-in-asia-s-subsea-cables-as-ai-rewires-demand</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/spotlight/comment/why-japan-s-takaichi-has-stepped-back-from-boj-rate-hike-debate">
|
||||
<title><![CDATA[Why Japan's Takaichi has stepped back from BOJ rate hike debate]]></title>
|
||||
<link>https://asia.nikkei.com/spotlight/comment/why-japan-s-takaichi-has-stepped-back-from-boj-rate-hike-debate</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/transportation/japan-flying-car-startup-skydrive-aims-for-the-skies-in-2028">
|
||||
<title><![CDATA[Japan flying car startup SkyDrive aims for the skies in 2028]]></title>
|
||||
<link>https://asia.nikkei.com/business/transportation/japan-flying-car-startup-skydrive-aims-for-the-skies-in-2028</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/technology/hanwha-qcells-kicks-off-first-fully-onshore-us-solar-supply-chain">
|
||||
<title><![CDATA[Hanwha Qcells kicks off first fully onshore US solar supply chain]]></title>
|
||||
<link>https://asia.nikkei.com/business/technology/hanwha-qcells-kicks-off-first-fully-onshore-us-solar-supply-chain</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/energy/japan-s-mitsubishi-hc-canada-s-brookfield-to-buy-european-wind-solar-farms-in-ai-play2">
|
||||
<title><![CDATA[Japan's Mitsubishi HC, Canada's Brookfield to buy European wind, solar farms in AI play]]></title>
|
||||
<link>https://asia.nikkei.com/business/energy/japan-s-mitsubishi-hc-canada-s-brookfield-to-buy-european-wind-solar-farms-in-ai-play2</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/technology/g7-plans-first-joint-statement-for-protecting-minors-on-social-media">
|
||||
<title><![CDATA[G7 plans first joint statement for protecting minors on social media]]></title>
|
||||
<link>https://asia.nikkei.com/business/technology/g7-plans-first-joint-statement-for-protecting-minors-on-social-media</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/automobiles/toyota-backs-japan-self-driving-startup-tier-iv-in-development-push">
|
||||
<title><![CDATA[Toyota backs Japan self-driving startup Tier IV in development push]]></title>
|
||||
<link>https://asia.nikkei.com/business/automobiles/toyota-backs-japan-self-driving-startup-tier-iv-in-development-push</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/softbank/jpmorgan-chase-emerges-as-softbank-group-s-top-lender-surpassing-mizuho">
|
||||
<title><![CDATA[JPMorgan Chase emerges as SoftBank Group's top lender, surpassing Mizuho]]></title>
|
||||
<link>https://asia.nikkei.com/business/softbank/jpmorgan-chase-emerges-as-softbank-group-s-top-lender-surpassing-mizuho</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/energy/malaysia-to-promise-japan-maximum-possible-lng-naphtha">
|
||||
<title><![CDATA[Malaysia to promise Japan maximum possible LNG, naphtha]]></title>
|
||||
<link>https://asia.nikkei.com/business/energy/malaysia-to-promise-japan-maximum-possible-lng-naphtha</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/photos/in-focus-mindanao-reels-from-another-deadly-earthquake">
|
||||
<title><![CDATA[In Focus: Mindanao reels from another deadly earthquake]]></title>
|
||||
<link>https://asia.nikkei.com/photos/in-focus-mindanao-reels-from-another-deadly-earthquake</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/politics/international-relations/us-china-tensions/pentagon-blacklists-alibaba-byd-and-baidu-over-alleged-military-ties">
|
||||
<title><![CDATA[Pentagon blacklists Alibaba, BYD and Baidu over alleged military ties]]></title>
|
||||
<link>https://asia.nikkei.com/politics/international-relations/us-china-tensions/pentagon-blacklists-alibaba-byd-and-baidu-over-alleged-military-ties</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/economy/bank-of-japan/bank-of-japan-set-to-hike-key-interest-rate-to-1">
|
||||
<title><![CDATA[Bank of Japan set to hike key interest rate to 1%]]></title>
|
||||
<link>https://asia.nikkei.com/economy/bank-of-japan/bank-of-japan-set-to-hike-key-interest-rate-to-1</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/semiconductor-advances-a-must-for-data-centers-says-tokyo-electron-boss">
|
||||
<title><![CDATA[Semiconductor advances a 'must' for data centers, says Tokyo Electron boss]]></title>
|
||||
<link>https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/semiconductor-advances-a-must-for-data-centers-says-tokyo-electron-boss</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/politics/international-relations/xi-shores-up-china-s-sway-in-pyongyang-wary-of-north-korea-russia-ties">
|
||||
<title><![CDATA[Xi shores up China's sway in Pyongyang, wary of North Korea-Russia ties]]></title>
|
||||
<link>https://asia.nikkei.com/politics/international-relations/xi-shores-up-china-s-sway-in-pyongyang-wary-of-north-korea-russia-ties</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/opinion/south-korea-election-yoon-s-legacy-partially-survives-progressive-victory">
|
||||
<title><![CDATA[South Korea election: Yoon's legacy partially survives progressive victory]]></title>
|
||||
<link>https://asia.nikkei.com/opinion/south-korea-election-yoon-s-legacy-partially-survives-progressive-victory</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/automobiles/electric-vehicles/chinese-entrepreneur-s-e-truck-startup-windrose-faces-unpaid-wage-claims">
|
||||
<title><![CDATA[Chinese entrepreneur's e-truck startup Windrose faces unpaid wage claims]]></title>
|
||||
<link>https://asia.nikkei.com/business/automobiles/electric-vehicles/chinese-entrepreneur-s-e-truck-startup-windrose-faces-unpaid-wage-claims</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/economy/bank-indonesia-raises-rates-0.25-at-emergency-meeting-to-defend-rupiah">
|
||||
<title><![CDATA[Bank Indonesia raises rates 0.25% at emergency meeting to defend rupiah]]></title>
|
||||
<link>https://asia.nikkei.com/economy/bank-indonesia-raises-rates-0.25-at-emergency-meeting-to-defend-rupiah</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/japan-ex-pm-kishida-calls-for-deeper-energy-ties-with-south-korea">
|
||||
<title><![CDATA[Japan ex-PM Kishida calls for deeper energy ties with South Korea]]></title>
|
||||
<link>https://asia.nikkei.com/spotlight/the-future-of-asia/future-of-asia-2026/japan-ex-pm-kishida-calls-for-deeper-energy-ties-with-south-korea</link>
|
||||
</item>
|
||||
<item rdf:about="https://asia.nikkei.com/business/markets/equities/binance-eyes-asian-stock-trading-as-bitcoin-slumps">
|
||||
<title><![CDATA[Binance eyes Asian stock trading as Bitcoin slumps]]></title>
|
||||
<link>https://asia.nikkei.com/business/markets/equities/binance-eyes-asian-stock-trading-as-bitcoin-slumps</link>
|
||||
</item>
|
||||
</rdf:RDF>
|
||||
@@ -0,0 +1,281 @@
|
||||
"""crawl-24x7 사이클 3 — 순수 함수/형태 회귀 테스트 (DB 불요).
|
||||
|
||||
B-4 signal-only(본문 무절단 + enqueue 가드) + C-4 피드 shape + CSB sitemap diff 파서
|
||||
+ API 공지 목록 파서 + CCPS beacon 링크 파서 + B-5 (Nikkei RDF = feedparser 네이티브,
|
||||
코드 분기 불요 박제).
|
||||
|
||||
fixture = 2026-06-11 live 박제 (tests/fixtures/, [[feedback_external_api_fixture_first]]).
|
||||
economist/ieee 는 repo 크기 사유로 item 수만 trim (헤더/푸터/item 구조 byte-faithful).
|
||||
"""
|
||||
|
||||
import re
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from pathlib import Path
|
||||
from types import SimpleNamespace
|
||||
|
||||
import feedparser
|
||||
import pytest
|
||||
|
||||
from workers import news_collector
|
||||
from workers.api_standards_collector import _parse_listing, _parse_pub_date
|
||||
from workers.ccps_collector import _beacon_pdf_links
|
||||
from workers.csb_collector import _parse_sitemap, _pdf_links, _should_skip
|
||||
from workers.news_collector import _clean_html, _entry_body
|
||||
|
||||
FIXTURES = Path(__file__).parent / "fixtures"
|
||||
|
||||
|
||||
def _feed(name: str):
|
||||
return feedparser.parse((FIXTURES / name).read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def _source(**kw):
|
||||
return SimpleNamespace(
|
||||
fetch_method=kw.get("fetch_method", "rss"),
|
||||
fulltext_policy=kw.get("fulltext_policy", "none"),
|
||||
source_channel=kw.get("source_channel", "news"),
|
||||
)
|
||||
|
||||
|
||||
# ── B-4: 본문 선택 정책 ───────────────────────────────────────────────────────
|
||||
|
||||
class TestEntryBodyPolicy:
|
||||
def test_signal_only_preserves_full_abstract(self):
|
||||
"""arXiv 초록 1.6K자 — 기본 1000자 cap 을 적용하면 꼬리 유실."""
|
||||
entry = _feed("arxiv_appph_rss.xml").entries[0]
|
||||
summary = _clean_html(entry.get("summary", "")) # 기본 경로 = 1000자 절단
|
||||
body, ver = _entry_body(_source(fetch_method="signal-only"), entry, summary)
|
||||
assert ver == "rss-signal"
|
||||
assert len(body) > 1000 >= len(summary)
|
||||
assert "Abstract" in body
|
||||
|
||||
def test_feed_full_promotes_ieee_description(self):
|
||||
entry = _feed("ieee_spectrum_energy_rss.xml").entries[0]
|
||||
summary = _clean_html(entry.get("summary", ""))
|
||||
body, ver = _entry_body(_source(fulltext_policy="feed-full"), entry, summary)
|
||||
assert ver == "rss-feed-full"
|
||||
assert len(body) > 1000
|
||||
|
||||
def test_default_source_keeps_capped_summary(self):
|
||||
entry = _feed("arxiv_appph_rss.xml").entries[0]
|
||||
summary = _clean_html(entry.get("summary", ""))
|
||||
body, ver = _entry_body(_source(), entry, summary)
|
||||
assert ver == "rss"
|
||||
assert body == summary
|
||||
|
||||
def test_signal_only_title_fallback_when_feed_has_no_summary(self):
|
||||
"""Nikkei RDF = description 없음 — summary 인자(=title 폴백)로 격하."""
|
||||
entry = _feed("nikkei_asia_nar_rdf.xml").entries[0]
|
||||
body, ver = _entry_body(
|
||||
_source(fetch_method="signal-only"), entry, entry.get("title", "")
|
||||
)
|
||||
assert ver == "rss-signal"
|
||||
assert body == entry.get("title", "") != ""
|
||||
|
||||
|
||||
# ── B-4: enqueue 가드 (signal-only = fulltext/summarize 절대 금지) ────────────
|
||||
|
||||
class TestSignalOnlyEnqueueGuard:
|
||||
@staticmethod
|
||||
def _patch(monkeypatch):
|
||||
calls = []
|
||||
|
||||
async def fake_enqueue(session, doc_id, stage):
|
||||
calls.append(stage)
|
||||
|
||||
monkeypatch.setattr(news_collector, "enqueue_stage", fake_enqueue)
|
||||
return calls
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_signal_only_overrides_misconfigured_page_policy(self, monkeypatch):
|
||||
"""레지스트리가 fulltext_policy='page' 로 잘못 설정돼도 페이지 fetch 0 (방어)."""
|
||||
calls = self._patch(monkeypatch)
|
||||
doc = SimpleNamespace(id=1, edit_url="https://x/a")
|
||||
src = _source(fetch_method="signal-only", fulltext_policy="page")
|
||||
await news_collector._enqueue_processing(
|
||||
None, doc, src, datetime.now(timezone.utc)
|
||||
)
|
||||
assert calls == ["embed", "chunk"] # fulltext/summarize 부재
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_signal_only_news_respects_30day_gate(self, monkeypatch):
|
||||
calls = self._patch(monkeypatch)
|
||||
doc = SimpleNamespace(id=1, edit_url="https://x/a")
|
||||
old = datetime.now(timezone.utc) - timedelta(days=40)
|
||||
await news_collector._enqueue_processing(
|
||||
None, doc, _source(fetch_method="signal-only"), old
|
||||
)
|
||||
assert calls == []
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_signal_only_crawl_channel_indexes_regardless_of_age(self, monkeypatch):
|
||||
calls = self._patch(monkeypatch)
|
||||
doc = SimpleNamespace(id=1, edit_url="https://x/a")
|
||||
old = datetime.now(timezone.utc) - timedelta(days=400)
|
||||
src = _source(fetch_method="signal-only", source_channel="crawl")
|
||||
await news_collector._enqueue_processing(None, doc, src, old)
|
||||
assert calls == ["embed", "chunk"]
|
||||
|
||||
|
||||
# ── C-4 / B-4 피드 shape (시드 전 live 박제) ─────────────────────────────────
|
||||
|
||||
class TestNikkeiRdfNativeParsing:
|
||||
"""B-5 'rdf' quirk = 코드 분기 불요 실측 — feedparser 가 RSS 1.0 을 정규화."""
|
||||
|
||||
def test_rss10_entries_have_title_and_link(self):
|
||||
f = _feed("nikkei_asia_nar_rdf.xml")
|
||||
assert f.version == "rss10"
|
||||
assert not f.bozo and len(f.entries) >= 10
|
||||
for e in f.entries:
|
||||
assert e.get("title", "").strip()
|
||||
assert e.get("link", "").startswith("https://asia.nikkei.com/")
|
||||
|
||||
def test_no_summary_no_dates_means_title_signal(self):
|
||||
e = _feed("nikkei_asia_nar_rdf.xml").entries[0]
|
||||
assert not e.get("summary", "")
|
||||
assert not e.get("published_parsed") and not e.get("updated_parsed")
|
||||
|
||||
|
||||
class TestBloombergFixture:
|
||||
def test_video_items_mixed_in_feed(self):
|
||||
"""비디오 항목 혼재 실측 → seed parser_quirk='skip-video' 의 근거."""
|
||||
links = [e.get("link", "") for e in _feed("bloomberg_markets_rss.xml").entries]
|
||||
video_pat = re.compile(r"/videos?/") # news_collector skip-video 와 동일 패턴
|
||||
assert any(video_pat.search(u) for u in links)
|
||||
assert any("/news/articles/" in u and not video_pat.search(u) for u in links)
|
||||
|
||||
def test_articles_have_signal_grade_summary(self):
|
||||
f = _feed("bloomberg_markets_rss.xml")
|
||||
assert any(len(e.get("summary", "")) >= 100 for e in f.entries)
|
||||
|
||||
|
||||
class TestAsmeJpvtFixture:
|
||||
def test_journal_identity_and_abstract(self):
|
||||
f = _feed("asme_jpvt_openissues_rss.xml")
|
||||
assert "Pressure Vessel Technology" in f.feed.get("title", "")
|
||||
assert f.entries
|
||||
for e in f.entries:
|
||||
assert len(e.get("summary", "")) >= 200 # 초록 = 본문
|
||||
|
||||
|
||||
class TestArxivFixture:
|
||||
def test_abs_links_are_stable_dedup_keys(self):
|
||||
"""replace/cross 재공지는 같은 /abs/ URL — edit_url dedup 이 자연 차단."""
|
||||
f = _feed("arxiv_appph_rss.xml")
|
||||
assert f.entries
|
||||
for e in f.entries:
|
||||
assert re.match(r"https://arxiv\.org/abs/\d", e.get("link", ""))
|
||||
|
||||
def test_announce_type_in_summary(self):
|
||||
e = _feed("arxiv_appph_rss.xml").entries[0]
|
||||
assert "Announce Type:" in e.get("summary", "")
|
||||
|
||||
|
||||
class TestEconomistFixture:
|
||||
def test_oneline_signal_summaries(self):
|
||||
f = _feed("economist_latest_rss.xml")
|
||||
assert f.entries
|
||||
for e in f.entries:
|
||||
assert e.get("title", "").strip()
|
||||
assert e.get("link", "").startswith("https://www.economist.com/")
|
||||
|
||||
|
||||
# ── CSB sitemap diff 파서 ────────────────────────────────────────────────────
|
||||
|
||||
class TestCsbSitemapParsing:
|
||||
def test_parse_pairs_with_tz_aware_lastmod(self):
|
||||
xml = (FIXTURES / "csb_sitemap_sample.xml").read_text(encoding="utf-8")
|
||||
pairs = _parse_sitemap(xml)
|
||||
assert pairs
|
||||
for url, lastmod in pairs:
|
||||
assert url.startswith("https://www.csb.gov/")
|
||||
assert lastmod.tzinfo is not None
|
||||
|
||||
def test_skip_sections_vs_root_slugs(self):
|
||||
assert _should_skip("https://www.csb.gov/videos/some-video/")
|
||||
assert _should_skip("https://www.csb.gov/investigations/completed-investigations/")
|
||||
assert _should_skip("https://www.csb.gov/site-map/")
|
||||
assert _should_skip("https://www.csb.gov/") # 홈
|
||||
# 조사 보고서/뉴스 릴리스 = 루트 슬러그 — 수집 대상
|
||||
assert not _should_skip("https://www.csb.gov/givaudan-sense-colour-explosion-/")
|
||||
assert not _should_skip("https://www.csb.gov/recommendations/preventive-maintenance/")
|
||||
|
||||
def test_watermark_diff_orders_oldest_first(self):
|
||||
xml = (FIXTURES / "csb_sitemap_sample.xml").read_text(encoding="utf-8")
|
||||
pairs = [p for p in _parse_sitemap(xml) if not _should_skip(p[0])]
|
||||
watermark = min(lm for _, lm in pairs)
|
||||
changed = sorted(
|
||||
((u, lm) for u, lm in pairs if lm >= watermark), key=lambda p: p[1]
|
||||
)
|
||||
assert changed == sorted(changed, key=lambda p: p[1])
|
||||
assert len(changed) == len(pairs) # >= 경계 포함
|
||||
|
||||
|
||||
class TestCsbPdfLinks:
|
||||
HTML = (FIXTURES / "csb_investigation_page_excerpt.html").read_text(encoding="utf-8")
|
||||
BASE = "https://www.csb.gov/givaudan-sense-colour-explosion-/"
|
||||
|
||||
def test_report_pdfs_kept_with_cachebuster_query(self):
|
||||
links = _pdf_links(self.HTML, self.BASE)
|
||||
assert any("Givaudan_Investigation_Report_Publication.pdf" in u for u in links)
|
||||
# cache-buster 쿼리는 다운로드 URL 에 유지 (정규화는 파일명/dedup 축에서만)
|
||||
assert any("?" in u for u in links)
|
||||
for u in links:
|
||||
assert u.startswith("https://www.csb.gov/")
|
||||
|
||||
def test_recommendation_status_summaries_excluded(self):
|
||||
links = _pdf_links(self.HTML, self.BASE)
|
||||
assert links
|
||||
assert not any("/assets/recommendation/" in u for u in links)
|
||||
|
||||
def test_dedup_by_path(self):
|
||||
html = (
|
||||
'<a href="/assets/1/6/r.pdf?100">a</a>'
|
||||
'<a href="/assets/1/6/r.pdf?200">b</a>'
|
||||
'<a href="https://evil.example.com/x.pdf">c</a>'
|
||||
)
|
||||
links = _pdf_links(html, "https://www.csb.gov/page/")
|
||||
assert len(links) == 1 # 같은 path 1회 + 외부 호스트 제외
|
||||
assert links[0].startswith("https://www.csb.gov/assets/1/6/r.pdf")
|
||||
|
||||
|
||||
# ── API 표준 공지 목록 파서 ──────────────────────────────────────────────────
|
||||
|
||||
class TestApiListingParsing:
|
||||
HTML = (FIXTURES / "api_standards_announcements_listing.html").read_text(
|
||||
encoding="utf-8", errors="replace"
|
||||
)
|
||||
|
||||
def test_ten_unique_detail_links_per_page(self):
|
||||
urls = _parse_listing(self.HTML)
|
||||
assert len(urls) == 10
|
||||
assert len(set(urls)) == 10
|
||||
for u in urls:
|
||||
assert u.startswith(
|
||||
"https://www.api.org/products-and-services/standards/"
|
||||
"important-standards-announcements/"
|
||||
)
|
||||
assert "?" not in u # 페이지네이션 링크(?page=) 미혼입
|
||||
|
||||
def test_pub_date_parse(self):
|
||||
dt = _parse_pub_date("Published June 4, 2026 — API announces ...")
|
||||
assert dt == datetime(2026, 6, 4, tzinfo=timezone.utc)
|
||||
assert _parse_pub_date("no date here") is None
|
||||
assert _parse_pub_date("February 31, 2026") is None # 달력 불가 = None
|
||||
|
||||
|
||||
# ── CCPS beacon 링크 파서 ────────────────────────────────────────────────────
|
||||
|
||||
class TestCcpsBeaconLinks:
|
||||
def test_beacon_filter_and_relative_resolve(self):
|
||||
html = (
|
||||
'<a href="/sites/default/files/2026-06/Beacon-June-2026.pdf">June</a>'
|
||||
'<a href="/sites/default/files/beacon_korean_2026_06.pdf"><b>Korean</b></a>'
|
||||
'<a href="/sites/default/files/other-brochure.pdf">brochure</a>'
|
||||
'<a href="/sites/default/files/monthly.pdf">Process Safety Beacon June</a>'
|
||||
)
|
||||
links = _beacon_pdf_links(html, "https://www.aiche.org/ccps/resources/process-safety-beacon")
|
||||
assert "https://www.aiche.org/sites/default/files/2026-06/Beacon-June-2026.pdf" in links
|
||||
assert any("beacon_korean" in u for u in links)
|
||||
assert any(u.endswith("/monthly.pdf") for u in links) # 앵커 텍스트 매칭
|
||||
assert not any("other-brochure" in u for u in links)
|
||||
Reference in New Issue
Block a user