feat(worker-pool): Registry-1C cap 1MB + deterministic compaction
사용자 결정 2026-05-19: 100KB cap 이 운영 7d 데이터 1.36MB 대비 부족 → cap 상향만으로 raw 비대화 위험. cap 1MB + payload compaction 병행. fetch_recap_context() 변경: - memo payload item field 축소 = id/title/ai_tldr/ai_event_kind/created_at (5 필드) (ai_bullets/file_type/source_channel/category/extracted_text 등 제외) - memo top-N = RECAP_MEMO_TOP_N env (default 200) — 초과분은 aggregate 로 - aggregate = memos_by_day + memos_by_kind + omitted_memos - payload_compacted flag = aggregate fallback 발현 여부 - events 는 raw (운영 7d 데이터에서 통상 0~소량) internal_worker.py: - PAYLOAD_MAX_BYTES → _payload_max_bytes() env override (WORKER_RECAP_PAYLOAD_MAX_BYTES default 1_000_000) - JobsRecapResponse 에 payload_compacted / omitted_memos 노출 - 413 detail 에 "after compaction" 명시 + RECAP_MEMO_TOP_N 조정 안내 테스트 3 항목 신규 + 기존 endpoint 413 test 업데이트: - 700 memo → 200 kept + 500 omitted + compacted=true + < 1MB - 10 memo → compacted=false + omitted=0 - 비정상 큰 title (compaction 후에도 cap 초과) → 413 유지 Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,202 @@
|
||||
"""PR-Worker-Pool-Registry-1C compaction — deterministic top-N + aggregate.
|
||||
|
||||
3 항목 (사용자 결정 2026-05-19 spec):
|
||||
1. 700+ memo → /jobs/recap 200 + payload_size < 1MB + payload_compacted=true + omitted > 0
|
||||
2. 작은 fixture (10 memos) → compacted=false + omitted=0
|
||||
3. 거대 title (1MB 초과 시뮬레이션) → 413 유지
|
||||
|
||||
monkeypatch 로 fetch_recap_context 또는 _memo_top_n 조정.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio
|
||||
|
||||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "app"))
|
||||
|
||||
from httpx import ASGITransport, AsyncClient
|
||||
|
||||
from _worker_pool_helpers import (
|
||||
cleanup_worker_jobs,
|
||||
ensure_user,
|
||||
fetch_worker_job,
|
||||
mint_access_token,
|
||||
)
|
||||
|
||||
|
||||
@pytest_asyncio.fixture
|
||||
async def env_setup(monkeypatch):
|
||||
monkeypatch.setenv("LAPTOP_WORKER_BOT_USERNAME", "laptop-worker-bot")
|
||||
|
||||
|
||||
def _make_fake_context(total_memos: int, top_n: int = 200) -> dict:
|
||||
"""compaction 모사 — _compact_memo 형식 (5 필드) 의 row."""
|
||||
from datetime import datetime, timezone
|
||||
from zoneinfo import ZoneInfo
|
||||
|
||||
kst = ZoneInfo("Asia/Seoul")
|
||||
now = datetime.now(timezone.utc)
|
||||
kept_count = min(total_memos, top_n)
|
||||
omitted = max(0, total_memos - top_n)
|
||||
memos = [
|
||||
{
|
||||
"id": i,
|
||||
"title": f"memo {i}",
|
||||
"ai_tldr": f"tldr {i}",
|
||||
"ai_event_kind": "note",
|
||||
"created_at": now.astimezone(kst).isoformat(),
|
||||
}
|
||||
for i in range(kept_count)
|
||||
]
|
||||
return {
|
||||
"user_id": 1,
|
||||
"days": 7,
|
||||
"period_start": now.astimezone(kst).isoformat(),
|
||||
"period_end": now.astimezone(kst).isoformat(),
|
||||
"timezone": "Asia/Seoul",
|
||||
"memos": memos,
|
||||
"events": [],
|
||||
"memo_count": total_memos,
|
||||
"event_count": 0,
|
||||
"summary_stats": {
|
||||
"total_memos": total_memos,
|
||||
"memos_kept": kept_count,
|
||||
"omitted_memos": omitted,
|
||||
"top_n": top_n,
|
||||
"memos_by_day": {"2026-05-19": omitted} if omitted else {},
|
||||
"memos_by_kind": {"note": omitted} if omitted else {},
|
||||
},
|
||||
"payload_compacted": omitted > 0,
|
||||
}
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_compaction_kicks_in_at_top_n(env_setup, monkeypatch):
|
||||
"""700 memo → 200 kept + 500 omitted + payload_compacted=true + payload < 1MB."""
|
||||
from api import internal_worker as iw_mod
|
||||
from main import app
|
||||
|
||||
fake = _make_fake_context(total_memos=700, top_n=200)
|
||||
|
||||
async def fake_fetch(session, user_id, days=7):
|
||||
# user_id 채워 줌
|
||||
fake["user_id"] = user_id
|
||||
return fake
|
||||
|
||||
monkeypatch.setattr(iw_mod, "fetch_recap_context", fake_fetch)
|
||||
await ensure_user("test-recap-compact-1c")
|
||||
token = mint_access_token("test-recap-compact-1c")
|
||||
|
||||
try:
|
||||
async with AsyncClient(
|
||||
transport=ASGITransport(app=app), base_url="http://test"
|
||||
) as c:
|
||||
r = await c.post(
|
||||
"/internal/worker/jobs/recap",
|
||||
json={"days": 7},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
js = r.json()
|
||||
assert js["memo_count"] == 700
|
||||
assert js["payload_compacted"] is True
|
||||
assert js["omitted_memos"] == 500
|
||||
assert js["payload_bytes"] < 1_000_000
|
||||
# DB verify — payload 안에 compacted memos + summary_stats
|
||||
job = await fetch_worker_job(js["job_id"])
|
||||
assert job is not None
|
||||
finally:
|
||||
await cleanup_worker_jobs("recap")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_no_compaction_when_under_top_n(env_setup, monkeypatch):
|
||||
"""10 memo → kept 10 + omitted 0 + payload_compacted=false."""
|
||||
from api import internal_worker as iw_mod
|
||||
from main import app
|
||||
|
||||
fake = _make_fake_context(total_memos=10, top_n=200)
|
||||
|
||||
async def fake_fetch(session, user_id, days=7):
|
||||
fake["user_id"] = user_id
|
||||
return fake
|
||||
|
||||
monkeypatch.setattr(iw_mod, "fetch_recap_context", fake_fetch)
|
||||
await ensure_user("test-recap-uncompact-1c")
|
||||
token = mint_access_token("test-recap-uncompact-1c")
|
||||
|
||||
try:
|
||||
async with AsyncClient(
|
||||
transport=ASGITransport(app=app), base_url="http://test"
|
||||
) as c:
|
||||
r = await c.post(
|
||||
"/internal/worker/jobs/recap",
|
||||
json={"days": 7},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert r.status_code == 200, r.text
|
||||
js = r.json()
|
||||
assert js["memo_count"] == 10
|
||||
assert js["payload_compacted"] is False
|
||||
assert js["omitted_memos"] == 0
|
||||
finally:
|
||||
await cleanup_worker_jobs("recap")
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_413_when_compacted_still_over_cap(env_setup, monkeypatch):
|
||||
"""비정상 큰 title (compaction 후에도 1MB 초과) → 413 유지."""
|
||||
from api import internal_worker as iw_mod
|
||||
from main import app
|
||||
|
||||
async def fake_fetch(session, user_id, days=7):
|
||||
huge_title = "x" * 6000 # per memo
|
||||
return {
|
||||
"user_id": user_id,
|
||||
"days": days,
|
||||
"period_start": "2026-05-12T00:00:00+09:00",
|
||||
"period_end": "2026-05-19T00:00:00+09:00",
|
||||
"timezone": "Asia/Seoul",
|
||||
"memos": [
|
||||
{
|
||||
"id": i,
|
||||
"title": huge_title,
|
||||
"ai_tldr": None,
|
||||
"ai_event_kind": None,
|
||||
"created_at": "2026-05-19T00:00:00+09:00",
|
||||
}
|
||||
for i in range(200)
|
||||
], # ~1.2MB
|
||||
"events": [],
|
||||
"memo_count": 200,
|
||||
"event_count": 0,
|
||||
"summary_stats": {
|
||||
"total_memos": 200,
|
||||
"memos_kept": 200,
|
||||
"omitted_memos": 0,
|
||||
"top_n": 200,
|
||||
"memos_by_day": {},
|
||||
"memos_by_kind": {},
|
||||
},
|
||||
"payload_compacted": False,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(iw_mod, "fetch_recap_context", fake_fetch)
|
||||
await ensure_user("test-recap-413-large-1c")
|
||||
token = mint_access_token("test-recap-413-large-1c")
|
||||
|
||||
async with AsyncClient(
|
||||
transport=ASGITransport(app=app), base_url="http://test"
|
||||
) as c:
|
||||
r = await c.post(
|
||||
"/internal/worker/jobs/recap",
|
||||
json={"days": 7},
|
||||
headers={"Authorization": f"Bearer {token}"},
|
||||
)
|
||||
assert r.status_code == 413, r.text
|
||||
assert "after compaction" in r.json()["detail"]
|
||||
@@ -83,6 +83,8 @@ async def test_recap_endpoint_creates_worker_job(env_setup):
|
||||
assert js["memo_count"] >= 0
|
||||
assert js["event_count"] >= 0
|
||||
assert js["payload_bytes"] > 0
|
||||
assert "payload_compacted" in js
|
||||
assert "omitted_memos" in js
|
||||
# DB verify
|
||||
job = await fetch_worker_job(js["job_id"])
|
||||
assert job is not None
|
||||
@@ -93,7 +95,7 @@ async def test_recap_endpoint_creates_worker_job(env_setup):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_recap_payload_413_when_oversize(env_setup, monkeypatch):
|
||||
"""payload 100KB 초과 시 413."""
|
||||
"""payload 1MB 초과 시 413 (사용자 결정 2026-05-19 cap 1MB)."""
|
||||
from api import internal_worker as iw_mod
|
||||
from main import app
|
||||
|
||||
@@ -107,10 +109,20 @@ async def test_recap_payload_413_when_oversize(env_setup, monkeypatch):
|
||||
"period_start": "2026-05-12T00:00:00+09:00",
|
||||
"period_end": "2026-05-19T00:00:00+09:00",
|
||||
"timezone": "Asia/Seoul",
|
||||
"memos": [{"id": i, "title": "x" * 1000} for i in range(120)], # ~120KB
|
||||
# ~1.2MB raw payload (compaction 후에도 cap 초과 가정)
|
||||
"memos": [{"id": i, "title": "x" * 6000} for i in range(200)],
|
||||
"events": [],
|
||||
"memo_count": 120,
|
||||
"memo_count": 200,
|
||||
"event_count": 0,
|
||||
"summary_stats": {
|
||||
"total_memos": 200,
|
||||
"memos_kept": 200,
|
||||
"omitted_memos": 0,
|
||||
"top_n": 200,
|
||||
"memos_by_day": {},
|
||||
"memos_by_kind": {},
|
||||
},
|
||||
"payload_compacted": False,
|
||||
}
|
||||
|
||||
monkeypatch.setattr(iw_mod, "fetch_recap_context", fake_fetch)
|
||||
@@ -125,3 +137,4 @@ async def test_recap_payload_413_when_oversize(env_setup, monkeypatch):
|
||||
)
|
||||
assert r.status_code == 413, r.text
|
||||
assert "bytes" in r.json()["detail"]
|
||||
assert "after compaction" in r.json()["detail"]
|
||||
|
||||
Reference in New Issue
Block a user