hyungi_document_server/app/workers/upload_cleanup.py

"""업로드 임시파일 cleanup 워커.

업로드 엔드포인트는 `<name>.uploading` 임시명으로 NAS Inbox 에 쓴 뒤
완료 시 atomic rename 한다. 정상 abort 는 endpoint 의 except 절이 정리하지만
프로세스 크래시 / 강제 종료 / 비정상 종료 시 `*.uploading` 잔존물이 남는다.

이 워커는 10분 주기로 Inbox 하위를 스캔해서
  - mtime 이 `orphan_max_age_sec` (기본 1시간) 보다 오래된 `*.uploading` 삭제
  - 최근 3회 (≈30분) 누적 삭제 수가 `cleanup_warn_threshold` (기본 10) 이상이면 WARNING

카운터는 in-memory deque (프로세스 재시작 시 리셋). 집요한 이슈만 잡는 것이 목적.
"""

from __future__ import annotations

import time
from collections import deque
from pathlib import Path

from core.config import settings
from core.utils import setup_logger

logger = setup_logger("upload_cleanup")

# 최근 3회 run 의 삭제 카운트. 30분 윈도우 (10분 주기 × 3).
_recent_deletes: deque[int] = deque(maxlen=3)


async def cleanup_orphan_uploads() -> int:
    """`*.uploading` orphan 파일을 수거. 삭제 수 반환.

    호출은 APScheduler 가 10분 주기로 트리거.
    """
    inbox_path = Path(settings.nas_mount_path) / "PKM" / "Inbox"
    if not inbox_path.exists():
        return 0

    max_age = settings.upload.orphan_max_age_sec
    threshold = settings.upload.cleanup_warn_threshold
    now = time.time()

    deleted = 0
    total_bytes = 0
    for f in inbox_path.rglob("*.uploading"):
        try:
            if not f.is_file():
                continue
            age = now - f.stat().st_mtime
            if age < max_age:
                continue
            size = f.stat().st_size
            f.unlink()
            deleted += 1
            total_bytes += size
            logger.info("orphan upload deleted: %s (age=%ds, size=%d)", f.name, int(age), size)
        except OSError as e:
            # 다른 프로세스가 정리 중이거나 권한 문제 — 다음 주기에 재시도
            logger.warning("orphan upload cleanup skipped %s: %s", f, e)

    _recent_deletes.append(deleted)
    window_total = sum(_recent_deletes)
    if window_total >= threshold:
        logger.warning(
            "upload orphan cleanup high — window=%d (last %d runs), threshold=%d. "
            "abort 가 구조적으로 많거나 대용량 업로드 실패 반복 의심.",
            window_total,
            len(_recent_deletes),
            threshold,
        )
    elif deleted > 0:
        logger.info(
            "upload orphan cleanup: deleted=%d bytes=%d window_sum=%d",
            deleted,
            total_bytes,
            window_total,
        )

    return deleted