TK-BOM-Project/backend/app/services/support_classifier.py

"""
SUPPORT 분류 시스템
배관 지지재, 우레탄 블록, 클램프 등 지지 부품 분류
"""

import re
from typing import Dict, List, Optional
from .material_classifier import classify_material

# ========== 서포트 타입별 분류 ==========
SUPPORT_TYPES = {
    "URETHANE_BLOCK": {
        "dat_file_patterns": ["URETHANE", "BLOCK", "SHOE"],
        "description_keywords": ["URETHANE BLOCK", "BLOCK SHOE", "우레탄 블록", "우레탄", "URETHANE"],
        "characteristics": "우레탄 블록 슈",
        "applications": "배관 지지, 진동 흡수",
        "material_type": "URETHANE"
    },

    "CLAMP": {
        "dat_file_patterns": ["CLAMP", "CL-"],
        "description_keywords": ["CLAMP", "클램프", "CL-1", "CL-2", "CL-3"],
        "characteristics": "배관 클램프",
        "applications": "배관 고정, 지지",
        "material_type": "STEEL"
    },

    "HANGER": {
        "dat_file_patterns": ["HANGER", "HANG", "SUPP"],
        "description_keywords": ["HANGER", "SUPPORT", "행거", "서포트", "PIPE HANGER"],
        "characteristics": "배관 행거",
        "applications": "배관 매달기, 지지",
        "material_type": "STEEL"
    },

    "SPRING_HANGER": {
        "dat_file_patterns": ["SPRING", "SPR_"],
        "description_keywords": ["SPRING HANGER", "SPRING", "스프링", "스프링 행거"],
        "characteristics": "스프링 행거",
        "applications": "가변 하중 지지",
        "material_type": "STEEL"
    },

    "GUIDE": {
        "dat_file_patterns": ["GUIDE", "GD_"],
        "description_keywords": ["GUIDE", "가이드", "PIPE GUIDE"],
        "characteristics": "배관 가이드",
        "applications": "배관 방향 제어",
        "material_type": "STEEL"
    },

    "ANCHOR": {
        "dat_file_patterns": ["ANCHOR", "ANCH"],
        "description_keywords": ["ANCHOR", "앵커", "PIPE ANCHOR"],
        "characteristics": "배관 앵커",
        "applications": "배관 고정점",
        "material_type": "STEEL"
    }
}

# ========== 하중 등급 분류 ==========
LOAD_RATINGS = {
    "LIGHT": {
        "patterns": [r"(\d+)T", r"(\d+)TON"],
        "range": (0, 5),  # 5톤 이하
        "description": "경하중용"
    },
    "MEDIUM": {
        "patterns": [r"(\d+)T", r"(\d+)TON"],
        "range": (5, 20),  # 5-20톤
        "description": "중하중용"
    },
    "HEAVY": {
        "patterns": [r"(\d+)T", r"(\d+)TON"],
        "range": (20, 100),  # 20-100톤
        "description": "중하중용"
    }
}

def classify_support(dat_file: str, description: str, main_nom: str,
                    length: Optional[float] = None) -> Dict:
    """
    SUPPORT 분류 메인 함수

    Args:
        dat_file: DAT 파일명
        description: 자재 설명
        main_nom: 주 사이즈
        length: 길이 (옵션)

    Returns:
        분류 결과 딕셔너리
    """

    dat_upper = dat_file.upper()
    desc_upper = description.upper()
    combined_text = f"{dat_upper} {desc_upper}"

    # 1. 서포트 타입 분류
    support_type_result = classify_support_type(dat_file, description)

    # 2. 재질 분류 (공통 모듈 사용)
    material_result = classify_material(description)

    # 3. 하중 등급 분류
    load_result = classify_load_rating(description)

    # 4. 사이즈 정보 추출
    size_result = extract_support_size(description, main_nom)

    # 5. 사용자 요구사항 추출
    user_requirements = extract_support_user_requirements(description)

    # 6. 우레탄 블럭슈 두께 정보 추출 및 Material Grade 보강
    enhanced_material_grade = material_result.get('grade', 'UNKNOWN')
    if support_type_result.get("support_type") == "URETHANE_BLOCK":
        # 두께 정보 추출 (40t, 27t 등)
        thickness_match = re.search(r'(\d+)\s*[tT](?![oO])', description.upper())
        if thickness_match:
            thickness = f"{thickness_match.group(1)}t"
            if enhanced_material_grade == 'UNKNOWN' or not enhanced_material_grade:
                enhanced_material_grade = thickness
            elif thickness not in enhanced_material_grade:
                enhanced_material_grade = f"{enhanced_material_grade} {thickness}"

    # 7. 최종 결과 조합
    return {
        "category": "SUPPORT",

        # 서포트 특화 정보
        "support_type": support_type_result.get("support_type", "UNKNOWN"),
        "support_subtype": support_type_result.get("subtype", ""),
        "load_rating": load_result.get("load_rating", ""),
        "load_capacity": load_result.get("capacity", ""),

        # 재질 정보 (공통 모듈) - 우레탄 블럭슈 두께 정보 포함
        "material": {
            "standard": material_result.get('standard', 'UNKNOWN'),
            "grade": enhanced_material_grade,
            "material_type": material_result.get('material_type', 'UNKNOWN'),
            "confidence": material_result.get('confidence', 0.0)
        },

        # 사이즈 정보
        "size_info": size_result,

        # 사용자 요구사항
        "user_requirements": user_requirements,

        # 전체 신뢰도
        "overall_confidence": calculate_support_confidence({
            "type": support_type_result.get('confidence', 0),
            "material": material_result.get('confidence', 0),
            "load": load_result.get('confidence', 0),
            "size": size_result.get('confidence', 0)
        }),

        # 증거
        "evidence": [
            f"SUPPORT_TYPE: {support_type_result.get('support_type', 'UNKNOWN')}",
            f"MATERIAL: {material_result.get('standard', 'UNKNOWN')}",
            f"LOAD: {load_result.get('load_rating', 'UNKNOWN')}"
        ]
    }

def classify_support_type(dat_file: str, description: str) -> Dict:
    """서포트 타입 분류"""

    dat_upper = dat_file.upper()
    desc_upper = description.upper()
    combined_text = f"{dat_upper} {desc_upper}"

    for support_type, type_data in SUPPORT_TYPES.items():
        # DAT 파일 패턴 확인
        for pattern in type_data["dat_file_patterns"]:
            if pattern in dat_upper:
                return {
                    "support_type": support_type,
                    "subtype": type_data["characteristics"],
                    "applications": type_data["applications"],
                    "confidence": 0.95,
                    "evidence": [f"DAT_PATTERN: {pattern}"]
                }

        # 설명 키워드 확인
        for keyword in type_data["description_keywords"]:
            if keyword in desc_upper:
                return {
                    "support_type": support_type,
                    "subtype": type_data["characteristics"],
                    "applications": type_data["applications"],
                    "confidence": 0.9,
                    "evidence": [f"DESC_KEYWORD: {keyword}"]
                }

    return {
        "support_type": "UNKNOWN",
        "subtype": "",
        "applications": "",
        "confidence": 0.0,
        "evidence": ["NO_SUPPORT_TYPE_FOUND"]
    }

def extract_support_user_requirements(description: str) -> List[str]:
    """서포트 사용자 요구사항 추출"""

    desc_upper = description.upper()
    requirements = []

    # 표면처리 관련
    if 'GALV' in desc_upper or 'GALVANIZED' in desc_upper:
        requirements.append('GALVANIZED')
    if 'HDG' in desc_upper or 'HOT DIP' in desc_upper:
        requirements.append('HOT DIP GALVANIZED')
    if 'PAINT' in desc_upper or 'PAINTED' in desc_upper:
        requirements.append('PAINTED')

    # 재질 관련
    if 'SS' in desc_upper or 'STAINLESS' in desc_upper:
        requirements.append('STAINLESS STEEL')
    if 'CARBON' in desc_upper:
        requirements.append('CARBON STEEL')

    # 특수 요구사항
    if 'FIRE SAFE' in desc_upper:
        requirements.append('FIRE SAFE')
    if 'SEISMIC' in desc_upper or '내진' in desc_upper:
        requirements.append('SEISMIC')

    return requirements

def classify_load_rating(description: str) -> Dict:
    """하중 등급 분류"""

    desc_upper = description.upper()

    # 하중 패턴 찾기 (40T, 50TON 등)
    for rating, rating_data in LOAD_RATINGS.items():
        for pattern in rating_data["patterns"]:
            match = re.search(pattern, desc_upper)
            if match:
                capacity = int(match.group(1))
                min_load, max_load = rating_data["range"]

                if min_load <= capacity <= max_load:
                    return {
                        "load_rating": rating,
                        "capacity": f"{capacity}T",
                        "description": rating_data["description"],
                        "confidence": 0.9,
                        "evidence": [f"LOAD_PATTERN: {match.group(0)}"]
                    }

    # 특정 하중 값이 있지만 등급을 모르는 경우
    load_match = re.search(r'(\d+)\s*[T톤]', desc_upper)
    if load_match:
        capacity = int(load_match.group(1))
        return {
            "load_rating": "CUSTOM",
            "capacity": f"{capacity}T",
            "description": f"{capacity}톤 하중",
            "confidence": 0.7,
            "evidence": [f"CUSTOM_LOAD: {load_match.group(0)}"]
        }

    return {
        "load_rating": "UNKNOWN",
        "capacity": "",
        "description": "",
        "confidence": 0.0,
        "evidence": ["NO_LOAD_RATING_FOUND"]
    }

def extract_support_size(description: str, main_nom: str) -> Dict:
    """서포트 사이즈 정보 추출"""

    desc_upper = description.upper()

    # 파이프 사이즈 (서포트가 지지하는 파이프 크기)
    pipe_size = main_nom if main_nom else ""

    # 서포트 자체 치수 (길이x폭x높이 등)
    dimension_patterns = [
        r'(\d+)\s*[X×]\s*(\d+)\s*[X×]\s*(\d+)',  # 100x50x20
        r'(\d+)\s*[X×]\s*(\d+)',  # 100x50
        r'L\s*(\d+)',  # L100 (길이)
        r'W\s*(\d+)',  # W50 (폭)
        r'H\s*(\d+)'   # H20 (높이)
    ]

    dimensions = {}
    for pattern in dimension_patterns:
        match = re.search(pattern, desc_upper)
        if match:
            if len(match.groups()) == 3:
                dimensions = {
                    "length": f"{match.group(1)}mm",
                    "width": f"{match.group(2)}mm",
                    "height": f"{match.group(3)}mm"
                }
            elif len(match.groups()) == 2:
                dimensions = {
                    "length": f"{match.group(1)}mm",
                    "width": f"{match.group(2)}mm"
                }
            break

    return {
        "pipe_size": pipe_size,
        "dimensions": dimensions,
        "confidence": 0.8 if dimensions else 0.3
    }

def calculate_support_confidence(confidence_scores: Dict) -> float:
    """서포트 분류 전체 신뢰도 계산"""

    weights = {
        "type": 0.4,      # 타입이 가장 중요
        "material": 0.2,  # 재질
        "load": 0.2,      # 하중
        "size": 0.2       # 사이즈
    }

    weighted_sum = sum(
        confidence_scores.get(key, 0) * weight
        for key, weight in weights.items()
    )

    return round(weighted_sum, 2)