TK-BOM-Project/backend/app/services/material_classifier.py

"""
재질 분류를 위한 공통 함수
materials_schema.py의 데이터를 사용하여 재질을 분류
"""

import re
from typing import Dict, List, Optional, Tuple
from .materials_schema import (
    MATERIAL_STANDARDS,
    SPECIAL_MATERIALS,
    MANUFACTURING_MATERIAL_MAP,
    GENERIC_MATERIAL_KEYWORDS
)

def classify_material(description: str) -> Dict:
    """
    공통 재질 분류 함수

    Args:
        description: 자재 설명 (DESCRIPTION 필드)

    Returns:
        재질 분류 결과 딕셔너리
    """

    desc_upper = str(description).upper().strip() if description is not None else ""

    # 1단계: 특수 재질 우선 확인 (가장 구체적)
    special_result = check_special_materials(desc_upper)
    if special_result['confidence'] > 0.9:
        return special_result

    # 2단계: ASTM/ASME 규격 확인
    astm_result = check_astm_materials(desc_upper)
    if astm_result['confidence'] > 0.8:
        return astm_result

    # 3단계: KS 규격 확인
    ks_result = check_ks_materials(desc_upper)
    if ks_result['confidence'] > 0.8:
        return ks_result

    # 4단계: JIS 규격 확인
    jis_result = check_jis_materials(desc_upper)
    if jis_result['confidence'] > 0.8:
        return jis_result

    # 5단계: 일반 키워드 확인
    generic_result = check_generic_materials(desc_upper)

    return generic_result

def check_special_materials(description: str) -> Dict:
    """특수 재질 확인"""

    # SUPER ALLOYS 확인
    for alloy_family, alloy_data in SPECIAL_MATERIALS["SUPER_ALLOYS"].items():
        for pattern in alloy_data["patterns"]:
            match = re.search(pattern, description)
            if match:
                grade = match.group(1) if match.groups() else "STANDARD"
                grade_info = alloy_data["grades"].get(grade, {})

                return {
                    "standard": f"{alloy_family}",
                    "grade": f"{alloy_family} {grade}",
                    "material_type": "SUPER_ALLOY",
                    "manufacturing": alloy_data.get("manufacturing", "SPECIAL"),
                    "composition": grade_info.get("composition", ""),
                    "applications": grade_info.get("applications", ""),
                    "confidence": 0.95,
                    "evidence": [f"SPECIAL_MATERIAL: {alloy_family} {grade}"]
                }

    # TITANIUM 확인
    titanium_data = SPECIAL_MATERIALS["TITANIUM"]
    for pattern in titanium_data["patterns"]:
        match = re.search(pattern, description)
        if match:
            grade = match.group(1) if match.groups() else "2"
            grade_info = titanium_data["grades"].get(grade, {})

            return {
                "standard": "TITANIUM",
                "grade": f"Titanium Grade {grade}",
                "material_type": "TITANIUM",
                "manufacturing": "FORGED_OR_SEAMLESS",
                "composition": grade_info.get("composition", f"Ti Grade {grade}"),
                "confidence": 0.95,
                "evidence": [f"TITANIUM: Grade {grade}"]
            }

    return {"confidence": 0.0}

def check_astm_materials(description: str) -> Dict:
    """ASTM/ASME 규격 확인"""

    astm_data = MATERIAL_STANDARDS["ASTM_ASME"]

    # FORGED 등급 확인
    for standard, standard_data in astm_data["FORGED_GRADES"].items():
        result = check_astm_standard(description, standard, standard_data)
        if result["confidence"] > 0.8:
            return result

    # WELDED 등급 확인
    for standard, standard_data in astm_data["WELDED_GRADES"].items():
        result = check_astm_standard(description, standard, standard_data)
        if result["confidence"] > 0.8:
            return result

    # CAST 등급 확인
    for standard, standard_data in astm_data["CAST_GRADES"].items():
        result = check_astm_standard(description, standard, standard_data)
        if result["confidence"] > 0.8:
            return result

    # PIPE 등급 확인
    for standard, standard_data in astm_data["PIPE_GRADES"].items():
        result = check_astm_standard(description, standard, standard_data)
        if result["confidence"] > 0.8:
            return result

    return {"confidence": 0.0}

def check_astm_standard(description: str, standard: str, standard_data: Dict) -> Dict:
    """개별 ASTM 규격 확인"""

    # 직접 패턴이 있는 경우 (A105 등)
    if "patterns" in standard_data:
        for pattern in standard_data["patterns"]:
            match = re.search(pattern, description)
            if match:
                grade_code = match.group(1) if match.groups() else ""
                full_grade = f"ASTM {standard}" + (f" {grade_code}" if grade_code else "")

                return {
                    "standard": f"ASTM {standard}",
                    "grade": full_grade,
                    "material_type": determine_material_type(standard, grade_code),
                    "manufacturing": standard_data.get("manufacturing", "UNKNOWN"),
                    "confidence": 0.9,
                    "evidence": [f"ASTM_{standard}: {grade_code if grade_code else 'Direct Match'}"]
                }

    # 하위 분류가 있는 경우 (A182, A234 등)
    else:
        for subtype, subtype_data in standard_data.items():
            for pattern in subtype_data["patterns"]:
                match = re.search(pattern, description)
                if match:
                    grade_code = match.group(1) if match.groups() else ""
                    grade_info = subtype_data["grades"].get(grade_code, {})

                    # A312의 경우 TP304 형태로 전체 grade 표시
                    if standard == "A312" and grade_code and not grade_code.startswith("TP"):
                        full_grade = f"ASTM {standard} TP{grade_code}"
                    elif grade_code.startswith("TP"):
                        full_grade = f"ASTM {standard} {grade_code}"
                    # A403의 경우 WP304 형태로 전체 grade 표시
                    elif standard == "A403" and grade_code and not grade_code.startswith("WP"):
                        full_grade = f"ASTM {standard} WP{grade_code}"
                    elif grade_code.startswith("WP"):
                        full_grade = f"ASTM {standard} {grade_code}"
                    # A420의 경우 WPL3 형태로 전체 grade 표시
                    elif standard == "A420" and grade_code and not grade_code.startswith("WPL"):
                        full_grade = f"ASTM {standard} WPL{grade_code}"
                    elif grade_code.startswith("WPL"):
                        full_grade = f"ASTM {standard} {grade_code}"
                    else:
                        full_grade = f"ASTM {standard} {grade_code}" if grade_code else f"ASTM {standard}"

                    return {
                        "standard": f"ASTM {standard}",
                        "grade": full_grade,
                        "material_type": determine_material_type(standard, grade_code),
                        "manufacturing": subtype_data.get("manufacturing", "UNKNOWN"),
                        "composition": grade_info.get("composition", ""),
                        "applications": grade_info.get("applications", ""),
                        "confidence": 0.9,
                        "evidence": [f"ASTM_{standard}: {grade_code}"]
                    }

    return {"confidence": 0.0}

def check_ks_materials(description: str) -> Dict:
    """KS 규격 확인"""

    ks_data = MATERIAL_STANDARDS["KS"]

    for category, standards in ks_data.items():
        for standard, standard_data in standards.items():
            for pattern in standard_data["patterns"]:
                match = re.search(pattern, description)
                if match:
                    return {
                        "standard": f"KS {standard}",
                        "grade": f"KS {standard}",
                        "material_type": determine_material_type_from_description(description),
                        "manufacturing": standard_data.get("manufacturing", "UNKNOWN"),
                        "description": standard_data["description"],
                        "confidence": 0.85,
                        "evidence": [f"KS_{standard}"]
                    }

    return {"confidence": 0.0}

def check_jis_materials(description: str) -> Dict:
    """JIS 규격 확인"""

    jis_data = MATERIAL_STANDARDS["JIS"]

    for category, standards in jis_data.items():
        for standard, standard_data in standards.items():
            for pattern in standard_data["patterns"]:
                match = re.search(pattern, description)
                if match:
                    return {
                        "standard": f"JIS {standard}",
                        "grade": f"JIS {standard}",
                        "material_type": determine_material_type_from_description(description),
                        "manufacturing": standard_data.get("manufacturing", "UNKNOWN"),
                        "description": standard_data["description"],
                        "confidence": 0.85,
                        "evidence": [f"JIS_{standard}"]
                    }

    return {"confidence": 0.0}

def check_generic_materials(description: str) -> Dict:
    """일반 재질 키워드 확인"""

    for material_type, keywords in GENERIC_MATERIAL_KEYWORDS.items():
        for keyword in keywords:
            if keyword in description:
                return {
                    "standard": "GENERIC",
                    "grade": keyword,
                    "material_type": material_type,
                    "manufacturing": "UNKNOWN",
                    "confidence": 0.6,
                    "evidence": [f"GENERIC: {keyword}"]
                }

    return {
        "standard": "UNKNOWN",
        "grade": "UNKNOWN",
        "material_type": "UNKNOWN",
        "manufacturing": "UNKNOWN",
        "confidence": 0.0,
        "evidence": ["NO_MATERIAL_FOUND"]
    }

def determine_material_type(standard: str, grade: str) -> str:
    """규격과 등급으로 재질 타입 결정"""

    # grade가 None이면 기본값 처리
    if not grade:
        grade = ""

    # 스테인리스 등급
    stainless_patterns = ["304", "316", "321", "347", "F304", "F316", "WP304", "CF8"]
    if any(pattern in grade for pattern in stainless_patterns):
        return "STAINLESS_STEEL"

    # 합금강 등급
    alloy_patterns = ["F1", "F5", "F11", "F22", "F91", "WP1", "WP5", "WP11", "WP22", "WP91"]
    if any(pattern in grade for pattern in alloy_patterns):
        return "ALLOY_STEEL"

    # 주조품
    if standard in ["A216", "A351"]:
        return "CAST_STEEL"

    # 기본값은 탄소강
    return "CARBON_STEEL"

def determine_material_type_from_description(description: str) -> str:
    """설명에서 재질 타입 추정"""

    desc_upper = description.upper()

    if any(keyword in desc_upper for keyword in ["SS", "STS", "STAINLESS", "304", "316"]):
        return "STAINLESS_STEEL"
    elif any(keyword in desc_upper for keyword in ["ALLOY", "합금", "CR", "MO"]):
        return "ALLOY_STEEL"
    elif any(keyword in desc_upper for keyword in ["CAST", "주조"]):
        return "CAST_STEEL"
    else:
        return "CARBON_STEEL"

def get_manufacturing_method_from_material(material_result: Dict) -> str:
    """재질 정보로부터 제작방법 추정"""

    if material_result.get("confidence", 0) < 0.5:
        return "UNKNOWN"

    material_standard = material_result.get('standard', '')

    # 직접 매핑
    if 'A182' in material_standard or 'A105' in material_standard:
        return 'FORGED'
    elif 'A234' in material_standard or 'A403' in material_standard or 'A420' in material_standard:
        return 'WELDED_FABRICATED'
    elif 'A216' in material_standard or 'A351' in material_standard:
        return 'CAST'
    elif 'A106' in material_standard or 'A312' in material_standard:
        return 'SEAMLESS'
    elif 'A53' in material_standard:
        return 'WELDED_OR_SEAMLESS'

    # manufacturing 필드가 있으면 직접 사용
    manufacturing = material_result.get("manufacturing", "UNKNOWN")
    if manufacturing != "UNKNOWN":
        return manufacturing

    return "UNKNOWN"

def get_material_confidence_factors(material_result: Dict) -> List[str]:
    """재질 분류 신뢰도 영향 요소 반환"""

    factors = []
    confidence = material_result.get("confidence", 0)

    if confidence >= 0.9:
        factors.append("HIGH_CONFIDENCE")
    elif confidence >= 0.7:
        factors.append("MEDIUM_CONFIDENCE")
    else:
        factors.append("LOW_CONFIDENCE")

    if material_result.get("standard") == "UNKNOWN":
        factors.append("NO_STANDARD_FOUND")

    if material_result.get("manufacturing") == "UNKNOWN":
        factors.append("MANUFACTURING_UNCLEAR")

    return factors