TK-BOM-Project/backend/app/services/fitting_classifier.py

"""
FITTING 분류 시스템 V2
재질 분류 + 피팅 특화 분류 + 스풀 시스템 통합
"""

import re
from typing import Dict, List, Optional
from .material_classifier import classify_material, get_manufacturing_method_from_material

# ========== FITTING 타입별 분류 (실제 BOM 기반) ==========
FITTING_TYPES = {
    "ELBOW": {
        "dat_file_patterns": ["90L_", "45L_", "ELL_", "ELBOW_"],
        "description_keywords": ["ELBOW", "ELL", "엘보"],
        "subtypes": {
            "90DEG": ["90", "90°", "90DEG", "90도"],
            "45DEG": ["45", "45°", "45DEG", "45도"],
            "LONG_RADIUS": ["LR", "LONG RADIUS", "장반경"],
            "SHORT_RADIUS": ["SR", "SHORT RADIUS", "단반경"]
        },
        "default_subtype": "90DEG",
        "common_connections": ["BUTT_WELD", "SOCKET_WELD"],
        "size_range": "1/2\" ~ 48\""
    },

    "TEE": {
        "dat_file_patterns": ["TEE_", "T_"],
        "description_keywords": ["TEE", "티"],
        "subtypes": {
            "EQUAL": ["EQUAL TEE", "등경티", "EQUAL"],
            "REDUCING": ["REDUCING TEE", "RED TEE", "축소티", "REDUCING", "RD"]
        },
        "size_analysis": True,  # RED_NOM으로 REDUCING 여부 판단
        "common_connections": ["BUTT_WELD", "SOCKET_WELD"],
        "size_range": "1/2\" ~ 48\""
    },

    "REDUCER": {
        "dat_file_patterns": ["CNC_", "ECC_", "RED_", "REDUCER_"],
        "description_keywords": ["REDUCER", "RED", "리듀서"],
        "subtypes": {
            "CONCENTRIC": ["CONCENTRIC", "CONC", "CNC", "동심", "CON"],
            "ECCENTRIC": ["ECCENTRIC", "ECC", "편심"]
        },
        "requires_two_sizes": True,
        "common_connections": ["BUTT_WELD"],
        "size_range": "1/2\" ~ 48\""
    },

    "CAP": {
        "dat_file_patterns": ["CAP_"],
        "description_keywords": ["CAP", "캡", "막음"],
        "subtypes": {
            "BUTT_WELD": ["BW", "BUTT WELD"],
            "SOCKET_WELD": ["SW", "SOCKET WELD"],
            "THREADED": ["THD", "THREADED", "나사", "NPT"]
        },
        "common_connections": ["BUTT_WELD", "SOCKET_WELD", "THREADED"],
        "size_range": "1/4\" ~ 24\""
    },

    "PLUG": {
        "dat_file_patterns": ["PLUG_", "HEX_PLUG"],
        "description_keywords": ["PLUG", "플러그", "HEX.PLUG", "HEX PLUG", "HEXAGON PLUG"],
        "subtypes": {
            "HEX": ["HEX", "HEXAGON", "육각"],
            "SQUARE": ["SQUARE", "사각"],
            "THREADED": ["THD", "THREADED", "나사", "NPT"]
        },
        "common_connections": ["THREADED", "NPT"],
        "size_range": "1/8\" ~ 4\""
    },

    "NIPPLE": {
        "dat_file_patterns": ["NIP_", "NIPPLE_"],
        "description_keywords": ["NIPPLE", "니플"],
        "subtypes": {
            "THREADED": ["THREADED", "THD", "NPT", "나사"],
            "SOCKET_WELD": ["SOCKET WELD", "SW", "소켓웰드"],
            "CLOSE": ["CLOSE NIPPLE", "CLOSE"],
            "SHORT": ["SHORT NIPPLE", "SHORT"],
            "LONG": ["LONG NIPPLE", "LONG"]
        },
        "common_connections": ["THREADED", "SOCKET_WELD"],
        "size_range": "1/8\" ~ 4\""
    },

    "SWAGE": {
        "dat_file_patterns": ["SWG_"],
        "description_keywords": ["SWAGE", "스웨지"],
        "subtypes": {
            "CONCENTRIC": ["CONCENTRIC", "CONC", "CN", "CON", "동심"],
            "ECCENTRIC": ["ECCENTRIC", "ECC", "EC", "편심"]
        },
        "requires_two_sizes": True,
        "common_connections": ["BUTT_WELD", "SOCKET_WELD"],
        "size_range": "1/2\" ~ 12\""
    },

    "OLET": {
        "dat_file_patterns": ["SOL_", "WOL_", "TOL_", "OLET_", "SOCK-O-LET", "WELD-O-LET"],
        "description_keywords": ["OLET", "올렛", "O-LET", "SOCK-O-LET", "WELD-O-LET", "SOCKOLET", "WELDOLET", "THREAD-O-LET", "THREADOLET", "SOCKLET", "SOCKET"],
        "subtypes": {
            "SOCKOLET": ["SOCK-O-LET", "SOCKOLET", "SOL", "SOCK O-LET", "SOCKET-O-LET", "SOCKLET"],
            "WELDOLET": ["WELD-O-LET", "WELDOLET", "WOL", "WELD O-LET", "WELDING-O-LET"],
            "THREADOLET": ["THREAD-O-LET", "THREADOLET", "TOL", "THREADED-O-LET"],
            "ELBOLET": ["ELB-O-LET", "ELBOLET", "EOL", "ELBOW-O-LET"],
            "NIPOLET": ["NIP-O-LET", "NIPOLET", "NOL", "NIPPLE-O-LET"],
            "COUPOLET": ["COUP-O-LET", "COUPOLET", "COL", "COUPLING-O-LET"]
        },
        "requires_two_sizes": True,  # 주배관 x 분기관
        "common_connections": ["SOCKET_WELD", "THREADED", "BUTT_WELD"],
        "size_range": "1/8\" ~ 4\""
    },

    "COUPLING": {
        "dat_file_patterns": ["CPL_", "COUPLING_"],
        "description_keywords": ["COUPLING", "커플링"],
        "subtypes": {
            "FULL": ["FULL COUPLING", "FULL"],
            "HALF": ["HALF COUPLING", "HALF"],
            "REDUCING": ["REDUCING COUPLING", "RED"]
        },
        "common_connections": ["SOCKET_WELD", "THREADED"],
        "size_range": "1/8\" ~ 4\""
    }
}

# ========== 연결 방식별 분류 ==========
CONNECTION_METHODS = {
    "BUTT_WELD": {
        "codes": ["BW", "BUTT WELD", "맞대기용접", "BUTT-WELD"],
        "dat_patterns": ["_BW"],
        "size_range": "1/2\" ~ 48\"",
        "pressure_range": "150LB ~ 2500LB",
        "typical_manufacturing": "WELDED_FABRICATED",
        "confidence": 0.95
    },
    "SOCKET_WELD": {
        "codes": ["SW", "SOCKET WELD", "소켓웰드", "SOCKET-WELD"],
        "dat_patterns": ["_SW_"],
        "size_range": "1/8\" ~ 4\"",
        "pressure_range": "150LB ~ 9000LB",
        "typical_manufacturing": "FORGED",
        "confidence": 0.95
    },
    "THREADED": {
        "codes": ["THD", "THRD", "NPT", "THREADED", "나사", "TR"],
        "dat_patterns": ["_TR", "_THD"],
        "size_range": "1/8\" ~ 4\"",
        "pressure_range": "150LB ~ 6000LB",
        "typical_manufacturing": "FORGED",
        "confidence": 0.95
    },
    "FLANGED": {
        "codes": ["FL", "FLG", "FLANGED", "플랜지"],
        "dat_patterns": ["_FL_"],
        "size_range": "1/2\" ~ 48\"",
        "pressure_range": "150LB ~ 2500LB",
        "typical_manufacturing": "FORGED_OR_CAST",
        "confidence": 0.9
    }
}

# ========== 압력 등급별 분류 ==========
PRESSURE_RATINGS = {
    "patterns": [
        r"(\d+)LB",
        r"CLASS\s*(\d+)",
        r"CL\s*(\d+)",
        r"(\d+)#",
        r"(\d+)\s*LB"
    ],
    "standard_ratings": {
        "150LB": {"max_pressure": "285 PSI", "common_use": "저압 일반용"},
        "300LB": {"max_pressure": "740 PSI", "common_use": "중압용"},
        "600LB": {"max_pressure": "1480 PSI", "common_use": "고압용"},
        "900LB": {"max_pressure": "2220 PSI", "common_use": "고압용"},
        "1500LB": {"max_pressure": "3705 PSI", "common_use": "고압용"},
        "2500LB": {"max_pressure": "6170 PSI", "common_use": "초고압용"},
        "3000LB": {"max_pressure": "7400 PSI", "common_use": "소구경 고압용"},
        "6000LB": {"max_pressure": "14800 PSI", "common_use": "소구경 초고압용"},
        "9000LB": {"max_pressure": "22200 PSI", "common_use": "소구경 극고압용"}
    }
}

def classify_fitting(dat_file: str, description: str, main_nom: str,
                    red_nom: str = None, length: float = None) -> Dict:
    """
    완전한 FITTING 분류

    Args:
        dat_file: DAT_FILE 필드
        description: DESCRIPTION 필드
        main_nom: MAIN_NOM 필드 (주 사이즈)
        red_nom: RED_NOM 필드 (축소 사이즈, 선택사항)

    Returns:
        완전한 피팅 분류 결과
    """

    desc_upper = description.upper()
    dat_upper = dat_file.upper()

    # 1. 명칭 우선 확인 (피팅 키워드가 있으면 피팅)
    fitting_keywords = ['ELBOW', 'ELL', 'TEE', 'REDUCER', 'RED', 'CAP', 'NIPPLE', 'SWAGE', 'OLET', 'COUPLING', 'PLUG', 'SOCKLET', 'SOCKET', '엘보', '티', '리듀서', '캡', '니플', '스웨지', '올렛', '커플링', 'SOCK-O-LET', 'WELD-O-LET', 'SOCKOLET', 'WELDOLET']
    is_fitting = any(keyword in desc_upper or keyword in dat_upper for keyword in fitting_keywords)

    if not is_fitting:
        return {
            "category": "UNKNOWN",
            "overall_confidence": 0.0,
            "reason": "피팅 키워드 없음"
        }

    # 2. 재질 분류 (공통 모듈 사용)
    material_result = classify_material(description)

    # 2. 피팅 타입 분류
    fitting_type_result = classify_fitting_type(dat_file, description, main_nom, red_nom)

    # 3. 연결 방식 분류
    connection_result = classify_connection_method(dat_file, description)

    # 4. 압력 등급 분류
    pressure_result = classify_pressure_rating(dat_file, description)

    # 5. 제작 방법 추정
    manufacturing_result = determine_fitting_manufacturing(
        material_result, connection_result, pressure_result, main_nom
    )

    # 6. 최종 결과 조합
    return {
        "category": "FITTING",

        # 재질 정보 (공통 모듈)
        "material": {
            "standard": material_result.get('standard', 'UNKNOWN'),
            "grade": material_result.get('grade', 'UNKNOWN'),
            "material_type": material_result.get('material_type', 'UNKNOWN'),
            "confidence": material_result.get('confidence', 0.0)
        },

        # 피팅 특화 정보
        "fitting_type": {
            "type": fitting_type_result.get('type', 'UNKNOWN'),
            "subtype": fitting_type_result.get('subtype', 'UNKNOWN'),
            "confidence": fitting_type_result.get('confidence', 0.0),
            "evidence": fitting_type_result.get('evidence', [])
        },

        "connection_method": {
            "method": connection_result.get('method', 'UNKNOWN'),
            "confidence": connection_result.get('confidence', 0.0),
            "matched_code": connection_result.get('matched_code', ''),
            "size_range": connection_result.get('size_range', ''),
            "pressure_range": connection_result.get('pressure_range', '')
        },

        "pressure_rating": {
            "rating": pressure_result.get('rating', 'UNKNOWN'),
            "confidence": pressure_result.get('confidence', 0.0),
            "max_pressure": pressure_result.get('max_pressure', ''),
            "common_use": pressure_result.get('common_use', '')
        },

        "manufacturing": {
            "method": manufacturing_result.get('method', 'UNKNOWN'),
            "confidence": manufacturing_result.get('confidence', 0.0),
            "evidence": manufacturing_result.get('evidence', []),
            "characteristics": manufacturing_result.get('characteristics', '')
        },

        "size_info": {
            "main_size": main_nom,
            "reduced_size": red_nom,
            "size_description": format_fitting_size(main_nom, red_nom),
            "requires_two_sizes": fitting_type_result.get('requires_two_sizes', False)
        },

        # 전체 신뢰도
        "overall_confidence": calculate_fitting_confidence({
            "material": material_result.get('confidence', 0),
            "fitting_type": fitting_type_result.get('confidence', 0),
            "connection": connection_result.get('confidence', 0),
            "pressure": pressure_result.get('confidence', 0)
        })
    }

def classify_fitting_type(dat_file: str, description: str,
                         main_nom: str, red_nom: str = None) -> Dict:
    """피팅 타입 분류"""

    dat_upper = dat_file.upper()
    desc_upper = description.upper()

    # 1. DAT_FILE 패턴으로 1차 분류 (가장 신뢰도 높음)
    for fitting_type, type_data in FITTING_TYPES.items():
        for pattern in type_data["dat_file_patterns"]:
            if pattern in dat_upper:
                subtype_result = classify_fitting_subtype(
                    fitting_type, desc_upper, main_nom, red_nom, type_data
                )

                return {
                    "type": fitting_type,
                    "subtype": subtype_result["subtype"],
                    "confidence": 0.95,
                    "evidence": [f"DAT_FILE_PATTERN: {pattern}"],
                    "subtype_confidence": subtype_result["confidence"],
                    "requires_two_sizes": type_data.get("requires_two_sizes", False)
                }

    # 2. DESCRIPTION 키워드로 2차 분류
    for fitting_type, type_data in FITTING_TYPES.items():
        for keyword in type_data["description_keywords"]:
            if keyword in desc_upper:
                subtype_result = classify_fitting_subtype(
                    fitting_type, desc_upper, main_nom, red_nom, type_data
                )

                return {
                    "type": fitting_type,
                    "subtype": subtype_result["subtype"],
                    "confidence": 0.85,
                    "evidence": [f"DESCRIPTION_KEYWORD: {keyword}"],
                    "subtype_confidence": subtype_result["confidence"],
                    "requires_two_sizes": type_data.get("requires_two_sizes", False)
                }

    # 3. 분류 실패
    return {
        "type": "UNKNOWN",
        "subtype": "UNKNOWN",
        "confidence": 0.0,
        "evidence": ["NO_FITTING_TYPE_IDENTIFIED"],
        "requires_two_sizes": False
    }

def classify_fitting_subtype(fitting_type: str, description: str,
                           main_nom: str, red_nom: str, type_data: Dict) -> Dict:
    """피팅 서브타입 분류"""

    subtypes = type_data.get("subtypes", {})

    # 1. 키워드 기반 서브타입 분류 (우선)
    for subtype, keywords in subtypes.items():
        for keyword in keywords:
            if keyword in description:
                return {
                    "subtype": subtype,
                    "confidence": 0.9,
                    "evidence": [f"SUBTYPE_KEYWORD: {keyword}"]
                }

    # 2. 사이즈 분석이 필요한 경우 (TEE, REDUCER 등)
    if type_data.get("size_analysis"):
        if red_nom and str(red_nom).strip() and red_nom != main_nom:
            return {
                "subtype": "REDUCING",
                "confidence": 0.85,
                "evidence": [f"SIZE_ANALYSIS_REDUCING: {main_nom} x {red_nom}"]
            }
        else:
            return {
                "subtype": "EQUAL",
                "confidence": 0.8,
                "evidence": [f"SIZE_ANALYSIS_EQUAL: {main_nom}"]
            }

    # 3. 두 사이즈가 필요한 경우 확인
    if type_data.get("requires_two_sizes"):
        if red_nom and str(red_nom).strip():
            confidence = 0.8
            evidence = [f"TWO_SIZES_PROVIDED: {main_nom} x {red_nom}"]
        else:
            confidence = 0.6
            evidence = [f"TWO_SIZES_EXPECTED_BUT_MISSING"]
    else:
        confidence = 0.7
        evidence = ["SINGLE_SIZE_FITTING"]

    # 4. 기본값
    default_subtype = type_data.get("default_subtype", "GENERAL")
    return {
        "subtype": default_subtype,
        "confidence": confidence,
        "evidence": evidence
    }

def classify_connection_method(dat_file: str, description: str) -> Dict:
    """연결 방식 분류"""

    dat_upper = dat_file.upper()
    desc_upper = description.upper()
    combined_text = f"{dat_upper} {desc_upper}"

    # 1. DAT_FILE 패턴 우선 확인 (가장 신뢰도 높음)
    for method, method_data in CONNECTION_METHODS.items():
        for pattern in method_data["dat_patterns"]:
            if pattern in dat_upper:
                return {
                    "method": method,
                    "confidence": 0.95,
                    "matched_code": pattern,
                    "source": "DAT_FILE_PATTERN",
                    "size_range": method_data["size_range"],
                    "pressure_range": method_data["pressure_range"],
                    "typical_manufacturing": method_data["typical_manufacturing"]
                }

    # 2. 키워드 확인
    for method, method_data in CONNECTION_METHODS.items():
        for code in method_data["codes"]:
            if code in combined_text:
                return {
                    "method": method,
                    "confidence": method_data["confidence"],
                    "matched_code": code,
                    "source": "KEYWORD_MATCH",
                    "size_range": method_data["size_range"],
                    "pressure_range": method_data["pressure_range"],
                    "typical_manufacturing": method_data["typical_manufacturing"]
                }

    return {
        "method": "UNKNOWN",
        "confidence": 0.0,
        "matched_code": "",
        "source": "NO_CONNECTION_METHOD_FOUND"
    }

def classify_pressure_rating(dat_file: str, description: str) -> Dict:
    """압력 등급 분류"""

    combined_text = f"{dat_file} {description}".upper()

    # 패턴 매칭으로 압력 등급 추출
    for pattern in PRESSURE_RATINGS["patterns"]:
        match = re.search(pattern, combined_text)
        if match:
            rating_num = match.group(1)
            rating = f"{rating_num}LB"

            # 표준 등급 정보 확인
            rating_info = PRESSURE_RATINGS["standard_ratings"].get(rating, {})

            if rating_info:
                confidence = 0.95
            else:
                confidence = 0.8
                rating_info = {"max_pressure": "확인 필요", "common_use": "비표준 등급"}

            return {
                "rating": rating,
                "confidence": confidence,
                "matched_pattern": pattern,
                "matched_value": rating_num,
                "max_pressure": rating_info.get("max_pressure", ""),
                "common_use": rating_info.get("common_use", "")
            }

    return {
        "rating": "UNKNOWN",
        "confidence": 0.0,
        "matched_pattern": "",
        "max_pressure": "",
        "common_use": ""
    }

def determine_fitting_manufacturing(material_result: Dict, connection_result: Dict,
                                  pressure_result: Dict, main_nom: str) -> Dict:
    """피팅 제작 방법 결정"""

    evidence = []

    # 1. 재질 기반 제작방법 (가장 확실)
    material_manufacturing = get_manufacturing_method_from_material(material_result)
    if material_manufacturing in ["FORGED", "CAST"]:
        evidence.append(f"MATERIAL_STANDARD: {material_result.get('standard')}")

        characteristics = {
            "FORGED": "고강도, 고압용, 소구경",
            "CAST": "복잡형상, 중저압용"
        }.get(material_manufacturing, "")

        return {
            "method": material_manufacturing,
            "confidence": 0.9,
            "evidence": evidence,
            "characteristics": characteristics
        }

    # 2. 연결방식 + 압력등급 조합 추정
    connection_method = connection_result.get("method", "")
    pressure_rating = pressure_result.get("rating", "")

    # 고압 + 소켓웰드/나사 = 단조
    high_pressure = ["3000LB", "6000LB", "9000LB"]
    forged_connections = ["SOCKET_WELD", "THREADED"]

    if (any(pressure in pressure_rating for pressure in high_pressure) and
        connection_method in forged_connections):
        evidence.append(f"HIGH_PRESSURE: {pressure_rating}")
        evidence.append(f"FORGED_CONNECTION: {connection_method}")
        return {
            "method": "FORGED",
            "confidence": 0.85,
            "evidence": evidence,
            "characteristics": "고압용 단조품"
        }

    # 3. 연결방식별 일반적 제작방법
    connection_manufacturing = connection_result.get("typical_manufacturing", "")
    if connection_manufacturing:
        evidence.append(f"CONNECTION_TYPICAL: {connection_method}")

        characteristics_map = {
            "FORGED": "단조품, 고강도",
            "WELDED_FABRICATED": "용접제작품, 대구경",
            "FORGED_OR_CAST": "단조 또는 주조"
        }

        return {
            "method": connection_manufacturing,
            "confidence": 0.7,
            "evidence": evidence,
            "characteristics": characteristics_map.get(connection_manufacturing, "")
        }

    # 4. 기본 추정
    return {
        "method": "UNKNOWN",
        "confidence": 0.0,
        "evidence": ["INSUFFICIENT_MANUFACTURING_INFO"],
        "characteristics": ""
    }

def format_fitting_size(main_nom: str, red_nom: str = None) -> str:
    """피팅 사이즈 표기 포맷팅"""
    main_nom_str = str(main_nom) if main_nom is not None else ""
    red_nom_str = str(red_nom) if red_nom is not None else ""
    if red_nom_str.strip() and red_nom_str != main_nom_str:
        return f"{main_nom_str} x {red_nom_str}"
    else:
        return main_nom_str

def calculate_fitting_confidence(confidence_scores: Dict) -> float:
    """피팅 분류 전체 신뢰도 계산"""

    scores = [score for score in confidence_scores.values() if score > 0]

    if not scores:
        return 0.0

    # 가중 평균 (피팅 타입이 가장 중요)
    weights = {
        "material": 0.25,
        "fitting_type": 0.4,
        "connection": 0.25,
        "pressure": 0.1
    }

    weighted_sum = sum(
        confidence_scores.get(key, 0) * weight
        for key, weight in weights.items()
    )

    return round(weighted_sum, 2)

# ========== 특수 분류 함수들 ==========

def is_high_pressure_fitting(pressure_rating: str) -> bool:
    """고압 피팅 여부 판단"""
    high_pressure_ratings = ["3000LB", "6000LB", "9000LB"]
    return pressure_rating in high_pressure_ratings

def is_small_bore_fitting(main_nom: str) -> bool:
    """소구경 피팅 여부 판단"""
    try:
        # 간단한 사이즈 파싱 (인치 기준)
        size_num = float(re.findall(r'(\d+(?:\.\d+)?)', main_nom)[0])
        return size_num <= 2.0
    except:
        return False

def get_fitting_purchase_info(fitting_result: Dict) -> Dict:
    """피팅 구매 정보 생성"""

    fitting_type = fitting_result["fitting_type"]["type"]
    connection = fitting_result["connection_method"]["method"]
    pressure = fitting_result["pressure_rating"]["rating"]
    manufacturing = fitting_result["manufacturing"]["method"]

    # 공급업체 타입 결정
    if manufacturing == "FORGED":
        supplier_type = "단조 피팅 전문업체"
    elif manufacturing == "CAST":
        supplier_type = "주조 피팅 전문업체"
    else:
        supplier_type = "일반 피팅 업체"

    # 납기 추정
    if is_high_pressure_fitting(pressure):
        lead_time = "6-10주 (고압용)"
    elif manufacturing == "FORGED":
        lead_time = "4-8주 (단조품)"
    else:
        lead_time = "2-6주 (일반품)"

    return {
        "supplier_type": supplier_type,
        "lead_time_estimate": lead_time,
        "purchase_category": f"{fitting_type} {connection} {pressure}",
        "manufacturing_note": fitting_result["manufacturing"]["characteristics"]
    }