Files
TK-BOM-Project/backend/app/services/fitting_classifier.py

891 lines
34 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
FITTING 분류 시스템 V2
재질 분류 + 피팅 특화 분류 + 스풀 시스템 통합
"""
import re
from typing import Dict, List, Optional
from .material_classifier import classify_material, get_manufacturing_method_from_material
# ========== FITTING 타입별 분류 (실제 BOM 기반) ==========
FITTING_TYPES = {
"ELBOW": {
"dat_file_patterns": ["90L_", "45L_", "ELL_", "ELBOW_"],
"description_keywords": ["ELBOW", "ELL", "엘보", "90 ELBOW", "45 ELBOW", "LR ELBOW", "SR ELBOW", "90 ELL", "45 ELL"],
"subtypes": {
"90DEG_LONG_RADIUS": ["90 LR", "90° LR", "90DEG LR", "90도 장반경", "90 LONG RADIUS", "LR 90"],
"90DEG_SHORT_RADIUS": ["90 SR", "90° SR", "90DEG SR", "90도 단반경", "90 SHORT RADIUS", "SR 90"],
"45DEG_LONG_RADIUS": ["45 LR", "45° LR", "45DEG LR", "45도 장반경", "45 LONG RADIUS", "LR 45"],
"45DEG_SHORT_RADIUS": ["45 SR", "45° SR", "45DEG SR", "45도 단반경", "45 SHORT RADIUS", "SR 45"],
"90DEG": ["90", "90°", "90DEG", "90도"],
"45DEG": ["45", "45°", "45DEG", "45도"],
"LONG_RADIUS": ["LR", "LONG RADIUS", "장반경"],
"SHORT_RADIUS": ["SR", "SHORT RADIUS", "단반경"]
},
"default_subtype": "90DEG",
"common_connections": ["BUTT_WELD", "SOCKET_WELD"],
"size_range": "1/2\" ~ 48\""
},
"TEE": {
"dat_file_patterns": ["TEE_", "T_"],
"description_keywords": ["TEE", ""],
"subtypes": {
"EQUAL": ["EQUAL TEE", "등경티", "EQUAL"],
"REDUCING": ["REDUCING TEE", "RED TEE", "축소티", "REDUCING", "RD"]
},
"size_analysis": True, # RED_NOM으로 REDUCING 여부 판단
"common_connections": ["BUTT_WELD", "SOCKET_WELD"],
"size_range": "1/2\" ~ 48\""
},
"REDUCER": {
"dat_file_patterns": ["CNC_", "ECC_", "RED_", "REDUCER_"],
"description_keywords": ["REDUCER", "RED", "리듀서"],
"subtypes": {
"CONCENTRIC": ["CONCENTRIC", "CONC", "CNC", "동심", "CON"],
"ECCENTRIC": ["ECCENTRIC", "ECC", "편심"]
},
"requires_two_sizes": True,
"common_connections": ["BUTT_WELD"],
"size_range": "1/2\" ~ 48\""
},
"CAP": {
"dat_file_patterns": ["CAP_"],
"description_keywords": ["CAP", "", "막음"],
"subtypes": {
"BUTT_WELD": ["BW", "BUTT WELD"],
"SOCKET_WELD": ["SW", "SOCKET WELD"],
"THREADED": ["THD", "THREADED", "나사", "NPT"]
},
"common_connections": ["BUTT_WELD", "SOCKET_WELD", "THREADED"],
"size_range": "1/4\" ~ 24\""
},
"PLUG": {
"dat_file_patterns": ["PLUG_", "HEX_PLUG"],
"description_keywords": ["PLUG", "플러그", "HEX.PLUG", "HEX PLUG", "HEXAGON PLUG"],
"subtypes": {
"HEX": ["HEX", "HEXAGON", "육각"],
"SQUARE": ["SQUARE", "사각"],
"THREADED": ["THD", "THREADED", "나사", "NPT"]
},
"common_connections": ["THREADED", "NPT"],
"size_range": "1/8\" ~ 4\""
},
"NIPPLE": {
"dat_file_patterns": ["NIP_", "NIPPLE_"],
"description_keywords": ["NIPPLE", "니플"],
"subtypes": {
"THREADED": ["THREADED", "THD", "NPT", "나사"],
"SOCKET_WELD": ["SOCKET WELD", "SW", "소켓웰드"],
"CLOSE": ["CLOSE NIPPLE", "CLOSE"],
"SHORT": ["SHORT NIPPLE", "SHORT"],
"LONG": ["LONG NIPPLE", "LONG"]
},
"common_connections": ["THREADED", "SOCKET_WELD"],
"size_range": "1/8\" ~ 4\""
},
"SWAGE": {
"dat_file_patterns": ["SWG_"],
"description_keywords": ["SWAGE", "스웨지"],
"subtypes": {
"CONCENTRIC": ["CONCENTRIC", "CONC", "CN", "CON", "동심"],
"ECCENTRIC": ["ECCENTRIC", "ECC", "EC", "편심"]
},
"requires_two_sizes": True,
"common_connections": ["BUTT_WELD", "SOCKET_WELD"],
"size_range": "1/2\" ~ 12\""
},
"OLET": {
"dat_file_patterns": ["SOL_", "WOL_", "TOL_", "EOL_", "NOL_", "COL_", "OLET_", "SOCK-O-LET", "WELD-O-LET", "ELL-O-LET", "THREAD-O-LET", "ELB-O-LET", "NIP-O-LET", "COUP-O-LET"],
"description_keywords": ["SOCK-O-LET", "WELD-O-LET", "ELL-O-LET", "THREAD-O-LET", "ELB-O-LET", "NIP-O-LET", "COUP-O-LET", "SOCKOLET", "WELDOLET", "ELLOLET", "THREADOLET", "ELBOLET", "NIPOLET", "COUPOLET", "OLET", "올렛", "O-LET", "SOCKLET"],
"subtypes": {
"SOCKOLET": ["SOCK-O-LET", "SOCKOLET", "SOL", "SOCK O-LET", "SOCKET-O-LET", "SOCKLET"],
"WELDOLET": ["WELD-O-LET", "WELDOLET", "WOL", "WELD O-LET", "WELDING-O-LET"],
"ELLOLET": ["ELL-O-LET", "ELLOLET", "EOL", "ELL O-LET", "ELBOW-O-LET"],
"THREADOLET": ["THREAD-O-LET", "THREADOLET", "TOL", "THREADED-O-LET"],
"ELBOLET": ["ELB-O-LET", "ELBOLET", "EOL", "ELBOW-O-LET"],
"NIPOLET": ["NIP-O-LET", "NIPOLET", "NOL", "NIPPLE-O-LET"],
"COUPOLET": ["COUP-O-LET", "COUPOLET", "COL", "COUPLING-O-LET"]
},
"requires_two_sizes": True, # 주배관 x 분기관
"common_connections": ["SOCKET_WELD", "THREADED", "BUTT_WELD"],
"size_range": "1/8\" ~ 4\""
},
"COUPLING": {
"dat_file_patterns": ["CPL_", "COUPLING_"],
"description_keywords": ["COUPLING", "커플링"],
"subtypes": {
"FULL": ["FULL COUPLING", "FULL"],
"HALF": ["HALF COUPLING", "HALF"],
"REDUCING": ["REDUCING COUPLING", "RED"]
},
"common_connections": ["SOCKET_WELD", "THREADED"],
"size_range": "1/8\" ~ 4\""
}
}
# ========== 연결 방식별 분류 ==========
CONNECTION_METHODS = {
"BUTT_WELD": {
"codes": ["BW", "BUTT WELD", "맞대기용접", "BUTT-WELD"],
"dat_patterns": ["_BW"],
"size_range": "1/2\" ~ 48\"",
"pressure_range": "150LB ~ 2500LB",
"typical_manufacturing": "WELDED_FABRICATED",
"confidence": 0.95
},
"SOCKET_WELD": {
"codes": ["SW", "SOCKET WELD", "소켓웰드", "SOCKET-WELD"],
"dat_patterns": ["_SW_"],
"size_range": "1/8\" ~ 4\"",
"pressure_range": "150LB ~ 9000LB",
"typical_manufacturing": "FORGED",
"confidence": 0.95
},
"THREADED": {
"codes": ["THD", "THRD", "NPT", "THREADED", "나사", "TR"],
"dat_patterns": ["_TR", "_THD"],
"size_range": "1/8\" ~ 4\"",
"pressure_range": "150LB ~ 6000LB",
"typical_manufacturing": "FORGED",
"confidence": 0.95
},
"FLANGED": {
"codes": ["FL", "FLG", "FLANGED", "플랜지"],
"dat_patterns": ["_FL_"],
"size_range": "1/2\" ~ 48\"",
"pressure_range": "150LB ~ 2500LB",
"typical_manufacturing": "FORGED_OR_CAST",
"confidence": 0.9
}
}
# ========== 압력 등급별 분류 ==========
PRESSURE_RATINGS = {
"patterns": [
r"(\d+)LB",
r"CLASS\s*(\d+)",
r"CL\s*(\d+)",
r"(\d+)#",
r"(\d+)\s*LB"
],
"standard_ratings": {
"150LB": {"max_pressure": "285 PSI", "common_use": "저압 일반용"},
"300LB": {"max_pressure": "740 PSI", "common_use": "중압용"},
"600LB": {"max_pressure": "1480 PSI", "common_use": "고압용"},
"900LB": {"max_pressure": "2220 PSI", "common_use": "고압용"},
"1500LB": {"max_pressure": "3705 PSI", "common_use": "고압용"},
"2500LB": {"max_pressure": "6170 PSI", "common_use": "초고압용"},
"3000LB": {"max_pressure": "7400 PSI", "common_use": "소구경 고압용"},
"6000LB": {"max_pressure": "14800 PSI", "common_use": "소구경 초고압용"},
"9000LB": {"max_pressure": "22200 PSI", "common_use": "소구경 극고압용"}
}
}
def classify_fitting(dat_file: str, description: str, main_nom: str,
red_nom: str = None, length: float = None) -> Dict:
"""
완전한 FITTING 분류
Args:
dat_file: DAT_FILE 필드
description: DESCRIPTION 필드
main_nom: MAIN_NOM 필드 (주 사이즈)
red_nom: RED_NOM 필드 (축소 사이즈, 선택사항)
Returns:
완전한 피팅 분류 결과
"""
desc_upper = description.upper()
dat_upper = dat_file.upper()
# 1. 피팅 키워드 확인 (재질만 있어도 통합 분류기가 이미 피팅으로 분류했으므로 진행)
# OLET 키워드를 우선 확인하여 정확한 분류 수행
olet_keywords = ['SOCK-O-LET', 'WELD-O-LET', 'ELL-O-LET', 'THREAD-O-LET', 'ELB-O-LET', 'NIP-O-LET', 'COUP-O-LET', 'SOCKOLET', 'WELDOLET', 'ELLOLET', 'THREADOLET', 'ELBOLET', 'NIPOLET', 'COUPOLET', 'OLET', 'O-LET', 'SOCKLET']
has_olet_keyword = any(keyword in desc_upper or keyword in dat_upper for keyword in olet_keywords)
fitting_keywords = ['ELBOW', 'ELL', 'TEE', 'REDUCER', 'RED', 'CAP', 'NIPPLE', 'SWAGE', 'COUPLING', 'PLUG', '엘보', '', '리듀서', '', '니플', '스웨지', '올렛', '커플링', '플러그'] + olet_keywords
has_fitting_keyword = any(keyword in desc_upper or keyword in dat_upper for keyword in fitting_keywords)
# 피팅 재질 확인 (A234, A403, A420)
fitting_materials = ['A234', 'A403', 'A420']
has_fitting_material = any(material in desc_upper for material in fitting_materials)
# 피팅 키워드도 없고 피팅 재질도 없으면 UNKNOWN
if not has_fitting_keyword and not has_fitting_material:
return {
"category": "UNKNOWN",
"overall_confidence": 0.0,
"reason": "피팅 키워드 및 재질 없음"
}
# 2. 재질 분류 (공통 모듈 사용)
material_result = classify_material(description)
# 2. 피팅 타입 분류
fitting_type_result = classify_fitting_type(dat_file, description, main_nom, red_nom)
# 3. 연결 방식 분류
connection_result = classify_connection_method(dat_file, description)
# 4. 압력 등급 분류
pressure_result = classify_pressure_rating(dat_file, description)
# 4.5. 스케줄 분류 (니플 등에 중요) - 분리 스케줄 지원
schedule_result = classify_fitting_schedule_with_reducing(description, main_nom, red_nom)
# 5. 제작 방법 추정
manufacturing_result = determine_fitting_manufacturing(
material_result, connection_result, pressure_result, main_nom
)
# 6. 최종 결과 조합
# --- 계장용(Instrument/Swagelok) 피팅 감지 로직 추가 ---
instrument_keywords = ["SWAGELOK", "DK-LOK", "TUBE FITTING", "UNION", "FERRULE", "MALE CONNECTOR", "FEMALE CONNECTOR"]
is_instrument = any(kw in desc_upper for kw in instrument_keywords)
if is_instrument:
fitting_type["category"] = "INSTRUMENT_FITTING"
if "SWAGELOK" in desc_upper: fitting_type["brand"] = "SWAGELOK"
# Tube OD 추출 (예: 1/4", 6MM, 12MM)
tube_match = re.search(r'(\d+(?:/\d+)?)\s*(?:\"|INCH|MM)\s*(?:OD|TUBE)', desc_upper)
if tube_match:
fitting_type["tube_od"] = tube_match.group(0)
return {
"category": "FITTING",
"fitting_type": fitting_type,
def analyze_size_pattern_for_fitting_type(description: str, main_nom: str, red_nom: str = None) -> Dict:
"""
실제 BOM 패턴 기반 TEE vs REDUCER 구분
실제 패턴:
- TEE RED, SMLS, SCH 40 x SCH 80 → TEE (키워드 우선)
- RED CONC, SMLS, SCH 80 x SCH 80 → REDUCER (키워드 우선)
- 모두 A x B 형태 (메인 x 감소)
"""
desc_upper = description.upper()
# 1. 키워드 기반 분류 (최우선) - 실제 BOM 패턴
if "TEE RED" in desc_upper or "TEE REDUCING" in desc_upper:
return {
"type": "TEE",
"subtype": "REDUCING",
"confidence": 0.95,
"evidence": ["KEYWORD_TEE_RED"],
"subtype_confidence": 0.95,
"requires_two_sizes": False
}
if "RED CONC" in desc_upper or "REDUCER CONC" in desc_upper:
return {
"type": "REDUCER",
"subtype": "CONCENTRIC",
"confidence": 0.95,
"evidence": ["KEYWORD_RED_CONC"],
"subtype_confidence": 0.95,
"requires_two_sizes": True
}
if "RED ECC" in desc_upper or "REDUCER ECC" in desc_upper:
return {
"type": "REDUCER",
"subtype": "ECCENTRIC",
"confidence": 0.95,
"evidence": ["KEYWORD_RED_ECC"],
"subtype_confidence": 0.95,
"requires_two_sizes": True
}
# 2. 사이즈 패턴 분석 (보조) - 기존 로직 유지
# x 또는 × 기호로 연결된 사이즈들 찾기
connected_sizes = re.findall(r'(\d+(?:\s+\d+/\d+)?(?:\.\d+)?)"?\s*[xX×]\s*(\d+(?:\s+\d+/\d+)?(?:\.\d+)?)"?(?:\s*[xX×]\s*(\d+(?:\s+\d+/\d+)?(?:\.\d+)?)"?)?', description)
if connected_sizes:
# 연결된 사이즈들을 리스트로 변환
sizes = []
for size_group in connected_sizes:
for size in size_group:
if size.strip():
sizes.append(size.strip())
# 중복 제거하되 순서 유지
unique_sizes = []
for size in sizes:
if size not in unique_sizes:
unique_sizes.append(size)
sizes = unique_sizes
if len(sizes) == 3:
# A x B x B 패턴 → TEE REDUCING
if sizes[1] == sizes[2]:
return {
"type": "TEE",
"subtype": "REDUCING",
"confidence": 0.85,
"evidence": [f"SIZE_PATTERN_TEE_REDUCING: {' x '.join(sizes)}"],
"subtype_confidence": 0.85,
"requires_two_sizes": False
}
# A x B x C 패턴 → TEE REDUCING (모두 다른 사이즈)
else:
return {
"type": "TEE",
"subtype": "REDUCING",
"confidence": 0.80,
"evidence": [f"SIZE_PATTERN_TEE_REDUCING_UNEQUAL: {' x '.join(sizes)}"],
"subtype_confidence": 0.80,
"requires_two_sizes": False
}
elif len(sizes) == 2:
# A x B 패턴 → 키워드가 없으면 REDUCER로 기본 분류
if "CONC" in desc_upper or "CONCENTRIC" in desc_upper:
return {
"type": "REDUCER",
"subtype": "CONCENTRIC",
"confidence": 0.80,
"evidence": [f"SIZE_PATTERN_REDUCER_CONC: {' x '.join(sizes)}"],
"subtype_confidence": 0.80,
"requires_two_sizes": True
}
elif "ECC" in desc_upper or "ECCENTRIC" in desc_upper:
return {
"type": "REDUCER",
"subtype": "ECCENTRIC",
"confidence": 0.80,
"evidence": [f"SIZE_PATTERN_REDUCER_ECC: {' x '.join(sizes)}"],
"subtype_confidence": 0.80,
"requires_two_sizes": True
}
else:
# 키워드 없는 A x B 패턴은 낮은 신뢰도로 REDUCER
return {
"type": "REDUCER",
"subtype": "CONCENTRIC", # 기본값
"confidence": 0.60,
"evidence": [f"SIZE_PATTERN_REDUCER_DEFAULT: {' x '.join(sizes)}"],
"subtype_confidence": 0.60,
"requires_two_sizes": True
}
return {"confidence": 0.0}
def classify_fitting_type(dat_file: str, description: str,
main_nom: str, red_nom: str = None) -> Dict:
"""피팅 타입 분류"""
dat_upper = dat_file.upper()
desc_upper = description.upper()
# 0. OLET 우선 확인 (ELL과의 혼동 방지)
olet_specific_keywords = ['SOCK-O-LET', 'WELD-O-LET', 'ELL-O-LET', 'THREAD-O-LET', 'ELB-O-LET', 'NIP-O-LET', 'COUP-O-LET', 'SOCKOLET', 'WELDOLET', 'ELLOLET', 'THREADOLET', 'ELBOLET', 'NIPOLET', 'COUPOLET', 'O-LET', 'SOCKLET']
for keyword in olet_specific_keywords:
if keyword in desc_upper or keyword in dat_upper:
subtype_result = classify_fitting_subtype(
"OLET", desc_upper, main_nom, red_nom, FITTING_TYPES["OLET"]
)
return {
"type": "OLET",
"subtype": subtype_result["subtype"],
"confidence": 0.95,
"evidence": [f"OLET_PRIORITY_KEYWORD: {keyword}"],
"subtype_confidence": subtype_result["confidence"],
"requires_two_sizes": FITTING_TYPES["OLET"].get("requires_two_sizes", False)
}
# 1. 사이즈 패턴 분석으로 TEE vs REDUCER 구분
size_pattern_result = analyze_size_pattern_for_fitting_type(desc_upper, main_nom, red_nom)
if size_pattern_result.get("confidence", 0) > 0.85:
return size_pattern_result
# 2. DAT_FILE 패턴으로 1차 분류 (가장 신뢰도 높음)
for fitting_type, type_data in FITTING_TYPES.items():
for pattern in type_data["dat_file_patterns"]:
if pattern in dat_upper:
subtype_result = classify_fitting_subtype(
fitting_type, desc_upper, main_nom, red_nom, type_data
)
return {
"type": fitting_type,
"subtype": subtype_result["subtype"],
"confidence": 0.95,
"evidence": [f"DAT_FILE_PATTERN: {pattern}"],
"subtype_confidence": subtype_result["confidence"],
"requires_two_sizes": type_data.get("requires_two_sizes", False)
}
# 3. DESCRIPTION 키워드로 2차 분류
for fitting_type, type_data in FITTING_TYPES.items():
for keyword in type_data["description_keywords"]:
if keyword in desc_upper:
subtype_result = classify_fitting_subtype(
fitting_type, desc_upper, main_nom, red_nom, type_data
)
return {
"type": fitting_type,
"subtype": subtype_result["subtype"],
"confidence": 0.85,
"evidence": [f"DESCRIPTION_KEYWORD: {keyword}"],
"subtype_confidence": subtype_result["confidence"],
"requires_two_sizes": type_data.get("requires_two_sizes", False)
}
# 4. 분류 실패
return {
"type": "UNKNOWN",
"subtype": "UNKNOWN",
"confidence": 0.0,
"evidence": ["NO_FITTING_TYPE_IDENTIFIED"],
"requires_two_sizes": False
}
def classify_fitting_subtype(fitting_type: str, description: str,
main_nom: str, red_nom: str, type_data: Dict) -> Dict:
"""피팅 서브타입 분류"""
desc_upper = description.upper()
subtypes = type_data.get("subtypes", {})
# 1. 키워드 기반 서브타입 분류 (우선) - 대소문자 구분 없이
for subtype, keywords in subtypes.items():
for keyword in keywords:
if keyword.upper() in desc_upper:
return {
"subtype": subtype,
"confidence": 0.9,
"evidence": [f"SUBTYPE_KEYWORD: {keyword}"]
}
# 1.5. ELBOW 특별 처리 - 조합 키워드 우선 확인
if fitting_type == "ELBOW":
# 90도 + 반경 조합
if ("90" in desc_upper or "90°" in desc_upper or "90DEG" in desc_upper):
if ("LR" in desc_upper or "LONG RADIUS" in desc_upper or "장반경" in desc_upper):
return {
"subtype": "90DEG_LONG_RADIUS",
"confidence": 0.95,
"evidence": ["90DEG + LONG_RADIUS"]
}
elif ("SR" in desc_upper or "SHORT RADIUS" in desc_upper or "단반경" in desc_upper):
return {
"subtype": "90DEG_SHORT_RADIUS",
"confidence": 0.95,
"evidence": ["90DEG + SHORT_RADIUS"]
}
else:
return {
"subtype": "90DEG",
"confidence": 0.85,
"evidence": ["90DEG_DETECTED"]
}
# 45도 + 반경 조합
elif ("45" in desc_upper or "45°" in desc_upper or "45DEG" in desc_upper):
if ("LR" in desc_upper or "LONG RADIUS" in desc_upper or "장반경" in desc_upper):
return {
"subtype": "45DEG_LONG_RADIUS",
"confidence": 0.95,
"evidence": ["45DEG + LONG_RADIUS"]
}
elif ("SR" in desc_upper or "SHORT RADIUS" in desc_upper or "단반경" in desc_upper):
return {
"subtype": "45DEG_SHORT_RADIUS",
"confidence": 0.95,
"evidence": ["45DEG + SHORT_RADIUS"]
}
else:
return {
"subtype": "45DEG",
"confidence": 0.85,
"evidence": ["45DEG_DETECTED"]
}
# 반경만 있는 경우 (기본 90도 가정)
elif ("LR" in desc_upper or "LONG RADIUS" in desc_upper or "장반경" in desc_upper):
return {
"subtype": "90DEG_LONG_RADIUS",
"confidence": 0.8,
"evidence": ["LONG_RADIUS_DEFAULT_90DEG"]
}
elif ("SR" in desc_upper or "SHORT RADIUS" in desc_upper or "단반경" in desc_upper):
return {
"subtype": "90DEG_SHORT_RADIUS",
"confidence": 0.8,
"evidence": ["SHORT_RADIUS_DEFAULT_90DEG"]
}
# 2. 사이즈 분석이 필요한 경우 (TEE, REDUCER 등)
if type_data.get("size_analysis"):
if red_nom and str(red_nom).strip() and red_nom != main_nom:
return {
"subtype": "REDUCING",
"confidence": 0.85,
"evidence": [f"SIZE_ANALYSIS_REDUCING: {main_nom} x {red_nom}"]
}
else:
return {
"subtype": "EQUAL",
"confidence": 0.8,
"evidence": [f"SIZE_ANALYSIS_EQUAL: {main_nom}"]
}
# 3. 두 사이즈가 필요한 경우 확인
if type_data.get("requires_two_sizes"):
if red_nom and str(red_nom).strip():
confidence = 0.8
evidence = [f"TWO_SIZES_PROVIDED: {main_nom} x {red_nom}"]
else:
confidence = 0.6
evidence = [f"TWO_SIZES_EXPECTED_BUT_MISSING"]
else:
confidence = 0.7
evidence = ["SINGLE_SIZE_FITTING"]
# 4. 기본값
default_subtype = type_data.get("default_subtype", "GENERAL")
return {
"subtype": default_subtype,
"confidence": confidence,
"evidence": evidence
}
def classify_connection_method(dat_file: str, description: str) -> Dict:
"""연결 방식 분류"""
dat_upper = dat_file.upper()
desc_upper = description.upper()
combined_text = f"{dat_upper} {desc_upper}"
# 1. DAT_FILE 패턴 우선 확인 (가장 신뢰도 높음)
for method, method_data in CONNECTION_METHODS.items():
for pattern in method_data["dat_patterns"]:
if pattern in dat_upper:
return {
"method": method,
"confidence": 0.95,
"matched_code": pattern,
"source": "DAT_FILE_PATTERN",
"size_range": method_data["size_range"],
"pressure_range": method_data["pressure_range"],
"typical_manufacturing": method_data["typical_manufacturing"]
}
# 2. 키워드 확인
for method, method_data in CONNECTION_METHODS.items():
for code in method_data["codes"]:
if code in combined_text:
return {
"method": method,
"confidence": method_data["confidence"],
"matched_code": code,
"source": "KEYWORD_MATCH",
"size_range": method_data["size_range"],
"pressure_range": method_data["pressure_range"],
"typical_manufacturing": method_data["typical_manufacturing"]
}
return {
"method": "UNKNOWN",
"confidence": 0.0,
"matched_code": "",
"source": "NO_CONNECTION_METHOD_FOUND"
}
def classify_pressure_rating(dat_file: str, description: str) -> Dict:
"""압력 등급 분류"""
combined_text = f"{dat_file} {description}".upper()
# 패턴 매칭으로 압력 등급 추출
for pattern in PRESSURE_RATINGS["patterns"]:
match = re.search(pattern, combined_text)
if match:
rating_num = match.group(1)
rating = f"{rating_num}LB"
# 표준 등급 정보 확인
rating_info = PRESSURE_RATINGS["standard_ratings"].get(rating, {})
if rating_info:
confidence = 0.95
else:
confidence = 0.8
rating_info = {"max_pressure": "확인 필요", "common_use": "비표준 등급"}
return {
"rating": rating,
"confidence": confidence,
"matched_pattern": pattern,
"matched_value": rating_num,
"max_pressure": rating_info.get("max_pressure", ""),
"common_use": rating_info.get("common_use", "")
}
return {
"rating": "UNKNOWN",
"confidence": 0.0,
"matched_pattern": "",
"max_pressure": "",
"common_use": ""
}
def determine_fitting_manufacturing(material_result: Dict, connection_result: Dict,
pressure_result: Dict, main_nom: str) -> Dict:
"""피팅 제작 방법 결정"""
evidence = []
# 1. 재질 기반 제작방법 (가장 확실)
material_manufacturing = get_manufacturing_method_from_material(material_result)
if material_manufacturing in ["FORGED", "CAST"]:
evidence.append(f"MATERIAL_STANDARD: {material_result.get('standard')}")
characteristics = {
"FORGED": "고강도, 고압용, 소구경",
"CAST": "복잡형상, 중저압용"
}.get(material_manufacturing, "")
return {
"method": material_manufacturing,
"confidence": 0.9,
"evidence": evidence,
"characteristics": characteristics
}
# 2. 연결방식 + 압력등급 조합 추정
connection_method = connection_result.get("method", "")
pressure_rating = pressure_result.get("rating", "")
# 고압 + 소켓웰드/나사 = 단조
high_pressure = ["3000LB", "6000LB", "9000LB"]
forged_connections = ["SOCKET_WELD", "THREADED"]
if (any(pressure in pressure_rating for pressure in high_pressure) and
connection_method in forged_connections):
evidence.append(f"HIGH_PRESSURE: {pressure_rating}")
evidence.append(f"FORGED_CONNECTION: {connection_method}")
return {
"method": "FORGED",
"confidence": 0.85,
"evidence": evidence,
"characteristics": "고압용 단조품"
}
# 3. 연결방식별 일반적 제작방법
connection_manufacturing = connection_result.get("typical_manufacturing", "")
if connection_manufacturing:
evidence.append(f"CONNECTION_TYPICAL: {connection_method}")
characteristics_map = {
"FORGED": "단조품, 고강도",
"WELDED_FABRICATED": "용접제작품, 대구경",
"FORGED_OR_CAST": "단조 또는 주조"
}
return {
"method": connection_manufacturing,
"confidence": 0.7,
"evidence": evidence,
"characteristics": characteristics_map.get(connection_manufacturing, "")
}
# 4. 기본 추정
return {
"method": "UNKNOWN",
"confidence": 0.0,
"evidence": ["INSUFFICIENT_MANUFACTURING_INFO"],
"characteristics": ""
}
def format_fitting_size(main_nom: str, red_nom: str = None) -> str:
"""피팅 사이즈 표기 포맷팅"""
main_nom_str = str(main_nom) if main_nom is not None else ""
red_nom_str = str(red_nom) if red_nom is not None else ""
if red_nom_str.strip() and red_nom_str != main_nom_str:
return f"{main_nom_str} x {red_nom_str}"
else:
return main_nom_str
def calculate_fitting_confidence(confidence_scores: Dict) -> float:
"""피팅 분류 전체 신뢰도 계산"""
scores = [score for score in confidence_scores.values() if score > 0]
if not scores:
return 0.0
# 가중 평균 (피팅 타입이 가장 중요)
weights = {
"material": 0.25,
"fitting_type": 0.4,
"connection": 0.25,
"pressure": 0.1
}
weighted_sum = sum(
confidence_scores.get(key, 0) * weight
for key, weight in weights.items()
)
return round(weighted_sum, 2)
# ========== 특수 분류 함수들 ==========
def is_high_pressure_fitting(pressure_rating: str) -> bool:
"""고압 피팅 여부 판단"""
high_pressure_ratings = ["3000LB", "6000LB", "9000LB"]
return pressure_rating in high_pressure_ratings
def is_small_bore_fitting(main_nom: str) -> bool:
"""소구경 피팅 여부 판단"""
try:
# 간단한 사이즈 파싱 (인치 기준)
size_num = float(re.findall(r'(\d+(?:\.\d+)?)', main_nom)[0])
return size_num <= 2.0
except:
return False
def get_fitting_purchase_info(fitting_result: Dict) -> Dict:
"""피팅 구매 정보 생성"""
fitting_type = fitting_result["fitting_type"]["type"]
connection = fitting_result["connection_method"]["method"]
pressure = fitting_result["pressure_rating"]["rating"]
manufacturing = fitting_result["manufacturing"]["method"]
# 공급업체 타입 결정
if manufacturing == "FORGED":
supplier_type = "단조 피팅 전문업체"
elif manufacturing == "CAST":
supplier_type = "주조 피팅 전문업체"
else:
supplier_type = "일반 피팅 업체"
# 납기 추정
if is_high_pressure_fitting(pressure):
lead_time = "6-10주 (고압용)"
elif manufacturing == "FORGED":
lead_time = "4-8주 (단조품)"
else:
lead_time = "2-6주 (일반품)"
return {
"supplier_type": supplier_type,
"lead_time_estimate": lead_time,
"purchase_category": f"{fitting_type} {connection} {pressure}",
"manufacturing_note": fitting_result["manufacturing"]["characteristics"]
}
def classify_fitting_schedule(description: str) -> Dict:
"""피팅 스케줄 분류 (특히 니플용)"""
desc_upper = description.upper()
# 스케줄 패턴 매칭
schedule_patterns = [
r'SCH\s*(\d+)',
r'SCHEDULE\s*(\d+)',
r'스케줄\s*(\d+)'
]
for pattern in schedule_patterns:
match = re.search(pattern, desc_upper)
if match:
schedule_number = match.group(1)
schedule = f"SCH {schedule_number}"
# 일반적인 스케줄 정보
common_schedules = {
"10": {"wall": "얇음", "pressure": "저압"},
"20": {"wall": "얇음", "pressure": "저압"},
"40": {"wall": "표준", "pressure": "중압"},
"80": {"wall": "두꺼움", "pressure": "고압"},
"120": {"wall": "매우 두꺼움", "pressure": "고압"},
"160": {"wall": "매우 두꺼움", "pressure": "초고압"}
}
schedule_info = common_schedules.get(schedule_number, {"wall": "비표준", "pressure": "확인 필요"})
return {
"schedule": schedule,
"schedule_number": schedule_number,
"wall_thickness": schedule_info["wall"],
"pressure_class": schedule_info["pressure"],
"confidence": 0.95,
"matched_pattern": pattern
}
return {
"schedule": "UNKNOWN",
"schedule_number": "",
"wall_thickness": "",
"pressure_class": "",
"confidence": 0.0,
"matched_pattern": ""
}
def classify_fitting_schedule_with_reducing(description: str, main_nom: str, red_nom: str = None) -> Dict:
"""
실제 BOM 패턴 기반 분리 스케줄 처리
실제 패턴:
- "TEE RED, SMLS, SCH 40 x SCH 80" → main: SCH 40, red: SCH 80
- "RED CONC, SMLS, SCH 40S x SCH 40S" → main: SCH 40S, red: SCH 40S
- "RED CONC, SMLS, SCH 80 x SCH 80" → main: SCH 80, red: SCH 80
"""
desc_upper = description.upper()
# 1. 분리 스케줄 패턴 확인 (SCH XX x SCH YY) - 개선된 패턴
separated_schedule_patterns = [
r'SCH\s*(\d+S?)\s*[xX×]\s*SCH\s*(\d+S?)', # SCH 40 x SCH 80
r'SCH\s*(\d+S?)\s*X\s*(\d+S?)', # SCH 40S X 40S (SCH 생략)
]
for pattern in separated_schedule_patterns:
separated_match = re.search(pattern, desc_upper)
if separated_match:
main_schedule = f"SCH {separated_match.group(1)}"
red_schedule = f"SCH {separated_match.group(2)}"
return {
"schedule": main_schedule, # 기본 스케줄 (호환성)
"main_schedule": main_schedule,
"red_schedule": red_schedule,
"has_different_schedules": main_schedule != red_schedule,
"confidence": 0.95,
"matched_pattern": separated_match.group(0),
"schedule_type": "SEPARATED"
}
# 2. 단일 스케줄 패턴 (기존 로직 사용)
basic_result = classify_fitting_schedule(description)
# 단일 스케줄을 main/red 모두에 적용
schedule = basic_result.get("schedule", "UNKNOWN")
return {
"schedule": schedule, # 기본 스케줄 (호환성)
"main_schedule": schedule,
"red_schedule": schedule if red_nom else None,
"has_different_schedules": False,
"confidence": basic_result.get("confidence", 0.0),
"matched_pattern": basic_result.get("matched_pattern", ""),
"schedule_type": "UNIFIED"
}