Files
TK-BOM-Project/backend/app/services/fitting_classifier.py
Hyungi Ahn 25ce3590ee feat: 자재 분류 시스템 대폭 개선
🔧 주요 개선사항:
- EXCLUDE 분류기 추가 (WELD GAP 등 제외 대상 처리)
- FITTING 분류기 키워드 확장 (ELL, RED 추가)
- PIPE 재질 중복 문제 해결 (material_grade 파싱 개선)
- NIPPLE 특별 처리 추가 (스케줄 + 길이 정보 포함)
- OLET 타입 중복 표시 제거

📊 분류 정확도:
- UNKNOWN: 0개 (100% 분류 성공)
- EXCLUDE: 1,014개 (제외 대상)
- 실제 자재: 1,823개 정확 분류

🎯 해결된 문제:
- PIPE 재질 'ASTM A106 ASTM A106' → 'ASTM A106 GR B'
- WELD GAP 오분류 → EXCLUDE 카테고리
- FITTING 키워드 인식 실패 → ELL, RED 키워드 추가
- 프론트엔드 중복 표시 제거
2025-07-18 10:28:02 +09:00

618 lines
23 KiB
Python

"""
FITTING 분류 시스템 V2
재질 분류 + 피팅 특화 분류 + 스풀 시스템 통합
"""
import re
from typing import Dict, List, Optional
from .material_classifier import classify_material, get_manufacturing_method_from_material
# ========== FITTING 타입별 분류 (실제 BOM 기반) ==========
FITTING_TYPES = {
"ELBOW": {
"dat_file_patterns": ["90L_", "45L_", "ELL_", "ELBOW_"],
"description_keywords": ["ELBOW", "ELL", "엘보"],
"subtypes": {
"90DEG": ["90", "90°", "90DEG", "90도"],
"45DEG": ["45", "45°", "45DEG", "45도"],
"LONG_RADIUS": ["LR", "LONG RADIUS", "장반경"],
"SHORT_RADIUS": ["SR", "SHORT RADIUS", "단반경"]
},
"default_subtype": "90DEG",
"common_connections": ["BUTT_WELD", "SOCKET_WELD"],
"size_range": "1/2\" ~ 48\""
},
"TEE": {
"dat_file_patterns": ["TEE_", "T_"],
"description_keywords": ["TEE", ""],
"subtypes": {
"EQUAL": ["EQUAL TEE", "등경티", "EQUAL"],
"REDUCING": ["REDUCING TEE", "RED TEE", "축소티", "REDUCING", "RD"]
},
"size_analysis": True, # RED_NOM으로 REDUCING 여부 판단
"common_connections": ["BUTT_WELD", "SOCKET_WELD"],
"size_range": "1/2\" ~ 48\""
},
"REDUCER": {
"dat_file_patterns": ["CNC_", "ECC_", "RED_", "REDUCER_"],
"description_keywords": ["REDUCER", "RED", "리듀서"],
"subtypes": {
"CONCENTRIC": ["CONCENTRIC", "CONC", "CNC", "동심", "CON"],
"ECCENTRIC": ["ECCENTRIC", "ECC", "편심"]
},
"requires_two_sizes": True,
"common_connections": ["BUTT_WELD"],
"size_range": "1/2\" ~ 48\""
},
"CAP": {
"dat_file_patterns": ["CAP_"],
"description_keywords": ["CAP", "", "막음"],
"subtypes": {
"BUTT_WELD": ["BW", "BUTT WELD"],
"SOCKET_WELD": ["SW", "SOCKET WELD"],
"THREADED": ["THD", "THREADED", "나사", "NPT"]
},
"common_connections": ["BUTT_WELD", "SOCKET_WELD", "THREADED"],
"size_range": "1/4\" ~ 24\""
},
"PLUG": {
"dat_file_patterns": ["PLUG_", "HEX_PLUG"],
"description_keywords": ["PLUG", "플러그", "HEX.PLUG", "HEX PLUG", "HEXAGON PLUG"],
"subtypes": {
"HEX": ["HEX", "HEXAGON", "육각"],
"SQUARE": ["SQUARE", "사각"],
"THREADED": ["THD", "THREADED", "나사", "NPT"]
},
"common_connections": ["THREADED", "NPT"],
"size_range": "1/8\" ~ 4\""
},
"NIPPLE": {
"dat_file_patterns": ["NIP_", "NIPPLE_"],
"description_keywords": ["NIPPLE", "니플"],
"subtypes": {
"THREADED": ["THREADED", "THD", "NPT", "나사"],
"SOCKET_WELD": ["SOCKET WELD", "SW", "소켓웰드"],
"CLOSE": ["CLOSE NIPPLE", "CLOSE"],
"SHORT": ["SHORT NIPPLE", "SHORT"],
"LONG": ["LONG NIPPLE", "LONG"]
},
"common_connections": ["THREADED", "SOCKET_WELD"],
"size_range": "1/8\" ~ 4\""
},
"SWAGE": {
"dat_file_patterns": ["SWG_"],
"description_keywords": ["SWAGE", "스웨지"],
"subtypes": {
"CONCENTRIC": ["CONCENTRIC", "CONC", "CN", "CON", "동심"],
"ECCENTRIC": ["ECCENTRIC", "ECC", "EC", "편심"]
},
"requires_two_sizes": True,
"common_connections": ["BUTT_WELD", "SOCKET_WELD"],
"size_range": "1/2\" ~ 12\""
},
"OLET": {
"dat_file_patterns": ["SOL_", "WOL_", "TOL_", "OLET_", "SOCK-O-LET", "WELD-O-LET"],
"description_keywords": ["OLET", "올렛", "O-LET", "SOCK-O-LET", "WELD-O-LET", "SOCKOLET", "WELDOLET", "THREAD-O-LET", "THREADOLET", "SOCKLET", "SOCKET"],
"subtypes": {
"SOCKOLET": ["SOCK-O-LET", "SOCKOLET", "SOL", "SOCK O-LET", "SOCKET-O-LET", "SOCKLET"],
"WELDOLET": ["WELD-O-LET", "WELDOLET", "WOL", "WELD O-LET", "WELDING-O-LET"],
"THREADOLET": ["THREAD-O-LET", "THREADOLET", "TOL", "THREADED-O-LET"],
"ELBOLET": ["ELB-O-LET", "ELBOLET", "EOL", "ELBOW-O-LET"],
"NIPOLET": ["NIP-O-LET", "NIPOLET", "NOL", "NIPPLE-O-LET"],
"COUPOLET": ["COUP-O-LET", "COUPOLET", "COL", "COUPLING-O-LET"]
},
"requires_two_sizes": True, # 주배관 x 분기관
"common_connections": ["SOCKET_WELD", "THREADED", "BUTT_WELD"],
"size_range": "1/8\" ~ 4\""
},
"COUPLING": {
"dat_file_patterns": ["CPL_", "COUPLING_"],
"description_keywords": ["COUPLING", "커플링"],
"subtypes": {
"FULL": ["FULL COUPLING", "FULL"],
"HALF": ["HALF COUPLING", "HALF"],
"REDUCING": ["REDUCING COUPLING", "RED"]
},
"common_connections": ["SOCKET_WELD", "THREADED"],
"size_range": "1/8\" ~ 4\""
}
}
# ========== 연결 방식별 분류 ==========
CONNECTION_METHODS = {
"BUTT_WELD": {
"codes": ["BW", "BUTT WELD", "맞대기용접", "BUTT-WELD"],
"dat_patterns": ["_BW"],
"size_range": "1/2\" ~ 48\"",
"pressure_range": "150LB ~ 2500LB",
"typical_manufacturing": "WELDED_FABRICATED",
"confidence": 0.95
},
"SOCKET_WELD": {
"codes": ["SW", "SOCKET WELD", "소켓웰드", "SOCKET-WELD"],
"dat_patterns": ["_SW_"],
"size_range": "1/8\" ~ 4\"",
"pressure_range": "150LB ~ 9000LB",
"typical_manufacturing": "FORGED",
"confidence": 0.95
},
"THREADED": {
"codes": ["THD", "THRD", "NPT", "THREADED", "나사", "TR"],
"dat_patterns": ["_TR", "_THD"],
"size_range": "1/8\" ~ 4\"",
"pressure_range": "150LB ~ 6000LB",
"typical_manufacturing": "FORGED",
"confidence": 0.95
},
"FLANGED": {
"codes": ["FL", "FLG", "FLANGED", "플랜지"],
"dat_patterns": ["_FL_"],
"size_range": "1/2\" ~ 48\"",
"pressure_range": "150LB ~ 2500LB",
"typical_manufacturing": "FORGED_OR_CAST",
"confidence": 0.9
}
}
# ========== 압력 등급별 분류 ==========
PRESSURE_RATINGS = {
"patterns": [
r"(\d+)LB",
r"CLASS\s*(\d+)",
r"CL\s*(\d+)",
r"(\d+)#",
r"(\d+)\s*LB"
],
"standard_ratings": {
"150LB": {"max_pressure": "285 PSI", "common_use": "저압 일반용"},
"300LB": {"max_pressure": "740 PSI", "common_use": "중압용"},
"600LB": {"max_pressure": "1480 PSI", "common_use": "고압용"},
"900LB": {"max_pressure": "2220 PSI", "common_use": "고압용"},
"1500LB": {"max_pressure": "3705 PSI", "common_use": "고압용"},
"2500LB": {"max_pressure": "6170 PSI", "common_use": "초고압용"},
"3000LB": {"max_pressure": "7400 PSI", "common_use": "소구경 고압용"},
"6000LB": {"max_pressure": "14800 PSI", "common_use": "소구경 초고압용"},
"9000LB": {"max_pressure": "22200 PSI", "common_use": "소구경 극고압용"}
}
}
def classify_fitting(dat_file: str, description: str, main_nom: str,
red_nom: str = None, length: float = None) -> Dict:
"""
완전한 FITTING 분류
Args:
dat_file: DAT_FILE 필드
description: DESCRIPTION 필드
main_nom: MAIN_NOM 필드 (주 사이즈)
red_nom: RED_NOM 필드 (축소 사이즈, 선택사항)
Returns:
완전한 피팅 분류 결과
"""
desc_upper = description.upper()
dat_upper = dat_file.upper()
# 1. 명칭 우선 확인 (피팅 키워드가 있으면 피팅)
fitting_keywords = ['ELBOW', 'ELL', 'TEE', 'REDUCER', 'RED', 'CAP', 'NIPPLE', 'SWAGE', 'OLET', 'COUPLING', 'PLUG', 'SOCKLET', 'SOCKET', '엘보', '', '리듀서', '', '니플', '스웨지', '올렛', '커플링', 'SOCK-O-LET', 'WELD-O-LET', 'SOCKOLET', 'WELDOLET']
is_fitting = any(keyword in desc_upper or keyword in dat_upper for keyword in fitting_keywords)
if not is_fitting:
return {
"category": "UNKNOWN",
"overall_confidence": 0.0,
"reason": "피팅 키워드 없음"
}
# 2. 재질 분류 (공통 모듈 사용)
material_result = classify_material(description)
# 2. 피팅 타입 분류
fitting_type_result = classify_fitting_type(dat_file, description, main_nom, red_nom)
# 3. 연결 방식 분류
connection_result = classify_connection_method(dat_file, description)
# 4. 압력 등급 분류
pressure_result = classify_pressure_rating(dat_file, description)
# 5. 제작 방법 추정
manufacturing_result = determine_fitting_manufacturing(
material_result, connection_result, pressure_result, main_nom
)
# 6. 최종 결과 조합
return {
"category": "FITTING",
# 재질 정보 (공통 모듈)
"material": {
"standard": material_result.get('standard', 'UNKNOWN'),
"grade": material_result.get('grade', 'UNKNOWN'),
"material_type": material_result.get('material_type', 'UNKNOWN'),
"confidence": material_result.get('confidence', 0.0)
},
# 피팅 특화 정보
"fitting_type": {
"type": fitting_type_result.get('type', 'UNKNOWN'),
"subtype": fitting_type_result.get('subtype', 'UNKNOWN'),
"confidence": fitting_type_result.get('confidence', 0.0),
"evidence": fitting_type_result.get('evidence', [])
},
"connection_method": {
"method": connection_result.get('method', 'UNKNOWN'),
"confidence": connection_result.get('confidence', 0.0),
"matched_code": connection_result.get('matched_code', ''),
"size_range": connection_result.get('size_range', ''),
"pressure_range": connection_result.get('pressure_range', '')
},
"pressure_rating": {
"rating": pressure_result.get('rating', 'UNKNOWN'),
"confidence": pressure_result.get('confidence', 0.0),
"max_pressure": pressure_result.get('max_pressure', ''),
"common_use": pressure_result.get('common_use', '')
},
"manufacturing": {
"method": manufacturing_result.get('method', 'UNKNOWN'),
"confidence": manufacturing_result.get('confidence', 0.0),
"evidence": manufacturing_result.get('evidence', []),
"characteristics": manufacturing_result.get('characteristics', '')
},
"size_info": {
"main_size": main_nom,
"reduced_size": red_nom,
"size_description": format_fitting_size(main_nom, red_nom),
"requires_two_sizes": fitting_type_result.get('requires_two_sizes', False)
},
# 전체 신뢰도
"overall_confidence": calculate_fitting_confidence({
"material": material_result.get('confidence', 0),
"fitting_type": fitting_type_result.get('confidence', 0),
"connection": connection_result.get('confidence', 0),
"pressure": pressure_result.get('confidence', 0)
})
}
def classify_fitting_type(dat_file: str, description: str,
main_nom: str, red_nom: str = None) -> Dict:
"""피팅 타입 분류"""
dat_upper = dat_file.upper()
desc_upper = description.upper()
# 1. DAT_FILE 패턴으로 1차 분류 (가장 신뢰도 높음)
for fitting_type, type_data in FITTING_TYPES.items():
for pattern in type_data["dat_file_patterns"]:
if pattern in dat_upper:
subtype_result = classify_fitting_subtype(
fitting_type, desc_upper, main_nom, red_nom, type_data
)
return {
"type": fitting_type,
"subtype": subtype_result["subtype"],
"confidence": 0.95,
"evidence": [f"DAT_FILE_PATTERN: {pattern}"],
"subtype_confidence": subtype_result["confidence"],
"requires_two_sizes": type_data.get("requires_two_sizes", False)
}
# 2. DESCRIPTION 키워드로 2차 분류
for fitting_type, type_data in FITTING_TYPES.items():
for keyword in type_data["description_keywords"]:
if keyword in desc_upper:
subtype_result = classify_fitting_subtype(
fitting_type, desc_upper, main_nom, red_nom, type_data
)
return {
"type": fitting_type,
"subtype": subtype_result["subtype"],
"confidence": 0.85,
"evidence": [f"DESCRIPTION_KEYWORD: {keyword}"],
"subtype_confidence": subtype_result["confidence"],
"requires_two_sizes": type_data.get("requires_two_sizes", False)
}
# 3. 분류 실패
return {
"type": "UNKNOWN",
"subtype": "UNKNOWN",
"confidence": 0.0,
"evidence": ["NO_FITTING_TYPE_IDENTIFIED"],
"requires_two_sizes": False
}
def classify_fitting_subtype(fitting_type: str, description: str,
main_nom: str, red_nom: str, type_data: Dict) -> Dict:
"""피팅 서브타입 분류"""
subtypes = type_data.get("subtypes", {})
# 1. 키워드 기반 서브타입 분류 (우선)
for subtype, keywords in subtypes.items():
for keyword in keywords:
if keyword in description:
return {
"subtype": subtype,
"confidence": 0.9,
"evidence": [f"SUBTYPE_KEYWORD: {keyword}"]
}
# 2. 사이즈 분석이 필요한 경우 (TEE, REDUCER 등)
if type_data.get("size_analysis"):
if red_nom and str(red_nom).strip() and red_nom != main_nom:
return {
"subtype": "REDUCING",
"confidence": 0.85,
"evidence": [f"SIZE_ANALYSIS_REDUCING: {main_nom} x {red_nom}"]
}
else:
return {
"subtype": "EQUAL",
"confidence": 0.8,
"evidence": [f"SIZE_ANALYSIS_EQUAL: {main_nom}"]
}
# 3. 두 사이즈가 필요한 경우 확인
if type_data.get("requires_two_sizes"):
if red_nom and str(red_nom).strip():
confidence = 0.8
evidence = [f"TWO_SIZES_PROVIDED: {main_nom} x {red_nom}"]
else:
confidence = 0.6
evidence = [f"TWO_SIZES_EXPECTED_BUT_MISSING"]
else:
confidence = 0.7
evidence = ["SINGLE_SIZE_FITTING"]
# 4. 기본값
default_subtype = type_data.get("default_subtype", "GENERAL")
return {
"subtype": default_subtype,
"confidence": confidence,
"evidence": evidence
}
def classify_connection_method(dat_file: str, description: str) -> Dict:
"""연결 방식 분류"""
dat_upper = dat_file.upper()
desc_upper = description.upper()
combined_text = f"{dat_upper} {desc_upper}"
# 1. DAT_FILE 패턴 우선 확인 (가장 신뢰도 높음)
for method, method_data in CONNECTION_METHODS.items():
for pattern in method_data["dat_patterns"]:
if pattern in dat_upper:
return {
"method": method,
"confidence": 0.95,
"matched_code": pattern,
"source": "DAT_FILE_PATTERN",
"size_range": method_data["size_range"],
"pressure_range": method_data["pressure_range"],
"typical_manufacturing": method_data["typical_manufacturing"]
}
# 2. 키워드 확인
for method, method_data in CONNECTION_METHODS.items():
for code in method_data["codes"]:
if code in combined_text:
return {
"method": method,
"confidence": method_data["confidence"],
"matched_code": code,
"source": "KEYWORD_MATCH",
"size_range": method_data["size_range"],
"pressure_range": method_data["pressure_range"],
"typical_manufacturing": method_data["typical_manufacturing"]
}
return {
"method": "UNKNOWN",
"confidence": 0.0,
"matched_code": "",
"source": "NO_CONNECTION_METHOD_FOUND"
}
def classify_pressure_rating(dat_file: str, description: str) -> Dict:
"""압력 등급 분류"""
combined_text = f"{dat_file} {description}".upper()
# 패턴 매칭으로 압력 등급 추출
for pattern in PRESSURE_RATINGS["patterns"]:
match = re.search(pattern, combined_text)
if match:
rating_num = match.group(1)
rating = f"{rating_num}LB"
# 표준 등급 정보 확인
rating_info = PRESSURE_RATINGS["standard_ratings"].get(rating, {})
if rating_info:
confidence = 0.95
else:
confidence = 0.8
rating_info = {"max_pressure": "확인 필요", "common_use": "비표준 등급"}
return {
"rating": rating,
"confidence": confidence,
"matched_pattern": pattern,
"matched_value": rating_num,
"max_pressure": rating_info.get("max_pressure", ""),
"common_use": rating_info.get("common_use", "")
}
return {
"rating": "UNKNOWN",
"confidence": 0.0,
"matched_pattern": "",
"max_pressure": "",
"common_use": ""
}
def determine_fitting_manufacturing(material_result: Dict, connection_result: Dict,
pressure_result: Dict, main_nom: str) -> Dict:
"""피팅 제작 방법 결정"""
evidence = []
# 1. 재질 기반 제작방법 (가장 확실)
material_manufacturing = get_manufacturing_method_from_material(material_result)
if material_manufacturing in ["FORGED", "CAST"]:
evidence.append(f"MATERIAL_STANDARD: {material_result.get('standard')}")
characteristics = {
"FORGED": "고강도, 고압용, 소구경",
"CAST": "복잡형상, 중저압용"
}.get(material_manufacturing, "")
return {
"method": material_manufacturing,
"confidence": 0.9,
"evidence": evidence,
"characteristics": characteristics
}
# 2. 연결방식 + 압력등급 조합 추정
connection_method = connection_result.get("method", "")
pressure_rating = pressure_result.get("rating", "")
# 고압 + 소켓웰드/나사 = 단조
high_pressure = ["3000LB", "6000LB", "9000LB"]
forged_connections = ["SOCKET_WELD", "THREADED"]
if (any(pressure in pressure_rating for pressure in high_pressure) and
connection_method in forged_connections):
evidence.append(f"HIGH_PRESSURE: {pressure_rating}")
evidence.append(f"FORGED_CONNECTION: {connection_method}")
return {
"method": "FORGED",
"confidence": 0.85,
"evidence": evidence,
"characteristics": "고압용 단조품"
}
# 3. 연결방식별 일반적 제작방법
connection_manufacturing = connection_result.get("typical_manufacturing", "")
if connection_manufacturing:
evidence.append(f"CONNECTION_TYPICAL: {connection_method}")
characteristics_map = {
"FORGED": "단조품, 고강도",
"WELDED_FABRICATED": "용접제작품, 대구경",
"FORGED_OR_CAST": "단조 또는 주조"
}
return {
"method": connection_manufacturing,
"confidence": 0.7,
"evidence": evidence,
"characteristics": characteristics_map.get(connection_manufacturing, "")
}
# 4. 기본 추정
return {
"method": "UNKNOWN",
"confidence": 0.0,
"evidence": ["INSUFFICIENT_MANUFACTURING_INFO"],
"characteristics": ""
}
def format_fitting_size(main_nom: str, red_nom: str = None) -> str:
"""피팅 사이즈 표기 포맷팅"""
main_nom_str = str(main_nom) if main_nom is not None else ""
red_nom_str = str(red_nom) if red_nom is not None else ""
if red_nom_str.strip() and red_nom_str != main_nom_str:
return f"{main_nom_str} x {red_nom_str}"
else:
return main_nom_str
def calculate_fitting_confidence(confidence_scores: Dict) -> float:
"""피팅 분류 전체 신뢰도 계산"""
scores = [score for score in confidence_scores.values() if score > 0]
if not scores:
return 0.0
# 가중 평균 (피팅 타입이 가장 중요)
weights = {
"material": 0.25,
"fitting_type": 0.4,
"connection": 0.25,
"pressure": 0.1
}
weighted_sum = sum(
confidence_scores.get(key, 0) * weight
for key, weight in weights.items()
)
return round(weighted_sum, 2)
# ========== 특수 분류 함수들 ==========
def is_high_pressure_fitting(pressure_rating: str) -> bool:
"""고압 피팅 여부 판단"""
high_pressure_ratings = ["3000LB", "6000LB", "9000LB"]
return pressure_rating in high_pressure_ratings
def is_small_bore_fitting(main_nom: str) -> bool:
"""소구경 피팅 여부 판단"""
try:
# 간단한 사이즈 파싱 (인치 기준)
size_num = float(re.findall(r'(\d+(?:\.\d+)?)', main_nom)[0])
return size_num <= 2.0
except:
return False
def get_fitting_purchase_info(fitting_result: Dict) -> Dict:
"""피팅 구매 정보 생성"""
fitting_type = fitting_result["fitting_type"]["type"]
connection = fitting_result["connection_method"]["method"]
pressure = fitting_result["pressure_rating"]["rating"]
manufacturing = fitting_result["manufacturing"]["method"]
# 공급업체 타입 결정
if manufacturing == "FORGED":
supplier_type = "단조 피팅 전문업체"
elif manufacturing == "CAST":
supplier_type = "주조 피팅 전문업체"
else:
supplier_type = "일반 피팅 업체"
# 납기 추정
if is_high_pressure_fitting(pressure):
lead_time = "6-10주 (고압용)"
elif manufacturing == "FORGED":
lead_time = "4-8주 (단조품)"
else:
lead_time = "2-6주 (일반품)"
return {
"supplier_type": supplier_type,
"lead_time_estimate": lead_time,
"purchase_category": f"{fitting_type} {connection} {pressure}",
"manufacturing_note": fitting_result["manufacturing"]["characteristics"]
}