- 통합 분류기 구현으로 키워드 우선순위 체계 적용 - HEX.PLUG → FITTING 분류 수정 (기존 VALVE 오분류 해결) - 플랜지/밸브가 볼트로 오분류되는 문제 해결 (A193, A194 재질 키워드 우선순위 적용) - 피팅 재질(A234, A403, A420) 기반 분류 추가 - 니플 길이 정보 보존 로직 개선 - 파이프 끝단 가공 정보를 구매 단계에서 제외 - PostgreSQL 사용으로 RULES.md 업데이트 - 상호 배타적 키워드 시스템 구현 (Level 1 키워드 우선)
339 lines
13 KiB
Python
339 lines
13 KiB
Python
"""
|
|
재질 분류를 위한 공통 함수
|
|
materials_schema.py의 데이터를 사용하여 재질을 분류
|
|
"""
|
|
|
|
import re
|
|
from typing import Dict, List, Optional, Tuple
|
|
from .materials_schema import (
|
|
MATERIAL_STANDARDS,
|
|
SPECIAL_MATERIALS,
|
|
MANUFACTURING_MATERIAL_MAP,
|
|
GENERIC_MATERIAL_KEYWORDS
|
|
)
|
|
|
|
def classify_material(description: str) -> Dict:
|
|
"""
|
|
공통 재질 분류 함수
|
|
|
|
Args:
|
|
description: 자재 설명 (DESCRIPTION 필드)
|
|
|
|
Returns:
|
|
재질 분류 결과 딕셔너리
|
|
"""
|
|
|
|
desc_upper = str(description).upper().strip() if description is not None else ""
|
|
|
|
# 1단계: 특수 재질 우선 확인 (가장 구체적)
|
|
special_result = check_special_materials(desc_upper)
|
|
if special_result['confidence'] > 0.9:
|
|
return special_result
|
|
|
|
# 2단계: ASTM/ASME 규격 확인
|
|
astm_result = check_astm_materials(desc_upper)
|
|
if astm_result['confidence'] > 0.8:
|
|
return astm_result
|
|
|
|
# 3단계: KS 규격 확인
|
|
ks_result = check_ks_materials(desc_upper)
|
|
if ks_result['confidence'] > 0.8:
|
|
return ks_result
|
|
|
|
# 4단계: JIS 규격 확인
|
|
jis_result = check_jis_materials(desc_upper)
|
|
if jis_result['confidence'] > 0.8:
|
|
return jis_result
|
|
|
|
# 5단계: 일반 키워드 확인
|
|
generic_result = check_generic_materials(desc_upper)
|
|
|
|
return generic_result
|
|
|
|
def check_special_materials(description: str) -> Dict:
|
|
"""특수 재질 확인"""
|
|
|
|
# SUPER ALLOYS 확인
|
|
for alloy_family, alloy_data in SPECIAL_MATERIALS["SUPER_ALLOYS"].items():
|
|
for pattern in alloy_data["patterns"]:
|
|
match = re.search(pattern, description)
|
|
if match:
|
|
grade = match.group(1) if match.groups() else "STANDARD"
|
|
grade_info = alloy_data["grades"].get(grade, {})
|
|
|
|
return {
|
|
"standard": f"{alloy_family}",
|
|
"grade": f"{alloy_family} {grade}",
|
|
"material_type": "SUPER_ALLOY",
|
|
"manufacturing": alloy_data.get("manufacturing", "SPECIAL"),
|
|
"composition": grade_info.get("composition", ""),
|
|
"applications": grade_info.get("applications", ""),
|
|
"confidence": 0.95,
|
|
"evidence": [f"SPECIAL_MATERIAL: {alloy_family} {grade}"]
|
|
}
|
|
|
|
# TITANIUM 확인
|
|
titanium_data = SPECIAL_MATERIALS["TITANIUM"]
|
|
for pattern in titanium_data["patterns"]:
|
|
match = re.search(pattern, description)
|
|
if match:
|
|
grade = match.group(1) if match.groups() else "2"
|
|
grade_info = titanium_data["grades"].get(grade, {})
|
|
|
|
return {
|
|
"standard": "TITANIUM",
|
|
"grade": f"Titanium Grade {grade}",
|
|
"material_type": "TITANIUM",
|
|
"manufacturing": "FORGED_OR_SEAMLESS",
|
|
"composition": grade_info.get("composition", f"Ti Grade {grade}"),
|
|
"confidence": 0.95,
|
|
"evidence": [f"TITANIUM: Grade {grade}"]
|
|
}
|
|
|
|
return {"confidence": 0.0}
|
|
|
|
def check_astm_materials(description: str) -> Dict:
|
|
"""ASTM/ASME 규격 확인"""
|
|
|
|
astm_data = MATERIAL_STANDARDS["ASTM_ASME"]
|
|
|
|
# FORGED 등급 확인
|
|
for standard, standard_data in astm_data["FORGED_GRADES"].items():
|
|
result = check_astm_standard(description, standard, standard_data)
|
|
if result["confidence"] > 0.8:
|
|
return result
|
|
|
|
# WELDED 등급 확인
|
|
for standard, standard_data in astm_data["WELDED_GRADES"].items():
|
|
result = check_astm_standard(description, standard, standard_data)
|
|
if result["confidence"] > 0.8:
|
|
return result
|
|
|
|
# CAST 등급 확인
|
|
for standard, standard_data in astm_data["CAST_GRADES"].items():
|
|
result = check_astm_standard(description, standard, standard_data)
|
|
if result["confidence"] > 0.8:
|
|
return result
|
|
|
|
# PIPE 등급 확인
|
|
for standard, standard_data in astm_data["PIPE_GRADES"].items():
|
|
result = check_astm_standard(description, standard, standard_data)
|
|
if result["confidence"] > 0.8:
|
|
return result
|
|
|
|
return {"confidence": 0.0}
|
|
|
|
def check_astm_standard(description: str, standard: str, standard_data: Dict) -> Dict:
|
|
"""개별 ASTM 규격 확인"""
|
|
|
|
# 직접 패턴이 있는 경우 (A105 등)
|
|
if "patterns" in standard_data:
|
|
for pattern in standard_data["patterns"]:
|
|
match = re.search(pattern, description)
|
|
if match:
|
|
grade_code = match.group(1) if match.groups() else ""
|
|
full_grade = f"ASTM {standard}" + (f" {grade_code}" if grade_code else "")
|
|
|
|
return {
|
|
"standard": f"ASTM {standard}",
|
|
"grade": full_grade,
|
|
"material_type": determine_material_type(standard, grade_code),
|
|
"manufacturing": standard_data.get("manufacturing", "UNKNOWN"),
|
|
"confidence": 0.9,
|
|
"evidence": [f"ASTM_{standard}: {grade_code if grade_code else 'Direct Match'}"]
|
|
}
|
|
|
|
# 하위 분류가 있는 경우 (A182, A234 등)
|
|
else:
|
|
for subtype, subtype_data in standard_data.items():
|
|
for pattern in subtype_data["patterns"]:
|
|
match = re.search(pattern, description)
|
|
if match:
|
|
grade_code = match.group(1) if match.groups() else ""
|
|
grade_info = subtype_data["grades"].get(grade_code, {})
|
|
|
|
# A312의 경우 TP304 형태로 전체 grade 표시
|
|
if standard == "A312" and grade_code and not grade_code.startswith("TP"):
|
|
full_grade = f"ASTM {standard} TP{grade_code}"
|
|
elif grade_code.startswith("TP"):
|
|
full_grade = f"ASTM {standard} {grade_code}"
|
|
# A403의 경우 WP304 형태로 전체 grade 표시
|
|
elif standard == "A403" and grade_code and not grade_code.startswith("WP"):
|
|
full_grade = f"ASTM {standard} WP{grade_code}"
|
|
elif grade_code.startswith("WP"):
|
|
full_grade = f"ASTM {standard} {grade_code}"
|
|
# A420의 경우 WPL3 형태로 전체 grade 표시
|
|
elif standard == "A420" and grade_code and not grade_code.startswith("WPL"):
|
|
full_grade = f"ASTM {standard} WPL{grade_code}"
|
|
elif grade_code.startswith("WPL"):
|
|
full_grade = f"ASTM {standard} {grade_code}"
|
|
else:
|
|
full_grade = f"ASTM {standard} {grade_code}" if grade_code else f"ASTM {standard}"
|
|
|
|
return {
|
|
"standard": f"ASTM {standard}",
|
|
"grade": full_grade,
|
|
"material_type": determine_material_type(standard, grade_code),
|
|
"manufacturing": subtype_data.get("manufacturing", "UNKNOWN"),
|
|
"composition": grade_info.get("composition", ""),
|
|
"applications": grade_info.get("applications", ""),
|
|
"confidence": 0.9,
|
|
"evidence": [f"ASTM_{standard}: {grade_code}"]
|
|
}
|
|
|
|
return {"confidence": 0.0}
|
|
|
|
def check_ks_materials(description: str) -> Dict:
|
|
"""KS 규격 확인"""
|
|
|
|
ks_data = MATERIAL_STANDARDS["KS"]
|
|
|
|
for category, standards in ks_data.items():
|
|
for standard, standard_data in standards.items():
|
|
for pattern in standard_data["patterns"]:
|
|
match = re.search(pattern, description)
|
|
if match:
|
|
return {
|
|
"standard": f"KS {standard}",
|
|
"grade": f"KS {standard}",
|
|
"material_type": determine_material_type_from_description(description),
|
|
"manufacturing": standard_data.get("manufacturing", "UNKNOWN"),
|
|
"description": standard_data["description"],
|
|
"confidence": 0.85,
|
|
"evidence": [f"KS_{standard}"]
|
|
}
|
|
|
|
return {"confidence": 0.0}
|
|
|
|
def check_jis_materials(description: str) -> Dict:
|
|
"""JIS 규격 확인"""
|
|
|
|
jis_data = MATERIAL_STANDARDS["JIS"]
|
|
|
|
for category, standards in jis_data.items():
|
|
for standard, standard_data in standards.items():
|
|
for pattern in standard_data["patterns"]:
|
|
match = re.search(pattern, description)
|
|
if match:
|
|
return {
|
|
"standard": f"JIS {standard}",
|
|
"grade": f"JIS {standard}",
|
|
"material_type": determine_material_type_from_description(description),
|
|
"manufacturing": standard_data.get("manufacturing", "UNKNOWN"),
|
|
"description": standard_data["description"],
|
|
"confidence": 0.85,
|
|
"evidence": [f"JIS_{standard}"]
|
|
}
|
|
|
|
return {"confidence": 0.0}
|
|
|
|
def check_generic_materials(description: str) -> Dict:
|
|
"""일반 재질 키워드 확인"""
|
|
|
|
for material_type, keywords in GENERIC_MATERIAL_KEYWORDS.items():
|
|
for keyword in keywords:
|
|
if keyword in description:
|
|
return {
|
|
"standard": "GENERIC",
|
|
"grade": keyword,
|
|
"material_type": material_type,
|
|
"manufacturing": "UNKNOWN",
|
|
"confidence": 0.6,
|
|
"evidence": [f"GENERIC: {keyword}"]
|
|
}
|
|
|
|
return {
|
|
"standard": "UNKNOWN",
|
|
"grade": "UNKNOWN",
|
|
"material_type": "UNKNOWN",
|
|
"manufacturing": "UNKNOWN",
|
|
"confidence": 0.0,
|
|
"evidence": ["NO_MATERIAL_FOUND"]
|
|
}
|
|
|
|
def determine_material_type(standard: str, grade: str) -> str:
|
|
"""규격과 등급으로 재질 타입 결정"""
|
|
|
|
# grade가 None이면 기본값 처리
|
|
if not grade:
|
|
grade = ""
|
|
|
|
# 스테인리스 등급
|
|
stainless_patterns = ["304", "316", "321", "347", "F304", "F316", "WP304", "CF8"]
|
|
if any(pattern in grade for pattern in stainless_patterns):
|
|
return "STAINLESS_STEEL"
|
|
|
|
# 합금강 등급
|
|
alloy_patterns = ["F1", "F5", "F11", "F22", "F91", "WP1", "WP5", "WP11", "WP22", "WP91"]
|
|
if any(pattern in grade for pattern in alloy_patterns):
|
|
return "ALLOY_STEEL"
|
|
|
|
# 주조품
|
|
if standard in ["A216", "A351"]:
|
|
return "CAST_STEEL"
|
|
|
|
# 기본값은 탄소강
|
|
return "CARBON_STEEL"
|
|
|
|
def determine_material_type_from_description(description: str) -> str:
|
|
"""설명에서 재질 타입 추정"""
|
|
|
|
desc_upper = description.upper()
|
|
|
|
if any(keyword in desc_upper for keyword in ["SS", "STS", "STAINLESS", "304", "316"]):
|
|
return "STAINLESS_STEEL"
|
|
elif any(keyword in desc_upper for keyword in ["ALLOY", "합금", "CR", "MO"]):
|
|
return "ALLOY_STEEL"
|
|
elif any(keyword in desc_upper for keyword in ["CAST", "주조"]):
|
|
return "CAST_STEEL"
|
|
else:
|
|
return "CARBON_STEEL"
|
|
|
|
def get_manufacturing_method_from_material(material_result: Dict) -> str:
|
|
"""재질 정보로부터 제작방법 추정"""
|
|
|
|
if material_result.get("confidence", 0) < 0.5:
|
|
return "UNKNOWN"
|
|
|
|
material_standard = material_result.get('standard', '')
|
|
|
|
# 직접 매핑
|
|
if 'A182' in material_standard or 'A105' in material_standard:
|
|
return 'FORGED'
|
|
elif 'A234' in material_standard or 'A403' in material_standard or 'A420' in material_standard:
|
|
return 'WELDED_FABRICATED'
|
|
elif 'A216' in material_standard or 'A351' in material_standard:
|
|
return 'CAST'
|
|
elif 'A106' in material_standard or 'A312' in material_standard:
|
|
return 'SEAMLESS'
|
|
elif 'A53' in material_standard:
|
|
return 'WELDED_OR_SEAMLESS'
|
|
|
|
# manufacturing 필드가 있으면 직접 사용
|
|
manufacturing = material_result.get("manufacturing", "UNKNOWN")
|
|
if manufacturing != "UNKNOWN":
|
|
return manufacturing
|
|
|
|
return "UNKNOWN"
|
|
|
|
def get_material_confidence_factors(material_result: Dict) -> List[str]:
|
|
"""재질 분류 신뢰도 영향 요소 반환"""
|
|
|
|
factors = []
|
|
confidence = material_result.get("confidence", 0)
|
|
|
|
if confidence >= 0.9:
|
|
factors.append("HIGH_CONFIDENCE")
|
|
elif confidence >= 0.7:
|
|
factors.append("MEDIUM_CONFIDENCE")
|
|
else:
|
|
factors.append("LOW_CONFIDENCE")
|
|
|
|
if material_result.get("standard") == "UNKNOWN":
|
|
factors.append("NO_STANDARD_FOUND")
|
|
|
|
if material_result.get("manufacturing") == "UNKNOWN":
|
|
factors.append("MANUFACTURING_UNCLEAR")
|
|
|
|
return factors
|