Files
TK-BOM-Project/backend/app/services/material_classifier.py
Hyungi Ahn 9e5250a8f9 자재 분류 시스템 개선 및 통합 분류기 구현
- 통합 분류기 구현으로 키워드 우선순위 체계 적용
- HEX.PLUG → FITTING 분류 수정 (기존 VALVE 오분류 해결)
- 플랜지/밸브가 볼트로 오분류되는 문제 해결 (A193, A194 재질 키워드 우선순위 적용)
- 피팅 재질(A234, A403, A420) 기반 분류 추가
- 니플 길이 정보 보존 로직 개선
- 파이프 끝단 가공 정보를 구매 단계에서 제외
- PostgreSQL 사용으로 RULES.md 업데이트
- 상호 배타적 키워드 시스템 구현 (Level 1 키워드 우선)
2025-07-23 14:38:49 +09:00

339 lines
13 KiB
Python

"""
재질 분류를 위한 공통 함수
materials_schema.py의 데이터를 사용하여 재질을 분류
"""
import re
from typing import Dict, List, Optional, Tuple
from .materials_schema import (
MATERIAL_STANDARDS,
SPECIAL_MATERIALS,
MANUFACTURING_MATERIAL_MAP,
GENERIC_MATERIAL_KEYWORDS
)
def classify_material(description: str) -> Dict:
"""
공통 재질 분류 함수
Args:
description: 자재 설명 (DESCRIPTION 필드)
Returns:
재질 분류 결과 딕셔너리
"""
desc_upper = str(description).upper().strip() if description is not None else ""
# 1단계: 특수 재질 우선 확인 (가장 구체적)
special_result = check_special_materials(desc_upper)
if special_result['confidence'] > 0.9:
return special_result
# 2단계: ASTM/ASME 규격 확인
astm_result = check_astm_materials(desc_upper)
if astm_result['confidence'] > 0.8:
return astm_result
# 3단계: KS 규격 확인
ks_result = check_ks_materials(desc_upper)
if ks_result['confidence'] > 0.8:
return ks_result
# 4단계: JIS 규격 확인
jis_result = check_jis_materials(desc_upper)
if jis_result['confidence'] > 0.8:
return jis_result
# 5단계: 일반 키워드 확인
generic_result = check_generic_materials(desc_upper)
return generic_result
def check_special_materials(description: str) -> Dict:
"""특수 재질 확인"""
# SUPER ALLOYS 확인
for alloy_family, alloy_data in SPECIAL_MATERIALS["SUPER_ALLOYS"].items():
for pattern in alloy_data["patterns"]:
match = re.search(pattern, description)
if match:
grade = match.group(1) if match.groups() else "STANDARD"
grade_info = alloy_data["grades"].get(grade, {})
return {
"standard": f"{alloy_family}",
"grade": f"{alloy_family} {grade}",
"material_type": "SUPER_ALLOY",
"manufacturing": alloy_data.get("manufacturing", "SPECIAL"),
"composition": grade_info.get("composition", ""),
"applications": grade_info.get("applications", ""),
"confidence": 0.95,
"evidence": [f"SPECIAL_MATERIAL: {alloy_family} {grade}"]
}
# TITANIUM 확인
titanium_data = SPECIAL_MATERIALS["TITANIUM"]
for pattern in titanium_data["patterns"]:
match = re.search(pattern, description)
if match:
grade = match.group(1) if match.groups() else "2"
grade_info = titanium_data["grades"].get(grade, {})
return {
"standard": "TITANIUM",
"grade": f"Titanium Grade {grade}",
"material_type": "TITANIUM",
"manufacturing": "FORGED_OR_SEAMLESS",
"composition": grade_info.get("composition", f"Ti Grade {grade}"),
"confidence": 0.95,
"evidence": [f"TITANIUM: Grade {grade}"]
}
return {"confidence": 0.0}
def check_astm_materials(description: str) -> Dict:
"""ASTM/ASME 규격 확인"""
astm_data = MATERIAL_STANDARDS["ASTM_ASME"]
# FORGED 등급 확인
for standard, standard_data in astm_data["FORGED_GRADES"].items():
result = check_astm_standard(description, standard, standard_data)
if result["confidence"] > 0.8:
return result
# WELDED 등급 확인
for standard, standard_data in astm_data["WELDED_GRADES"].items():
result = check_astm_standard(description, standard, standard_data)
if result["confidence"] > 0.8:
return result
# CAST 등급 확인
for standard, standard_data in astm_data["CAST_GRADES"].items():
result = check_astm_standard(description, standard, standard_data)
if result["confidence"] > 0.8:
return result
# PIPE 등급 확인
for standard, standard_data in astm_data["PIPE_GRADES"].items():
result = check_astm_standard(description, standard, standard_data)
if result["confidence"] > 0.8:
return result
return {"confidence": 0.0}
def check_astm_standard(description: str, standard: str, standard_data: Dict) -> Dict:
"""개별 ASTM 규격 확인"""
# 직접 패턴이 있는 경우 (A105 등)
if "patterns" in standard_data:
for pattern in standard_data["patterns"]:
match = re.search(pattern, description)
if match:
grade_code = match.group(1) if match.groups() else ""
full_grade = f"ASTM {standard}" + (f" {grade_code}" if grade_code else "")
return {
"standard": f"ASTM {standard}",
"grade": full_grade,
"material_type": determine_material_type(standard, grade_code),
"manufacturing": standard_data.get("manufacturing", "UNKNOWN"),
"confidence": 0.9,
"evidence": [f"ASTM_{standard}: {grade_code if grade_code else 'Direct Match'}"]
}
# 하위 분류가 있는 경우 (A182, A234 등)
else:
for subtype, subtype_data in standard_data.items():
for pattern in subtype_data["patterns"]:
match = re.search(pattern, description)
if match:
grade_code = match.group(1) if match.groups() else ""
grade_info = subtype_data["grades"].get(grade_code, {})
# A312의 경우 TP304 형태로 전체 grade 표시
if standard == "A312" and grade_code and not grade_code.startswith("TP"):
full_grade = f"ASTM {standard} TP{grade_code}"
elif grade_code.startswith("TP"):
full_grade = f"ASTM {standard} {grade_code}"
# A403의 경우 WP304 형태로 전체 grade 표시
elif standard == "A403" and grade_code and not grade_code.startswith("WP"):
full_grade = f"ASTM {standard} WP{grade_code}"
elif grade_code.startswith("WP"):
full_grade = f"ASTM {standard} {grade_code}"
# A420의 경우 WPL3 형태로 전체 grade 표시
elif standard == "A420" and grade_code and not grade_code.startswith("WPL"):
full_grade = f"ASTM {standard} WPL{grade_code}"
elif grade_code.startswith("WPL"):
full_grade = f"ASTM {standard} {grade_code}"
else:
full_grade = f"ASTM {standard} {grade_code}" if grade_code else f"ASTM {standard}"
return {
"standard": f"ASTM {standard}",
"grade": full_grade,
"material_type": determine_material_type(standard, grade_code),
"manufacturing": subtype_data.get("manufacturing", "UNKNOWN"),
"composition": grade_info.get("composition", ""),
"applications": grade_info.get("applications", ""),
"confidence": 0.9,
"evidence": [f"ASTM_{standard}: {grade_code}"]
}
return {"confidence": 0.0}
def check_ks_materials(description: str) -> Dict:
"""KS 규격 확인"""
ks_data = MATERIAL_STANDARDS["KS"]
for category, standards in ks_data.items():
for standard, standard_data in standards.items():
for pattern in standard_data["patterns"]:
match = re.search(pattern, description)
if match:
return {
"standard": f"KS {standard}",
"grade": f"KS {standard}",
"material_type": determine_material_type_from_description(description),
"manufacturing": standard_data.get("manufacturing", "UNKNOWN"),
"description": standard_data["description"],
"confidence": 0.85,
"evidence": [f"KS_{standard}"]
}
return {"confidence": 0.0}
def check_jis_materials(description: str) -> Dict:
"""JIS 규격 확인"""
jis_data = MATERIAL_STANDARDS["JIS"]
for category, standards in jis_data.items():
for standard, standard_data in standards.items():
for pattern in standard_data["patterns"]:
match = re.search(pattern, description)
if match:
return {
"standard": f"JIS {standard}",
"grade": f"JIS {standard}",
"material_type": determine_material_type_from_description(description),
"manufacturing": standard_data.get("manufacturing", "UNKNOWN"),
"description": standard_data["description"],
"confidence": 0.85,
"evidence": [f"JIS_{standard}"]
}
return {"confidence": 0.0}
def check_generic_materials(description: str) -> Dict:
"""일반 재질 키워드 확인"""
for material_type, keywords in GENERIC_MATERIAL_KEYWORDS.items():
for keyword in keywords:
if keyword in description:
return {
"standard": "GENERIC",
"grade": keyword,
"material_type": material_type,
"manufacturing": "UNKNOWN",
"confidence": 0.6,
"evidence": [f"GENERIC: {keyword}"]
}
return {
"standard": "UNKNOWN",
"grade": "UNKNOWN",
"material_type": "UNKNOWN",
"manufacturing": "UNKNOWN",
"confidence": 0.0,
"evidence": ["NO_MATERIAL_FOUND"]
}
def determine_material_type(standard: str, grade: str) -> str:
"""규격과 등급으로 재질 타입 결정"""
# grade가 None이면 기본값 처리
if not grade:
grade = ""
# 스테인리스 등급
stainless_patterns = ["304", "316", "321", "347", "F304", "F316", "WP304", "CF8"]
if any(pattern in grade for pattern in stainless_patterns):
return "STAINLESS_STEEL"
# 합금강 등급
alloy_patterns = ["F1", "F5", "F11", "F22", "F91", "WP1", "WP5", "WP11", "WP22", "WP91"]
if any(pattern in grade for pattern in alloy_patterns):
return "ALLOY_STEEL"
# 주조품
if standard in ["A216", "A351"]:
return "CAST_STEEL"
# 기본값은 탄소강
return "CARBON_STEEL"
def determine_material_type_from_description(description: str) -> str:
"""설명에서 재질 타입 추정"""
desc_upper = description.upper()
if any(keyword in desc_upper for keyword in ["SS", "STS", "STAINLESS", "304", "316"]):
return "STAINLESS_STEEL"
elif any(keyword in desc_upper for keyword in ["ALLOY", "합금", "CR", "MO"]):
return "ALLOY_STEEL"
elif any(keyword in desc_upper for keyword in ["CAST", "주조"]):
return "CAST_STEEL"
else:
return "CARBON_STEEL"
def get_manufacturing_method_from_material(material_result: Dict) -> str:
"""재질 정보로부터 제작방법 추정"""
if material_result.get("confidence", 0) < 0.5:
return "UNKNOWN"
material_standard = material_result.get('standard', '')
# 직접 매핑
if 'A182' in material_standard or 'A105' in material_standard:
return 'FORGED'
elif 'A234' in material_standard or 'A403' in material_standard or 'A420' in material_standard:
return 'WELDED_FABRICATED'
elif 'A216' in material_standard or 'A351' in material_standard:
return 'CAST'
elif 'A106' in material_standard or 'A312' in material_standard:
return 'SEAMLESS'
elif 'A53' in material_standard:
return 'WELDED_OR_SEAMLESS'
# manufacturing 필드가 있으면 직접 사용
manufacturing = material_result.get("manufacturing", "UNKNOWN")
if manufacturing != "UNKNOWN":
return manufacturing
return "UNKNOWN"
def get_material_confidence_factors(material_result: Dict) -> List[str]:
"""재질 분류 신뢰도 영향 요소 반환"""
factors = []
confidence = material_result.get("confidence", 0)
if confidence >= 0.9:
factors.append("HIGH_CONFIDENCE")
elif confidence >= 0.7:
factors.append("MEDIUM_CONFIDENCE")
else:
factors.append("LOW_CONFIDENCE")
if material_result.get("standard") == "UNKNOWN":
factors.append("NO_STANDARD_FOUND")
if material_result.get("manufacturing") == "UNKNOWN":
factors.append("MANUFACTURING_UNCLEAR")
return factors