refactor: centralize classifier constants and simplify logic

This commit is contained in:
Hyungi Ahn
2026-01-09 14:07:45 +09:00
parent f16bc662ad
commit ee99586a2f
4 changed files with 237 additions and 95 deletions

View File

@@ -0,0 +1,157 @@
"""
자재 분류 시스템용 상수 및 키워드 정의
중복 로직 제거 및 유지보수성 향상을 위해 중앙 집중화됨
"""
from typing import Dict, List
# ==============================================================================
# 1. 압력 등급 (Pressure Ratings)
# ==============================================================================
# 단순 키워드 목록 (Integrated Classifier용)
LEVEL3_PRESSURE_KEYWORDS = [
"150LB", "300LB", "600LB", "900LB", "1500LB",
"2500LB", "3000LB", "6000LB", "9000LB"
]
# 상세 스펙 및 메타데이터 (Fitting Classifier용)
PRESSURE_RATINGS_SPECS = {
"150LB": {"max_pressure": "285 PSI", "common_use": "저압 일반용"},
"300LB": {"max_pressure": "740 PSI", "common_use": "중압용"},
"600LB": {"max_pressure": "1480 PSI", "common_use": "고압용"},
"900LB": {"max_pressure": "2220 PSI", "common_use": "고압용"},
"1500LB": {"max_pressure": "3705 PSI", "common_use": "고압용"},
"2500LB": {"max_pressure": "6170 PSI", "common_use": "초고압용"},
"3000LB": {"max_pressure": "7400 PSI", "common_use": "소구경 고압용"},
"6000LB": {"max_pressure": "14800 PSI", "common_use": "소구경 초고압용"},
"9000LB": {"max_pressure": "22200 PSI", "common_use": "소구경 극고압용"}
}
# 정규식 패턴 (Fitting Classifier용)
PRESSURE_PATTERNS = [
r"(\d+)LB",
r"CLASS\s*(\d+)",
r"CL\s*(\d+)",
r"(\d+)#",
r"(\d+)\s*LB"
]
# ==============================================================================
# 2. OLET 키워드 (OLET Keywords)
# ==============================================================================
# Fitting Classifier와 Integrated Classifier에서 공통 사용
OLET_KEYWORDS = [
# Full Names
"SOCK-O-LET", "WELD-O-LET", "ELL-O-LET", "THREAD-O-LET", "ELB-O-LET",
"NIP-O-LET", "COUP-O-LET",
# Variations
"SOCKOLET", "WELDOLET", "ELLOLET", "THREADOLET", "ELBOLET", "NIPOLET", "COUPOLET",
"OLET", "올렛", "O-LET", "SOCKLET", "SOCKET-O-LET", "WELD O-LET", "ELL O-LET",
"THREADED-O-LET", "ELBOW-O-LET", "NIPPLE-O-LET", "COUPLING-O-LET",
# Abbreviations (Caution: specific context needed sometimes)
"SOL", "WOL", "EOL", "TOL", "NOL", "COL"
]
# ==============================================================================
# 3. 연결 방식 (Connection Methods)
# ==============================================================================
LEVEL3_CONNECTION_KEYWORDS = {
"SW": ["SW", "SOCKET WELD", "소켓웰드", "SOCKET-WELD", "_SW_"],
"THD": ["THD", "THREADED", "NPT", "나사", "THRD", "TR", "_TR", "_THD"],
"BW": ["BW", "BUTT WELD", "맞대기용접", "BUTT-WELD", "_BW"],
"FL": ["FL", "FLANGED", "플랜지", "FLG", "_FL_"]
}
# ==============================================================================
# 4. 재질 키워드 (Material Keywords)
# ==============================================================================
LEVEL4_MATERIAL_KEYWORDS = {
"PIPE": ["A106", "A333", "A312", "A53"],
"FITTING": ["A234", "A403", "A420"],
"FLANGE": ["A182", "A350"],
"VALVE": ["A216", "A217", "A351", "A352"],
"BOLT": ["A193", "A194", "A320", "A325", "A490"]
}
GENERIC_MATERIALS = {
"A105": ["VALVE", "FLANGE", "FITTING"],
"316": ["VALVE", "FLANGE", "FITTING", "PIPE", "BOLT"],
"304": ["VALVE", "FLANGE", "FITTING", "PIPE", "BOLT"]
}
# ==============================================================================
# 5. 메인 분류 키워드 (Level 1 Type Keywords)
# ==============================================================================
LEVEL1_TYPE_KEYWORDS = {
"BOLT": [
"FLANGE BOLT", "U-BOLT", "U BOLT", "BOLT", "STUD", "NUT", "SCREW",
"WASHER", "볼트", "너트", "스터드", "나사", "와셔", "유볼트"
],
"VALVE": [
"VALVE", "GATE", "BALL", "GLOBE", "CHECK", "BUTTERFLY", "NEEDLE",
"RELIEF", "SIGHT GLASS", "STRAINER", "밸브", "게이트", "", "글로브",
"체크", "버터플라이", "니들", "릴리프", "사이트글라스", "스트레이너"
],
"FLANGE": [
"FLG", "FLANGE", "플랜지", "프랜지", "ORIFICE", "SPECTACLE", "PADDLE",
"SPACER", "BLIND", "REDUCING FLANGE", "RED FLANGE"
],
"PIPE": [
"PIPE", "TUBE", "파이프", "배관", "SMLS", "SEAMLESS"
],
"FITTING": [
# Standard Fittings
"ELBOW", "ELL", "TEE", "REDUCER", "CAP", "COUPLING", "NIPPLE", "SWAGE", "PLUG",
"엘보", "", "리듀서", "", "니플", "커플링", "플러그", "CONC", "ECC",
# Instrument Fittings
"SWAGELOK", "DK-LOK", "HY-LOK", "SUPERLOK", "TUBE FITTING", "COMPRESSION",
"UNION", "FERRULE", "NUT & FERRULE", "MALE CONNECTOR", "FEMALE CONNECTOR",
"TUBE ADAPTER", "PORT CONNECTOR", "CONNECTOR"
] + OLET_KEYWORDS, # OLET Keywords 병합
"GASKET": [
"GASKET", "GASK", "가스켓", "SWG", "SPIRAL"
],
"INSTRUMENT": [
"GAUGE", "TRANSMITTER", "SENSOR", "THERMOMETER", "계기", "게이지", "트랜스미터", "센서"
],
"SUPPORT": [
"URETHANE BLOCK", "URETHANE", "BLOCK SHOE", "CLAMP", "SUPPORT", "HANGER",
"SPRING", "우레탄", "블록", "클램프", "서포트", "행거", "스프링", "PIPE CLAMP"
],
"PLATE": [
"PLATE", "PL", "CHECKER PLATE", "판재", "철판"
],
"STRUCTURAL": [
"H-BEAM", "BEAM", "ANGLE", "CHANNEL", "H-SECTION", "I-BEAM", "형강", "앵글", "채널"
]
}
# ==============================================================================
# 6. 서브타입 키워드 (Level 2 Subtype Keywords)
# ==============================================================================
LEVEL2_SUBTYPE_KEYWORDS = {
"VALVE": {
"GATE": ["GATE VALVE", "GATE", "게이트 밸브"],
"BALL": ["BALL VALVE", "BALL", "볼 밸브"],
"GLOBE": ["GLOBE VALVE", "GLOBE", "글로브 밸브"],
"CHECK": ["CHECK VALVE", "CHECK", "체크 밸브", "역지 밸브"]
},
"FLANGE": {
"WELD_NECK": ["WELD NECK", "WN", "웰드넥"],
"SLIP_ON": ["SLIP ON", "SO", "슬립온"],
"BLIND": ["BLIND", "BL", "막음", "차단"],
"SOCKET_WELD": ["SOCKET WELD", "소켓웰드"]
},
"BOLT": {
"HEX_BOLT": ["HEX BOLT", "HEXAGON", "육각 볼트"],
"STUD_BOLT": ["STUD BOLT", "STUD", "스터드 볼트"],
"U_BOLT": ["U-BOLT", "U BOLT", "유볼트"]
},
"SUPPORT": {
"URETHANE_BLOCK": ["URETHANE BLOCK", "BLOCK SHOE", "우레탄 블록"],
"CLAMP": ["CLAMP", "클램프"],
"HANGER": ["HANGER", "SUPPORT", "행거", "서포트"],
"SPRING": ["SPRING", "스프링"]
}
}

View File

@@ -6,6 +6,7 @@ FITTING 분류 시스템 V2
import re import re
from typing import Dict, List, Optional from typing import Dict, List, Optional
from .material_classifier import classify_material, get_manufacturing_method_from_material from .material_classifier import classify_material, get_manufacturing_method_from_material
from .classifier_constants import PRESSURE_PATTERNS, PRESSURE_RATINGS_SPECS, OLET_KEYWORDS
# ========== FITTING 타입별 분류 (실제 BOM 기반) ========== # ========== FITTING 타입별 분류 (실제 BOM 기반) ==========
FITTING_TYPES = { FITTING_TYPES = {
@@ -103,7 +104,7 @@ FITTING_TYPES = {
"OLET": { "OLET": {
"dat_file_patterns": ["SOL_", "WOL_", "TOL_", "EOL_", "NOL_", "COL_", "OLET_", "SOCK-O-LET", "WELD-O-LET", "ELL-O-LET", "THREAD-O-LET", "ELB-O-LET", "NIP-O-LET", "COUP-O-LET"], "dat_file_patterns": ["SOL_", "WOL_", "TOL_", "EOL_", "NOL_", "COL_", "OLET_", "SOCK-O-LET", "WELD-O-LET", "ELL-O-LET", "THREAD-O-LET", "ELB-O-LET", "NIP-O-LET", "COUP-O-LET"],
"description_keywords": ["SOCK-O-LET", "WELD-O-LET", "ELL-O-LET", "THREAD-O-LET", "ELB-O-LET", "NIP-O-LET", "COUP-O-LET", "SOCKOLET", "WELDOLET", "ELLOLET", "THREADOLET", "ELBOLET", "NIPOLET", "COUPOLET", "OLET", "올렛", "O-LET", "SOCKLET"], "description_keywords": OLET_KEYWORDS,
"subtypes": { "subtypes": {
"SOCKOLET": ["SOCK-O-LET", "SOCKOLET", "SOL", "SOCK O-LET", "SOCKET-O-LET", "SOCKLET"], "SOCKOLET": ["SOCK-O-LET", "SOCKOLET", "SOL", "SOCK O-LET", "SOCKET-O-LET", "SOCKLET"],
"WELDOLET": ["WELD-O-LET", "WELDOLET", "WOL", "WELD O-LET", "WELDING-O-LET"], "WELDOLET": ["WELD-O-LET", "WELDOLET", "WOL", "WELD O-LET", "WELDING-O-LET"],
@@ -169,24 +170,8 @@ CONNECTION_METHODS = {
# ========== 압력 등급별 분류 ========== # ========== 압력 등급별 분류 ==========
PRESSURE_RATINGS = { PRESSURE_RATINGS = {
"patterns": [ "patterns": PRESSURE_PATTERNS,
r"(\d+)LB", "standard_ratings": PRESSURE_RATINGS_SPECS
r"CLASS\s*(\d+)",
r"CL\s*(\d+)",
r"(\d+)#",
r"(\d+)\s*LB"
],
"standard_ratings": {
"150LB": {"max_pressure": "285 PSI", "common_use": "저압 일반용"},
"300LB": {"max_pressure": "740 PSI", "common_use": "중압용"},
"600LB": {"max_pressure": "1480 PSI", "common_use": "고압용"},
"900LB": {"max_pressure": "2220 PSI", "common_use": "고압용"},
"1500LB": {"max_pressure": "3705 PSI", "common_use": "고압용"},
"2500LB": {"max_pressure": "6170 PSI", "common_use": "초고압용"},
"3000LB": {"max_pressure": "7400 PSI", "common_use": "소구경 고압용"},
"6000LB": {"max_pressure": "14800 PSI", "common_use": "소구경 초고압용"},
"9000LB": {"max_pressure": "22200 PSI", "common_use": "소구경 극고압용"}
}
} }
def classify_fitting(dat_file: str, description: str, main_nom: str, def classify_fitting(dat_file: str, description: str, main_nom: str,
@@ -209,7 +194,7 @@ def classify_fitting(dat_file: str, description: str, main_nom: str,
# 1. 피팅 키워드 확인 (재질만 있어도 통합 분류기가 이미 피팅으로 분류했으므로 진행) # 1. 피팅 키워드 확인 (재질만 있어도 통합 분류기가 이미 피팅으로 분류했으므로 진행)
# OLET 키워드를 우선 확인하여 정확한 분류 수행 # OLET 키워드를 우선 확인하여 정확한 분류 수행
olet_keywords = ['SOCK-O-LET', 'WELD-O-LET', 'ELL-O-LET', 'THREAD-O-LET', 'ELB-O-LET', 'NIP-O-LET', 'COUP-O-LET', 'SOCKOLET', 'WELDOLET', 'ELLOLET', 'THREADOLET', 'ELBOLET', 'NIPOLET', 'COUPOLET', 'OLET', 'O-LET', 'SOCKLET'] olet_keywords = OLET_KEYWORDS
has_olet_keyword = any(keyword in desc_upper or keyword in dat_upper for keyword in olet_keywords) has_olet_keyword = any(keyword in desc_upper or keyword in dat_upper for keyword in olet_keywords)
fitting_keywords = ['ELBOW', 'ELL', 'TEE', 'REDUCER', 'RED', 'CAP', 'NIPPLE', 'SWAGE', 'COUPLING', 'PLUG', '엘보', '', '리듀서', '', '니플', '스웨지', '올렛', '커플링', '플러그'] + olet_keywords fitting_keywords = ['ELBOW', 'ELL', 'TEE', 'REDUCER', 'RED', 'CAP', 'NIPPLE', 'SWAGE', 'COUPLING', 'PLUG', '엘보', '', '리듀서', '', '니플', '스웨지', '올렛', '커플링', '플러그'] + olet_keywords
@@ -402,7 +387,7 @@ def classify_fitting_type(dat_file: str, description: str,
desc_upper = description.upper() desc_upper = description.upper()
# 0. OLET 우선 확인 (ELL과의 혼동 방지) # 0. OLET 우선 확인 (ELL과의 혼동 방지)
olet_specific_keywords = ['SOCK-O-LET', 'WELD-O-LET', 'ELL-O-LET', 'THREAD-O-LET', 'ELB-O-LET', 'NIP-O-LET', 'COUP-O-LET', 'SOCKOLET', 'WELDOLET', 'ELLOLET', 'THREADOLET', 'ELBOLET', 'NIPOLET', 'COUPOLET', 'O-LET', 'SOCKLET'] olet_specific_keywords = OLET_KEYWORDS
for keyword in olet_specific_keywords: for keyword in olet_specific_keywords:
if keyword in desc_upper or keyword in dat_upper: if keyword in desc_upper or keyword in dat_upper:
subtype_result = classify_fitting_subtype( subtype_result = classify_fitting_subtype(

View File

@@ -6,79 +6,14 @@
import re import re
from typing import Dict, List, Optional, Tuple from typing import Dict, List, Optional, Tuple
from .fitting_classifier import classify_fitting from .fitting_classifier import classify_fitting
from .classifier_constants import (
# Level 1: 명확한 타입 키워드 (최우선) LEVEL1_TYPE_KEYWORDS,
LEVEL1_TYPE_KEYWORDS = { LEVEL2_SUBTYPE_KEYWORDS,
"BOLT": ["FLANGE BOLT", "U-BOLT", "U BOLT", "BOLT", "STUD", "NUT", "SCREW", "WASHER", "볼트", "너트", "스터드", "나사", "와셔", "유볼트"], LEVEL3_CONNECTION_KEYWORDS,
"VALVE": ["VALVE", "GATE", "BALL", "GLOBE", "CHECK", "BUTTERFLY", "NEEDLE", "RELIEF", "SIGHT GLASS", "STRAINER", "밸브", "게이트", "", "글로브", "체크", "버터플라이", "니들", "릴리프", "사이트글라스", "스트레이너"], LEVEL3_PRESSURE_KEYWORDS,
"FLANGE": ["FLG", "FLANGE", "플랜지", "프랜지", "ORIFICE", "SPECTACLE", "PADDLE", "SPACER", "BLIND", "REDUCING FLANGE", "RED FLANGE"], LEVEL4_MATERIAL_KEYWORDS,
"PIPE": ["PIPE", "TUBE", "파이프", "배관", "SMLS", "SEAMLESS"], GENERIC_MATERIALS
"FITTING": [ )
"SOCK-O-LET", "WELD-O-LET", "ELL-O-LET", "THREAD-O-LET", "ELB-O-LET", "NIP-O-LET", "COUP-O-LET",
"SOCKOLET", "WELDOLET", "ELLOLET", "THREADOLET", "ELBOLET", "NIPOLET", "COUPOLET", "OLET",
"ELBOW", "ELL", "TEE", "REDUCER", "CAP", "COUPLING", "NIPPLE", "SWAGE", "PLUG",
"엘보", "", "리듀서", "", "니플", "커플링", "플러그", "CONC", "ECC",
"SWAGELOK", "UNION", "CONNECTOR", "FERRULE", "NUT & FERRULE", "MALE CONNECTOR", "FEMALE CONNECTOR"
],
"GASKET": ["GASKET", "GASK", "가스켓", "SWG", "SPIRAL"],
"INSTRUMENT": ["GAUGE", "TRANSMITTER", "SENSOR", "THERMOMETER", "계기", "게이지", "트랜스미터", "센서"],
"SUPPORT": ["URETHANE BLOCK", "URETHANE", "BLOCK SHOE", "CLAMP", "SUPPORT", "HANGER", "SPRING", "우레탄", "블록", "클램프", "서포트", "행거", "스프링"],
"PLATE": ["PLATE", "PL", "CHECKER PLATE", "판재", "철판"],
"STRUCTURAL": ["H-BEAM", "BEAM", "ANGLE", "CHANNEL", "H-SECTION", "I-BEAM", "형강", "앵글", "채널"]
}
# Level 2: 서브타입 키워드 (구체화)
LEVEL2_SUBTYPE_KEYWORDS = {
"VALVE": {
"GATE": ["GATE VALVE", "GATE", "게이트 밸브"],
"BALL": ["BALL VALVE", "BALL", "볼 밸브"],
"GLOBE": ["GLOBE VALVE", "GLOBE", "글로브 밸브"],
"CHECK": ["CHECK VALVE", "CHECK", "체크 밸브", "역지 밸브"]
},
"FLANGE": {
"WELD_NECK": ["WELD NECK", "WN", "웰드넥"],
"SLIP_ON": ["SLIP ON", "SO", "슬립온"],
"BLIND": ["BLIND", "BL", "막음", "차단"],
"SOCKET_WELD": ["SOCKET WELD", "소켓웰드"]
},
"BOLT": {
"HEX_BOLT": ["HEX BOLT", "HEXAGON", "육각 볼트"],
"STUD_BOLT": ["STUD BOLT", "STUD", "스터드 볼트"],
"U_BOLT": ["U-BOLT", "U BOLT", "유볼트"]
},
"SUPPORT": {
"URETHANE_BLOCK": ["URETHANE BLOCK", "BLOCK SHOE", "우레탄 블록"],
"CLAMP": ["CLAMP", "클램프"],
"HANGER": ["HANGER", "SUPPORT", "행거", "서포트"],
"SPRING": ["SPRING", "스프링"]
}
}
# Level 3: 연결/압력 키워드 (공용)
LEVEL3_CONNECTION_KEYWORDS = {
"SW": ["SW", "SOCKET WELD", "소켓웰드"],
"THD": ["THD", "THREADED", "NPT", "나사"],
"FL": ["FL", "FLANGED", "플랜지형"],
"BW": ["BW", "BUTT WELD", "맞대기용접"]
}
LEVEL3_PRESSURE_KEYWORDS = ["150LB", "300LB", "600LB", "900LB", "1500LB", "2500LB", "3000LB", "6000LB"]
# Level 4: 재질 키워드 (최후 판단)
LEVEL4_MATERIAL_KEYWORDS = {
"PIPE": ["A106", "A333", "A312", "A53"],
"FITTING": ["A234", "A403", "A420"],
"FLANGE": ["A182", "A350"], # A105 제거 (범용 재질로 이동)
"VALVE": ["A216", "A217", "A351", "A352"],
"BOLT": ["A193", "A194", "A320", "A325", "A490"]
}
# 범용 재질 (여러 타입에 사용 가능)
GENERIC_MATERIALS = {
"A105": ["VALVE", "FLANGE", "FITTING"], # 우선순위 순서
"316": ["VALVE", "FLANGE", "FITTING", "PIPE", "BOLT"],
"304": ["VALVE", "FLANGE", "FITTING", "PIPE", "BOLT"]
}
def classify_material_integrated(description: str, main_nom: str = "", def classify_material_integrated(description: str, main_nom: str = "",
red_nom: str = "", length: float = None) -> Dict: red_nom: str = "", length: float = None) -> Dict:
@@ -142,6 +77,18 @@ def classify_material_integrated(description: str, main_nom: str = "",
"classification_level": "LEVEL0_SUPPORT", "classification_level": "LEVEL0_SUPPORT",
"reason": "SUPPORT 시스템 키워드 발견" "reason": "SUPPORT 시스템 키워드 발견"
} }
# [신규] Swagelok 스타일 파트 넘버 패턴 확인
# 예: SS-400-1-4, SS-810-6, B-400-9, SS-1610-P
swagelok_pattern = r'\b(SS|S|B|A|M)-([0-9]{3,4}|[0-9]+M[0-9]*)-([0-9A-Z])'
if re.search(swagelok_pattern, desc_upper):
return {
"category": "TUBE_FITTING",
"confidence": 0.98,
"evidence": ["SWAGELOK_PART_NO"],
"classification_level": "LEVEL0_PARTNO",
"reason": "Swagelok 스타일 파트넘버 감지"
}
# 쉼표로 구분된 각 부분을 별도로 체크 (예: "NIPPLE, SMLS, SCH 80") # 쉼표로 구분된 각 부분을 별도로 체크 (예: "NIPPLE, SMLS, SCH 80")
desc_parts = [part.strip() for part in desc_upper.split(',')] desc_parts = [part.strip() for part in desc_upper.split(',')]
@@ -351,4 +298,4 @@ def should_exclude_material(description: str) -> bool:
] ]
desc_upper = description.upper() desc_upper = description.upper()
return any(keyword in desc_upper for keyword in exclude_keywords) return any(keyword in desc_upper for keyword in exclude_keywords)

View File

@@ -0,0 +1,53 @@
import pytest
from app.services.integrated_classifier import classify_material_integrated
from app.services.fitting_classifier import classify_fitting
from app.services.classifier_constants import LEVEL1_TYPE_KEYWORDS
def test_classify_simple_pipe():
result = classify_material_integrated("PIPE, A106 Gr.B, 2 INCH")
# LEVEL1_TYPE_KEYWORDS["PIPE"] contains "PIPE"
assert result["category"] == "PIPE"
def test_classify_fitting_elbow():
result = classify_material_integrated("ELBOW 90DEG, BW")
# Should route to FITTING and then call fitting_classifier
assert result["category"] == "FITTING"
# detail check
if "fitting_type" in result:
assert result["fitting_type"]["type"] == "ELBOW"
def test_classify_swagelok_partno():
# Regex check in integrated_classifier
result = classify_material_integrated("SS-400-1-4 CONNECTOR")
# Should be detected by swagelok_pattern as TUBE_FITTING (Level 0)
assert result["category"] == "TUBE_FITTING"
def test_classify_swagelok_keyword():
# Keyword check
result = classify_material_integrated("SWAGELOK UNION 1/4 INCH")
# 'SWAGELOK' is in FITTING list in constants.
# So it should be FITTING?
# BUT integrated_classifier has logic: if detected_type == FITTING -> call classify_fitting
# classify_fitting checks 'SWAGELOK' -> sets category 'INSTRUMENT_FITTING'
# Let's see what meaningful category it returns.
# The return from classify_fitting overrides integrated result if present.
assert result["category"] in ["FITTING", "INSTRUMENT_FITTING"]
def test_classify_u_bolt():
# Priority check: U-BOLT is in BOLT keywords but integrated_classifier has early check for SUPPORT
result = classify_material_integrated("U-BOLT, 2 INCH")
assert result["category"] == "SUPPORT"
def test_classify_pressure_constants_usage():
# fitting_classifier uses imported constants
# Test if it recognizes 3000LB (from constants)
result = classify_fitting("P_DAT", "COUPLING, 3000LB, SW", "2")
assert result["pressure_rating"]["rating"] == "3000LB"
assert result["pressure_rating"]["confidence"] > 0.9
def test_classify_olet_constants_usage():
# Detect OLET
result = classify_fitting("P_DAT", "WELDOLET, 3000LB", "2", "1")
assert result["fitting_type"]["type"] == "OLET"