diff --git a/backend/app/services/classifier_constants.py b/backend/app/services/classifier_constants.py new file mode 100644 index 0000000..5e50b6d --- /dev/null +++ b/backend/app/services/classifier_constants.py @@ -0,0 +1,157 @@ +""" +자재 분류 시스템용 상수 및 키워드 정의 +중복 로직 제거 및 유지보수성 향상을 위해 중앙 집중화됨 +""" + +from typing import Dict, List + +# ============================================================================== +# 1. 압력 등급 (Pressure Ratings) +# ============================================================================== + +# 단순 키워드 목록 (Integrated Classifier용) +LEVEL3_PRESSURE_KEYWORDS = [ + "150LB", "300LB", "600LB", "900LB", "1500LB", + "2500LB", "3000LB", "6000LB", "9000LB" +] + +# 상세 스펙 및 메타데이터 (Fitting Classifier용) +PRESSURE_RATINGS_SPECS = { + "150LB": {"max_pressure": "285 PSI", "common_use": "저압 일반용"}, + "300LB": {"max_pressure": "740 PSI", "common_use": "중압용"}, + "600LB": {"max_pressure": "1480 PSI", "common_use": "고압용"}, + "900LB": {"max_pressure": "2220 PSI", "common_use": "고압용"}, + "1500LB": {"max_pressure": "3705 PSI", "common_use": "고압용"}, + "2500LB": {"max_pressure": "6170 PSI", "common_use": "초고압용"}, + "3000LB": {"max_pressure": "7400 PSI", "common_use": "소구경 고압용"}, + "6000LB": {"max_pressure": "14800 PSI", "common_use": "소구경 초고압용"}, + "9000LB": {"max_pressure": "22200 PSI", "common_use": "소구경 극고압용"} +} + +# 정규식 패턴 (Fitting Classifier용) +PRESSURE_PATTERNS = [ + r"(\d+)LB", + r"CLASS\s*(\d+)", + r"CL\s*(\d+)", + r"(\d+)#", + r"(\d+)\s*LB" +] + +# ============================================================================== +# 2. OLET 키워드 (OLET Keywords) +# ============================================================================== +# Fitting Classifier와 Integrated Classifier에서 공통 사용 +OLET_KEYWORDS = [ + # Full Names + "SOCK-O-LET", "WELD-O-LET", "ELL-O-LET", "THREAD-O-LET", "ELB-O-LET", + "NIP-O-LET", "COUP-O-LET", + # Variations + "SOCKOLET", "WELDOLET", "ELLOLET", "THREADOLET", "ELBOLET", "NIPOLET", "COUPOLET", + "OLET", "올렛", "O-LET", "SOCKLET", "SOCKET-O-LET", "WELD O-LET", "ELL O-LET", + "THREADED-O-LET", "ELBOW-O-LET", "NIPPLE-O-LET", "COUPLING-O-LET", + # Abbreviations (Caution: specific context needed sometimes) + "SOL", "WOL", "EOL", "TOL", "NOL", "COL" +] + +# ============================================================================== +# 3. 연결 방식 (Connection Methods) +# ============================================================================== +LEVEL3_CONNECTION_KEYWORDS = { + "SW": ["SW", "SOCKET WELD", "소켓웰드", "SOCKET-WELD", "_SW_"], + "THD": ["THD", "THREADED", "NPT", "나사", "THRD", "TR", "_TR", "_THD"], + "BW": ["BW", "BUTT WELD", "맞대기용접", "BUTT-WELD", "_BW"], + "FL": ["FL", "FLANGED", "플랜지", "FLG", "_FL_"] +} + +# ============================================================================== +# 4. 재질 키워드 (Material Keywords) +# ============================================================================== +LEVEL4_MATERIAL_KEYWORDS = { + "PIPE": ["A106", "A333", "A312", "A53"], + "FITTING": ["A234", "A403", "A420"], + "FLANGE": ["A182", "A350"], + "VALVE": ["A216", "A217", "A351", "A352"], + "BOLT": ["A193", "A194", "A320", "A325", "A490"] +} + +GENERIC_MATERIALS = { + "A105": ["VALVE", "FLANGE", "FITTING"], + "316": ["VALVE", "FLANGE", "FITTING", "PIPE", "BOLT"], + "304": ["VALVE", "FLANGE", "FITTING", "PIPE", "BOLT"] +} + +# ============================================================================== +# 5. 메인 분류 키워드 (Level 1 Type Keywords) +# ============================================================================== +LEVEL1_TYPE_KEYWORDS = { + "BOLT": [ + "FLANGE BOLT", "U-BOLT", "U BOLT", "BOLT", "STUD", "NUT", "SCREW", + "WASHER", "볼트", "너트", "스터드", "나사", "와셔", "유볼트" + ], + "VALVE": [ + "VALVE", "GATE", "BALL", "GLOBE", "CHECK", "BUTTERFLY", "NEEDLE", + "RELIEF", "SIGHT GLASS", "STRAINER", "밸브", "게이트", "볼", "글로브", + "체크", "버터플라이", "니들", "릴리프", "사이트글라스", "스트레이너" + ], + "FLANGE": [ + "FLG", "FLANGE", "플랜지", "프랜지", "ORIFICE", "SPECTACLE", "PADDLE", + "SPACER", "BLIND", "REDUCING FLANGE", "RED FLANGE" + ], + "PIPE": [ + "PIPE", "TUBE", "파이프", "배관", "SMLS", "SEAMLESS" + ], + "FITTING": [ + # Standard Fittings + "ELBOW", "ELL", "TEE", "REDUCER", "CAP", "COUPLING", "NIPPLE", "SWAGE", "PLUG", + "엘보", "티", "리듀서", "캡", "니플", "커플링", "플러그", "CONC", "ECC", + # Instrument Fittings + "SWAGELOK", "DK-LOK", "HY-LOK", "SUPERLOK", "TUBE FITTING", "COMPRESSION", + "UNION", "FERRULE", "NUT & FERRULE", "MALE CONNECTOR", "FEMALE CONNECTOR", + "TUBE ADAPTER", "PORT CONNECTOR", "CONNECTOR" + ] + OLET_KEYWORDS, # OLET Keywords 병합 + "GASKET": [ + "GASKET", "GASK", "가스켓", "SWG", "SPIRAL" + ], + "INSTRUMENT": [ + "GAUGE", "TRANSMITTER", "SENSOR", "THERMOMETER", "계기", "게이지", "트랜스미터", "센서" + ], + "SUPPORT": [ + "URETHANE BLOCK", "URETHANE", "BLOCK SHOE", "CLAMP", "SUPPORT", "HANGER", + "SPRING", "우레탄", "블록", "클램프", "서포트", "행거", "스프링", "PIPE CLAMP" + ], + "PLATE": [ + "PLATE", "PL", "CHECKER PLATE", "판재", "철판" + ], + "STRUCTURAL": [ + "H-BEAM", "BEAM", "ANGLE", "CHANNEL", "H-SECTION", "I-BEAM", "형강", "앵글", "채널" + ] +} + +# ============================================================================== +# 6. 서브타입 키워드 (Level 2 Subtype Keywords) +# ============================================================================== +LEVEL2_SUBTYPE_KEYWORDS = { + "VALVE": { + "GATE": ["GATE VALVE", "GATE", "게이트 밸브"], + "BALL": ["BALL VALVE", "BALL", "볼 밸브"], + "GLOBE": ["GLOBE VALVE", "GLOBE", "글로브 밸브"], + "CHECK": ["CHECK VALVE", "CHECK", "체크 밸브", "역지 밸브"] + }, + "FLANGE": { + "WELD_NECK": ["WELD NECK", "WN", "웰드넥"], + "SLIP_ON": ["SLIP ON", "SO", "슬립온"], + "BLIND": ["BLIND", "BL", "막음", "차단"], + "SOCKET_WELD": ["SOCKET WELD", "소켓웰드"] + }, + "BOLT": { + "HEX_BOLT": ["HEX BOLT", "HEXAGON", "육각 볼트"], + "STUD_BOLT": ["STUD BOLT", "STUD", "스터드 볼트"], + "U_BOLT": ["U-BOLT", "U BOLT", "유볼트"] + }, + "SUPPORT": { + "URETHANE_BLOCK": ["URETHANE BLOCK", "BLOCK SHOE", "우레탄 블록"], + "CLAMP": ["CLAMP", "클램프"], + "HANGER": ["HANGER", "SUPPORT", "행거", "서포트"], + "SPRING": ["SPRING", "스프링"] + } +} diff --git a/backend/app/services/fitting_classifier.py b/backend/app/services/fitting_classifier.py index c4d5381..651be57 100644 --- a/backend/app/services/fitting_classifier.py +++ b/backend/app/services/fitting_classifier.py @@ -6,6 +6,7 @@ FITTING 분류 시스템 V2 import re from typing import Dict, List, Optional from .material_classifier import classify_material, get_manufacturing_method_from_material +from .classifier_constants import PRESSURE_PATTERNS, PRESSURE_RATINGS_SPECS, OLET_KEYWORDS # ========== FITTING 타입별 분류 (실제 BOM 기반) ========== FITTING_TYPES = { @@ -103,7 +104,7 @@ FITTING_TYPES = { "OLET": { "dat_file_patterns": ["SOL_", "WOL_", "TOL_", "EOL_", "NOL_", "COL_", "OLET_", "SOCK-O-LET", "WELD-O-LET", "ELL-O-LET", "THREAD-O-LET", "ELB-O-LET", "NIP-O-LET", "COUP-O-LET"], - "description_keywords": ["SOCK-O-LET", "WELD-O-LET", "ELL-O-LET", "THREAD-O-LET", "ELB-O-LET", "NIP-O-LET", "COUP-O-LET", "SOCKOLET", "WELDOLET", "ELLOLET", "THREADOLET", "ELBOLET", "NIPOLET", "COUPOLET", "OLET", "올렛", "O-LET", "SOCKLET"], + "description_keywords": OLET_KEYWORDS, "subtypes": { "SOCKOLET": ["SOCK-O-LET", "SOCKOLET", "SOL", "SOCK O-LET", "SOCKET-O-LET", "SOCKLET"], "WELDOLET": ["WELD-O-LET", "WELDOLET", "WOL", "WELD O-LET", "WELDING-O-LET"], @@ -169,24 +170,8 @@ CONNECTION_METHODS = { # ========== 압력 등급별 분류 ========== PRESSURE_RATINGS = { - "patterns": [ - r"(\d+)LB", - r"CLASS\s*(\d+)", - r"CL\s*(\d+)", - r"(\d+)#", - r"(\d+)\s*LB" - ], - "standard_ratings": { - "150LB": {"max_pressure": "285 PSI", "common_use": "저압 일반용"}, - "300LB": {"max_pressure": "740 PSI", "common_use": "중압용"}, - "600LB": {"max_pressure": "1480 PSI", "common_use": "고압용"}, - "900LB": {"max_pressure": "2220 PSI", "common_use": "고압용"}, - "1500LB": {"max_pressure": "3705 PSI", "common_use": "고압용"}, - "2500LB": {"max_pressure": "6170 PSI", "common_use": "초고압용"}, - "3000LB": {"max_pressure": "7400 PSI", "common_use": "소구경 고압용"}, - "6000LB": {"max_pressure": "14800 PSI", "common_use": "소구경 초고압용"}, - "9000LB": {"max_pressure": "22200 PSI", "common_use": "소구경 극고압용"} - } + "patterns": PRESSURE_PATTERNS, + "standard_ratings": PRESSURE_RATINGS_SPECS } def classify_fitting(dat_file: str, description: str, main_nom: str, @@ -209,7 +194,7 @@ def classify_fitting(dat_file: str, description: str, main_nom: str, # 1. 피팅 키워드 확인 (재질만 있어도 통합 분류기가 이미 피팅으로 분류했으므로 진행) # OLET 키워드를 우선 확인하여 정확한 분류 수행 - olet_keywords = ['SOCK-O-LET', 'WELD-O-LET', 'ELL-O-LET', 'THREAD-O-LET', 'ELB-O-LET', 'NIP-O-LET', 'COUP-O-LET', 'SOCKOLET', 'WELDOLET', 'ELLOLET', 'THREADOLET', 'ELBOLET', 'NIPOLET', 'COUPOLET', 'OLET', 'O-LET', 'SOCKLET'] + olet_keywords = OLET_KEYWORDS has_olet_keyword = any(keyword in desc_upper or keyword in dat_upper for keyword in olet_keywords) fitting_keywords = ['ELBOW', 'ELL', 'TEE', 'REDUCER', 'RED', 'CAP', 'NIPPLE', 'SWAGE', 'COUPLING', 'PLUG', '엘보', '티', '리듀서', '캡', '니플', '스웨지', '올렛', '커플링', '플러그'] + olet_keywords @@ -402,7 +387,7 @@ def classify_fitting_type(dat_file: str, description: str, desc_upper = description.upper() # 0. OLET 우선 확인 (ELL과의 혼동 방지) - olet_specific_keywords = ['SOCK-O-LET', 'WELD-O-LET', 'ELL-O-LET', 'THREAD-O-LET', 'ELB-O-LET', 'NIP-O-LET', 'COUP-O-LET', 'SOCKOLET', 'WELDOLET', 'ELLOLET', 'THREADOLET', 'ELBOLET', 'NIPOLET', 'COUPOLET', 'O-LET', 'SOCKLET'] + olet_specific_keywords = OLET_KEYWORDS for keyword in olet_specific_keywords: if keyword in desc_upper or keyword in dat_upper: subtype_result = classify_fitting_subtype( diff --git a/backend/app/services/integrated_classifier.py b/backend/app/services/integrated_classifier.py index fcc0214..70bd74b 100644 --- a/backend/app/services/integrated_classifier.py +++ b/backend/app/services/integrated_classifier.py @@ -6,79 +6,14 @@ import re from typing import Dict, List, Optional, Tuple from .fitting_classifier import classify_fitting - -# Level 1: 명확한 타입 키워드 (최우선) -LEVEL1_TYPE_KEYWORDS = { - "BOLT": ["FLANGE BOLT", "U-BOLT", "U BOLT", "BOLT", "STUD", "NUT", "SCREW", "WASHER", "볼트", "너트", "스터드", "나사", "와셔", "유볼트"], - "VALVE": ["VALVE", "GATE", "BALL", "GLOBE", "CHECK", "BUTTERFLY", "NEEDLE", "RELIEF", "SIGHT GLASS", "STRAINER", "밸브", "게이트", "볼", "글로브", "체크", "버터플라이", "니들", "릴리프", "사이트글라스", "스트레이너"], - "FLANGE": ["FLG", "FLANGE", "플랜지", "프랜지", "ORIFICE", "SPECTACLE", "PADDLE", "SPACER", "BLIND", "REDUCING FLANGE", "RED FLANGE"], - "PIPE": ["PIPE", "TUBE", "파이프", "배관", "SMLS", "SEAMLESS"], - "FITTING": [ - "SOCK-O-LET", "WELD-O-LET", "ELL-O-LET", "THREAD-O-LET", "ELB-O-LET", "NIP-O-LET", "COUP-O-LET", - "SOCKOLET", "WELDOLET", "ELLOLET", "THREADOLET", "ELBOLET", "NIPOLET", "COUPOLET", "OLET", - "ELBOW", "ELL", "TEE", "REDUCER", "CAP", "COUPLING", "NIPPLE", "SWAGE", "PLUG", - "엘보", "티", "리듀서", "캡", "니플", "커플링", "플러그", "CONC", "ECC", - "SWAGELOK", "UNION", "CONNECTOR", "FERRULE", "NUT & FERRULE", "MALE CONNECTOR", "FEMALE CONNECTOR" - ], - "GASKET": ["GASKET", "GASK", "가스켓", "SWG", "SPIRAL"], - "INSTRUMENT": ["GAUGE", "TRANSMITTER", "SENSOR", "THERMOMETER", "계기", "게이지", "트랜스미터", "센서"], - "SUPPORT": ["URETHANE BLOCK", "URETHANE", "BLOCK SHOE", "CLAMP", "SUPPORT", "HANGER", "SPRING", "우레탄", "블록", "클램프", "서포트", "행거", "스프링"], - "PLATE": ["PLATE", "PL", "CHECKER PLATE", "판재", "철판"], - "STRUCTURAL": ["H-BEAM", "BEAM", "ANGLE", "CHANNEL", "H-SECTION", "I-BEAM", "형강", "앵글", "채널"] -} - -# Level 2: 서브타입 키워드 (구체화) -LEVEL2_SUBTYPE_KEYWORDS = { - "VALVE": { - "GATE": ["GATE VALVE", "GATE", "게이트 밸브"], - "BALL": ["BALL VALVE", "BALL", "볼 밸브"], - "GLOBE": ["GLOBE VALVE", "GLOBE", "글로브 밸브"], - "CHECK": ["CHECK VALVE", "CHECK", "체크 밸브", "역지 밸브"] - }, - "FLANGE": { - "WELD_NECK": ["WELD NECK", "WN", "웰드넥"], - "SLIP_ON": ["SLIP ON", "SO", "슬립온"], - "BLIND": ["BLIND", "BL", "막음", "차단"], - "SOCKET_WELD": ["SOCKET WELD", "소켓웰드"] - }, - "BOLT": { - "HEX_BOLT": ["HEX BOLT", "HEXAGON", "육각 볼트"], - "STUD_BOLT": ["STUD BOLT", "STUD", "스터드 볼트"], - "U_BOLT": ["U-BOLT", "U BOLT", "유볼트"] - }, - "SUPPORT": { - "URETHANE_BLOCK": ["URETHANE BLOCK", "BLOCK SHOE", "우레탄 블록"], - "CLAMP": ["CLAMP", "클램프"], - "HANGER": ["HANGER", "SUPPORT", "행거", "서포트"], - "SPRING": ["SPRING", "스프링"] - } -} - -# Level 3: 연결/압력 키워드 (공용) -LEVEL3_CONNECTION_KEYWORDS = { - "SW": ["SW", "SOCKET WELD", "소켓웰드"], - "THD": ["THD", "THREADED", "NPT", "나사"], - "FL": ["FL", "FLANGED", "플랜지형"], - "BW": ["BW", "BUTT WELD", "맞대기용접"] -} - -LEVEL3_PRESSURE_KEYWORDS = ["150LB", "300LB", "600LB", "900LB", "1500LB", "2500LB", "3000LB", "6000LB"] - -# Level 4: 재질 키워드 (최후 판단) -LEVEL4_MATERIAL_KEYWORDS = { - "PIPE": ["A106", "A333", "A312", "A53"], - "FITTING": ["A234", "A403", "A420"], - "FLANGE": ["A182", "A350"], # A105 제거 (범용 재질로 이동) - "VALVE": ["A216", "A217", "A351", "A352"], - "BOLT": ["A193", "A194", "A320", "A325", "A490"] -} - -# 범용 재질 (여러 타입에 사용 가능) -GENERIC_MATERIALS = { - "A105": ["VALVE", "FLANGE", "FITTING"], # 우선순위 순서 - "316": ["VALVE", "FLANGE", "FITTING", "PIPE", "BOLT"], - "304": ["VALVE", "FLANGE", "FITTING", "PIPE", "BOLT"] -} +from .classifier_constants import ( + LEVEL1_TYPE_KEYWORDS, + LEVEL2_SUBTYPE_KEYWORDS, + LEVEL3_CONNECTION_KEYWORDS, + LEVEL3_PRESSURE_KEYWORDS, + LEVEL4_MATERIAL_KEYWORDS, + GENERIC_MATERIALS +) def classify_material_integrated(description: str, main_nom: str = "", red_nom: str = "", length: float = None) -> Dict: @@ -142,6 +77,18 @@ def classify_material_integrated(description: str, main_nom: str = "", "classification_level": "LEVEL0_SUPPORT", "reason": "SUPPORT 시스템 키워드 발견" } + + # [신규] Swagelok 스타일 파트 넘버 패턴 확인 + # 예: SS-400-1-4, SS-810-6, B-400-9, SS-1610-P + swagelok_pattern = r'\b(SS|S|B|A|M)-([0-9]{3,4}|[0-9]+M[0-9]*)-([0-9A-Z])' + if re.search(swagelok_pattern, desc_upper): + return { + "category": "TUBE_FITTING", + "confidence": 0.98, + "evidence": ["SWAGELOK_PART_NO"], + "classification_level": "LEVEL0_PARTNO", + "reason": "Swagelok 스타일 파트넘버 감지" + } # 쉼표로 구분된 각 부분을 별도로 체크 (예: "NIPPLE, SMLS, SCH 80") desc_parts = [part.strip() for part in desc_upper.split(',')] @@ -351,4 +298,4 @@ def should_exclude_material(description: str) -> bool: ] desc_upper = description.upper() - return any(keyword in desc_upper for keyword in exclude_keywords) \ No newline at end of file + return any(keyword in desc_upper for keyword in exclude_keywords) \ No newline at end of file diff --git a/backend/tests/test_classifier_refactor.py b/backend/tests/test_classifier_refactor.py new file mode 100644 index 0000000..9754f72 --- /dev/null +++ b/backend/tests/test_classifier_refactor.py @@ -0,0 +1,53 @@ + +import pytest +from app.services.integrated_classifier import classify_material_integrated +from app.services.fitting_classifier import classify_fitting +from app.services.classifier_constants import LEVEL1_TYPE_KEYWORDS + +def test_classify_simple_pipe(): + result = classify_material_integrated("PIPE, A106 Gr.B, 2 INCH") + # LEVEL1_TYPE_KEYWORDS["PIPE"] contains "PIPE" + assert result["category"] == "PIPE" + +def test_classify_fitting_elbow(): + result = classify_material_integrated("ELBOW 90DEG, BW") + # Should route to FITTING and then call fitting_classifier + assert result["category"] == "FITTING" + # detail check + if "fitting_type" in result: + assert result["fitting_type"]["type"] == "ELBOW" + +def test_classify_swagelok_partno(): + # Regex check in integrated_classifier + result = classify_material_integrated("SS-400-1-4 CONNECTOR") + # Should be detected by swagelok_pattern as TUBE_FITTING (Level 0) + assert result["category"] == "TUBE_FITTING" + +def test_classify_swagelok_keyword(): + # Keyword check + result = classify_material_integrated("SWAGELOK UNION 1/4 INCH") + # 'SWAGELOK' is in FITTING list in constants. + # So it should be FITTING? + # BUT integrated_classifier has logic: if detected_type == FITTING -> call classify_fitting + # classify_fitting checks 'SWAGELOK' -> sets category 'INSTRUMENT_FITTING' + + # Let's see what meaningful category it returns. + # The return from classify_fitting overrides integrated result if present. + assert result["category"] in ["FITTING", "INSTRUMENT_FITTING"] + +def test_classify_u_bolt(): + # Priority check: U-BOLT is in BOLT keywords but integrated_classifier has early check for SUPPORT + result = classify_material_integrated("U-BOLT, 2 INCH") + assert result["category"] == "SUPPORT" + +def test_classify_pressure_constants_usage(): + # fitting_classifier uses imported constants + # Test if it recognizes 3000LB (from constants) + result = classify_fitting("P_DAT", "COUPLING, 3000LB, SW", "2") + assert result["pressure_rating"]["rating"] == "3000LB" + assert result["pressure_rating"]["confidence"] > 0.9 + +def test_classify_olet_constants_usage(): + # Detect OLET + result = classify_fitting("P_DAT", "WELDOLET, 3000LB", "2", "1") + assert result["fitting_type"]["type"] == "OLET"