""" PIPE 데이터 추출 서비스 BOM 파일에서 PIPE 자재의 도면-라인번호-길이 정보를 추출하고 처리 """ import logging import re from typing import Dict, List, Optional, Any, Tuple from sqlalchemy.orm import Session from sqlalchemy import text from ..database import get_db from ..models import Material, File from ..utils.pipe_utils import ( PipeConstants, PipeDataExtractor, PipeValidator, PipeFormatter, PipeLogger ) logger = logging.getLogger(__name__) class PipeDataExtractionService: """PIPE 데이터 추출 및 처리 서비스""" def __init__(self, db: Session): self.db = db def extract_pipe_data_from_file(self, file_id: int) -> Dict[str, Any]: """ 파일에서 PIPE 데이터 추출 Args: file_id: 파일 ID Returns: 추출된 PIPE 데이터 정보 """ try: # 1. 파일 정보 확인 file_info = self.db.query(File).filter(File.id == file_id).first() if not file_info: return { "success": False, "message": "파일을 찾을 수 없습니다." } # 2. PIPE 자재 조회 pipe_materials = self._get_pipe_materials_from_file(file_id) if not pipe_materials: return { "success": False, "message": "PIPE 자재가 없습니다." } # 3. 데이터 추출 및 정제 extracted_data = [] extraction_stats = { "total_materials": len(pipe_materials), "successful_extractions": 0, "failed_extractions": 0, "unique_drawings": set(), "unique_line_numbers": set(), "total_length": 0 } for material in pipe_materials: extracted_item = self._extract_pipe_item_data(material) if extracted_item["success"]: extracted_data.append(extracted_item["data"]) extraction_stats["successful_extractions"] += 1 extraction_stats["unique_drawings"].add(extracted_item["data"]["drawing_name"]) if extracted_item["data"]["line_no"]: extraction_stats["unique_line_numbers"].add(extracted_item["data"]["line_no"]) extraction_stats["total_length"] += extracted_item["data"]["length_mm"] else: extraction_stats["failed_extractions"] += 1 logger.warning(f"Failed to extract data from material {material.id}: {extracted_item['message']}") # 4. 통계 정리 extraction_stats["unique_drawings"] = len(extraction_stats["unique_drawings"]) extraction_stats["unique_line_numbers"] = len(extraction_stats["unique_line_numbers"]) return { "success": True, "file_id": file_id, "file_name": file_info.original_filename, "job_no": file_info.job_no, "extracted_data": extracted_data, "extraction_stats": extraction_stats, "message": f"PIPE 데이터 추출 완료: {extraction_stats['successful_extractions']}개 성공, {extraction_stats['failed_extractions']}개 실패" } except Exception as e: logger.error(f"Failed to extract pipe data from file {file_id}: {e}") return { "success": False, "message": f"PIPE 데이터 추출 실패: {str(e)}" } def _get_pipe_materials_from_file(self, file_id: int) -> List[Material]: """파일에서 PIPE 자재 조회""" return self.db.query(Material).filter( Material.file_id == file_id, Material.classified_category == 'PIPE', Material.is_active == True ).all() def _extract_pipe_item_data(self, material: Material) -> Dict[str, Any]: """개별 PIPE 자재에서 데이터 추출""" try: # 기본 정보 data = { "material_id": material.id, "drawing_name": self._extract_drawing_name(material), "line_no": self._extract_line_number(material), "material_grade": self._extract_material_grade(material), "schedule_spec": self._extract_schedule_spec(material), "nominal_size": self._extract_nominal_size(material), "length_mm": self._extract_length(material), "end_preparation": self._extract_end_preparation(material), "quantity": int(material.quantity or 1), "description": material.description or "", "original_description": material.description or "" } # 데이터 검증 validation_result = self._validate_extracted_data(data) if not validation_result["valid"]: return { "success": False, "message": validation_result["message"], "data": data } return { "success": True, "data": data, "message": "데이터 추출 성공" } except Exception as e: logger.error(f"Failed to extract data from material {material.id}: {e}") return { "success": False, "message": f"데이터 추출 실패: {str(e)}", "data": {} } def _extract_drawing_name(self, material: Material) -> str: """도면명 추출""" # 1. drawing_name 필드 우선 if material.drawing_name: return material.drawing_name.strip() # 2. description에서 추출 시도 if material.description: # 일반적인 도면명 패턴 (P&ID-001, DWG-A-001 등) drawing_patterns = [ r'(P&ID[-_]\w+)', r'(DWG[-_]\w+[-_]\w+)', r'(DRAWING[-_]\w+)', r'([A-Z]+[-_]\d+[-_]\w+)', r'([A-Z]+\d+[A-Z]*)' ] for pattern in drawing_patterns: match = re.search(pattern, material.description.upper()) if match: return match.group(1) return "UNKNOWN_DRAWING" def _extract_line_number(self, material: Material) -> str: """라인번호 추출""" # 1. line_no 필드 우선 if material.line_no: return material.line_no.strip() # 2. description에서 추출 시도 if material.description: # 라인번호 패턴 (LINE-001, L-001, 1001 등) line_patterns = [ r'LINE[-_]?(\w+)', r'L[-_]?(\d+[A-Z]*)', r'(\d{3,4}[A-Z]*)', # 3-4자리 숫자 + 선택적 문자 r'([A-Z]\d+[A-Z]*)' # 문자+숫자+선택적문자 ] for pattern in line_patterns: match = re.search(pattern, material.description.upper()) if match: return f"LINE-{match.group(1)}" return "" # 라인번호는 필수가 아님 def _extract_material_grade(self, material: Material) -> str: """재질 추출""" # 1. full_material_grade 필드 우선 if material.full_material_grade: return material.full_material_grade.strip() # 2. description에서 추출 시도 if material.description: # 일반적인 재질 패턴 material_patterns = [ r'(A\d+\s*GR\.?\s*[A-Z])', # A106 GR.B r'(A\d+)', # A106 r'(SS\d+[A-Z]*)', # SS316L r'(CS|CARBON\s*STEEL)', # Carbon Steel r'(SS|STAINLESS\s*STEEL)' # Stainless Steel ] for pattern in material_patterns: match = re.search(pattern, material.description.upper()) if match: return match.group(1).strip() return "UNKNOWN" def _extract_schedule_spec(self, material: Material) -> str: """스케줄/규격 추출""" if material.description: # 스케줄 패턴 (SCH40, SCH80, STD, XS 등) schedule_patterns = [ r'(SCH\s*\d+[A-Z]*)', r'(STD|STANDARD)', r'(XS|EXTRA\s*STRONG)', r'(XXS|DOUBLE\s*EXTRA\s*STRONG)', r'(\d+\.?\d*\s*MM)', # 두께 (mm) r'(\d+\.?\d*"?\s*THK)' # 두께 (THK) ] for pattern in schedule_patterns: match = re.search(pattern, material.description.upper()) if match: return match.group(1).strip() return "" def _extract_nominal_size(self, material: Material) -> str: """호칭 크기 추출""" # 1. main_nom 필드 우선 if material.main_nom: return material.main_nom.strip() # 2. description에서 추출 시도 if material.description: # 호칭 크기 패턴 (4", 6", 100A 등) size_patterns = [ r'(\d+\.?\d*")', # 4", 6.5" r'(\d+\.?\d*\s*INCH)', # 4 INCH r'(\d+A)', # 100A r'(DN\s*\d+)', # DN100 r'(\d+\.?\d*\s*MM)' # 100MM (직경) ] for pattern in size_patterns: match = re.search(pattern, material.description.upper()) if match: return match.group(1).strip() return "" def _extract_length(self, material: Material) -> float: """길이 추출 (mm 단위)""" # 1. length 필드 우선 if material.length and material.length > 0: return float(material.length) # 2. total_length 필드 if material.total_length and material.total_length > 0: return float(material.total_length) # 3. description에서 추출 시도 if material.description: # 길이 패턴 length_patterns = [ r'(\d+\.?\d*)\s*MM', # 1500MM r'(\d+\.?\d*)\s*M(?!\w)', # 1.5M (단, MM이 아닌) r'(\d+\.?\d*)\s*METER', # 1.5 METER r'L\s*=?\s*(\d+\.?\d*)', # L=1500 r'LENGTH\s*:?\s*(\d+\.?\d*)' # LENGTH: 1500 ] for pattern in length_patterns: match = re.search(pattern, material.description.upper()) if match: length_value = float(match.group(1)) # 단위 변환 (M -> MM) if 'M' in pattern and 'MM' not in pattern: length_value *= 1000 return length_value # 기본값: 6000mm (6m) return 6000.0 def _extract_end_preparation(self, material: Material) -> str: """끝단 가공 정보 추출""" if material.description: desc_upper = material.description.upper() # 끝단 가공 패턴 if any(keyword in desc_upper for keyword in ['DOUBLE BEVEL', '양개선', 'DBE']): return '양개선' elif any(keyword in desc_upper for keyword in ['SINGLE BEVEL', '한개선', 'SBE']): return '한개선' elif any(keyword in desc_upper for keyword in ['PLAIN', '무개선', 'PE']): return '무개선' return '무개선' # 기본값 def _validate_extracted_data(self, data: Dict[str, Any]) -> Dict[str, Any]: """추출된 데이터 검증""" errors = [] # 필수 필드 검증 if not data.get("drawing_name") or data["drawing_name"] == "UNKNOWN_DRAWING": errors.append("도면명을 추출할 수 없습니다") if data.get("length_mm", 0) <= 0: errors.append("유효한 길이 정보가 없습니다") if not data.get("material_grade") or data["material_grade"] == "UNKNOWN": errors.append("재질 정보를 추출할 수 없습니다") # 경고 (오류는 아님) warnings = [] if not data.get("line_no"): warnings.append("라인번호가 없습니다") if not data.get("nominal_size"): warnings.append("호칭 크기가 없습니다") return { "valid": len(errors) == 0, "errors": errors, "warnings": warnings, "message": "; ".join(errors) if errors else "검증 통과" } def get_extraction_summary(self, file_id: int) -> Dict[str, Any]: """파일의 PIPE 데이터 추출 요약 정보""" try: extraction_result = self.extract_pipe_data_from_file(file_id) if not extraction_result["success"]: return extraction_result # 요약 통계 생성 extracted_data = extraction_result["extracted_data"] # 도면별 통계 drawing_stats = {} for item in extracted_data: drawing = item["drawing_name"] if drawing not in drawing_stats: drawing_stats[drawing] = { "count": 0, "total_length": 0, "line_numbers": set(), "materials": set() } drawing_stats[drawing]["count"] += 1 drawing_stats[drawing]["total_length"] += item["length_mm"] if item["line_no"]: drawing_stats[drawing]["line_numbers"].add(item["line_no"]) drawing_stats[drawing]["materials"].add(item["material_grade"]) # set을 list로 변환 for drawing in drawing_stats: drawing_stats[drawing]["line_numbers"] = list(drawing_stats[drawing]["line_numbers"]) drawing_stats[drawing]["materials"] = list(drawing_stats[drawing]["materials"]) return { "success": True, "file_id": file_id, "extraction_stats": extraction_result["extraction_stats"], "drawing_stats": drawing_stats, "ready_for_cutting_plan": extraction_result["extraction_stats"]["successful_extractions"] > 0 } except Exception as e: logger.error(f"Failed to get extraction summary: {e}") return { "success": False, "message": f"추출 요약 생성 실패: {str(e)}" } def get_pipe_data_extraction_service(db: Session = None) -> PipeDataExtractionService: """PipeDataExtractionService 인스턴스 생성""" if db is None: db = next(get_db()) return PipeDataExtractionService(db)