볼트 분류 개선 및 업로드 성능 최적화

- 볼트 길이 추출 로직 개선: '70.0000 LG' 형태 인식 추가
- 재질 중복 표시 수정: 'ASTM A193 ASTM A193 B7' → 'B7'
- A193/A194 등급 추출 로직 개선: 'GR B7/2H' 형태 지원
- bolt_details 테이블에 pressure_rating 컬럼 추가
- 볼트 분류기 오분류 방지: 플랜지/피팅이 볼트로 분류되지 않도록 수정
- 업로드 성능 개선: 키워드 기반 빠른 분류기 선택 로직 추가
- 분류 키워드 대폭 확장: 피팅/파이프/플랜지 키워드 추가
This commit is contained in:
Hyungi Ahn
2025-07-18 12:48:24 +09:00
parent 25ce3590ee
commit 3dd301cb57
13 changed files with 1184 additions and 106 deletions

View File

@@ -9,6 +9,7 @@ import uuid
import pandas as pd
import re
from pathlib import Path
import json
from ..database import get_db
from app.services.material_classifier import classify_material
@@ -220,7 +221,14 @@ async def upload_file(
file_id = file_result.fetchone()[0]
print(f"파일 저장 완료: file_id = {file_id}")
# 자재 데이터 저장 (분류 포함)
# 자재 데이터 저장 (분류 포함) - 배치 처리로 성능 개선
materials_to_insert = []
pipe_details_to_insert = []
fitting_details_to_insert = []
bolt_details_to_insert = []
gasket_details_to_insert = []
flange_details_to_insert = []
materials_inserted = 0
for material_data in materials_data:
# 자재 타입 분류기 적용 (PIPE, FITTING, VALVE 등)
@@ -242,47 +250,101 @@ async def upload_file(
main_nom = material_data.get("main_nom")
red_nom = material_data.get("red_nom")
classification_result = None
classification_results = []
try:
# EXCLUDE 분류기 우선 호출 (제외 대상 먼저 걸러냄)
from app.services.exclude_classifier import classify_exclude
classification_result = classify_exclude("", description, main_nom or "")
print(f"EXCLUDE 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
exclude_result = classify_exclude("", description, main_nom or "")
print(f"EXCLUDE 분류 결과: {exclude_result.get('category', 'UNKNOWN')} (신뢰도: {exclude_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 파이프 분류기 호출
classification_result = classify_pipe("", description, main_nom or "", length_value)
print(f"PIPE 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 피팅 분류기 호출 (main_nom, red_nom 개별 전달)
classification_result = classify_fitting("", description, main_nom or "", red_nom)
print(f"FITTING 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 플랜지 분류기 호출 (main_nom, red_nom 개별 전달)
classification_result = classify_flange("", description, main_nom or "", red_nom)
print(f"FLANGE 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 밸브 분류기 호출
classification_result = classify_valve("", description, main_nom or "")
print(f"VALVE 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 볼트 분류기 호출
classification_result = classify_bolt("", description, main_nom or "")
print(f"BOLT 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 가스켓 분류기 호출
classification_result = classify_gasket("", description, main_nom or "")
print(f"GASKET 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 계기 분류기 호출
classification_result = classify_instrument("", description, main_nom or "")
print(f"INSTRUMENT 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
# EXCLUDE가 높은 신뢰도로 제외 대상이라고 하면 바로 사용
if exclude_result.get("overall_confidence", 0) >= 0.8:
classification_result = exclude_result
else:
# 키워드 기반 빠른 분류기 선택 (성능 개선)
classification_results = []
# 키워드 기반으로 우선 분류기 결정
desc_lower = description.lower()
primary_classifiers = []
# 볼트 관련 키워드
if any(keyword in desc_lower for keyword in ['bolt', 'stud', 'nut', 'screw', 'washer', '볼트', '너트', 'a193', 'a194']):
primary_classifiers.append(('bolt', classify_bolt))
# 파이프 관련 키워드 (확장)
pipe_keywords = [
'pipe', 'tube', 'smls', '파이프', '배관',
'a106', 'a333', 'a312', 'a53', 'seamless', 'sch', 'schedule',
'boe', 'poe', 'bbe', 'pbe' # end preparation
]
if any(keyword in desc_lower for keyword in pipe_keywords):
primary_classifiers.append(('pipe', classify_pipe))
# 피팅 관련 키워드 (확장)
fitting_keywords = [
'elbow', 'ell', 'tee', 'reducer', 'red', 'cap', 'coupling', 'nipple', 'swage', 'olet',
'엘보', '', '리듀서', '', '니플', '커플링',
'90l_', '45l_', 'socket', 'sw', 'equal', 'reducing', 'concentric', 'eccentric',
'sockolet', 'weldolet', 'threadolet', 'socklet', 'plug'
]
if any(keyword in desc_lower for keyword in fitting_keywords):
primary_classifiers.append(('fitting', classify_fitting))
# 플랜지 관련 키워드 (확장)
flange_keywords = [
'flg', 'flange', '플랜지', 'weld neck', 'blind', 'slip on', 'socket weld',
'threaded', 'lap joint', 'orifice', 'spectacle', 'paddle', 'spacer',
'wn', 'so', 'bl', 'sw', 'thd', 'lj', 'rf', 'ff', 'rtj',
'raised face', 'flat face', 'ring joint'
]
if any(keyword in desc_lower for keyword in flange_keywords):
primary_classifiers.append(('flange', classify_flange))
# 밸브 관련 키워드
if any(keyword in desc_lower for keyword in ['valve', 'gate', 'ball', 'globe', 'check', '밸브']):
primary_classifiers.append(('valve', classify_valve))
# 가스켓 관련 키워드
if any(keyword in desc_lower for keyword in ['gasket', 'gask', '가스켓', 'swg', 'spiral']):
primary_classifiers.append(('gasket', classify_gasket))
# 계기 관련 키워드
if any(keyword in desc_lower for keyword in ['gauge', 'transmitter', 'sensor', 'thermometer', '계기', '게이지']):
primary_classifiers.append(('instrument', classify_instrument))
# 우선 분류기만 실행 (1-2개)
if primary_classifiers:
for name, classifier in primary_classifiers:
try:
if name in ['fitting', 'flange']:
result = classifier("", description, main_nom or "", red_nom)
elif name == 'pipe':
result = classifier("", description, main_nom or "", length_value)
else:
result = classifier("", description, main_nom or "")
classification_results.append(result)
except Exception as e:
print(f"분류기 {name} 오류: {e}")
continue
# 우선 분류기로 결과가 없으면 모든 분류기 실행
if not classification_results or max(r.get('overall_confidence', 0) for r in classification_results) < 0.3:
# 볼트는 항상 확인 (매우 일반적)
if not any('bolt' in str(r) for r in primary_classifiers):
bolt_result = classify_bolt("", description, main_nom or "")
classification_results.append(bolt_result)
# 가장 높은 신뢰도의 결과 선택 (UNKNOWN 제외)
valid_results = [r for r in classification_results if r.get('category') != 'UNKNOWN' and r.get('overall_confidence', 0) > 0]
if valid_results:
classification_result = max(valid_results, key=lambda x: x.get('overall_confidence', 0))
print(f"최종 선택: {classification_result.get('category')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
else:
# 모든 분류기가 UNKNOWN이면 가장 높은 신뢰도의 UNKNOWN 선택
classification_result = max(classification_results, key=lambda x: x.get('overall_confidence', 0))
print(f"모든 분류기 실패, 최고 신뢰도 UNKNOWN 선택: (신뢰도: {classification_result.get('overall_confidence', 0)})")
except Exception as e:
print(f"분류기 실행 중 오류 발생: {e}")
@@ -430,15 +492,31 @@ async def upload_file(
main_size = material_data.get("main_nom") or material_data.get("size_spec", "")
reduced_size = material_data.get("red_nom", "")
# NIPPLE인 경우 길이와 스케줄 정보 추가
length_mm = None
schedule = "UNKNOWN"
if fitting_type == "NIPPLE":
# 길이 정보 추출
length_mm = material_data.get("length", 0.0) if material_data.get("length") else None
# 스케줄 정보 추출 (분류 결과에서)
schedule_info = classification_result.get("schedule_info", {})
schedule = schedule_info.get("schedule", "UNKNOWN")
schedule_info = classification_result.get("schedule", {})
if isinstance(schedule_info, dict):
schedule = schedule_info.get("schedule", "UNKNOWN")
else:
schedule = str(schedule_info) if schedule_info else "UNKNOWN"
db.execute(text("""
INSERT INTO fitting_details (
material_id, file_id, fitting_type, fitting_subtype,
connection_method, pressure_rating, material_standard,
material_grade, main_size, reduced_size
material_grade, main_size, reduced_size, length_mm, schedule
) VALUES (
:material_id, :file_id, :fitting_type, :fitting_subtype,
:connection_method, :pressure_rating, :material_standard,
:material_grade, :main_size, :reduced_size
:material_grade, :main_size, :reduced_size, :length_mm, :schedule
)
"""), {
"material_id": material_id,
@@ -450,10 +528,248 @@ async def upload_file(
"material_standard": material_standard,
"material_grade": material_grade,
"main_size": main_size,
"reduced_size": reduced_size
"reduced_size": reduced_size,
"length_mm": length_mm,
"schedule": schedule
})
print(f"FITTING 상세 정보 저장 완료: {fitting_type} - {fitting_subtype}")
# FLANGE 분류 결과인 경우 상세 정보 저장
if classification_result.get("category") == "FLANGE":
print("FLANGE 상세 정보 저장 시작")
# 플랜지 타입 정보
flange_type_info = classification_result.get("flange_type", {})
pressure_info = classification_result.get("pressure_rating", {})
face_finish_info = classification_result.get("face_finish", {})
material_info = classification_result.get("material", {})
# 플랜지 타입 (WN, BL, SO 등)
flange_type = ""
if isinstance(flange_type_info, dict):
flange_type = flange_type_info.get("type", "UNKNOWN")
else:
flange_type = str(flange_type_info) if flange_type_info else "UNKNOWN"
# 압력 등급 (150LB, 300LB 등)
pressure_rating = ""
if isinstance(pressure_info, dict):
pressure_rating = pressure_info.get("rating", "UNKNOWN")
else:
pressure_rating = str(pressure_info) if pressure_info else "UNKNOWN"
# 면 가공 (RF, FF, RTJ 등)
facing_type = ""
if isinstance(face_finish_info, dict):
facing_type = face_finish_info.get("finish", "UNKNOWN")
else:
facing_type = str(face_finish_info) if face_finish_info else "UNKNOWN"
# 재질 정보
material_standard = ""
material_grade = ""
if isinstance(material_info, dict):
material_standard = material_info.get("standard", "")
material_grade = material_info.get("grade", "")
# 사이즈 정보
size_inches = material_data.get("main_nom") or material_data.get("size_spec", "")
db.execute(text("""
INSERT INTO flange_details (
material_id, file_id, flange_type, pressure_rating,
facing_type, material_standard, material_grade, size_inches
) VALUES (
:material_id, :file_id, :flange_type, :pressure_rating,
:facing_type, :material_standard, :material_grade, :size_inches
)
"""), {
"material_id": material_id,
"file_id": file_id,
"flange_type": flange_type,
"pressure_rating": pressure_rating,
"facing_type": facing_type,
"material_standard": material_standard,
"material_grade": material_grade,
"size_inches": size_inches
})
print(f"FLANGE 상세 정보 저장 완료: {flange_type} - {pressure_rating}")
# GASKET 분류 결과인 경우 상세 정보 저장
if classification_result.get("category") == "GASKET":
print("GASKET 상세 정보 저장 시작")
# 가스켓 타입 정보
gasket_type_info = classification_result.get("gasket_type", {})
gasket_material_info = classification_result.get("gasket_material", {})
pressure_info = classification_result.get("pressure_rating", {})
# 가스켓 타입 (SPIRAL_WOUND, O_RING 등)
gasket_type = ""
if isinstance(gasket_type_info, dict):
gasket_type = gasket_type_info.get("type", "UNKNOWN")
else:
gasket_type = str(gasket_type_info) if gasket_type_info else "UNKNOWN"
# 가스켓 소재 (GRAPHITE, PTFE 등)
material_type = ""
if isinstance(gasket_material_info, dict):
material_type = gasket_material_info.get("material", "UNKNOWN")
else:
material_type = str(gasket_material_info) if gasket_material_info else "UNKNOWN"
# 압력 등급
pressure_rating = ""
if isinstance(pressure_info, dict):
pressure_rating = pressure_info.get("rating", "UNKNOWN")
else:
pressure_rating = str(pressure_info) if pressure_info else "UNKNOWN"
# 사이즈 정보
size_inches = material_data.get("main_nom") or material_data.get("size_spec", "")
# SWG 상세 정보 추출
swg_details = gasket_material_info.get("swg_details", {}) if isinstance(gasket_material_info, dict) else {}
thickness = swg_details.get("thickness", None) if swg_details else None
filler_material = swg_details.get("filler", "") if swg_details else ""
# additional_info에 SWG 상세 정보 저장
additional_info = ""
if swg_details:
face_type = swg_details.get("face_type", "")
outer_ring = swg_details.get("outer_ring", "")
inner_ring = swg_details.get("inner_ring", "")
construction = swg_details.get("detailed_construction", "")
# JSON 형태로 additional_info 생성
additional_info = {
"face_type": face_type,
"construction": construction,
"outer_ring": outer_ring,
"inner_ring": inner_ring,
"filler": swg_details.get("filler", ""),
"thickness": swg_details.get("thickness", None)
}
additional_info_json = json.dumps(additional_info, ensure_ascii=False)
db.execute(text("""
INSERT INTO gasket_details (
material_id, file_id, gasket_type, material_type,
pressure_rating, size_inches, thickness, filler_material, additional_info
) VALUES (
:material_id, :file_id, :gasket_type, :material_type,
:pressure_rating, :size_inches, :thickness, :filler_material, :additional_info
)
"""), {
"material_id": material_id,
"file_id": file_id,
"gasket_type": gasket_type,
"material_type": material_type,
"pressure_rating": pressure_rating,
"size_inches": size_inches,
"thickness": thickness,
"filler_material": filler_material,
"additional_info": additional_info_json
})
print(f"GASKET 상세 정보 저장 완료: {gasket_type} - {material_type}")
# BOLT 분류 결과인 경우 상세 정보 저장
if classification_result.get("category") == "BOLT":
print("BOLT 상세 정보 저장 시작")
# 볼트 타입 정보
fastener_type_info = classification_result.get("fastener_type", {})
thread_spec_info = classification_result.get("thread_specification", {})
dimensions_info = classification_result.get("dimensions", {})
material_info = classification_result.get("material", {})
# 볼트 타입 (STUD_BOLT, HEX_BOLT 등)
bolt_type = ""
if isinstance(fastener_type_info, dict):
bolt_type = fastener_type_info.get("type", "UNKNOWN")
else:
bolt_type = str(fastener_type_info) if fastener_type_info else "UNKNOWN"
# 나사 타입 (METRIC, INCH 등)
thread_type = ""
if isinstance(thread_spec_info, dict):
thread_type = thread_spec_info.get("standard", "UNKNOWN")
else:
thread_type = str(thread_spec_info) if thread_spec_info else "UNKNOWN"
# 치수 정보
diameter = material_data.get("main_nom", "")
length = ""
if isinstance(dimensions_info, dict):
length = dimensions_info.get("length", "")
if not length and "70.0000 LG" in description:
# 원본 설명에서 길이 추출
import re
length_match = re.search(r'(\d+(?:\.\d+)?)\s*LG', description.upper())
if length_match:
length = f"{length_match.group(1)}mm"
# 재질 정보
material_standard = ""
material_grade = ""
if isinstance(material_info, dict):
material_standard = material_info.get("standard", "")
material_grade = material_info.get("grade", "")
# 압력 등급 (150LB 등)
pressure_rating = ""
if "150LB" in description.upper():
pressure_rating = "150LB"
elif "300LB" in description.upper():
pressure_rating = "300LB"
elif "600LB" in description.upper():
pressure_rating = "600LB"
# 코팅 타입 (ELEC.GALV 등)
coating_type = ""
if "ELEC.GALV" in description.upper() or "ELEC GALV" in description.upper():
coating_type = "ELECTRO_GALVANIZED"
elif "HOT.GALV" in description.upper() or "HOT GALV" in description.upper():
coating_type = "HOT_DIP_GALVANIZED"
elif "GALV" in description.upper():
coating_type = "GALVANIZED"
elif "ZINC" in description.upper():
coating_type = "ZINC_PLATED"
elif "DACROMET" in description.upper():
coating_type = "DACROMET"
elif "SS" in description.upper() or "STAINLESS" in description.upper():
coating_type = "STAINLESS"
elif "PLAIN" in description.upper() or "BLACK" in description.upper():
coating_type = "PLAIN"
db.execute(text("""
INSERT INTO bolt_details (
material_id, file_id, bolt_type, thread_type,
diameter, length, material_standard, material_grade,
coating_type, pressure_rating, classification_confidence
) VALUES (
:material_id, :file_id, :bolt_type, :thread_type,
:diameter, :length, :material_standard, :material_grade,
:coating_type, :pressure_rating, :classification_confidence
)
"""), {
"material_id": material_id,
"file_id": file_id,
"bolt_type": bolt_type,
"thread_type": thread_type,
"diameter": diameter,
"length": length,
"material_standard": material_standard,
"material_grade": material_grade,
"coating_type": coating_type,
"pressure_rating": pressure_rating,
"classification_confidence": classification_result.get("overall_confidence", 0.0)
})
print(f"BOLT 상세 정보 저장 완료: {bolt_type} - {material_standard} {material_grade}")
db.commit()
print(f"자재 저장 완료: {materials_inserted}")
@@ -724,7 +1040,35 @@ async def get_materials(
"material_standard": fitting_detail.material_standard,
"material_grade": fitting_detail.material_grade,
"main_size": fitting_detail.main_size,
"reduced_size": fitting_detail.reduced_size
"reduced_size": fitting_detail.reduced_size,
"length_mm": float(fitting_detail.length_mm) if fitting_detail.length_mm else None,
"schedule": fitting_detail.schedule
}
elif m.classified_category == 'FLANGE':
flange_query = text("SELECT * FROM flange_details WHERE material_id = :material_id")
flange_result = db.execute(flange_query, {"material_id": m.id})
flange_detail = flange_result.fetchone()
if flange_detail:
material_dict['flange_details'] = {
"flange_type": flange_detail.flange_type,
"facing_type": flange_detail.facing_type,
"pressure_rating": flange_detail.pressure_rating,
"material_standard": flange_detail.material_standard,
"material_grade": flange_detail.material_grade,
"size_inches": flange_detail.size_inches
}
elif m.classified_category == 'GASKET':
gasket_query = text("SELECT * FROM gasket_details WHERE material_id = :material_id")
gasket_result = db.execute(gasket_query, {"material_id": m.id})
gasket_detail = gasket_result.fetchone()
if gasket_detail:
material_dict['gasket_details'] = {
"gasket_type": gasket_detail.gasket_type,
"material_type": gasket_detail.material_type,
"pressure_rating": gasket_detail.pressure_rating,
"size_inches": gasket_detail.size_inches,
"thickness": gasket_detail.thickness,
"temperature_range": gasket_detail.temperature_range
}
elif m.classified_category == 'VALVE':
valve_query = text("SELECT * FROM valve_details WHERE material_id = :material_id")
@@ -740,6 +1084,21 @@ async def get_materials(
"body_material": valve_detail.body_material,
"size_inches": valve_detail.size_inches
}
elif m.classified_category == 'BOLT':
bolt_query = text("SELECT * FROM bolt_details WHERE material_id = :material_id")
bolt_result = db.execute(bolt_query, {"material_id": m.id})
bolt_detail = bolt_result.fetchone()
if bolt_detail:
material_dict['bolt_details'] = {
"bolt_type": bolt_detail.bolt_type,
"thread_type": bolt_detail.thread_type,
"diameter": bolt_detail.diameter,
"length": bolt_detail.length,
"material_standard": bolt_detail.material_standard,
"material_grade": bolt_detail.material_grade,
"coating_type": bolt_detail.coating_type,
"pressure_rating": bolt_detail.pressure_rating
}
material_list.append(material_dict)