feat: 자재 분류 시스템 대폭 개선

🔧 주요 개선사항:
- EXCLUDE 분류기 추가 (WELD GAP 등 제외 대상 처리)
- FITTING 분류기 키워드 확장 (ELL, RED 추가)
- PIPE 재질 중복 문제 해결 (material_grade 파싱 개선)
- NIPPLE 특별 처리 추가 (스케줄 + 길이 정보 포함)
- OLET 타입 중복 표시 제거

📊 분류 정확도:
- UNKNOWN: 0개 (100% 분류 성공)
- EXCLUDE: 1,014개 (제외 대상)
- 실제 자재: 1,823개 정확 분류

🎯 해결된 문제:
- PIPE 재질 'ASTM A106 ASTM A106' → 'ASTM A106 GR B'
- WELD GAP 오분류 → EXCLUDE 카테고리
- FITTING 키워드 인식 실패 → ELL, RED 키워드 추가
- 프론트엔드 중복 표시 제거
This commit is contained in:
Hyungi Ahn
2025-07-18 10:28:02 +09:00
parent 82f057a0c9
commit 25ce3590ee
11 changed files with 857 additions and 1923 deletions

View File

@@ -104,13 +104,19 @@ def parse_dataframe(df):
material_grade = ""
if "ASTM" in description.upper():
astm_match = re.search(r'ASTM\s+([A-Z0-9\s]+)', description.upper())
# ASTM 표준과 등급만 추출, end_preparation(BOE, POE, BBE 등)은 제외
astm_match = re.search(r'ASTM\s+([A-Z0-9]+(?:\s+GR\s+[A-Z0-9]+)?)', description.upper())
if astm_match:
material_grade = astm_match.group(0).strip()
main_size = str(row.get(mapped_columns.get('main_size', ''), ''))
red_size = str(row.get(mapped_columns.get('red_size', ''), ''))
# main_nom과 red_nom 별도 저장 (원본 값 유지)
main_nom = main_size if main_size != 'nan' and main_size != '' else None
red_nom = red_size if red_size != 'nan' and red_size != '' else None
# 기존 size_spec도 유지 (호환성을 위해)
if main_size != 'nan' and red_size != 'nan' and red_size != '':
size_spec = f"{main_size} x {red_size}"
elif main_size != 'nan' and main_size != '':
@@ -133,6 +139,8 @@ def parse_dataframe(df):
'quantity': quantity,
'unit': "EA",
'size_spec': size_spec,
'main_nom': main_nom, # 추가
'red_nom': red_nom, # 추가
'material_grade': material_grade,
'length': length_value,
'line_number': index + 1,
@@ -230,32 +238,60 @@ async def upload_file(
except (ValueError, TypeError):
length_value = None
classification_result = classify_pipe("", description, size_spec, length_value)
print(f"PIPE 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
# main_nom과 red_nom 추출
main_nom = material_data.get("main_nom")
red_nom = material_data.get("red_nom")
if classification_result.get("overall_confidence", 0) < 0.5:
classification_result = classify_fitting("", description, size_spec)
print(f"FITTING 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
classification_result = classify_valve("", description, size_spec)
print(f"VALVE 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
classification_result = classify_flange("", description, size_spec)
print(f"FLANGE 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
classification_result = classify_bolt("", description, size_spec)
print(f"BOLT 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
classification_result = classify_gasket("", description, size_spec)
print(f"GASKET 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
classification_result = classify_instrument("", description, size_spec)
print(f"INSTRUMENT 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
classification_result = None
try:
# EXCLUDE 분류기 우선 호출 (제외 대상 먼저 걸러냄)
from app.services.exclude_classifier import classify_exclude
classification_result = classify_exclude("", description, main_nom or "")
print(f"EXCLUDE 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 파이프 분류기 호출
classification_result = classify_pipe("", description, main_nom or "", length_value)
print(f"PIPE 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 피팅 분류기 호출 (main_nom, red_nom 개별 전달)
classification_result = classify_fitting("", description, main_nom or "", red_nom)
print(f"FITTING 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 플랜지 분류기 호출 (main_nom, red_nom 개별 전달)
classification_result = classify_flange("", description, main_nom or "", red_nom)
print(f"FLANGE 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 밸브 분류기 호출
classification_result = classify_valve("", description, main_nom or "")
print(f"VALVE 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 볼트 분류기 호출
classification_result = classify_bolt("", description, main_nom or "")
print(f"BOLT 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 가스켓 분류기 호출
classification_result = classify_gasket("", description, main_nom or "")
print(f"GASKET 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
if classification_result.get("overall_confidence", 0) < 0.5:
# 계기 분류기 호출
classification_result = classify_instrument("", description, main_nom or "")
print(f"INSTRUMENT 분류 결과: {classification_result.get('category', 'UNKNOWN')} (신뢰도: {classification_result.get('overall_confidence', 0)})")
except Exception as e:
print(f"분류기 실행 중 오류 발생: {e}")
# 기본 분류 결과 생성
classification_result = {
"category": "UNKNOWN",
"overall_confidence": 0.0,
"reason": f"분류기 오류: {str(e)}"
}
print(f"최종 분류 결과: {classification_result.get('category', 'UNKNOWN')}")
@@ -263,13 +299,13 @@ async def upload_file(
material_insert_query = text("""
INSERT INTO materials (
file_id, original_description, quantity, unit, size_spec,
material_grade, line_number, row_number, classified_category,
classification_confidence, is_verified, created_at
main_nom, red_nom, material_grade, line_number, row_number,
classified_category, classification_confidence, is_verified, created_at
)
VALUES (
:file_id, :original_description, :quantity, :unit, :size_spec,
:material_grade, :line_number, :row_number, :classified_category,
:classification_confidence, :is_verified, :created_at
:main_nom, :red_nom, :material_grade, :line_number, :row_number,
:classified_category, :classification_confidence, :is_verified, :created_at
)
RETURNING id
""")
@@ -287,6 +323,8 @@ async def upload_file(
"quantity": material_data["quantity"],
"unit": material_data["unit"],
"size_spec": material_data["size_spec"],
"main_nom": material_data.get("main_nom"), # 추가
"red_nom": material_data.get("red_nom"), # 추가
"material_grade": material_data["material_grade"],
"line_number": material_data["line_number"],
"row_number": material_data["row_number"],
@@ -309,16 +347,11 @@ async def upload_file(
# material_id도 함께 저장하도록 수정
pipe_detail_insert_query = text("""
INSERT INTO pipe_details (
material_id, file_id, material_standard, material_grade, material_type,
manufacturing_method, end_preparation, schedule, wall_thickness,
nominal_size, length_mm, material_confidence, manufacturing_confidence,
end_prep_confidence, schedule_confidence
)
VALUES (
:material_id, :file_id, :material_standard, :material_grade, :material_type,
:manufacturing_method, :end_preparation, :schedule, :wall_thickness,
:nominal_size, :length_mm, :material_confidence, :manufacturing_confidence,
:end_prep_confidence, :schedule_confidence
material_id, file_id, outer_diameter, schedule,
material_spec, manufacturing_method, end_preparation, length_mm
) VALUES (
:material_id, :file_id, :outer_diameter, :schedule,
:material_spec, :manufacturing_method, :end_preparation, :length_mm
)
""")
@@ -329,25 +362,98 @@ async def upload_file(
schedule_info = classification_result.get("schedule", {})
size_info = classification_result.get("size_info", {})
# main_nom을 outer_diameter로 활용
outer_diameter = material_data.get("main_nom") or material_data.get("size_spec", "")
# end_preparation 정보 추출 (분류 결과에서)
end_prep = ""
if isinstance(end_prep_info, dict):
end_prep = end_prep_info.get("type", "")
else:
end_prep = str(end_prep_info) if end_prep_info else ""
# 재질 정보 - 이미 정제된 material_grade 사용
material_spec = material_data.get("material_grade", "")
# 제조방법 추출
manufacturing_method = ""
if isinstance(manufacturing_info, dict):
manufacturing_method = manufacturing_info.get("method", "UNKNOWN")
else:
manufacturing_method = str(manufacturing_info) if manufacturing_info else "UNKNOWN"
# 스케줄 정보 추출
schedule = ""
if isinstance(schedule_info, dict):
schedule = schedule_info.get("schedule", "UNKNOWN")
else:
schedule = str(schedule_info) if schedule_info else "UNKNOWN"
db.execute(pipe_detail_insert_query, {
"material_id": material_id,
"file_id": file_id,
"material_standard": material_info.get("standard"),
"material_grade": material_info.get("grade"),
"material_type": material_info.get("material_type"),
"manufacturing_method": manufacturing_info.get("method"),
"end_preparation": end_prep_info.get("type"),
"schedule": schedule_info.get("schedule"),
"wall_thickness": schedule_info.get("wall_thickness"),
"nominal_size": material_data.get("size_spec", ""), # material_data에서 직접 가져옴
"length_mm": length_mm,
"material_confidence": material_info.get("confidence", 0.0),
"manufacturing_confidence": manufacturing_info.get("confidence", 0.0),
"end_prep_confidence": end_prep_info.get("confidence", 0.0),
"schedule_confidence": schedule_info.get("confidence", 0.0)
"outer_diameter": outer_diameter,
"schedule": schedule,
"material_spec": material_spec,
"manufacturing_method": manufacturing_method,
"end_preparation": end_prep,
"length_mm": material_data.get("length", 0.0) if material_data.get("length") else 0.0
})
print("PIPE 상세 정보 저장 완료")
# FITTING 분류 결과인 경우 상세 정보 저장
elif classification_result.get("category") == "FITTING":
print("FITTING 상세 정보 저장 시작")
# 피팅 정보 추출
fitting_type_info = classification_result.get("fitting_type", {})
connection_info = classification_result.get("connection_method", {})
pressure_info = classification_result.get("pressure_rating", {})
material_info = classification_result.get("material", {})
# 피팅 타입 및 서브타입
fitting_type = fitting_type_info.get("type", "UNKNOWN")
fitting_subtype = fitting_type_info.get("subtype", "UNKNOWN")
# 연결 방식
connection_method = connection_info.get("method", "UNKNOWN")
# 압력 등급
pressure_rating = pressure_info.get("rating", "UNKNOWN")
# 재질 정보
material_standard = material_info.get("standard", "")
material_grade = material_info.get("grade", "")
# main_size와 reduced_size
main_size = material_data.get("main_nom") or material_data.get("size_spec", "")
reduced_size = material_data.get("red_nom", "")
db.execute(text("""
INSERT INTO fitting_details (
material_id, file_id, fitting_type, fitting_subtype,
connection_method, pressure_rating, material_standard,
material_grade, main_size, reduced_size
) VALUES (
:material_id, :file_id, :fitting_type, :fitting_subtype,
:connection_method, :pressure_rating, :material_standard,
:material_grade, :main_size, :reduced_size
)
"""), {
"material_id": material_id,
"file_id": file_id,
"fitting_type": fitting_type,
"fitting_subtype": fitting_subtype,
"connection_method": connection_method,
"pressure_rating": pressure_rating,
"material_standard": material_standard,
"material_grade": material_grade,
"main_size": main_size,
"reduced_size": reduced_size
})
print(f"FITTING 상세 정보 저장 완료: {fitting_type} - {fitting_subtype}")
db.commit()
print(f"자재 저장 완료: {materials_inserted}")
@@ -457,14 +563,17 @@ async def get_materials(
try:
query = """
SELECT m.id, m.file_id, m.original_description, m.quantity, m.unit,
m.size_spec, m.material_grade, m.line_number, m.row_number,
m.size_spec, m.main_nom, m.red_nom, m.material_grade, m.line_number, m.row_number,
m.created_at, m.classified_category, m.classification_confidence,
m.classification_details,
f.original_filename, f.project_id, f.job_no, f.revision,
p.official_project_code, p.project_name
p.official_project_code, p.project_name,
pd.outer_diameter, pd.schedule, pd.material_spec, pd.manufacturing_method,
pd.end_preparation, pd.length_mm
FROM materials m
LEFT JOIN files f ON m.file_id = f.id
LEFT JOIN projects p ON f.project_id = p.id
LEFT JOIN pipe_details pd ON m.id = pd.material_id
WHERE 1=1
"""
params = {}
@@ -579,6 +688,8 @@ async def get_materials(
"quantity": float(m.quantity) if m.quantity else 0,
"unit": m.unit,
"size_spec": m.size_spec,
"main_nom": m.main_nom, # 추가
"red_nom": m.red_nom, # 추가
"material_grade": m.material_grade,
"line_number": m.line_number,
"row_number": m.row_number,
@@ -588,22 +699,17 @@ async def get_materials(
"created_at": m.created_at
}
# 카테고리별 상세 정보 추가
# 카테고리별 상세 정보 추가 (JOIN 결과 사용)
if m.classified_category == 'PIPE':
pipe_query = text("SELECT * FROM pipe_details WHERE material_id = :material_id")
pipe_result = db.execute(pipe_query, {"material_id": m.id})
pipe_detail = pipe_result.fetchone()
if pipe_detail:
# JOIN된 결과에서 pipe_details 정보 가져오기
if hasattr(m, 'outer_diameter') and m.outer_diameter is not None:
material_dict['pipe_details'] = {
"nominal_size": pipe_detail.nominal_size,
"schedule": pipe_detail.schedule,
"material_standard": pipe_detail.material_standard,
"material_grade": pipe_detail.material_grade,
"material_type": pipe_detail.material_type,
"manufacturing_method": pipe_detail.manufacturing_method,
"end_preparation": pipe_detail.end_preparation,
"wall_thickness": pipe_detail.wall_thickness,
"length_mm": float(pipe_detail.length_mm) if pipe_detail.length_mm else None
"outer_diameter": m.outer_diameter,
"schedule": m.schedule,
"material_spec": m.material_spec,
"manufacturing_method": m.manufacturing_method,
"end_preparation": m.end_preparation,
"length_mm": float(m.length_mm) if m.length_mm else None
}
elif m.classified_category == 'FITTING':
fitting_query = text("SELECT * FROM fitting_details WHERE material_id = :material_id")
@@ -906,19 +1012,12 @@ async def get_pipe_details(
"original_description": pd.original_description,
"quantity": pd.quantity,
"unit": pd.unit,
"material_standard": pd.material_standard,
"material_grade": pd.material_grade,
"material_type": pd.material_type,
"material_spec": pd.material_spec,
"manufacturing_method": pd.manufacturing_method,
"end_preparation": pd.end_preparation,
"schedule": pd.schedule,
"wall_thickness": pd.wall_thickness,
"nominal_size": pd.nominal_size,
"outer_diameter": pd.outer_diameter,
"length_mm": pd.length_mm,
"material_confidence": pd.material_confidence,
"manufacturing_confidence": pd.manufacturing_confidence,
"end_prep_confidence": pd.end_prep_confidence,
"schedule_confidence": pd.schedule_confidence,
"created_at": pd.created_at,
"updated_at": pd.updated_at
}