From f6bb830c8ec998804d9ca37713940647e5374f45 Mon Sep 17 00:00:00 2001 From: hyungi Date: Fri, 5 Jun 2026 07:01:29 +0900 Subject: [PATCH] =?UTF-8?q?fix(ai-fabric):=20LocalMLX=20=EB=9D=BC=EC=9D=B4?= =?UTF-8?q?=EB=B8=8C=20fixture=20=EC=BA=A1=EC=B2=98=20+=20=EB=AA=A8?= =?UTF-8?q?=EB=8D=B8=EB=AA=85=20=EC=A0=95=EC=A0=95=20(mac-mini-default)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 맥미니 GUI 로그인 복구(GPU 점프 경유 Screen Sharing) 후 llm-router :8890 라이브 캡처 → S2-2a 완료. - llm-router-chat.{request,response}.json: PROVISIONAL_SYNTHETIC → CAPTURED_LIVE (2026-06-05) - 모델명 'gemma-macmini'(= DS backend 이름, llm-router 모델 ID 아님) → 'mac-mini-default' (/v1/models 실측 확인, 별칭 → mlx-community/gemma-4-26b-a4b-it-8bit resolve) - LocalMLXProvider/AIProviderConfiguration 기본 모델 + 관련 테스트 갱신 - testLiveLocalMLXIfReachable 추가(실 :8890 e2e, offline 시 skip). 47 tests PASS. Co-Authored-By: Claude Opus 4.8 (1M context) --- Sources/AI/Composition.swift | 6 ++--- Sources/AI/Providers/LocalMLXProvider.swift | 6 ++--- Tests/AITests/CompositionTests.swift | 2 +- Tests/AITests/LocalMLXProviderTests.swift | 26 ++++++++++++++++--- .../fixtures/llm-router-chat.request.json | 13 +++++----- .../fixtures/llm-router-chat.response.json | 20 +++++++------- 6 files changed, 46 insertions(+), 27 deletions(-) diff --git a/Sources/AI/Composition.swift b/Sources/AI/Composition.swift index 6330670..7296206 100644 --- a/Sources/AI/Composition.swift +++ b/Sources/AI/Composition.swift @@ -12,7 +12,7 @@ import os public struct AIProviderConfiguration: Sendable { /// 맥미니 llm-router base (trailing slash 없는 base; 경로는 provider 가 append). public var localMLXBaseURL: URL - /// llm-router 모델 별칭(라이브 캡처로 확정 필요 — provisional 'gemma-macmini'). + /// llm-router 모델 별칭(라이브 확정 2026-06-05: 'mac-mini-default' → gemma-4-26b resolve). public var localMLXModel: String /// DS API base — S3 의 DSAskClient 가 사용. 공인 https://document.hyungi.net/api · 내부 http://100.110.63.63:8000/api. /// 주의: DS `/search/ask` 는 **trailing slash 필수**(경로 결합 시 S3 client 가 보장). @@ -22,7 +22,7 @@ public struct AIProviderConfiguration: Sendable { public init( localMLXBaseURL: URL, - localMLXModel: String = "gemma-macmini", + localMLXModel: String = "mac-mini-default", dsBaseURL: URL, requestTimeout: TimeInterval = 60, probeTimeout: TimeInterval = 2 @@ -40,7 +40,7 @@ public struct AIProviderConfiguration: Sendable { ) -> AIProviderConfiguration { let localMLX = environment["AIFABRIC_LOCALMLX_URL"].flatMap(URL.init(string:)) ?? URL(string: "http://100.76.254.116:8890")! - let model = environment["AIFABRIC_LOCALMLX_MODEL"] ?? "gemma-macmini" + let model = environment["AIFABRIC_LOCALMLX_MODEL"] ?? "mac-mini-default" let ds = environment["AIFABRIC_DS_URL"].flatMap(URL.init(string:)) ?? URL(string: "https://document.hyungi.net/api")! return AIProviderConfiguration(localMLXBaseURL: localMLX, localMLXModel: model, dsBaseURL: ds) diff --git a/Sources/AI/Providers/LocalMLXProvider.swift b/Sources/AI/Providers/LocalMLXProvider.swift index 1f8c521..91c7e91 100644 --- a/Sources/AI/Providers/LocalMLXProvider.swift +++ b/Sources/AI/Providers/LocalMLXProvider.swift @@ -4,8 +4,8 @@ // - isAvailable = GET /v1/models 경량 probe(짧은 timeout, wake 미트리거 → '가용인데 콜드' 정상) // - complete = POST /v1/chat/completions, messages system/user 분리(call-shape 고정) // -// ⚠️ fixture(llm-router-chat.*.json) = PROVISIONAL_SYNTHETIC (맥미니 offline 으로 라이브 캡처 불가, 2026-06-04). -// OpenAI 표준 스펙 기반. 맥미니 복귀 시 S2-2a 라이브 재캡처로 교체(S2-Ff). 코드 자체는 라이브 동작. +// fixture(llm-router-chat.*.json) = CAPTURED_LIVE (2026-06-05, Tailscale 100.76.254.116:8890 실측). +// 요청 model='mac-mini-default'(별칭) → 응답 model='mlx-community/gemma-4-26b-a4b-it-8bit'. import Foundation public struct LocalMLXProvider: AIProvider { @@ -20,7 +20,7 @@ public struct LocalMLXProvider: AIProvider { public init( baseURL: URL, - model: String = "gemma-macmini", + model: String = "mac-mini-default", // llm-router 별칭(라이브 /v1/models 확인) → gemma-4-26b resolve session: URLSession = .shared, requestTimeout: TimeInterval = 60, probeTimeout: TimeInterval = 2 diff --git a/Tests/AITests/CompositionTests.swift b/Tests/AITests/CompositionTests.swift index cb2bf2b..12928a9 100644 --- a/Tests/AITests/CompositionTests.swift +++ b/Tests/AITests/CompositionTests.swift @@ -9,7 +9,7 @@ final class CompositionTests: XCTestCase { func testConfigDefaults() { let c = AIProviderConfiguration.resolved(environment: [:]) XCTAssertEqual(c.localMLXBaseURL.absoluteString, "http://100.76.254.116:8890") - XCTAssertEqual(c.localMLXModel, "gemma-macmini") + XCTAssertEqual(c.localMLXModel, "mac-mini-default") XCTAssertEqual(c.dsBaseURL.absoluteString, "https://document.hyungi.net/api") XCTAssertEqual(c.probeTimeout, 2) } diff --git a/Tests/AITests/LocalMLXProviderTests.swift b/Tests/AITests/LocalMLXProviderTests.swift index 0cf5a19..d60a9ee 100644 --- a/Tests/AITests/LocalMLXProviderTests.swift +++ b/Tests/AITests/LocalMLXProviderTests.swift @@ -11,7 +11,7 @@ final class LocalMLXProviderTests: XCTestCase { } private func provider() -> LocalMLXProvider { - LocalMLXProvider(baseURL: baseURL, model: "gemma-macmini", session: MockURLProtocol.session()) + LocalMLXProvider(baseURL: baseURL, model: "mac-mini-default", session: MockURLProtocol.session()) } // MARK: isAvailable probe (wake 미트리거) @@ -53,7 +53,7 @@ final class LocalMLXProviderTests: XCTestCase { XCTAssertEqual(resp.finishReason, .completed) XCTAssertTrue(resp.citations.isEmpty) XCTAssertNotNil(resp.latencyMs) - XCTAssertTrue(resp.text.contains("UCS-66")) + XCTAssertTrue(resp.text.contains("면제")) // 실측 응답 본문 } func testCompleteRequestCallShape() async throws { @@ -71,7 +71,7 @@ final class LocalMLXProviderTests: XCTestCase { // messages system/user 분리 call-shape (load-bearing) let sent = try XCTUnwrap(MockURLProtocol.recorder.lastBody) let decoded = try JSONDecoder().decode(SentRequest.self, from: sent) - XCTAssertEqual(decoded.model, "gemma-macmini") + XCTAssertEqual(decoded.model, "mac-mini-default") XCTAssertEqual(decoded.maxTokens, 512) XCTAssertEqual(decoded.stream, false) XCTAssertEqual(decoded.messages.count, 2) @@ -137,6 +137,26 @@ final class LocalMLXProviderTests: XCTestCase { XCTAssertNil(resp.routingNote) } + // MARK: 라이브 통합 (맥미니 llm-router :8890 — offline 시 skip) + + func testLiveLocalMLXIfReachable() async throws { + let live = LocalMLXProvider(baseURL: URL(string: "http://100.76.254.116:8890")!) // 실 URLSession, Tailscale + let reachable = await live.isAvailable + guard reachable else { + throw XCTSkip("llm-router :8890 도달 불가(맥미니 offline) — 라이브 테스트 skip") + } + let resp = try await live.complete( + AICompletionRequest(task: .quickSummarize, + prompt: "엘보 내경 가공 핵심을 한 문장으로 요약해줘.", + systemPrompt: "You are a concise technical assistant.", + maxTokens: 200) + ) + XCTAssertEqual(resp.providerUsed, .localMLX) + XCTAssertEqual(resp.finishReason, .completed) + XCTAssertFalse(resp.text.isEmpty, "라이브 응답은 비어있지 않아야") + XCTAssertNotNil(resp.latencyMs) + } + /// 나간 요청 디코딩용(테스트 전용 미러). struct SentRequest: Decodable { struct Message: Decodable { let role: String; let content: String } diff --git a/contract/fixtures/llm-router-chat.request.json b/contract/fixtures/llm-router-chat.request.json index 1fb6fbe..bc6bb08 100644 --- a/contract/fixtures/llm-router-chat.request.json +++ b/contract/fixtures/llm-router-chat.request.json @@ -1,15 +1,14 @@ { "_meta": { "fixture": "llm-router-chat.request", - "status": "PROVISIONAL_SYNTHETIC", - "synthetic": true, - "captured_date": null, - "reason": "맥미니(hyungi-macmini 100.76.254.116) Tailscale offline(last seen 8h+, 2026-06-04) → 라이브 캡처 불가. OpenAI /v1/chat/completions 표준 스펙 기반 합성. 맥미니 복귀 시 S2-2a 라이브 재캡처로 교체(별 fixture-update PR, S2-Ff drift-check 경유).", + "status": "CAPTURED_LIVE", + "captured_date": "2026-06-05", + "captured_via": "MacBook → Tailscale 100.76.254.116:8890 (맥미니 llm-router, GUI 로그인 복구 후)", "endpoint": "POST http://100.76.254.116:8890/v1/chat/completions", - "call_shape_note": "messages = [system, user] 분리 고정(load-bearing). system.content = AICompletionRequest.systemPrompt ?? \"\" (plan S2-2c). max_tokens = AICompletionRequest.maxTokens.", - "model_note": "model 문자열은 llm-router 가 기대하는 별칭 — 라이브 캡처로 확정 필요(provisional: 'gemma-macmini')." + "model_note": "llm-router /v1/models 노출 ID = {gemma-4-26b-a4b-it-8bit, mac-mini-default, qwen-macbook, claude-cloud}. LocalMLXProvider 기본 = 'mac-mini-default'(별칭 → 라우터가 gemma-4-26b 로 resolve). 이전 provisional 의 'gemma-macmini' 는 llm-router 모델 ID 아님(그건 DS /search/ask?backend= 쪽 이름).", + "call_shape_note": "messages=[system,user] 분리 고정(load-bearing). system.content = AICompletionRequest.systemPrompt ?? \"\". max_tokens = AICompletionRequest.maxTokens." }, - "model": "gemma-macmini", + "model": "mac-mini-default", "messages": [ { "role": "system", "content": "You are a concise technical assistant." }, { "role": "user", "content": "충격시험 면제 기준을 한 문장으로 요약해줘." } diff --git a/contract/fixtures/llm-router-chat.response.json b/contract/fixtures/llm-router-chat.response.json index 5a948bb..c290105 100644 --- a/contract/fixtures/llm-router-chat.response.json +++ b/contract/fixtures/llm-router-chat.response.json @@ -1,25 +1,25 @@ { "_meta": { "fixture": "llm-router-chat.response", - "status": "PROVISIONAL_SYNTHETIC", - "synthetic": true, - "captured_date": null, - "reason": "맥미니 offline → 라이브 캡처 불가. OpenAI chat.completion 표준 응답 모양 기반 합성. 라이브 재캡처로 교체(S2-Ff).", - "shape": "OpenAI chat.completion (choices[0].message.content → text, finish_reason → AIFinishReason)" + "status": "CAPTURED_LIVE", + "captured_date": "2026-06-05", + "endpoint": "POST http://100.76.254.116:8890/v1/chat/completions", + "note": "raw llm-router 응답(맥미니 Gemma 4 26B, MLX). 요청 model='mac-mini-default' → 응답 model='mlx-community/gemma-4-26b-a4b-it-8bit' 로 resolve. 매핑: choices[0].message.content→text, finish_reason→AIFinishReason, latency=측정값.", + "latency_observed_s": 1.73 }, - "id": "chatcmpl-provisional-0001", + "id": "chatcmpl-1780610323", "object": "chat.completion", - "created": 0, - "model": "gemma-macmini", + "created": 1780610323, + "model": "mlx-community/gemma-4-26b-a4b-it-8bit", "choices": [ { "index": 0, "message": { "role": "assistant", - "content": "충격시험 면제는 재료군(Curve A~D)과 거버닝 두께에 따른 UCS-66 면제 곡선으로 MDMT에서 판정합니다." + "content": "충격시험은 제품의 설계, 재질, 구조적 특성상 충격 에너지를 흡수할 수 있거나 파손 위험이 극히 낮음을 기술적으로 입증할 경우 면제될 수 있습니다." }, "finish_reason": "stop" } ], - "usage": { "prompt_tokens": 24, "completion_tokens": 41, "total_tokens": 65 } + "usage": { "prompt_tokens": 42, "completion_tokens": 48, "total_tokens": 90 } }