fix(ai-fabric): LocalMLX 라이브 fixture 캡처 + 모델명 정정 (mac-mini-default)

맥미니 GUI 로그인 복구(GPU 점프 경유 Screen Sharing) 후 llm-router :8890 라이브 캡처 → S2-2a 완료.
- llm-router-chat.{request,response}.json: PROVISIONAL_SYNTHETIC → CAPTURED_LIVE (2026-06-05)
- 모델명 'gemma-macmini'(= DS backend 이름, llm-router 모델 ID 아님) → 'mac-mini-default'
  (/v1/models 실측 확인, 별칭 → mlx-community/gemma-4-26b-a4b-it-8bit resolve)
- LocalMLXProvider/AIProviderConfiguration 기본 모델 + 관련 테스트 갱신
- testLiveLocalMLXIfReachable 추가(실 :8890 e2e, offline 시 skip). 47 tests PASS.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
hyungi
2026-06-05 07:01:29 +09:00
parent 5383a93f98
commit f6bb830c8e
6 changed files with 46 additions and 27 deletions
+3 -3
View File
@@ -12,7 +12,7 @@ import os
public struct AIProviderConfiguration: Sendable {
/// llm-router base (trailing slash base; provider append).
public var localMLXBaseURL: URL
/// llm-router ( provisional 'gemma-macmini').
/// llm-router ( 2026-06-05: 'mac-mini-default' gemma-4-26b resolve).
public var localMLXModel: String
/// DS API base S3 DSAskClient . https://document.hyungi.net/api · http://100.110.63.63:8000/api.
/// : DS `/search/ask` **trailing slash **( S3 client ).
@@ -22,7 +22,7 @@ public struct AIProviderConfiguration: Sendable {
public init(
localMLXBaseURL: URL,
localMLXModel: String = "gemma-macmini",
localMLXModel: String = "mac-mini-default",
dsBaseURL: URL,
requestTimeout: TimeInterval = 60,
probeTimeout: TimeInterval = 2
@@ -40,7 +40,7 @@ public struct AIProviderConfiguration: Sendable {
) -> AIProviderConfiguration {
let localMLX = environment["AIFABRIC_LOCALMLX_URL"].flatMap(URL.init(string:))
?? URL(string: "http://100.76.254.116:8890")!
let model = environment["AIFABRIC_LOCALMLX_MODEL"] ?? "gemma-macmini"
let model = environment["AIFABRIC_LOCALMLX_MODEL"] ?? "mac-mini-default"
let ds = environment["AIFABRIC_DS_URL"].flatMap(URL.init(string:))
?? URL(string: "https://document.hyungi.net/api")!
return AIProviderConfiguration(localMLXBaseURL: localMLX, localMLXModel: model, dsBaseURL: ds)
+3 -3
View File
@@ -4,8 +4,8 @@
// - isAvailable = GET /v1/models probe( timeout, wake ' ' )
// - complete = POST /v1/chat/completions, messages system/user (call-shape )
//
// fixture(llm-router-chat.*.json) = PROVISIONAL_SYNTHETIC ( offline , 2026-06-04).
// OpenAI . S2-2a (S2-Ff). .
// fixture(llm-router-chat.*.json) = CAPTURED_LIVE (2026-06-05, Tailscale 100.76.254.116:8890 ).
// model='mac-mini-default'() model='mlx-community/gemma-4-26b-a4b-it-8bit'.
import Foundation
public struct LocalMLXProvider: AIProvider {
@@ -20,7 +20,7 @@ public struct LocalMLXProvider: AIProvider {
public init(
baseURL: URL,
model: String = "gemma-macmini",
model: String = "mac-mini-default", // llm-router ( /v1/models ) gemma-4-26b resolve
session: URLSession = .shared,
requestTimeout: TimeInterval = 60,
probeTimeout: TimeInterval = 2
+1 -1
View File
@@ -9,7 +9,7 @@ final class CompositionTests: XCTestCase {
func testConfigDefaults() {
let c = AIProviderConfiguration.resolved(environment: [:])
XCTAssertEqual(c.localMLXBaseURL.absoluteString, "http://100.76.254.116:8890")
XCTAssertEqual(c.localMLXModel, "gemma-macmini")
XCTAssertEqual(c.localMLXModel, "mac-mini-default")
XCTAssertEqual(c.dsBaseURL.absoluteString, "https://document.hyungi.net/api")
XCTAssertEqual(c.probeTimeout, 2)
}
+23 -3
View File
@@ -11,7 +11,7 @@ final class LocalMLXProviderTests: XCTestCase {
}
private func provider() -> LocalMLXProvider {
LocalMLXProvider(baseURL: baseURL, model: "gemma-macmini", session: MockURLProtocol.session())
LocalMLXProvider(baseURL: baseURL, model: "mac-mini-default", session: MockURLProtocol.session())
}
// MARK: isAvailable probe (wake )
@@ -53,7 +53,7 @@ final class LocalMLXProviderTests: XCTestCase {
XCTAssertEqual(resp.finishReason, .completed)
XCTAssertTrue(resp.citations.isEmpty)
XCTAssertNotNil(resp.latencyMs)
XCTAssertTrue(resp.text.contains("UCS-66"))
XCTAssertTrue(resp.text.contains("면제")) //
}
func testCompleteRequestCallShape() async throws {
@@ -71,7 +71,7 @@ final class LocalMLXProviderTests: XCTestCase {
// messages system/user call-shape (load-bearing)
let sent = try XCTUnwrap(MockURLProtocol.recorder.lastBody)
let decoded = try JSONDecoder().decode(SentRequest.self, from: sent)
XCTAssertEqual(decoded.model, "gemma-macmini")
XCTAssertEqual(decoded.model, "mac-mini-default")
XCTAssertEqual(decoded.maxTokens, 512)
XCTAssertEqual(decoded.stream, false)
XCTAssertEqual(decoded.messages.count, 2)
@@ -137,6 +137,26 @@ final class LocalMLXProviderTests: XCTestCase {
XCTAssertNil(resp.routingNote)
}
// MARK: ( llm-router :8890 offline skip)
func testLiveLocalMLXIfReachable() async throws {
let live = LocalMLXProvider(baseURL: URL(string: "http://100.76.254.116:8890")!) // URLSession, Tailscale
let reachable = await live.isAvailable
guard reachable else {
throw XCTSkip("llm-router :8890 도달 불가(맥미니 offline) — 라이브 테스트 skip")
}
let resp = try await live.complete(
AICompletionRequest(task: .quickSummarize,
prompt: "엘보 내경 가공 핵심을 한 문장으로 요약해줘.",
systemPrompt: "You are a concise technical assistant.",
maxTokens: 200)
)
XCTAssertEqual(resp.providerUsed, .localMLX)
XCTAssertEqual(resp.finishReason, .completed)
XCTAssertFalse(resp.text.isEmpty, "라이브 응답은 비어있지 않아야")
XCTAssertNotNil(resp.latencyMs)
}
/// ( ).
struct SentRequest: Decodable {
struct Message: Decodable { let role: String; let content: String }
@@ -1,15 +1,14 @@
{
"_meta": {
"fixture": "llm-router-chat.request",
"status": "PROVISIONAL_SYNTHETIC",
"synthetic": true,
"captured_date": null,
"reason": "맥미니(hyungi-macmini 100.76.254.116) Tailscale offline(last seen 8h+, 2026-06-04) → 라이브 캡처 불가. OpenAI /v1/chat/completions 표준 스펙 기반 합성. 맥미니 복귀 시 S2-2a 라이브 재캡처로 교체(별 fixture-update PR, S2-Ff drift-check 경유).",
"status": "CAPTURED_LIVE",
"captured_date": "2026-06-05",
"captured_via": "MacBook → Tailscale 100.76.254.116:8890 (맥미니 llm-router, GUI 로그인 복구 후)",
"endpoint": "POST http://100.76.254.116:8890/v1/chat/completions",
"call_shape_note": "messages = [system, user] 분리 고정(load-bearing). system.content = AICompletionRequest.systemPrompt ?? \"\" (plan S2-2c). max_tokens = AICompletionRequest.maxTokens.",
"model_note": "model 문자열은 llm-router 가 기대하는 별칭 — 라이브 캡처로 확정 필요(provisional: 'gemma-macmini')."
"model_note": "llm-router /v1/models 노출 ID = {gemma-4-26b-a4b-it-8bit, mac-mini-default, qwen-macbook, claude-cloud}. LocalMLXProvider 기본 = 'mac-mini-default'(별칭 → 라우터가 gemma-4-26b 로 resolve). 이전 provisional 의 'gemma-macmini' 는 llm-router 모델 ID 아님(그건 DS /search/ask?backend= 쪽 이름).",
"call_shape_note": "messages=[system,user] 분리 고정(load-bearing). system.content = AICompletionRequest.systemPrompt ?? \"\". max_tokens = AICompletionRequest.maxTokens."
},
"model": "gemma-macmini",
"model": "mac-mini-default",
"messages": [
{ "role": "system", "content": "You are a concise technical assistant." },
{ "role": "user", "content": "충격시험 면제 기준을 한 문장으로 요약해줘." }
+10 -10
View File
@@ -1,25 +1,25 @@
{
"_meta": {
"fixture": "llm-router-chat.response",
"status": "PROVISIONAL_SYNTHETIC",
"synthetic": true,
"captured_date": null,
"reason": "맥미니 offline → 라이브 캡처 불가. OpenAI chat.completion 표준 응답 모양 기반 합성. 라이브 재캡처로 교체(S2-Ff).",
"shape": "OpenAI chat.completion (choices[0].message.content → text, finish_reason → AIFinishReason)"
"status": "CAPTURED_LIVE",
"captured_date": "2026-06-05",
"endpoint": "POST http://100.76.254.116:8890/v1/chat/completions",
"note": "raw llm-router 응답(맥미니 Gemma 4 26B, MLX). 요청 model='mac-mini-default' → 응답 model='mlx-community/gemma-4-26b-a4b-it-8bit' 로 resolve. 매핑: choices[0].message.content→text, finish_reason→AIFinishReason, latency=측정값.",
"latency_observed_s": 1.73
},
"id": "chatcmpl-provisional-0001",
"id": "chatcmpl-1780610323",
"object": "chat.completion",
"created": 0,
"model": "gemma-macmini",
"created": 1780610323,
"model": "mlx-community/gemma-4-26b-a4b-it-8bit",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "충격시험 면제는 재료군(Curve A~D)과 거버닝 두께에 따른 UCS-66 면제 곡선으로 MDMT에서 판정합니다."
"content": "충격시험은 제품의 설계, 재질, 구조적 특성상 충격 에너지를 흡수할 수 있거나 파손 위험이 극히 낮음을 기술적으로 입증할 경우 면제될 수 있습니다."
},
"finish_reason": "stop"
}
],
"usage": { "prompt_tokens": 24, "completion_tokens": 41, "total_tokens": 65 }
"usage": { "prompt_tokens": 42, "completion_tokens": 48, "total_tokens": 90 }
}