Files
hyungi_document_server/Tests/AITests/LocalMLXProviderTests.swift
T
hyungi 5383a93f98 feat(ai-fabric): S2 LLM 패브릭 4 provider 결선 + 컴포지션 루트
risk-first 채움(RemoteDS→LocalMLX→OnDevice→Specialized) + makeDefaultRouter 컴포지션 루트.
동결 인터페이스(AIProvider/AIRouter/MockAIProvider) 무변경. SPM AIFabric 단독 빌드·테스트(46 PASS).

- RemoteDS: DSAskClient seam + AskResponse(ask.json) 매핑 + backend exhaustive switch(qwen/cloud TODO)
- LocalMLX: GET /v1/models probe + OpenAI /v1/chat/completions system/user call-shape + non-200 backendError
- OnDevice: FoundationModels 라이브(M5 Max) availability + respond() + GenerationError 9-case 매핑 + stateless/prewarm
- Specialized: scaffold-only(명시 unavailable, vision 폴백 가시화), cloud='claude-cloud' 503
- config 단일소스(env override) + 타임아웃/취소(URLSession 자동 honor, OnDevice 협조적)

실측 동결(S2-3a, M5 Max): availability=available · 취소=COOPERATIVE(~33ms) · 오버플로=exceededContextWindowSize
  · GenerationError 9-case(refusal·concurrentRequests 추가 발견, plan 정정).
한계: LocalMLX fixture=PROVISIONAL_SYNTHETIC(맥미니 offline → 라이브 재캡처 S2-Ff 대기).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 17:20:10 +09:00

150 lines
6.8 KiB
Swift

import XCTest
@testable import AIFabric
final class LocalMLXProviderTests: XCTestCase {
private let baseURL = URL(string: "http://100.76.254.116:8890")!
override func tearDown() {
MockURLProtocol.reset()
super.tearDown()
}
private func provider() -> LocalMLXProvider {
LocalMLXProvider(baseURL: baseURL, model: "gemma-macmini", session: MockURLProtocol.session())
}
// MARK: isAvailable probe (wake )
func testProbeAvailable() async throws {
MockURLProtocol.handler = { req in
MockURLProtocol.ok(req.url!, json: Data(#"{"data":[{"id":"gemma-macmini"}]}"#.utf8))
}
let available = await provider().isAvailable
XCTAssertTrue(available)
// probe GET /v1/models
XCTAssertEqual(MockURLProtocol.recorder.lastURL?.path, "/v1/models")
XCTAssertEqual(MockURLProtocol.recorder.lastMethod, "GET")
}
func testProbeUnavailableOnError() async throws {
MockURLProtocol.handler = { _ in throw URLError(.cannotConnectToHost) }
let available = await provider().isAvailable
XCTAssertFalse(available) // false(throw )
}
func testProbeUnavailableOn500() async throws {
MockURLProtocol.handler = { req in MockURLProtocol.status(req.url!, 500) }
let available = await provider().isAvailable
XCTAssertFalse(available)
}
// MARK: complete + call-shape
func testCompleteMapsResponseFixture() async throws {
let body = try Fixture.data("llm-router-chat.response.json")
MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) }
let resp = try await provider().complete(
AICompletionRequest(task: .quickSummarize, prompt: "충격시험 면제 기준을 한 문장으로 요약해줘.",
systemPrompt: "You are a concise technical assistant.", maxTokens: 512)
)
XCTAssertEqual(resp.providerUsed, .localMLX)
XCTAssertEqual(resp.finishReason, .completed)
XCTAssertTrue(resp.citations.isEmpty)
XCTAssertNotNil(resp.latencyMs)
XCTAssertTrue(resp.text.contains("UCS-66"))
}
func testCompleteRequestCallShape() async throws {
let body = try Fixture.data("llm-router-chat.response.json")
MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) }
_ = try await provider().complete(
AICompletionRequest(task: .quickSummarize, prompt: "PROMPT_X",
systemPrompt: "SYS_Y", maxTokens: 512)
)
// POST /v1/chat/completions
XCTAssertEqual(MockURLProtocol.recorder.lastURL?.path, "/v1/chat/completions")
XCTAssertEqual(MockURLProtocol.recorder.lastMethod, "POST")
// messages system/user call-shape (load-bearing)
let sent = try XCTUnwrap(MockURLProtocol.recorder.lastBody)
let decoded = try JSONDecoder().decode(SentRequest.self, from: sent)
XCTAssertEqual(decoded.model, "gemma-macmini")
XCTAssertEqual(decoded.maxTokens, 512)
XCTAssertEqual(decoded.stream, false)
XCTAssertEqual(decoded.messages.count, 2)
XCTAssertEqual(decoded.messages[0].role, "system")
XCTAssertEqual(decoded.messages[0].content, "SYS_Y")
XCTAssertEqual(decoded.messages[1].role, "user")
XCTAssertEqual(decoded.messages[1].content, "PROMPT_X")
}
func testNilSystemPromptSendsEmptySystemMessage() async throws {
let body = try Fixture.data("llm-router-chat.response.json")
MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) }
_ = try await provider().complete(AICompletionRequest(task: .quickSummarize, prompt: "P"))
let sent = try XCTUnwrap(MockURLProtocol.recorder.lastBody)
let decoded = try JSONDecoder().decode(SentRequest.self, from: sent)
XCTAssertEqual(decoded.messages[0].role, "system")
XCTAssertEqual(decoded.messages[0].content, "") // plan S2-2c: systemPrompt ?? ""
}
func testNon200BackendError() async throws {
MockURLProtocol.handler = { req in MockURLProtocol.status(req.url!, 503, body: "model loading") }
do {
_ = try await provider().complete(AICompletionRequest(task: .quickSummarize, prompt: "P"))
XCTFail("non-200 must throw backendError, not silent empty text")
} catch let AIProviderError.backendError(id, status, reason) {
XCTAssertEqual(id, .localMLX)
XCTAssertEqual(status, 503)
XCTAssertEqual(reason, "model loading")
}
}
func testRequestFixtureMatchesEncoder() throws {
// request fixture call-shape encodeRequest (릿 placeholder ).
let fixtureData = try Fixture.data("llm-router-chat.request.json")
let fixture = try JSONDecoder().decode(SentRequest.self, from: fixtureData)
XCTAssertEqual(fixture.messages.count, 2)
XCTAssertEqual(fixture.messages[0].role, "system")
XCTAssertEqual(fixture.messages[1].role, "user")
XCTAssertEqual(fixture.stream, false)
}
// MARK: rule-fallback (S2-2d) onDevice localMLX
func testFallbackFromOnDeviceToLocalMLX() async throws {
let body = try Fixture.data("llm-router-chat.response.json")
MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) }
let router = AIRouter(providers: [
.onDevice: MockAIProvider(id: .onDevice, available: false), //
.localMLX: provider(),
])
let resp = try await router.route(AICompletionRequest(task: .quickSummarize, prompt: "P"))
XCTAssertEqual(resp.providerUsed, .localMLX)
XCTAssertEqual(resp.routingNote, "fallback from onDevice → localMLX")
}
func testNoFallbackNoteOnFirstChoiceSuccess() async throws {
let body = try Fixture.data("llm-router-chat.response.json")
MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) }
// classify = [.localMLX, .remoteDS, .onDevice] 1 localMLX note nil
let router = AIRouter(providers: [.localMLX: provider()])
let resp = try await router.route(AICompletionRequest(task: .classify, prompt: "P"))
XCTAssertEqual(resp.providerUsed, .localMLX)
XCTAssertNil(resp.routingNote)
}
/// ( ).
struct SentRequest: Decodable {
struct Message: Decodable { let role: String; let content: String }
let model: String
let messages: [Message]
let maxTokens: Int?
let stream: Bool
enum CodingKeys: String, CodingKey { case model, messages, stream; case maxTokens = "max_tokens" }
}
}