merge: integrate AIFabric (S2) into S3 app — unified package

- Resolve Package.swift add/add: one manifest, single AIFabric target (Sources/AI compiled once;
  no duplicate-symbol risk) + DSKit/AppFeature/DSApp + AITests + DSKitTests, AIFabric library product kept.
- import AI -> import AIFabric across AppFeature + RouterFallbackTests (S2 renamed module).
- AppModel.askMeta qualified DSKit.AskResponse (AIFabric also defines an AskResponse for RemoteDS).

swift build + swift test green (71 tests: S2 AITests + S3 DSKitTests). Frozen AIProvider interface intact.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
This commit is contained in:
Hyungi
2026-06-05 06:41:30 +09:00
25 changed files with 1584 additions and 61 deletions
+28 -11
View File
@@ -1,15 +1,26 @@
// swift-tools-version: 6.2
//
// DS multidevice app unified package (S2 AIFabric + S3 app), one repo / one manifest.
//
// AIFabric (Sources/AI, S2-owned) is compiled exactly once as a single target here, so the
// " b" duplicate-symbol concern (two packages each compiling Sources/AI) does not arise.
// It is also exposed as a library product so a future separate app package could depend on it.
//
// Ownership boundary unchanged: S2 owns Sources/AI/** + Tests/AITests/**; S3 owns DSKit / AppFeature
// / DSApp / Tests/DSKitTests. S3 consumes AIFabric read-only.
import PackageDescription
// DS multidevice app macOS-first scaffold (S3).
// Phase 1 targets: AI (S2-owned, read-only) + DSKit (models + client + fixtures) + DSKitTests.
// Phase 2 will add AppFeature (SwiftUI shell) + DSApp (executable) see plans/2026-06-04-ds-app-s3-scaffold-plan.html.
let package = Package(
name: "DSApp",
platforms: [.macOS(.v26)],
platforms: [
.macOS(.v26), // FoundationModels (OnDeviceProvider) + 3-column NavigationSplitView
],
products: [
.library(name: "AIFabric", targets: ["AIFabric"]),
],
targets: [
.target(
name: "AI",
name: "AIFabric",
path: "Sources/AI",
swiftSettings: [.swiftLanguageMode(.v6)]
),
@@ -18,14 +29,9 @@ let package = Package(
resources: [.process("Resources")],
swiftSettings: [.swiftLanguageMode(.v6)]
),
.testTarget(
name: "DSKitTests",
dependencies: ["DSKit", "AI"],
swiftSettings: [.swiftLanguageMode(.v6)]
),
.target(
name: "AppFeature",
dependencies: ["DSKit", "AI"],
dependencies: ["DSKit", "AIFabric"],
swiftSettings: [.swiftLanguageMode(.v6)]
),
.executableTarget(
@@ -33,5 +39,16 @@ let package = Package(
dependencies: ["AppFeature"],
swiftSettings: [.swiftLanguageMode(.v6)]
),
.testTarget(
name: "DSKitTests",
dependencies: ["DSKit", "AIFabric"],
swiftSettings: [.swiftLanguageMode(.v6)]
),
.testTarget(
name: "AITests",
dependencies: ["AIFabric"],
path: "Tests/AITests",
swiftSettings: [.swiftLanguageMode(.v6)]
),
]
)
+82
View File
@@ -0,0 +1,82 @@
// Composition.swift S2 S3 ( b) + config.
//
// INTEGRATION ( b): (S3) `AIFabric` product ** SwiftPM ** ,
// Sources/AI ( / ). S3 makeDefaultRouter(...)
// MockAIProvider . DSAskClient(HTTP) = S3 .
//
// (S2-Fa): raw URL swap
// (2026-05-17 Hermes incident ). env override . ([[feedback_hermes_config_single_source_envvar]])
import Foundation
import os
public struct AIProviderConfiguration: Sendable {
/// llm-router base (trailing slash base; provider append).
public var localMLXBaseURL: URL
/// llm-router ( provisional 'gemma-macmini').
public var localMLXModel: String
/// DS API base S3 DSAskClient . https://document.hyungi.net/api · http://100.110.63.63:8000/api.
/// : DS `/search/ask` **trailing slash **( S3 client ).
public var dsBaseURL: URL
public var requestTimeout: TimeInterval
public var probeTimeout: TimeInterval
public init(
localMLXBaseURL: URL,
localMLXModel: String = "gemma-macmini",
dsBaseURL: URL,
requestTimeout: TimeInterval = 60,
probeTimeout: TimeInterval = 2
) {
self.localMLXBaseURL = localMLXBaseURL
self.localMLXModel = localMLXModel
self.dsBaseURL = dsBaseURL
self.requestTimeout = requestTimeout
self.probeTimeout = probeTimeout
}
/// override ( source). .
public static func resolved(
environment: [String: String] = ProcessInfo.processInfo.environment
) -> AIProviderConfiguration {
let localMLX = environment["AIFABRIC_LOCALMLX_URL"].flatMap(URL.init(string:))
?? URL(string: "http://100.76.254.116:8890")!
let model = environment["AIFABRIC_LOCALMLX_MODEL"] ?? "gemma-macmini"
let ds = environment["AIFABRIC_DS_URL"].flatMap(URL.init(string:))
?? URL(string: "https://document.hyungi.net/api")!
return AIProviderConfiguration(localMLXBaseURL: localMLX, localMLXModel: model, dsBaseURL: ds)
}
}
/// OSLog / (silent ). S3 (public).
public enum AIFabricLog {
static let router = Logger(subsystem: "ds-app.AIFabric", category: "AIRouter")
public static let routerHook: @Sendable (String) -> Void = { msg in
router.info("\(msg, privacy: .public)")
}
}
/// S3 S2 . 4 provider (vision ) + + log .
/// - client: S3 DS ask client(HTTP).
/// - config: ( = env override ).
/// - session: LocalMLX URLSession( .shared; mock ).
public func makeDefaultRouter(
client: DSAskClient,
config: AIProviderConfiguration = .resolved(),
session: URLSession = .shared,
policy: AIRoutingPolicy = .default,
log: @escaping @Sendable (String) -> Void = AIFabricLog.routerHook
) -> AIRouter {
let providers: [AIProviderID: any AIProvider] = [
.remoteDS: RemoteDSProvider(client: client),
.localMLX: LocalMLXProvider(
baseURL: config.localMLXBaseURL,
model: config.localMLXModel,
session: session,
requestTimeout: config.requestTimeout,
probeTimeout: config.probeTimeout
),
.onDevice: OnDeviceProvider(),
.specialized: SpecializedProvider(), // scaffold() vision
]
return AIRouter(providers: providers, policy: policy, log: log)
}
+142 -14
View File
@@ -1,33 +1,161 @@
// LocalMLXProvider.swift S2 ( LLM ).
// LocalMLXProvider.swift S2 ( LLM ).
//
// : Gemma 4 26B (MLX) OpenAI .
// - : llm-router :8890 () MLX :8801 (Tailscale 100.76.254.116)
// - isAvailable = health ( + )
// - complete = POST /v1/chat/completions (messages: system/user , call-shape )
// .
// Gemma 4 26B, llm-router :8890 (OpenAI , wake-on-call). #4: raw MLX :8801 .
// - isAvailable = GET /v1/models probe( timeout, wake ' ' )
// - complete = POST /v1/chat/completions, messages system/user (call-shape )
//
// fixture(llm-router-chat.*.json) = PROVISIONAL_SYNTHETIC ( offline , 2026-06-04).
// OpenAI . S2-2a (S2-Ff). .
import Foundation
public struct LocalMLXProvider: AIProvider {
public let id: AIProviderID = .localMLX
/// URL (S2 /Keychain ).
/// URL (S2-Fa config ). trailing slash base, appendingPathComponent.
public let baseURL: URL
let model: String
let session: URLSession
let requestTimeout: TimeInterval
let probeTimeout: TimeInterval
public init(baseURL: URL) {
public init(
baseURL: URL,
model: String = "gemma-macmini",
session: URLSession = .shared,
requestTimeout: TimeInterval = 60,
probeTimeout: TimeInterval = 2
) {
self.baseURL = baseURL
self.model = model
self.session = session
self.requestTimeout = requestTimeout
self.probeTimeout = probeTimeout
}
// MARK: isAvailable health probe (wake )
public var isAvailable: Bool {
get async {
// S2: GET /v1/models health .
false
var req = URLRequest(url: baseURL.appendingPathComponent("v1/models"))
req.httpMethod = "GET"
req.timeoutInterval = probeTimeout
do {
let (_, resp) = try await session.data(for: req)
guard let http = resp as? HTTPURLResponse else { return false }
return (200..<300).contains(http.statusCode)
} catch {
// timeout/ false(throw ). probe wake/ X.
// '= ' complete() .
return false
}
}
}
// MARK: complete OpenAI chat/completions
public func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse {
// S2: OpenAI chat/completions AICompletionResponse(providerUsed: .localMLX).
// messages (system/user ) production source-of-truth.
throw AIProviderError.notImplemented(id)
try Task.checkCancellation()
var req = URLRequest(url: baseURL.appendingPathComponent("v1/chat/completions"))
req.httpMethod = "POST"
req.setValue("application/json", forHTTPHeaderField: "Content-Type")
req.timeoutInterval = requestTimeout // S2-Fe:
req.httpBody = try Self.encodeRequest(request, model: model)
let started = Date()
let data: Data
let resp: URLResponse
do {
// URLSession async Task honor CancellationError (S2-Fe).
(data, resp) = try await session.data(for: req)
} catch let e as URLError where e.code == .timedOut {
throw AIProviderError.backendError(id, status: -1, reason: "request timed out after \(Int(requestTimeout))s")
}
guard let http = resp as? HTTPURLResponse else {
throw AIProviderError.backendError(id, status: -1, reason: "non-HTTP response")
}
guard (200..<300).contains(http.statusCode) else {
// non-200 backendError ( text ).
let reason = String(data: data, encoding: .utf8).map { String($0.prefix(300)) }
throw AIProviderError.backendError(id, status: http.statusCode, reason: reason)
}
let decoded = try JSONDecoder().decode(OpenAIChatResponse.self, from: data)
guard let choice = decoded.choices.first else {
throw AIProviderError.backendError(id, status: http.statusCode, reason: "no choices in response")
}
return AICompletionResponse(
text: choice.message.content,
providerUsed: .localMLX,
finishReason: Self.finishReason(choice.finishReason),
citations: [], //
confidence: nil,
latencyMs: Date().timeIntervalSince(started) * 1000,
routingNote: nil // fallback note
)
}
// MARK:
static func finishReason(_ openAI: String?) -> AIFinishReason {
switch openAI {
case "stop": return .completed
case "length": return .completed // max_tokens
default: return .completed
}
}
/// AICompletionRequest OpenAI chat/completions body. messages system/user (fixture source-of-truth).
/// system.content = systemPrompt ?? "" (plan S2-2c). temperature AICompletionRequest () ( ).
static func encodeRequest(_ request: AICompletionRequest, model: String) throws -> Data {
let body = OpenAIChatRequest(
model: model,
messages: [
OpenAIChatRequest.Message(role: "system", content: request.systemPrompt ?? ""),
OpenAIChatRequest.Message(role: "user", content: request.prompt),
],
maxTokens: request.maxTokens,
stream: false
)
let enc = JSONEncoder()
enc.outputFormatting = [.sortedKeys]
return try enc.encode(body)
}
}
// MARK: - OpenAI wire ()
struct OpenAIChatRequest: Encodable, Sendable {
struct Message: Encodable, Sendable {
let role: String
let content: String
}
let model: String
let messages: [Message]
let maxTokens: Int?
let stream: Bool
enum CodingKeys: String, CodingKey {
case model, messages, stream
case maxTokens = "max_tokens"
}
}
struct OpenAIChatResponse: Decodable, Sendable {
struct Choice: Decodable, Sendable {
struct Message: Decodable, Sendable {
let role: String
let content: String
}
let index: Int?
let message: Message
let finishReason: String?
enum CodingKeys: String, CodingKey {
case index, message
case finishReason = "finish_reason"
}
}
let choices: [Choice]
}
+183 -13
View File
@@ -1,26 +1,196 @@
// OnDeviceProvider.swift S2 (· ).
// OnDeviceProvider.swift S2 (· , Apple FoundationModels).
//
// : `import FoundationModels` SystemLanguageModel / LanguageModelSession .
// - isAvailable = SystemLanguageModel.default.availability == .available
// - complete = LanguageModelSession prompt (citations )
// Foundation-only (notImplemented).
// ( #3): SystemLanguageModel.default.availability() + LanguageModelSession.respond().
// ~3B/2-bit QAT quickSummarize/classify , corpusAsk ( corpusAsk RemoteDS ).
//
// : backend seam(OnDeviceModelBackend) CI/-AI Mac mock
// / , M5 Max ( default = FoundationModels).
//
// (S2-3a, M5 Max 2026-06-04): availability=available · respond()Response<String>.content ·
// GenerationError 9 case · exceededContextWindowSize · =COOPERATIVE(respond()
// mid-flight Task.cancel checkCancellation belt-and-suspenders, streamResponse ).
import Foundation
import os
// MARK: - (FoundationModels HW )
public enum OnDeviceAvailability: Sendable, Equatable {
case available
case unavailable(reason: String)
}
/// FoundationModels.LanguageModelSession.GenerationError (9 case + unknown).
/// backend SDK provider finishReason/throw (· ).
public enum OnDeviceGenerationError: Error, Sendable, Equatable {
case guardrailViolation
case refusal
case exceededContextWindowSize
case rateLimited
case concurrentRequests
case unsupportedLanguageOrLocale
case unsupportedGuide
case decodingFailure
case assetsUnavailable
case unknown(String)
}
/// backend seam. = FoundationModelsBackend, = mock.
protocol OnDeviceModelBackend: Sendable {
var availability: OnDeviceAvailability { get }
/// OnDeviceGenerationError CancellationError throw.
func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String
}
// MARK: - Provider
public struct OnDeviceProvider: AIProvider {
public let id: AIProviderID = .onDevice
private let backend: OnDeviceModelBackend
private let log: @Sendable (String) -> Void
public init() {}
public init() {
self.backend = Self.makeLiveBackend()
let logger = Logger(subsystem: "ds-app.AIFabric", category: "OnDeviceProvider")
self.log = { msg in logger.warning("\(msg, privacy: .public)") }
}
/// seam backend/log (HW ).
init(backend: OnDeviceModelBackend, log: @escaping @Sendable (String) -> Void = { _ in }) {
self.backend = backend
self.log = log
}
public var isAvailable: Bool {
get async {
// S2: FoundationModels .
false
}
get async { backend.availability == .available }
}
public func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse {
// S2: LanguageModelSession(.default) AICompletionResponse(providerUsed: .onDevice).
throw AIProviderError.notImplemented(id)
// belt-and-suspenders: mid-flight respond() (S2-3a ).
try Task.checkCancellation()
let started = Date()
do {
let text = try await backend.generate(
prompt: request.prompt,
systemPrompt: request.systemPrompt,
maxTokens: request.maxTokens
)
return AICompletionResponse(
text: text,
providerUsed: .onDevice,
finishReason: .completed,
latencyMs: Date().timeIntervalSince(started) * 1000
)
} catch let e as OnDeviceGenerationError {
return try mapError(e)
}
// CancellationError .
}
/// GenerationError (S2-3c). = ( X). = provider throw( ).
private func mapError(_ e: OnDeviceGenerationError) throws -> AICompletionResponse {
switch e {
case .guardrailViolation, .refusal:
return AICompletionResponse(
text: "",
providerUsed: .onDevice,
finishReason: .refused,
routingNote: "on-device refused (guardrail/refusal)"
)
case .rateLimited:
// stateless-per-request = ( loud + ).
log("UNEXPECTED onDevice rateLimited on stateless session — 세션 재사용 버그 의심")
throw AIProviderError.unavailable(id)
case .concurrentRequests:
log("UNEXPECTED onDevice concurrentRequests on stateless session — 세션 재사용 버그 의심")
throw AIProviderError.unavailable(id)
case .exceededContextWindowSize:
log("onDevice context window(4096) exceeded — 라우터가 localMLX 로 폴백")
throw AIProviderError.unavailable(id)
case .unsupportedLanguageOrLocale:
log("onDevice unsupported language/locale — 폴백")
throw AIProviderError.unavailable(id)
case .unsupportedGuide, .decodingFailure, .assetsUnavailable:
throw AIProviderError.unavailable(id)
case .unknown(let detail):
log("onDevice unknown generation error: \(detail)")
throw AIProviderError.unavailable(id)
}
}
static func makeLiveBackend() -> OnDeviceModelBackend {
#if canImport(FoundationModels)
return FoundationModelsBackend()
#else
return UnavailableBackend(reason: "FoundationModels not importable on this platform")
#endif
}
}
/// FoundationModels /SDK .
struct UnavailableBackend: OnDeviceModelBackend {
let reason: String
var availability: OnDeviceAvailability { .unavailable(reason: reason) }
func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String {
throw OnDeviceGenerationError.unknown("backend unavailable: \(reason)")
}
}
// MARK: - FoundationModels backend (M5 Max / Apple Intelligence)
#if canImport(FoundationModels)
import FoundationModels
struct FoundationModelsBackend: OnDeviceModelBackend {
var availability: OnDeviceAvailability {
switch SystemLanguageModel.default.availability {
case .available:
return .available
case .unavailable(let reason):
switch reason {
case .deviceNotEligible: return .unavailable(reason: "deviceNotEligible")
case .appleIntelligenceNotEnabled: return .unavailable(reason: "appleIntelligenceNotEnabled")
case .modelNotReady: return .unavailable(reason: "modelNotReady")
@unknown default: return .unavailable(reason: "unknownReason")
}
@unknown default:
return .unavailable(reason: "unknown")
}
}
func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String {
// instructions = init . systemPrompt nil ( S2-3c, LocalMLX ?? '' ).
let session: LanguageModelSession
if let systemPrompt {
session = LanguageModelSession(model: .default, instructions: systemPrompt)
} else {
session = LanguageModelSession(model: .default)
}
session.prewarm() // (~1.3ms), (S2-3a)
// temperature AICompletionRequest () ( ). LocalMLX .
let options = GenerationOptions(maximumResponseTokens: maxTokens)
do {
let response = try await session.respond(to: prompt, options: options)
return response.content // Response<String>.content : String
} catch let g as LanguageModelSession.GenerationError {
throw Self.translate(g)
}
}
/// SDK GenerationError(9 case) OnDeviceGenerationError. exhaustive + @unknown default.
static func translate(_ g: LanguageModelSession.GenerationError) -> OnDeviceGenerationError {
switch g {
case .guardrailViolation: return .guardrailViolation
case .refusal: return .refusal
case .exceededContextWindowSize: return .exceededContextWindowSize
case .rateLimited: return .rateLimited
case .concurrentRequests: return .concurrentRequests
case .unsupportedLanguageOrLocale: return .unsupportedLanguageOrLocale
case .unsupportedGuide: return .unsupportedGuide
case .decodingFailure: return .decodingFailure
case .assetsUnavailable: return .assetsUnavailable
@unknown default: return .unknown("\(g)")
}
}
}
#endif
+127 -16
View File
@@ -1,22 +1,99 @@
// RemoteDSProvider.swift S2 ( DS RAG).
// RemoteDSProvider.swift S2 ( DS RAG).
//
// provider S1 :
// complete(corpusAsk) GET /search/ask?q=&backend= (CONTRACT.md §4, AskResponse)
// AskResponse.citations [AICitation]
// S1 (CONTRACT.md §4 / AI-ROUTING.md §4):
// complete(corpusAsk) DSAskClient.ask(query:backend:) AskResponse AICompletionResponse
// AskResponse.ai_answer text
// AskResponse.citations[] [AICitation]
// AskResponse.synthesis_status AIFinishReason
// AskResponse.backend_used routingNote ( LLM )
// backend : nil(=mac-mini-default) explicitProvider (localMLXgemma-macmini ).
// (S3 DS API client S2 ).
// AskResponse.confidence AIConfidence
// AskResponse.backend_used routingNote ( LLM )
//
// HTTP S3 client(LiveDSClient) S2 DSAskClient seam + .
// : AIProvider . RemoteDSProvider.init(client:) S2 .
import Foundation
// MARK: - S2 DS ask seam ( impl = S3)
/// DS `GET /search/ask?q=&backend=` . S3 LiveDSClient conform,
/// S2 mock ( 0). HTTP conformer throw
/// (: `AIProviderError.backendError(.remoteDS, status:, reason:)`) .
public protocol DSAskClient: Sendable {
func ask(query: String, backend: String) async throws -> AskResponse
}
// MARK: - DS /search/ask ( , )
//
// CodingKeys convertFromSnakeCase (S3 ). fixture: contract/fixtures/ask.json.
public struct AskResponse: Decodable, Sendable {
public let aiAnswer: String
public let citations: [AskCitation]
public let synthesisStatus: String
public let synthesisMs: Double?
public let confidence: String?
public let backendUsed: String?
public let refused: Bool?
enum CodingKeys: String, CodingKey {
case aiAnswer = "ai_answer"
case citations
case synthesisStatus = "synthesis_status"
case synthesisMs = "synthesis_ms"
case confidence
case backendUsed = "backend_used"
case refused
}
public init(aiAnswer: String, citations: [AskCitation], synthesisStatus: String,
synthesisMs: Double? = nil, confidence: String? = nil,
backendUsed: String? = nil, refused: Bool? = nil) {
self.aiAnswer = aiAnswer
self.citations = citations
self.synthesisStatus = synthesisStatus
self.synthesisMs = synthesisMs
self.confidence = confidence
self.backendUsed = backendUsed
self.refused = refused
}
}
public struct AskCitation: Decodable, Sendable {
public let n: Int
public let docId: Int
public let title: String?
public let sectionTitle: String?
public let spanText: String
enum CodingKeys: String, CodingKey {
case n
case docId = "doc_id"
case title
case sectionTitle = "section_title"
case spanText = "span_text"
}
public init(n: Int, docId: Int, title: String?, sectionTitle: String?, spanText: String) {
self.n = n
self.docId = docId
self.title = title
self.sectionTitle = sectionTitle
self.spanText = spanText
}
}
// MARK: - Provider
public struct RemoteDSProvider: AIProvider {
public let id: AIProviderID = .remoteDS
private let client: DSAskClient
public init() {}
public init(client: DSAskClient) {
self.client = client
}
/// ( ). complete .
public var isAvailable: Bool {
get async { true } // ( complete backendError).
get async { true }
}
public func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse {
@@ -24,14 +101,31 @@ public struct RemoteDSProvider: AIProvider {
guard request.task == .corpusAsk else {
throw AIProviderError.notImplemented(id)
}
// S2: DS API client.ask(q:) AskResponse .
// let r = try await dsClient.ask(q: request.prompt, backend: mappedBackend(request.explicitProvider))
// return Self.map(r)
throw AIProviderError.notImplemented(id)
try Task.checkCancellation()
let backend = Self.dsBackend(for: request.explicitProvider)
// HTTP (503 ) client throw ( ).
let response = try await client.ask(query: request.prompt, backend: backend)
return Self.map(response)
}
// MARK: (AI-ROUTING.md §4, )
static func map(_ r: AskResponse) -> AICompletionResponse {
let citations = r.citations.map {
AICitation(n: $0.n, docId: $0.docId, title: $0.title,
sectionTitle: $0.sectionTitle, spanText: $0.spanText)
}
return AICompletionResponse(
text: r.aiAnswer,
providerUsed: .remoteDS,
finishReason: finishReason(fromSynthesisStatus: r.synthesisStatus),
citations: citations,
confidence: r.confidence.flatMap(AIConfidence.init(rawValue:)),
latencyMs: r.synthesisMs, // latency synthesis_ms ( )
routingNote: r.backendUsed // LLM
)
}
/// AskResponse(JSON) AICompletionResponse (). S2 .
/// S3 DS client .
static func finishReason(fromSynthesisStatus status: String) -> AIFinishReason {
switch status {
case "completed": return .completed
@@ -41,4 +135,21 @@ public struct RemoteDSProvider: AIProvider {
default: return .refused
}
}
/// explicitProvider DS backend (AI-ROUTING.md §4, ).
/// **dict exhaustive switch** AIProviderID backend
/// ( provider nil backend 404 ).
static func dsBackend(for explicit: AIProviderID?) -> String {
guard let explicit else { return "mac-mini-default" } // DS
switch explicit {
case .localMLX: return "gemma-macmini"
case .remoteDS: return "mac-mini-default" // remoteDS = DS
case .onDevice: return "mac-mini-default" // onDevice DS
case .specialized: return "mac-mini-default" // specialized backend DS
}
// TODO(qwen-macbook): AIProviderID 'qwen-macbook'(M5 Max Qwen VLM) .
// provider case exhaustive switch backend (S2-1b b).
// TODO(claude-cloud): cloud backend = 'claude-cloud' DS 503(scaffold, S2-4b). case .
// 503 client backendError(.remoteDS, status:503, ) X( ).
}
}
@@ -0,0 +1,27 @@
// SpecializedProvider.swift S2 scaffold (GPU : rerank / embed / vision / OCR).
//
// PR = **scaffold-only**: isAvailable=false, completenotImplemented(.specialized). HTTP client/API key/cost = 0.
// : .vision [.specialized, .onDevice] specialized **dict continue** ,
// - provider ****( 'specialized unavailable onDevice' log).
//
// vision = backend + fixture ** PR**( #1):
// = MacBook M5 Max Qwen VLM http://100.118.112.84:8810 (OpenAI , wake-on-call) VLM.
// GPU Ollama embedding(bge-m3) vision .
import Foundation
public struct SpecializedProvider: AIProvider {
public let id: AIProviderID = .specialized
public init() {}
/// scaffold . false.
public var isAvailable: Bool {
get async { false }
}
public func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse {
// nil-skip notImplemented vision-chain .
// TODO(vision): .specialized M5 Max Qwen VLM(:8810, wake-on-call) call-shape + fixture PR.
throw AIProviderError.notImplemented(id)
}
}
+1 -1
View File
@@ -1,5 +1,5 @@
import SwiftUI
import AI
import AIFabric
public extension AIProviderID {
var displayName: String {
+1 -1
View File
@@ -1,5 +1,5 @@
import Foundation
import AI
import AIFabric
/// Renderable failure (the UI never sees a raw AIRoutingError that would break the
/// "visible error, not silent fallback" contract).
+1 -1
View File
@@ -1,5 +1,5 @@
import Foundation
import AI
import AIFabric
/// The ONE composition touch-point that names MockAIProvider. When S2 ships real providers,
/// only this file changes (mockProviders -> realProviders) AIService, views, and intents stay put.
+1 -1
View File
@@ -1,5 +1,5 @@
import SwiftUI
import AI
import AIFabric
/// RAG proof page: routes corpusAsk through AIService (-> AIRouter -> MockAIProvider). Explicit backend
/// pick sets explicitProvider; an explicit-unavailable result renders a visible, non-retrying error.
+2 -2
View File
@@ -1,7 +1,7 @@
import SwiftUI
import Observation
import DSKit
import AI
import AIFabric
/// The single app-state store driving the 3-pane shell. @MainActor @Observable: mutations are
/// main-isolated; the DSClient returns Sendable models; AIService is an actor.
@@ -35,7 +35,7 @@ public final class AppModel {
public var searchResponse: SearchResponse?
public var askQuery: String = ""
public var askResult: AIResult?
public var askMeta: AskResponse?
public var askMeta: DSKit.AskResponse? // qualified: AIFabric also defines an AskResponse
public var memoList: [MemoResponse] = []
public var memoDetail: MemoResponse?
public var digest: DigestResponse?
+84
View File
@@ -0,0 +1,84 @@
import XCTest
@testable import AIFabric
/// S2-Fc AI-ROUTING.md §3 mock provider( net 0) .
/// ( + enum + ). latency .
final class AIRouterSmokeTests: XCTestCase {
private func askFixture() throws -> AskResponse {
try Fixture.decode(AskResponse.self, from: "ask.json")
}
/// 1 .
private func healthyRouter(log: @escaping @Sendable (String) -> Void = { _ in }) throws -> AIRouter {
AIRouter(providers: [
.onDevice: EchoProvider(id: .onDevice),
.localMLX: EchoProvider(id: .localMLX),
.remoteDS: RemoteDSProvider(client: MockDSAskClient(response: try askFixture())),
.specialized: SpecializedProvider(), // scaffold
], log: log)
}
func testQuickSummarizeToOnDevice() async throws {
let resp = try await healthyRouter().route(AICompletionRequest(task: .quickSummarize, prompt: "p"))
XCTAssertEqual(resp.providerUsed, .onDevice)
XCTAssertNil(resp.routingNote)
}
func testCorpusAskToRemoteDSWithCitations() async throws {
let resp = try await healthyRouter().route(AICompletionRequest(task: .corpusAsk, prompt: "p"))
XCTAssertEqual(resp.providerUsed, .remoteDS)
XCTAssertEqual(resp.citations.count, 1)
}
func testClassifyToLocalMLX() async throws {
let resp = try await healthyRouter().route(AICompletionRequest(task: .classify, prompt: "p"))
XCTAssertEqual(resp.providerUsed, .localMLX)
XCTAssertNil(resp.routingNote)
}
func testVisionSpecializedUnavailableFallsToOnDeviceVisibly() async throws {
let sink = LogSink()
let resp = try await healthyRouter(log: { sink.append($0) })
.route(AICompletionRequest(task: .vision, prompt: "p"))
XCTAssertEqual(resp.providerUsed, .onDevice)
XCTAssertEqual(resp.routingNote, "fallback from specialized → onDevice")
XCTAssertTrue(sink.lines.contains { $0.contains("specialized") && $0.contains("unavailable") })
}
func testExplicitOnDeviceUnavailableErrorsNoFallback() async throws {
let router = AIRouter(providers: [
.onDevice: EchoProvider(id: .onDevice, available: false),
.localMLX: EchoProvider(id: .localMLX, available: true),
])
do {
_ = try await router.route(AICompletionRequest(task: .quickSummarize, prompt: "p", explicitProvider: .onDevice))
XCTFail("explicit onDevice 불가 → 에러(자동 fallback X)")
} catch let AIRoutingError.explicitProviderUnavailable(id) {
XCTAssertEqual(id, .onDevice)
}
}
func testRuleFallbackOnDeviceDownToLocalMLXWithNote() async throws {
let router = AIRouter(providers: [
.onDevice: EchoProvider(id: .onDevice, available: false),
.localMLX: EchoProvider(id: .localMLX, available: true),
])
let resp = try await router.route(AICompletionRequest(task: .quickSummarize, prompt: "p"))
XCTAssertEqual(resp.providerUsed, .localMLX)
XCTAssertEqual(resp.routingNote, "fallback from onDevice → localMLX")
}
func testAllUnavailableYieldsNoProviderAvailable() async throws {
let router = AIRouter(providers: [
.onDevice: EchoProvider(id: .onDevice, available: false),
.localMLX: EchoProvider(id: .localMLX, available: false),
])
do {
_ = try await router.route(AICompletionRequest(task: .quickSummarize, prompt: "p"))
XCTFail("전부 불가 → noProviderAvailable")
} catch let AIRoutingError.noProviderAvailable(task) {
XCTAssertEqual(task, .quickSummarize)
}
}
}
+66
View File
@@ -0,0 +1,66 @@
import XCTest
@testable import AIFabric
/// S2-Fa(config ) + S2-Fb( ) + S2-Fe(/ ).
final class CompositionTests: XCTestCase {
// MARK: S2-Fa config
func testConfigDefaults() {
let c = AIProviderConfiguration.resolved(environment: [:])
XCTAssertEqual(c.localMLXBaseURL.absoluteString, "http://100.76.254.116:8890")
XCTAssertEqual(c.localMLXModel, "gemma-macmini")
XCTAssertEqual(c.dsBaseURL.absoluteString, "https://document.hyungi.net/api")
XCTAssertEqual(c.probeTimeout, 2)
}
func testConfigEnvOverride() {
let c = AIProviderConfiguration.resolved(environment: [
"AIFABRIC_LOCALMLX_URL": "http://127.0.0.1:9999",
"AIFABRIC_LOCALMLX_MODEL": "test-model",
"AIFABRIC_DS_URL": "http://100.110.63.63:8000/api",
])
XCTAssertEqual(c.localMLXBaseURL.absoluteString, "http://127.0.0.1:9999")
XCTAssertEqual(c.localMLXModel, "test-model")
XCTAssertEqual(c.dsBaseURL.absoluteString, "http://100.110.63.63:8000/api")
}
// MARK: S2-Fb (4 provider )
func testMakeDefaultRouterRegistersAllFour() async throws {
let client = MockDSAskClient(response: try Fixture.decode(AskResponse.self, from: "ask.json"))
let router = makeDefaultRouter(client: client, session: MockURLProtocol.session(), log: { _ in })
XCTAssertEqual(Set(router.providers.keys), Set(AIProviderID.allCases))
// corpusAsk RemoteDS citations .
let resp = try await router.route(AICompletionRequest(task: .corpusAsk, prompt: "p"))
XCTAssertEqual(resp.providerUsed, .remoteDS)
XCTAssertEqual(resp.citations.count, 1)
}
func testMakeDefaultRouterVisionFallbackVisible() async throws {
let client = MockDSAskClient(response: try Fixture.decode(AskResponse.self, from: "ask.json"))
let sink = LogSink()
let router = makeDefaultRouter(client: client, session: MockURLProtocol.session(), log: { sink.append($0) })
// specialized scaffold() onDevice. (onDevice specialized log )
_ = try? await router.route(AICompletionRequest(task: .vision, prompt: "p"))
XCTAssertTrue(sink.lines.contains { $0.contains("specialized") && $0.contains("unavailable") },
"specialized 불가가 침묵 아닌 log 로 가시화")
}
// MARK: S2-Fe (URLSession )
func testCancellationPropagatesThroughRouter() async throws {
let router = AIRouter(providers: [.localMLX: SleepingProvider(id: .localMLX)])
let task = Task {
try await router.route(AICompletionRequest(task: .classify, prompt: "p"))
}
try? await Task.sleep(nanoseconds: 100_000_000)
task.cancel()
do {
_ = try await task.value
XCTFail("취소된 생성은 CancellationError 전파")
} catch is CancellationError {
// : URLSession async/Task.sleep honor. OnDevice respond() (S2-3a).
}
}
}
+26
View File
@@ -0,0 +1,26 @@
// FixtureSupport.swift canonical fixture (contract/fixtures/ #filePath ).
//
// repo `contract/fixtures/` (S1 ask.json + S2
// foundationmodels-respond / llm-router-chat).
// #filePath repo canonical .
import Foundation
enum Fixture {
/// repo (.../ds-app-s2) <root>/Tests/AITests/FixtureSupport.swift.
static let repoRoot: URL = URL(fileURLWithPath: #filePath)
.deletingLastPathComponent() // Tests/AITests
.deletingLastPathComponent() // Tests
.deletingLastPathComponent() // <root>
static func url(_ name: String) -> URL {
repoRoot.appendingPathComponent("contract/fixtures").appendingPathComponent(name)
}
static func data(_ name: String) throws -> Data {
try Data(contentsOf: url(name))
}
static func decode<T: Decodable>(_ type: T.Type, from name: String, using decoder: JSONDecoder = JSONDecoder()) throws -> T {
try decoder.decode(type, from: data(name))
}
}
+18
View File
@@ -0,0 +1,18 @@
import XCTest
@testable import AIFabric
/// Phase 0 Sources/AI / , .
final class HarnessSmokeTests: XCTestCase {
func testFrozenTypesVisible() {
// AIFabric product .
XCTAssertEqual(AIProviderID.allCases.count, 4)
XCTAssertEqual(AIRoutingPolicy.default.chain(for: .corpusAsk), [.remoteDS])
}
func testAskFixtureReadable() throws {
// canonical contract/fixtures/ask.json (FixtureSupport ).
let data = try Fixture.data("ask.json")
XCTAssertGreaterThan(data.count, 0)
}
}
+149
View File
@@ -0,0 +1,149 @@
import XCTest
@testable import AIFabric
final class LocalMLXProviderTests: XCTestCase {
private let baseURL = URL(string: "http://100.76.254.116:8890")!
override func tearDown() {
MockURLProtocol.reset()
super.tearDown()
}
private func provider() -> LocalMLXProvider {
LocalMLXProvider(baseURL: baseURL, model: "gemma-macmini", session: MockURLProtocol.session())
}
// MARK: isAvailable probe (wake )
func testProbeAvailable() async throws {
MockURLProtocol.handler = { req in
MockURLProtocol.ok(req.url!, json: Data(#"{"data":[{"id":"gemma-macmini"}]}"#.utf8))
}
let available = await provider().isAvailable
XCTAssertTrue(available)
// probe GET /v1/models
XCTAssertEqual(MockURLProtocol.recorder.lastURL?.path, "/v1/models")
XCTAssertEqual(MockURLProtocol.recorder.lastMethod, "GET")
}
func testProbeUnavailableOnError() async throws {
MockURLProtocol.handler = { _ in throw URLError(.cannotConnectToHost) }
let available = await provider().isAvailable
XCTAssertFalse(available) // false(throw )
}
func testProbeUnavailableOn500() async throws {
MockURLProtocol.handler = { req in MockURLProtocol.status(req.url!, 500) }
let available = await provider().isAvailable
XCTAssertFalse(available)
}
// MARK: complete + call-shape
func testCompleteMapsResponseFixture() async throws {
let body = try Fixture.data("llm-router-chat.response.json")
MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) }
let resp = try await provider().complete(
AICompletionRequest(task: .quickSummarize, prompt: "충격시험 면제 기준을 한 문장으로 요약해줘.",
systemPrompt: "You are a concise technical assistant.", maxTokens: 512)
)
XCTAssertEqual(resp.providerUsed, .localMLX)
XCTAssertEqual(resp.finishReason, .completed)
XCTAssertTrue(resp.citations.isEmpty)
XCTAssertNotNil(resp.latencyMs)
XCTAssertTrue(resp.text.contains("UCS-66"))
}
func testCompleteRequestCallShape() async throws {
let body = try Fixture.data("llm-router-chat.response.json")
MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) }
_ = try await provider().complete(
AICompletionRequest(task: .quickSummarize, prompt: "PROMPT_X",
systemPrompt: "SYS_Y", maxTokens: 512)
)
// POST /v1/chat/completions
XCTAssertEqual(MockURLProtocol.recorder.lastURL?.path, "/v1/chat/completions")
XCTAssertEqual(MockURLProtocol.recorder.lastMethod, "POST")
// messages system/user call-shape (load-bearing)
let sent = try XCTUnwrap(MockURLProtocol.recorder.lastBody)
let decoded = try JSONDecoder().decode(SentRequest.self, from: sent)
XCTAssertEqual(decoded.model, "gemma-macmini")
XCTAssertEqual(decoded.maxTokens, 512)
XCTAssertEqual(decoded.stream, false)
XCTAssertEqual(decoded.messages.count, 2)
XCTAssertEqual(decoded.messages[0].role, "system")
XCTAssertEqual(decoded.messages[0].content, "SYS_Y")
XCTAssertEqual(decoded.messages[1].role, "user")
XCTAssertEqual(decoded.messages[1].content, "PROMPT_X")
}
func testNilSystemPromptSendsEmptySystemMessage() async throws {
let body = try Fixture.data("llm-router-chat.response.json")
MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) }
_ = try await provider().complete(AICompletionRequest(task: .quickSummarize, prompt: "P"))
let sent = try XCTUnwrap(MockURLProtocol.recorder.lastBody)
let decoded = try JSONDecoder().decode(SentRequest.self, from: sent)
XCTAssertEqual(decoded.messages[0].role, "system")
XCTAssertEqual(decoded.messages[0].content, "") // plan S2-2c: systemPrompt ?? ""
}
func testNon200BackendError() async throws {
MockURLProtocol.handler = { req in MockURLProtocol.status(req.url!, 503, body: "model loading") }
do {
_ = try await provider().complete(AICompletionRequest(task: .quickSummarize, prompt: "P"))
XCTFail("non-200 must throw backendError, not silent empty text")
} catch let AIProviderError.backendError(id, status, reason) {
XCTAssertEqual(id, .localMLX)
XCTAssertEqual(status, 503)
XCTAssertEqual(reason, "model loading")
}
}
func testRequestFixtureMatchesEncoder() throws {
// request fixture call-shape encodeRequest (릿 placeholder ).
let fixtureData = try Fixture.data("llm-router-chat.request.json")
let fixture = try JSONDecoder().decode(SentRequest.self, from: fixtureData)
XCTAssertEqual(fixture.messages.count, 2)
XCTAssertEqual(fixture.messages[0].role, "system")
XCTAssertEqual(fixture.messages[1].role, "user")
XCTAssertEqual(fixture.stream, false)
}
// MARK: rule-fallback (S2-2d) onDevice localMLX
func testFallbackFromOnDeviceToLocalMLX() async throws {
let body = try Fixture.data("llm-router-chat.response.json")
MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) }
let router = AIRouter(providers: [
.onDevice: MockAIProvider(id: .onDevice, available: false), //
.localMLX: provider(),
])
let resp = try await router.route(AICompletionRequest(task: .quickSummarize, prompt: "P"))
XCTAssertEqual(resp.providerUsed, .localMLX)
XCTAssertEqual(resp.routingNote, "fallback from onDevice → localMLX")
}
func testNoFallbackNoteOnFirstChoiceSuccess() async throws {
let body = try Fixture.data("llm-router-chat.response.json")
MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) }
// classify = [.localMLX, .remoteDS, .onDevice] 1 localMLX note nil
let router = AIRouter(providers: [.localMLX: provider()])
let resp = try await router.route(AICompletionRequest(task: .classify, prompt: "P"))
XCTAssertEqual(resp.providerUsed, .localMLX)
XCTAssertNil(resp.routingNote)
}
/// ( ).
struct SentRequest: Decodable {
struct Message: Decodable { let role: String; let content: String }
let model: String
let messages: [Message]
let maxTokens: Int?
let stream: Bool
enum CodingKeys: String, CodingKey { case model, messages, stream; case maxTokens = "max_tokens" }
}
}
+86
View File
@@ -0,0 +1,86 @@
import Foundation
/// URLProtocol URLSession canned / , .
/// 0 LocalMLX probe/complete call-shape .
final class MockURLProtocol: URLProtocol {
/// (request) -> (response, body). throw URLSession .
nonisolated(unsafe) static var handler: (@Sendable (URLRequest) throws -> (HTTPURLResponse, Data))?
/// (body ) .
nonisolated(unsafe) static var recorder = RequestRecorder()
static func reset() {
handler = nil
recorder = RequestRecorder()
}
override class func canInit(with request: URLRequest) -> Bool { true }
override class func canonicalRequest(for request: URLRequest) -> URLRequest { request }
override func startLoading() {
Self.recorder.record(request)
guard let handler = Self.handler else {
client?.urlProtocol(self, didFailWithError: URLError(.unsupportedURL))
return
}
do {
let (response, data) = try handler(request)
client?.urlProtocol(self, didReceive: response, cacheStoragePolicy: .notAllowed)
client?.urlProtocol(self, didLoad: data)
client?.urlProtocolDidFinishLoading(self)
} catch {
client?.urlProtocol(self, didFailWithError: error)
}
}
override func stopLoading() {}
// MARK: helpers
static func session() -> URLSession {
let config = URLSessionConfiguration.ephemeral
config.protocolClasses = [MockURLProtocol.self]
return URLSession(configuration: config)
}
static func ok(_ url: URL, json: Data) -> (HTTPURLResponse, Data) {
(HTTPURLResponse(url: url, statusCode: 200, httpVersion: nil, headerFields: nil)!, json)
}
static func status(_ url: URL, _ code: Int, body: String = "") -> (HTTPURLResponse, Data) {
(HTTPURLResponse(url: url, statusCode: code, httpVersion: nil, headerFields: nil)!, Data(body.utf8))
}
}
/// (body httpBody httpBodyStream URLProtocol stream ).
final class RequestRecorder: @unchecked Sendable {
private(set) var lastURL: URL?
private(set) var lastMethod: String?
private(set) var lastBody: Data?
private(set) var callCount = 0
func record(_ request: URLRequest) {
callCount += 1
lastURL = request.url
lastMethod = request.httpMethod
lastBody = request.bodyData
}
}
extension URLRequest {
/// URLProtocol body httpBody nil httpBodyStream .
var bodyData: Data? {
if let httpBody { return httpBody }
guard let stream = httpBodyStream else { return nil }
stream.open()
defer { stream.close() }
var data = Data()
let bufSize = 8192
var buffer = [UInt8](repeating: 0, count: bufSize)
while stream.hasBytesAvailable {
let read = stream.read(&buffer, maxLength: bufSize)
if read <= 0 { break }
data.append(buffer, count: read)
}
return data
}
}
+201
View File
@@ -0,0 +1,201 @@
import XCTest
@testable import AIFabric
#if canImport(FoundationModels)
import FoundationModels
#endif
/// HW mock backend availability + generate .
struct MockOnDeviceBackend: OnDeviceModelBackend {
let avail: OnDeviceAvailability
let outcome: Result<String, OnDeviceGenerationError>
init(avail: OnDeviceAvailability = .available,
outcome: Result<String, OnDeviceGenerationError> = .success("on-device ok")) {
self.avail = avail
self.outcome = outcome
}
var availability: OnDeviceAvailability { avail }
func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String {
switch outcome {
case .success(let s): return s
case .failure(let e): throw e
}
}
}
/// complete() provider .
actor CountingProvider: AIProvider {
nonisolated let id: AIProviderID
let available: Bool
private(set) var completeCalls = 0
init(id: AIProviderID, available: Bool) {
self.id = id
self.available = available
}
var isAvailable: Bool { get async { available } }
func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse {
completeCalls += 1
return AICompletionResponse(text: "should-not-be-called", providerUsed: id)
}
}
final class LogSink: @unchecked Sendable {
private let lock = NSLock()
private var storage: [String] = []
func append(_ s: String) { lock.lock(); storage.append(s); lock.unlock() }
var lines: [String] { lock.lock(); defer { lock.unlock() }; return storage }
}
final class OnDeviceProviderTests: XCTestCase {
// MARK: + happy path ( backend)
func testAvailableReturnsText() async throws {
let p = OnDeviceProvider(backend: MockOnDeviceBackend(avail: .available, outcome: .success("요약 결과")))
let available = await p.isAvailable
XCTAssertTrue(available)
let resp = try await p.complete(AICompletionRequest(task: .quickSummarize, prompt: "p"))
XCTAssertEqual(resp.providerUsed, .onDevice)
XCTAssertEqual(resp.finishReason, .completed)
XCTAssertEqual(resp.text, "요약 결과")
XCTAssertNotNil(resp.latencyMs)
}
func testUnavailableReportsFalse() async throws {
let p = OnDeviceProvider(backend: MockOnDeviceBackend(avail: .unavailable(reason: "appleIntelligenceNotEnabled")))
let available = await p.isAvailable
XCTAssertFalse(available)
}
// MARK: GenerationError (S2-3c)
func testGuardrailAndRefusalReturnRefused() async throws {
for err in [OnDeviceGenerationError.guardrailViolation, .refusal] {
let p = OnDeviceProvider(backend: MockOnDeviceBackend(outcome: .failure(err)))
let resp = try await p.complete(AICompletionRequest(task: .quickSummarize, prompt: "p"))
XCTAssertEqual(resp.finishReason, .refused, "\(err) → .refused (답변의 일종, 폴백 X)")
XCTAssertEqual(resp.providerUsed, .onDevice)
}
}
func testRateLimitedThrowsUnavailableAndLoudLogs() async throws {
let sink = LogSink()
let p = OnDeviceProvider(backend: MockOnDeviceBackend(outcome: .failure(.rateLimited)),
log: { sink.append($0) })
do {
_ = try await p.complete(AICompletionRequest(task: .quickSummarize, prompt: "p"))
XCTFail("rateLimited → throw unavailable")
} catch let AIProviderError.unavailable(id) {
XCTAssertEqual(id, .onDevice)
}
XCTAssertTrue(sink.lines.contains { $0.contains("rateLimited") }, "stateless 위반은 loud log")
}
func testConcurrentRequestsThrowsUnavailableAndLoudLogs() async throws {
let sink = LogSink()
let p = OnDeviceProvider(backend: MockOnDeviceBackend(outcome: .failure(.concurrentRequests)),
log: { sink.append($0) })
do {
_ = try await p.complete(AICompletionRequest(task: .quickSummarize, prompt: "p"))
XCTFail("concurrentRequests → throw unavailable")
} catch let AIProviderError.unavailable(id) {
XCTAssertEqual(id, .onDevice)
}
XCTAssertTrue(sink.lines.contains { $0.contains("concurrentRequests") })
}
func testContextOverflowThrowsUnavailable() async throws {
let p = OnDeviceProvider(backend: MockOnDeviceBackend(outcome: .failure(.exceededContextWindowSize)))
do {
_ = try await p.complete(AICompletionRequest(task: .quickSummarize, prompt: "p"))
XCTFail("exceededContextWindowSize → throw unavailable (폴백 유도)")
} catch let AIProviderError.unavailable(id) {
XCTAssertEqual(id, .onDevice)
}
}
// MARK: (S2-3d)
func testRouterFallsBackOnDeviceOverflowToLocalMLX() async throws {
let router = AIRouter(providers: [
.onDevice: OnDeviceProvider(backend: MockOnDeviceBackend(outcome: .failure(.exceededContextWindowSize))),
.localMLX: MockAIProvider(id: .localMLX, available: true),
])
let resp = try await router.route(AICompletionRequest(task: .quickSummarize, prompt: "p"))
XCTAssertEqual(resp.providerUsed, .localMLX)
XCTAssertEqual(resp.routingNote, "fallback from onDevice → localMLX")
}
func testExplicitOnDeviceUnavailableNoFallback() async throws {
let counting = CountingProvider(id: .localMLX, available: true)
let router = AIRouter(providers: [
.onDevice: OnDeviceProvider(backend: MockOnDeviceBackend(avail: .unavailable(reason: "deviceNotEligible"))),
.localMLX: counting,
])
do {
_ = try await router.route(AICompletionRequest(task: .quickSummarize, prompt: "p", explicitProvider: .onDevice))
XCTFail("explicit onDevice unavailable → explicitProviderUnavailable, 자동 폴백 금지")
} catch let AIRoutingError.explicitProviderUnavailable(id) {
XCTAssertEqual(id, .onDevice)
}
let calls = await counting.completeCalls
XCTAssertEqual(calls, 0, "명시 불가 시 타 provider complete() 호출 0")
}
// MARK: SDK GenerationError lock ( )
#if canImport(FoundationModels)
func testTranslateGenerationErrorCases() {
let ctx = LanguageModelSession.GenerationError.Context(debugDescription: "test")
XCTAssertEqual(FoundationModelsBackend.translate(.exceededContextWindowSize(ctx)), .exceededContextWindowSize)
XCTAssertEqual(FoundationModelsBackend.translate(.guardrailViolation(ctx)), .guardrailViolation)
XCTAssertEqual(FoundationModelsBackend.translate(.rateLimited(ctx)), .rateLimited)
XCTAssertEqual(FoundationModelsBackend.translate(.concurrentRequests(ctx)), .concurrentRequests)
XCTAssertEqual(FoundationModelsBackend.translate(.unsupportedLanguageOrLocale(ctx)), .unsupportedLanguageOrLocale)
XCTAssertEqual(FoundationModelsBackend.translate(.assetsUnavailable(ctx)), .assetsUnavailable)
}
#endif
// MARK: (M5 Max -AI Mac skip)
func testLiveOnDeviceIntegration() async throws {
let p = OnDeviceProvider() // FoundationModels backend
guard await p.isAvailable else {
throw XCTSkip("FoundationModels not available on this machine — live test skipped")
}
let resp = try await p.complete(
AICompletionRequest(task: .quickSummarize,
prompt: "엘보 내경 가공의 핵심 관리 포인트를 한 문장으로 요약해줘.",
maxTokens: 120)
)
XCTAssertEqual(resp.providerUsed, .onDevice)
XCTAssertEqual(resp.finishReason, .completed)
XCTAssertFalse(resp.text.isEmpty, "라이브 응답은 비어있지 않아야")
}
func testLiveCancellationCooperative() async throws {
let p = OnDeviceProvider()
guard await p.isAvailable else {
throw XCTSkip("FoundationModels not available — cancellation live test skipped")
}
let started = Date()
let task = Task { () -> AIFinishReason in
let r = try await p.complete(
AICompletionRequest(task: .quickSummarize,
prompt: "대한민국 압력용기 산업과 ASME 표준 채택 역사를 아주 길고 자세하게 여러 단락으로 서술해줘.",
maxTokens: 4000)
)
return r.finishReason
}
try? await Task.sleep(nanoseconds: 500_000_000)
task.cancel()
do {
_ = try await task.value
// ( , ).
} catch is CancellationError {
let elapsed = Date().timeIntervalSince(started)
XCTAssertLessThan(elapsed, 8.0, "협조적 취소면 빠르게 중단(S2-3a: ~33ms 후)")
}
}
}
+151
View File
@@ -0,0 +1,151 @@
import XCTest
@testable import AIFabric
/// DS client ask.json fixture , call-shape .
actor MockDSAskClient: DSAskClient {
let response: AskResponse?
let error: Error?
private(set) var lastBackend: String?
private(set) var lastQuery: String?
private(set) var callCount = 0
init(response: AskResponse? = nil, error: Error? = nil) {
self.response = response
self.error = error
}
func ask(query: String, backend: String) async throws -> AskResponse {
callCount += 1
lastBackend = backend
lastQuery = query
if let error { throw error }
return response!
}
}
final class RemoteDSProviderTests: XCTestCase {
private func askFixture() throws -> AskResponse {
try Fixture.decode(AskResponse.self, from: "ask.json")
}
// MARK: ask.json + (call-shape )
func testAskJsonDecodeAndMap() throws {
let r = try askFixture()
XCTAssertEqual(r.synthesisStatus, "completed")
XCTAssertEqual(r.confidence, "high")
XCTAssertEqual(r.backendUsed, "gemma-macmini")
XCTAssertEqual(r.citations.count, 1)
XCTAssertEqual(r.citations[0].docId, 4912)
XCTAssertEqual(r.citations[0].n, 1)
XCTAssertEqual(r.citations[0].sectionTitle, "2. UCS-66 면제 곡선")
let mapped = RemoteDSProvider.map(r)
XCTAssertEqual(mapped.providerUsed, .remoteDS)
XCTAssertEqual(mapped.finishReason, .completed)
XCTAssertEqual(mapped.citations.count, 1)
XCTAssertEqual(mapped.citations[0].docId, 4912)
XCTAssertEqual(mapped.confidence, .high)
XCTAssertEqual(mapped.routingNote, "gemma-macmini")
XCTAssertEqual(mapped.latencyMs, 2841.5)
XCTAssertEqual(mapped.text, r.aiAnswer)
}
func testCompleteMapsFixture() async throws {
let mock = MockDSAskClient(response: try askFixture())
let provider = RemoteDSProvider(client: mock)
let resp = try await provider.complete(
AICompletionRequest(task: .corpusAsk, prompt: "충격시험은 언제 면제되나")
)
XCTAssertEqual(resp.providerUsed, .remoteDS)
XCTAssertEqual(resp.citations.count, 1)
XCTAssertEqual(resp.finishReason, .completed)
XCTAssertEqual(resp.routingNote, "gemma-macmini")
}
// MARK: backend exhaustive switch call-shape ( )
func testBackendCallShape_nilExplicit() async throws {
let mock = MockDSAskClient(response: try askFixture())
let provider = RemoteDSProvider(client: mock)
_ = try await provider.complete(AICompletionRequest(task: .corpusAsk, prompt: "q"))
let backend = await mock.lastBackend
XCTAssertEqual(backend, "mac-mini-default") // DS
}
func testBackendCallShape_localMLXExplicit() async throws {
let mock = MockDSAskClient(response: try askFixture())
let provider = RemoteDSProvider(client: mock)
_ = try await provider.complete(
AICompletionRequest(task: .corpusAsk, prompt: "q", explicitProvider: .localMLX)
)
let backend = await mock.lastBackend
XCTAssertEqual(backend, "gemma-macmini")
}
func testBackendMapPure() {
XCTAssertEqual(RemoteDSProvider.dsBackend(for: nil), "mac-mini-default")
XCTAssertEqual(RemoteDSProvider.dsBackend(for: .localMLX), "gemma-macmini")
XCTAssertEqual(RemoteDSProvider.dsBackend(for: .remoteDS), "mac-mini-default")
XCTAssertEqual(RemoteDSProvider.dsBackend(for: .onDevice), "mac-mini-default")
XCTAssertEqual(RemoteDSProvider.dsBackend(for: .specialized), "mac-mini-default")
}
func testNonCorpusTaskNotImplemented() async throws {
let mock = MockDSAskClient(response: try askFixture())
let provider = RemoteDSProvider(client: mock)
do {
_ = try await provider.complete(AICompletionRequest(task: .quickSummarize, prompt: "q"))
XCTFail("non-corpus task should not be served by RemoteDS")
} catch let AIProviderError.notImplemented(id) {
XCTAssertEqual(id, .remoteDS)
}
}
// MARK: corpusAsk ( )
func testCorpusAskRoutesToRemoteDSOnly() async throws {
let router = AIRouter(providers: [
.remoteDS: RemoteDSProvider(client: MockDSAskClient(response: try askFixture())),
.onDevice: MockAIProvider(id: .onDevice, available: true), // available corpusAsk
])
let resp = try await router.route(AICompletionRequest(task: .corpusAsk, prompt: "q"))
XCTAssertEqual(resp.providerUsed, .remoteDS)
XCTAssertEqual(resp.citations.count, 1)
}
func testCorpusAskRemoteDSDown_NoLocalFallback() async throws {
// remoteDS ().
struct Net: Error {}
let router = AIRouter(providers: [
.remoteDS: RemoteDSProvider(client: MockDSAskClient(error: Net())),
.onDevice: MockAIProvider(id: .onDevice, available: true),
])
do {
_ = try await router.route(AICompletionRequest(task: .corpusAsk, prompt: "q"))
XCTFail("corpusAsk must not fall back to onDevice")
} catch is Net {
// : remoteDS ( = [.remoteDS] only)
}
}
// MARK: S2-4b cloud 'claude-cloud' = 503 ( )
func testCloud503Surfaces_NoSilentFallback() async throws {
let err = AIProviderError.backendError(.remoteDS, status: 503, reason: "cloud backend pending activation")
let router = AIRouter(providers: [
.remoteDS: RemoteDSProvider(client: MockDSAskClient(error: err)),
.onDevice: MockAIProvider(id: .onDevice, available: true),
])
do {
_ = try await router.route(
AICompletionRequest(task: .corpusAsk, prompt: "q", explicitProvider: .remoteDS)
)
XCTFail("503 must surface, not fall back")
} catch let AIProviderError.backendError(id, status, _) {
XCTAssertEqual(id, .remoteDS)
XCTAssertEqual(status, 503)
}
}
}
@@ -0,0 +1,39 @@
import XCTest
@testable import AIFabric
final class SpecializedProviderTests: XCTestCase {
func testScaffoldUnavailableAndNotImplemented() async throws {
let p = SpecializedProvider()
let available = await p.isAvailable
XCTAssertFalse(available)
do {
_ = try await p.complete(AICompletionRequest(task: .vision, prompt: "p"))
XCTFail("scaffold must throw notImplemented")
} catch let AIProviderError.notImplemented(id) {
XCTAssertEqual(id, .specialized)
}
}
/// .vision [.specialized, .onDevice] specialized · onDevice **** ( log).
/// (onDevice providerUsed=id CountingProvider MockAIProvider vision
/// providerUsed .specialized .)
func testVisionFallsBackToOnDeviceVisibly() async throws {
let sink = LogSink()
let onDevice = CountingProvider(id: .onDevice, available: true)
let router = AIRouter(
providers: [
.specialized: SpecializedProvider(),
.onDevice: onDevice,
],
log: { sink.append($0) }
)
let resp = try await router.route(AICompletionRequest(task: .vision, prompt: "도면 보기"))
XCTAssertEqual(resp.providerUsed, .onDevice)
XCTAssertEqual(resp.routingNote, "fallback from specialized → onDevice")
let calls = await onDevice.completeCalls
XCTAssertEqual(calls, 1)
XCTAssertTrue(sink.lines.contains { $0.contains("specialized") && $0.contains("unavailable") },
"specialized 불가가 침묵 아닌 log 로 가시화")
}
}
+28
View File
@@ -0,0 +1,28 @@
import Foundation
@testable import AIFabric
/// providerUsed=id provider(MockAIProvider providerUsed ).
struct EchoProvider: AIProvider {
let id: AIProviderID
let available: Bool
init(id: AIProviderID, available: Bool = true) {
self.id = id
self.available = available
}
var isAvailable: Bool { get async { available } }
func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse {
AICompletionResponse(text: "echo:\(id.rawValue)", providerUsed: id)
}
}
/// sleep Task CancellationError(S2-Fe URLSession ).
struct SleepingProvider: AIProvider {
let id: AIProviderID
init(id: AIProviderID = .localMLX) { self.id = id }
var isAvailable: Bool { get async { true } }
func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse {
try await Task.sleep(nanoseconds: 5_000_000_000) // CancellationError throw
try Task.checkCancellation()
return AICompletionResponse(text: "done", providerUsed: id)
}
}
+1 -1
View File
@@ -1,5 +1,5 @@
import XCTest
import AI
import AIFabric
/// Gate 4 (AI flow): proves the S2 AIRouter produces a VISIBLE routingNote on a rule-based fallback,
/// and that an explicit-provider-unavailable pick throws (no silent fallback). Sources/AI is consumed
@@ -0,0 +1,96 @@
{
"_meta": {
"fixture": "foundationmodels-respond",
"purpose": "S2-3a — Apple FoundationModels live capture (OnDeviceProvider 결선 + 테스트 동결 기준)",
"captured_on": "M5 Max MacBook Pro (128GB, Apple Intelligence)",
"captured_date": "2026-06-04",
"sdk": "macOS 26.5 SDK / FoundationModels.framework",
"note": "SDK 가 marshaling 하므로 raw request_body 는 없음. 이 파일은 응답 모양 + 에러 타입 + 취소 동작의 동결 기준."
},
"availability": {
"observed": "available",
"is_available_convenience": true,
"read_is_synchronous": true,
"api": "SystemLanguageModel.default.availability",
"enum": {
"available": "case available",
"unavailable_reasons": ["deviceNotEligible", "appleIntelligenceNotEnabled", "modelNotReady"]
},
"supports_korean": true,
"supported_language_count": 23
},
"happy_path": {
"api": "session.respond(to: String, options: GenerationOptions) async throws -> Response<String>",
"content_accessor": "response.content (Response<String>.content : String)",
"observed_content": "압력용기의 충격시험(Charpy) 면제 판정은, 용기의 압력 등급이 10MPa 이하인 경우, 충격 시험을 면제할 수 있으며, 이는 용기의 안전성을 보장하기 위한 중요한 기준입니다.",
"observed_latency_ms": 1291.3,
"transcript_entries_count": 1,
"is_responding_after": false,
"quality_note": "내용은 부정확(온디바이스 ~3B/2-bit QAT). corpusAsk 부적합·quickSummarize/classify 적합 라우팅 정합 — 사실성은 RemoteDS 코퍼스가 담당."
},
"session_init": {
"api": "LanguageModelSession(model: .default, tools: [], instructions: String?)",
"instructions_timing": "init (per-call 아님)",
"instructions_nil_handling": "systemPrompt == nil 이면 instructions 인자 생략 (빈 문자열 금지)",
"prewarm": "session.prewarm() — 동기 반환(관찰 ~1.3ms), 백그라운드 워밍",
"stateless_per_request": "호출마다 새 세션 생성 → instructions(init-time) + rateLimited/concurrentRequests(세션 상태) 둘 다 우회"
},
"generation_options": {
"api": "GenerationOptions(sampling: SamplingMode? = nil, temperature: Double? = nil, maximumResponseTokens: Int? = nil)",
"mapping": "AICompletionRequest.maxTokens -> maximumResponseTokens",
"temperature_note": "AICompletionRequest 에 temperature 필드 없음(동결) → 미설정(모델 기본). LocalMLX 와 동일 정책(둘 다 미설정)."
},
"generation_error": {
"_source": "Xcode jump-to-def / swiftinterface (LanguageModelSession.GenerationError) — authoritative, version-accurate",
"type": "LanguageModelSession.GenerationError : Error, LocalizedError",
"associated_value": "각 case 는 GenerationError.Context (refusal 은 (Refusal, Context))",
"cases": [
"exceededContextWindowSize(Context)",
"assetsUnavailable(Context)",
"guardrailViolation(Context)",
"unsupportedGuide(Context)",
"unsupportedLanguageOrLocale(Context)",
"decodingFailure(Context)",
"rateLimited(Context)",
"concurrentRequests(Context)",
"refusal(Refusal, Context)"
],
"plan_corrections": [
"plan 가정 'refusal 케이스명 없음' = 틀림 → refusal 은 별도 case 로 존재(guardrailViolation 과 구분).",
"plan 에 없던 concurrentRequests case 존재 — rateLimited 와 함께 stateless 세션에서 뜨면 세션 공유 버그 신호.",
"assetsUnavailable 정확명 확정(모델 자산 미가용)."
],
"reproduced_live": {
"exceededContextWindowSize": {
"trigger": "의도적 컨텍스트 오버플로(긴 프롬프트)",
"errorDescription": "Exceeded model context window size"
}
},
"finish_reason_mapping": {
"guardrailViolation": ".refused",
"refusal": ".refused",
"exceededContextWindowSize": ".unavailable",
"rateLimited": ".unavailable + loud log (stateless 인데 발생 = 세션 재사용 버그 신호)",
"concurrentRequests": ".unavailable + loud log (동일 — stateless 위반 신호)",
"unsupportedLanguageOrLocale": ".unavailable (+ supportedLocale 사전체크로 회피)",
"unsupportedGuide": ".unavailable",
"decodingFailure": ".unavailable",
"assetsUnavailable": ".unavailable",
"@unknown default": ".unavailable + loud log"
}
},
"cancellation": {
"_finding": "S2-Fe 전제 확정 — COOPERATIVE",
"cancel_requested_at_ms": 500,
"threw": "CancellationError",
"elapsed_ms": 533.6,
"interpretation": "respond() 는 mid-flight Task 취소를 협조적으로 honor(요청 33ms 후 CancellationError throw).",
"implication": "OnDevice complete() 에 surrounding Task.checkCancellation() 은 belt-and-suspenders(실제 중단은 respond() 내부). streamResponse 토큰단위 취소 폴백 불필요(선전환 금지)."
}
}
@@ -0,0 +1,19 @@
{
"_meta": {
"fixture": "llm-router-chat.request",
"status": "PROVISIONAL_SYNTHETIC",
"synthetic": true,
"captured_date": null,
"reason": "맥미니(hyungi-macmini 100.76.254.116) Tailscale offline(last seen 8h+, 2026-06-04) → 라이브 캡처 불가. OpenAI /v1/chat/completions 표준 스펙 기반 합성. 맥미니 복귀 시 S2-2a 라이브 재캡처로 교체(별 fixture-update PR, S2-Ff drift-check 경유).",
"endpoint": "POST http://100.76.254.116:8890/v1/chat/completions",
"call_shape_note": "messages = [system, user] 분리 고정(load-bearing). system.content = AICompletionRequest.systemPrompt ?? \"\" (plan S2-2c). max_tokens = AICompletionRequest.maxTokens.",
"model_note": "model 문자열은 llm-router 가 기대하는 별칭 — 라이브 캡처로 확정 필요(provisional: 'gemma-macmini')."
},
"model": "gemma-macmini",
"messages": [
{ "role": "system", "content": "You are a concise technical assistant." },
{ "role": "user", "content": "충격시험 면제 기준을 한 문장으로 요약해줘." }
],
"max_tokens": 512,
"stream": false
}
@@ -0,0 +1,25 @@
{
"_meta": {
"fixture": "llm-router-chat.response",
"status": "PROVISIONAL_SYNTHETIC",
"synthetic": true,
"captured_date": null,
"reason": "맥미니 offline → 라이브 캡처 불가. OpenAI chat.completion 표준 응답 모양 기반 합성. 라이브 재캡처로 교체(S2-Ff).",
"shape": "OpenAI chat.completion (choices[0].message.content → text, finish_reason → AIFinishReason)"
},
"id": "chatcmpl-provisional-0001",
"object": "chat.completion",
"created": 0,
"model": "gemma-macmini",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "충격시험 면제는 재료군(Curve A~D)과 거버닝 두께에 따른 UCS-66 면제 곡선으로 MDMT에서 판정합니다."
},
"finish_reason": "stop"
}
],
"usage": { "prompt_tokens": 24, "completion_tokens": 41, "total_tokens": 65 }
}