diff --git a/Package.swift b/Package.swift new file mode 100644 index 0000000..f48fa8f --- /dev/null +++ b/Package.swift @@ -0,0 +1,38 @@ +// swift-tools-version: 6.2 +// +// AIFabric — S2 LLM 패브릭 (Document Server 멀티디바이스 앱). +// +// 이 매니페스트는 `Sources/AI/` (S2 소유) 만 라이브러리 product 로 노출한다. +// S3 앱(별 트랙)은 이 `AIFabric` product 에 **로컬 SwiftPM 의존**(소비모델 b)으로 붙어 +// `Sources/AI` 를 앱 타깃에 직접 포함하지 않는다 → 소스 이중소유/중복심볼 방지(plan S2-0b/S2-Fb). +// +// 경계: S2 는 이 Package.swift + Sources/AI/** + Tests/AITests/** + contract/fixtures/** 만 소유. +// 앱 셸·뷰·구체 DS client = S3. +import PackageDescription + +let package = Package( + name: "AIFabric", + platforms: [ + .macOS(.v26), // FoundationModels (OnDeviceProvider) = macOS 26 SDK + ], + products: [ + .library(name: "AIFabric", targets: ["AIFabric"]), + ], + targets: [ + .target( + name: "AIFabric", + path: "Sources/AI", + swiftSettings: [ + .swiftLanguageMode(.v6), // strict concurrency complete (동결 인터페이스 = swift6 PASS) + ] + ), + .testTarget( + name: "AITests", + dependencies: ["AIFabric"], + path: "Tests/AITests", + swiftSettings: [ + .swiftLanguageMode(.v6), + ] + ), + ] +) diff --git a/Sources/AI/Composition.swift b/Sources/AI/Composition.swift new file mode 100644 index 0000000..6330670 --- /dev/null +++ b/Sources/AI/Composition.swift @@ -0,0 +1,82 @@ +// Composition.swift — S2 → S3 통합 진입점 (소비모델 b) + 엔드포인트 단일소스 config. +// +// INTEGRATION (소비모델 b): 앱(S3)은 이 패키지의 `AIFabric` product 에 **로컬 SwiftPM 의존**으로 붙고, +// Sources/AI 를 앱 타깃에 직접 포함하지 않는다(소스 이중소유/중복심볼 방지). S3 는 makeDefaultRouter(...) +// 하나로 실 라우터를 와이어링하고 MockAIProvider 를 대체한다. 구체 DSAskClient(HTTP) = S3 소유. +// +// 엔드포인트 단일소스(S2-Fa): raw URL 산재 금지 — 부주의한 편집의 침묵 엔드포인트 swap 방지 +// (2026-05-17 Hermes incident 선례). env override → 검증된 기본값. ([[feedback_hermes_config_single_source_envvar]]) +import Foundation +import os + +public struct AIProviderConfiguration: Sendable { + /// 맥미니 llm-router base (trailing slash 없는 base; 경로는 provider 가 append). + public var localMLXBaseURL: URL + /// llm-router 모델 별칭(라이브 캡처로 확정 필요 — provisional 'gemma-macmini'). + public var localMLXModel: String + /// DS API base — S3 의 DSAskClient 가 사용. 공인 https://document.hyungi.net/api · 내부 http://100.110.63.63:8000/api. + /// 주의: DS `/search/ask` 는 **trailing slash 필수**(경로 결합 시 S3 client 가 보장). + public var dsBaseURL: URL + public var requestTimeout: TimeInterval + public var probeTimeout: TimeInterval + + public init( + localMLXBaseURL: URL, + localMLXModel: String = "gemma-macmini", + dsBaseURL: URL, + requestTimeout: TimeInterval = 60, + probeTimeout: TimeInterval = 2 + ) { + self.localMLXBaseURL = localMLXBaseURL + self.localMLXModel = localMLXModel + self.dsBaseURL = dsBaseURL + self.requestTimeout = requestTimeout + self.probeTimeout = probeTimeout + } + + /// 환경변수 override → 검증된 기본값(단일 source). 키 부재 시 기본값. + public static func resolved( + environment: [String: String] = ProcessInfo.processInfo.environment + ) -> AIProviderConfiguration { + let localMLX = environment["AIFABRIC_LOCALMLX_URL"].flatMap(URL.init(string:)) + ?? URL(string: "http://100.76.254.116:8890")! + let model = environment["AIFABRIC_LOCALMLX_MODEL"] ?? "gemma-macmini" + let ds = environment["AIFABRIC_DS_URL"].flatMap(URL.init(string:)) + ?? URL(string: "https://document.hyungi.net/api")! + return AIProviderConfiguration(localMLXBaseURL: localMLX, localMLXModel: model, dsBaseURL: ds) + } +} + +/// 기본 OSLog 라우팅 훅 — 폴백/스킵을 가시화(silent 금지). S3 도 참조 가능(public). +public enum AIFabricLog { + static let router = Logger(subsystem: "ds-app.AIFabric", category: "AIRouter") + public static let routerHook: @Sendable (String) -> Void = { msg in + router.info("\(msg, privacy: .public)") + } +} + +/// S3 → S2 단일 진입점. 4 provider 전부 등록(vision 체인 가시 폴백 보장) + 기본 정책 + log 훅. +/// - client: S3 가 주입하는 구체 DS ask client(HTTP). +/// - config: 엔드포인트 단일소스(기본 = env override → 검증 기본값). +/// - session: LocalMLX URLSession(기본 .shared; 테스트는 mock 주입). +public func makeDefaultRouter( + client: DSAskClient, + config: AIProviderConfiguration = .resolved(), + session: URLSession = .shared, + policy: AIRoutingPolicy = .default, + log: @escaping @Sendable (String) -> Void = AIFabricLog.routerHook +) -> AIRouter { + let providers: [AIProviderID: any AIProvider] = [ + .remoteDS: RemoteDSProvider(client: client), + .localMLX: LocalMLXProvider( + baseURL: config.localMLXBaseURL, + model: config.localMLXModel, + session: session, + requestTimeout: config.requestTimeout, + probeTimeout: config.probeTimeout + ), + .onDevice: OnDeviceProvider(), + .specialized: SpecializedProvider(), // scaffold(불가) — vision 폴백 가시화 + ] + return AIRouter(providers: providers, policy: policy, log: log) +} diff --git a/Sources/AI/Providers/LocalMLXProvider.swift b/Sources/AI/Providers/LocalMLXProvider.swift index 244959d..1f8c521 100644 --- a/Sources/AI/Providers/LocalMLXProvider.swift +++ b/Sources/AI/Providers/LocalMLXProvider.swift @@ -1,33 +1,161 @@ -// LocalMLXProvider.swift — S2 구현 스켈레톤 (맥미니 메인 로컬 LLM 허브). +// LocalMLXProvider.swift — S2 구현 (맥미니 메인 로컬 LLM 허브). // -// 실제 구현: 맥미니 Gemma 4 26B (MLX) OpenAI 호환 엔드포인트 호출. -// - 엔드포인트: llm-router :8890 (권장) 또는 MLX :8801 (Tailscale 100.76.254.116) -// - isAvailable = 짧은 health 핑 (도달 + 모델 로드) -// - complete = POST /v1/chat/completions (messages: system/user 분리, call-shape 고정) -// 인터페이스 동결 단계에서는 스텁. - +// 맥미니 Gemma 4 26B, llm-router :8890 (OpenAI 호환, wake-on-call). 결정 #4: raw MLX :8801 폐기. +// - isAvailable = GET /v1/models 경량 probe(짧은 timeout, wake 미트리거 → '가용인데 콜드' 정상) +// - complete = POST /v1/chat/completions, messages system/user 분리(call-shape 고정) +// +// ⚠️ fixture(llm-router-chat.*.json) = PROVISIONAL_SYNTHETIC (맥미니 offline 으로 라이브 캡처 불가, 2026-06-04). +// OpenAI 표준 스펙 기반. 맥미니 복귀 시 S2-2a 라이브 재캡처로 교체(S2-Ff). 코드 자체는 라이브 동작. import Foundation public struct LocalMLXProvider: AIProvider { public let id: AIProviderID = .localMLX - /// 맥미니 허브 베이스 URL (S2 가 설정/Keychain 에서 주입). + /// 맥미니 허브 베이스 URL (S2-Fa config 에서 주입). trailing slash 없는 base, 경로는 appendingPathComponent. public let baseURL: URL + let model: String + let session: URLSession + let requestTimeout: TimeInterval + let probeTimeout: TimeInterval - public init(baseURL: URL) { + public init( + baseURL: URL, + model: String = "gemma-macmini", + session: URLSession = .shared, + requestTimeout: TimeInterval = 60, + probeTimeout: TimeInterval = 2 + ) { self.baseURL = baseURL + self.model = model + self.session = session + self.requestTimeout = requestTimeout + self.probeTimeout = probeTimeout } + // MARK: isAvailable — 경량 health probe (wake 미트리거) + public var isAvailable: Bool { get async { - // S2: GET /v1/models 또는 경량 health 핑으로 교체. - false + var req = URLRequest(url: baseURL.appendingPathComponent("v1/models")) + req.httpMethod = "GET" + req.timeoutInterval = probeTimeout + do { + let (_, resp) = try await session.data(for: req) + guard let http = resp as? HTTPURLResponse else { return false } + return (200..<300).contains(http.statusCode) + } catch { + // timeout/연결오류 → false(throw 아님). probe 는 wake/모델로드 유발 X. + // '가용=응답가능 ≠ 즉답' — 콜드 모델은 첫 complete() 가 로드지연 흡수. + return false + } } } + // MARK: complete — OpenAI 호환 chat/completions + public func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse { - // S2: OpenAI 호환 chat/completions 호출 → AICompletionResponse(providerUsed: .localMLX). - // messages 구조(system/user 분리)는 production 호출과 단일 source-of-truth. - throw AIProviderError.notImplemented(id) + try Task.checkCancellation() + + var req = URLRequest(url: baseURL.appendingPathComponent("v1/chat/completions")) + req.httpMethod = "POST" + req.setValue("application/json", forHTTPHeaderField: "Content-Type") + req.timeoutInterval = requestTimeout // S2-Fe: 행 걸린 생성 무한정지 방지 + req.httpBody = try Self.encodeRequest(request, model: model) + + let started = Date() + let data: Data + let resp: URLResponse + do { + // URLSession async 는 Task 취소를 자동 honor → CancellationError 전파(S2-Fe). + (data, resp) = try await session.data(for: req) + } catch let e as URLError where e.code == .timedOut { + throw AIProviderError.backendError(id, status: -1, reason: "request timed out after \(Int(requestTimeout))s") + } + + guard let http = resp as? HTTPURLResponse else { + throw AIProviderError.backendError(id, status: -1, reason: "non-HTTP response") + } + guard (200..<300).contains(http.statusCode) else { + // non-200 → backendError (빈 text 침묵 금지). + let reason = String(data: data, encoding: .utf8).map { String($0.prefix(300)) } + throw AIProviderError.backendError(id, status: http.statusCode, reason: reason) + } + + let decoded = try JSONDecoder().decode(OpenAIChatResponse.self, from: data) + guard let choice = decoded.choices.first else { + throw AIProviderError.backendError(id, status: http.statusCode, reason: "no choices in response") + } + return AICompletionResponse( + text: choice.message.content, + providerUsed: .localMLX, + finishReason: Self.finishReason(choice.finishReason), + citations: [], // 로컬 생성 — 코퍼스 인용 없음 + confidence: nil, + latencyMs: Date().timeIntervalSince(started) * 1000, + routingNote: nil // fallback note 는 라우터가 채움 + ) + } + + // MARK: 매핑 + + static func finishReason(_ openAI: String?) -> AIFinishReason { + switch openAI { + case "stop": return .completed + case "length": return .completed // max_tokens 절단 — 정상 완료로 취급 + default: return .completed + } + } + + /// AICompletionRequest → OpenAI chat/completions body. messages system/user 분리(fixture 와 단일 source-of-truth). + /// system.content = systemPrompt ?? "" (plan S2-2c). temperature 는 AICompletionRequest 에 없음(동결) → 미설정(서버 기본). + static func encodeRequest(_ request: AICompletionRequest, model: String) throws -> Data { + let body = OpenAIChatRequest( + model: model, + messages: [ + OpenAIChatRequest.Message(role: "system", content: request.systemPrompt ?? ""), + OpenAIChatRequest.Message(role: "user", content: request.prompt), + ], + maxTokens: request.maxTokens, + stream: false + ) + let enc = JSONEncoder() + enc.outputFormatting = [.sortedKeys] + return try enc.encode(body) } } + +// MARK: - OpenAI 호환 wire 타입 (내부) + +struct OpenAIChatRequest: Encodable, Sendable { + struct Message: Encodable, Sendable { + let role: String + let content: String + } + let model: String + let messages: [Message] + let maxTokens: Int? + let stream: Bool + + enum CodingKeys: String, CodingKey { + case model, messages, stream + case maxTokens = "max_tokens" + } +} + +struct OpenAIChatResponse: Decodable, Sendable { + struct Choice: Decodable, Sendable { + struct Message: Decodable, Sendable { + let role: String + let content: String + } + let index: Int? + let message: Message + let finishReason: String? + + enum CodingKeys: String, CodingKey { + case index, message + case finishReason = "finish_reason" + } + } + let choices: [Choice] +} diff --git a/Sources/AI/Providers/OnDeviceProvider.swift b/Sources/AI/Providers/OnDeviceProvider.swift index bf0a4d5..5b21885 100644 --- a/Sources/AI/Providers/OnDeviceProvider.swift +++ b/Sources/AI/Providers/OnDeviceProvider.swift @@ -1,26 +1,196 @@ -// OnDeviceProvider.swift — S2 구현 스켈레톤 (맥북·아이폰 온디바이스). +// OnDeviceProvider.swift — S2 구현 (맥북·아이폰 온디바이스, Apple FoundationModels). // -// 실제 구현: `import FoundationModels` 후 SystemLanguageModel / LanguageModelSession 사용. -// - isAvailable = SystemLanguageModel.default.availability == .available -// - complete = LanguageModelSession 으로 prompt 응답 (citations 없음 — 로컬 생성) -// 인터페이스 동결 단계에서는 Foundation-only 스텁(notImplemented). - +// 라이브 경로(결정 #3): SystemLanguageModel.default.availability(동기) + LanguageModelSession.respond(). +// 모델 ~3B/2-bit QAT → quickSummarize/classify 적합, corpusAsk 무리(라우팅이 corpusAsk 를 RemoteDS 로만 보냄). +// +// 테스트 가능성: 모델 backend 를 seam(OnDeviceModelBackend)으로 주입 → CI/비-AI Mac 은 mock 으로 +// 에러매핑/라우팅 단위테스트, M5 Max 는 라이브 통합테스트(아래 default = FoundationModels). +// +// 실측 동결(S2-3a, M5 Max 2026-06-04): availability=available · respond()→Response.content · +// GenerationError 9 case · 컨텍스트 오버플로→exceededContextWindowSize · 취소=COOPERATIVE(respond() 가 +// mid-flight Task.cancel 협조 → checkCancellation 은 belt-and-suspenders, streamResponse 불필요). import Foundation +import os + +// MARK: - 중립 타입 (FoundationModels 비의존 — 매핑 로직을 HW 없이 단위테스트) + +public enum OnDeviceAvailability: Sendable, Equatable { + case available + case unavailable(reason: String) +} + +/// FoundationModels.LanguageModelSession.GenerationError 의 중립 미러(9 case + unknown). +/// 라이브 backend 가 SDK 에러를 이걸로 번역 → provider 가 finishReason/throw 로 매핑(순수·테스트 가능). +public enum OnDeviceGenerationError: Error, Sendable, Equatable { + case guardrailViolation + case refusal + case exceededContextWindowSize + case rateLimited + case concurrentRequests + case unsupportedLanguageOrLocale + case unsupportedGuide + case decodingFailure + case assetsUnavailable + case unknown(String) +} + +/// 온디바이스 모델 backend seam. 라이브 = FoundationModelsBackend, 테스트 = mock. +protocol OnDeviceModelBackend: Sendable { + var availability: OnDeviceAvailability { get } + /// 실패는 OnDeviceGenerationError 또는 CancellationError 로 throw. + func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String +} + +// MARK: - Provider public struct OnDeviceProvider: AIProvider { public let id: AIProviderID = .onDevice + private let backend: OnDeviceModelBackend + private let log: @Sendable (String) -> Void - public init() {} + public init() { + self.backend = Self.makeLiveBackend() + let logger = Logger(subsystem: "ds-app.AIFabric", category: "OnDeviceProvider") + self.log = { msg in logger.warning("\(msg, privacy: .public)") } + } + + /// 테스트 seam — backend/log 주입(HW 비의존 단위테스트). + init(backend: OnDeviceModelBackend, log: @escaping @Sendable (String) -> Void = { _ in }) { + self.backend = backend + self.log = log + } public var isAvailable: Bool { - get async { - // S2: FoundationModels 가용성 프로브로 교체. - false - } + get async { backend.availability == .available } } public func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse { - // S2: LanguageModelSession(.default) 호출 → AICompletionResponse(providerUsed: .onDevice). - throw AIProviderError.notImplemented(id) + // belt-and-suspenders: 실제 mid-flight 취소는 respond() 내부가 협조(S2-3a 실측). + try Task.checkCancellation() + let started = Date() + do { + let text = try await backend.generate( + prompt: request.prompt, + systemPrompt: request.systemPrompt, + maxTokens: request.maxTokens + ) + return AICompletionResponse( + text: text, + providerUsed: .onDevice, + finishReason: .completed, + latencyMs: Date().timeIntervalSince(started) * 1000 + ) + } catch let e as OnDeviceGenerationError { + return try mapError(e) + } + // CancellationError 등은 자연 전파. + } + + /// GenerationError 매핑(S2-3c). 거부 = 답변의 일종 → 반환(폴백 X). 그 외 = provider 불가 → throw(라우터가 가시 폴백). + private func mapError(_ e: OnDeviceGenerationError) throws -> AICompletionResponse { + switch e { + case .guardrailViolation, .refusal: + return AICompletionResponse( + text: "", + providerUsed: .onDevice, + finishReason: .refused, + routingNote: "on-device refused (guardrail/refusal)" + ) + case .rateLimited: + // stateless-per-request 인데 발생 = 세션 재사용 버그 신호(가림 금지 → loud + 불가). + log("UNEXPECTED onDevice rateLimited on stateless session — 세션 재사용 버그 의심") + throw AIProviderError.unavailable(id) + case .concurrentRequests: + log("UNEXPECTED onDevice concurrentRequests on stateless session — 세션 재사용 버그 의심") + throw AIProviderError.unavailable(id) + case .exceededContextWindowSize: + log("onDevice context window(4096) exceeded — 라우터가 localMLX 로 폴백") + throw AIProviderError.unavailable(id) + case .unsupportedLanguageOrLocale: + log("onDevice unsupported language/locale — 폴백") + throw AIProviderError.unavailable(id) + case .unsupportedGuide, .decodingFailure, .assetsUnavailable: + throw AIProviderError.unavailable(id) + case .unknown(let detail): + log("onDevice unknown generation error: \(detail)") + throw AIProviderError.unavailable(id) + } + } + + static func makeLiveBackend() -> OnDeviceModelBackend { + #if canImport(FoundationModels) + return FoundationModelsBackend() + #else + return UnavailableBackend(reason: "FoundationModels not importable on this platform") + #endif } } + +/// FoundationModels 미가용 플랫폼/SDK 폴백. +struct UnavailableBackend: OnDeviceModelBackend { + let reason: String + var availability: OnDeviceAvailability { .unavailable(reason: reason) } + func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String { + throw OnDeviceGenerationError.unknown("backend unavailable: \(reason)") + } +} + +// MARK: - 라이브 FoundationModels backend (M5 Max / Apple Intelligence) + +#if canImport(FoundationModels) +import FoundationModels + +struct FoundationModelsBackend: OnDeviceModelBackend { + + var availability: OnDeviceAvailability { + switch SystemLanguageModel.default.availability { + case .available: + return .available + case .unavailable(let reason): + switch reason { + case .deviceNotEligible: return .unavailable(reason: "deviceNotEligible") + case .appleIntelligenceNotEnabled: return .unavailable(reason: "appleIntelligenceNotEnabled") + case .modelNotReady: return .unavailable(reason: "modelNotReady") + @unknown default: return .unavailable(reason: "unknownReason") + } + @unknown default: + return .unavailable(reason: "unknown") + } + } + + func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String { + // instructions = init 시점. systemPrompt nil 이면 생략(빈 문자열 금지 — S2-3c, LocalMLX 의 ?? '' 와 구분). + let session: LanguageModelSession + if let systemPrompt { + session = LanguageModelSession(model: .default, instructions: systemPrompt) + } else { + session = LanguageModelSession(model: .default) + } + session.prewarm() // 동기 반환(~1.3ms), 백그라운드 워밍(S2-3a) + // temperature 는 AICompletionRequest 에 없음(동결) → 미설정(모델 기본). LocalMLX 와 동일 정책. + let options = GenerationOptions(maximumResponseTokens: maxTokens) + do { + let response = try await session.respond(to: prompt, options: options) + return response.content // Response.content : String + } catch let g as LanguageModelSession.GenerationError { + throw Self.translate(g) + } + } + + /// SDK GenerationError(9 case) → 중립 OnDeviceGenerationError. exhaustive + @unknown default. + static func translate(_ g: LanguageModelSession.GenerationError) -> OnDeviceGenerationError { + switch g { + case .guardrailViolation: return .guardrailViolation + case .refusal: return .refusal + case .exceededContextWindowSize: return .exceededContextWindowSize + case .rateLimited: return .rateLimited + case .concurrentRequests: return .concurrentRequests + case .unsupportedLanguageOrLocale: return .unsupportedLanguageOrLocale + case .unsupportedGuide: return .unsupportedGuide + case .decodingFailure: return .decodingFailure + case .assetsUnavailable: return .assetsUnavailable + @unknown default: return .unknown("\(g)") + } + } +} +#endif diff --git a/Sources/AI/Providers/RemoteDSProvider.swift b/Sources/AI/Providers/RemoteDSProvider.swift index dcda7d5..1744e8d 100644 --- a/Sources/AI/Providers/RemoteDSProvider.swift +++ b/Sources/AI/Providers/RemoteDSProvider.swift @@ -1,22 +1,99 @@ -// RemoteDSProvider.swift — S2 구현 스켈레톤 (원격 DS 코퍼스 RAG). +// RemoteDSProvider.swift — S2 구현 (원격 DS 코퍼스 RAG). // -// 이 provider 가 S1 계약과 만나는 다리: -// complete(corpusAsk) → GET /search/ask?q=&backend= (CONTRACT.md §4, AskResponse) -// AskResponse.citations → [AICitation] 매핑 +// S1 계약과 만나는 다리 (CONTRACT.md §4 / AI-ROUTING.md §4): +// complete(corpusAsk) → DSAskClient.ask(query:backend:) → AskResponse → AICompletionResponse +// AskResponse.ai_answer → text +// AskResponse.citations[] → [AICitation] // AskResponse.synthesis_status → AIFinishReason -// AskResponse.backend_used → routingNote (어느 LLM 이 응답했는지) -// backend 인자: nil(=mac-mini-default) 또는 explicitProvider 매핑(localMLX→gemma-macmini 등). -// 인터페이스 동결 단계에서는 스텁(S3 의 DS API client 주입 후 S2 가 결선). - +// AskResponse.confidence → AIConfidence +// AskResponse.backend_used → routingNote (어느 LLM 이 응답했는지) +// +// HTTP 는 S3 의 구체 client(LiveDSClient)가 소유 — S2 는 DSAskClient 프로토콜 seam + 매핑만. +// 인터페이스 동결: AIProvider 프로토콜은 불변. RemoteDSProvider.init(client:) 은 S2 가 채우는 구현부. import Foundation +// MARK: - S2 가 소유하는 DS ask seam (구체 impl = S3) + +/// DS `GET /search/ask?q=&backend=` 호출 추상화. S3 의 LiveDSClient 가 conform, +/// S2 는 mock 으로 단위테스트(라이브 네트워크 0). HTTP 실패는 conformer 가 throw +/// (권장: `AIProviderError.backendError(.remoteDS, status:, reason:)`) — 침묵 폴백 금지. +public protocol DSAskClient: Sendable { + func ask(query: String, backend: String) async throws -> AskResponse +} + +// MARK: - DS /search/ask 응답 (부분 미러, 디코딩 전용) +// +// 명시 CodingKeys — convertFromSnakeCase 금지(S3 모델 규약과 일관). fixture: contract/fixtures/ask.json. + +public struct AskResponse: Decodable, Sendable { + public let aiAnswer: String + public let citations: [AskCitation] + public let synthesisStatus: String + public let synthesisMs: Double? + public let confidence: String? + public let backendUsed: String? + public let refused: Bool? + + enum CodingKeys: String, CodingKey { + case aiAnswer = "ai_answer" + case citations + case synthesisStatus = "synthesis_status" + case synthesisMs = "synthesis_ms" + case confidence + case backendUsed = "backend_used" + case refused + } + + public init(aiAnswer: String, citations: [AskCitation], synthesisStatus: String, + synthesisMs: Double? = nil, confidence: String? = nil, + backendUsed: String? = nil, refused: Bool? = nil) { + self.aiAnswer = aiAnswer + self.citations = citations + self.synthesisStatus = synthesisStatus + self.synthesisMs = synthesisMs + self.confidence = confidence + self.backendUsed = backendUsed + self.refused = refused + } +} + +public struct AskCitation: Decodable, Sendable { + public let n: Int + public let docId: Int + public let title: String? + public let sectionTitle: String? + public let spanText: String + + enum CodingKeys: String, CodingKey { + case n + case docId = "doc_id" + case title + case sectionTitle = "section_title" + case spanText = "span_text" + } + + public init(n: Int, docId: Int, title: String?, sectionTitle: String?, spanText: String) { + self.n = n + self.docId = docId + self.title = title + self.sectionTitle = sectionTitle + self.spanText = spanText + } +} + +// MARK: - Provider + public struct RemoteDSProvider: AIProvider { public let id: AIProviderID = .remoteDS + private let client: DSAskClient - public init() {} + public init(client: DSAskClient) { + self.client = client + } + /// 원격 코퍼스는 항상 후보(라우팅 시맨틱). 실제 도달 실패는 complete 에서 표면화. public var isAvailable: Bool { - get async { true } // 원격 코퍼스는 항상 후보 (실제 도달 실패는 complete 에서 backendError). + get async { true } } public func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse { @@ -24,14 +101,31 @@ public struct RemoteDSProvider: AIProvider { guard request.task == .corpusAsk else { throw AIProviderError.notImplemented(id) } - // S2: DS API client.ask(q:) 호출 → AskResponse 디코딩 → 아래 매핑. - // let r = try await dsClient.ask(q: request.prompt, backend: mappedBackend(request.explicitProvider)) - // return Self.map(r) - throw AIProviderError.notImplemented(id) + try Task.checkCancellation() + let backend = Self.dsBackend(for: request.explicitProvider) + // HTTP 실패(503 등)는 client 가 throw → 그대로 전파(자동 로컬 폴백 금지). + let response = try await client.ask(query: request.prompt, backend: backend) + return Self.map(response) + } + + // MARK: 매핑 (AI-ROUTING.md §4, 고정) + + static func map(_ r: AskResponse) -> AICompletionResponse { + let citations = r.citations.map { + AICitation(n: $0.n, docId: $0.docId, title: $0.title, + sectionTitle: $0.sectionTitle, spanText: $0.spanText) + } + return AICompletionResponse( + text: r.aiAnswer, + providerUsed: .remoteDS, + finishReason: finishReason(fromSynthesisStatus: r.synthesisStatus), + citations: citations, + confidence: r.confidence.flatMap(AIConfidence.init(rawValue:)), + latencyMs: r.synthesisMs, // latency 는 synthesis_ms 만 기록(하드 게이트 없음) + routingNote: r.backendUsed // 어느 LLM 이 응답했는지 + ) } - /// AskResponse(JSON) → AICompletionResponse 매핑 규칙(고정). S2 가 이 형태로 결선. - /// 시그니처만 동결 — 실제 호출은 S3 DS client 와 결합. static func finishReason(fromSynthesisStatus status: String) -> AIFinishReason { switch status { case "completed": return .completed @@ -41,4 +135,21 @@ public struct RemoteDSProvider: AIProvider { default: return .refused } } + + /// explicitProvider → DS 합성 backend (AI-ROUTING.md §4, 고정). + /// **dict 아닌 exhaustive switch** — 미래 AIProviderID 추가 시 컴파일러가 backend 결정을 강제 + /// (미매핑 provider → nil → 미정의 backend → 404 침묵실패를 컴파일 타임에 차단). + static func dsBackend(for explicit: AIProviderID?) -> String { + guard let explicit else { return "mac-mini-default" } // 미지정 → DS 기본 + switch explicit { + case .localMLX: return "gemma-macmini" + case .remoteDS: return "mac-mini-default" // 명시 remoteDS = DS 기본 합성 + case .onDevice: return "mac-mini-default" // onDevice 는 코퍼스 합성 불가 → DS 기본 + case .specialized: return "mac-mini-default" // specialized 코퍼스 backend 없음 → DS 기본 + } + // TODO(qwen-macbook): 현재 어떤 AIProviderID 도 'qwen-macbook'(M5 Max Qwen VLM) 로 매핑 안 됨. + // 해당 provider case 가 생기면 위 exhaustive switch 가 컴파일 실패 → backend 결정 강제(S2-1b 게이트 b). + // TODO(claude-cloud): cloud backend = 'claude-cloud' 는 DS 가 503(scaffold, S2-4b). 매핑하는 case 없음. + // 503 은 client 가 backendError(.remoteDS, status:503, …) 로 표면화 — 절대 로컬 침묵 폴백 X(과금 버킷 분리). + } } diff --git a/Sources/AI/Providers/SpecializedProvider.swift b/Sources/AI/Providers/SpecializedProvider.swift new file mode 100644 index 0000000..0d11025 --- /dev/null +++ b/Sources/AI/Providers/SpecializedProvider.swift @@ -0,0 +1,27 @@ +// SpecializedProvider.swift — S2 scaffold (GPU 특화 통로: rerank / embed / vision / OCR). +// +// 이번 PR = **scaffold-only**: isAvailable=false, complete→notImplemented(.specialized). HTTP client/API key/cost = 0. +// 목적: 라우터 .vision 체인 [.specialized, .onDevice] 에서 specialized 가 **dict 에 부재 → 침묵 continue** 하던 것을, +// 등록된 명시-불가 provider 로 바꿔 폴백을 **가시화**(라우터가 'specialized unavailable → onDevice' log). +// +// 실 vision 통합 = backend 확정 + fixture 캡처 후 **별 PR**(결정 #1): +// 후보 = MacBook M5 Max Qwen VLM http://100.118.112.84:8810 (OpenAI 호환, wake-on-call) — 패브릭 내 유일 생성형 VLM. +// GPU Ollama 는 embedding(bge-m3) 전용이라 생성형 vision 아님. +import Foundation + +public struct SpecializedProvider: AIProvider { + public let id: AIProviderID = .specialized + + public init() {} + + /// scaffold — 미구현. 라우터가 가시 폴백하도록 정직하게 false. + public var isAvailable: Bool { + get async { false } + } + + public func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse { + // 침묵 nil-skip 금지 — 명시 notImplemented 로 vision-chain 폴백을 가시화. + // TODO(vision): .specialized → M5 Max Qwen VLM(:8810, wake-on-call) 실 call-shape + fixture 후 별 PR. + throw AIProviderError.notImplemented(id) + } +} diff --git a/Tests/AITests/AIRouterSmokeTests.swift b/Tests/AITests/AIRouterSmokeTests.swift new file mode 100644 index 0000000..a65f6da --- /dev/null +++ b/Tests/AITests/AIRouterSmokeTests.swift @@ -0,0 +1,84 @@ +import XCTest +@testable import AIFabric + +/// S2-Fc — AI-ROUTING.md §3 스모크 매트릭스를 mock provider(라이브 net 0)로 검증. +/// 정성 게이트(올바른 라우팅 시맨틱 + 정확한 에러 enum + 폴백 가시성). latency 는 기록만. +final class AIRouterSmokeTests: XCTestCase { + + private func askFixture() throws -> AskResponse { + try Fixture.decode(AskResponse.self, from: "ask.json") + } + + /// 모든 태스크가 1순위 성공하는 정상 패브릭. + private func healthyRouter(log: @escaping @Sendable (String) -> Void = { _ in }) throws -> AIRouter { + AIRouter(providers: [ + .onDevice: EchoProvider(id: .onDevice), + .localMLX: EchoProvider(id: .localMLX), + .remoteDS: RemoteDSProvider(client: MockDSAskClient(response: try askFixture())), + .specialized: SpecializedProvider(), // 불가 scaffold + ], log: log) + } + + func testQuickSummarizeToOnDevice() async throws { + let resp = try await healthyRouter().route(AICompletionRequest(task: .quickSummarize, prompt: "p")) + XCTAssertEqual(resp.providerUsed, .onDevice) + XCTAssertNil(resp.routingNote) + } + + func testCorpusAskToRemoteDSWithCitations() async throws { + let resp = try await healthyRouter().route(AICompletionRequest(task: .corpusAsk, prompt: "p")) + XCTAssertEqual(resp.providerUsed, .remoteDS) + XCTAssertEqual(resp.citations.count, 1) + } + + func testClassifyToLocalMLX() async throws { + let resp = try await healthyRouter().route(AICompletionRequest(task: .classify, prompt: "p")) + XCTAssertEqual(resp.providerUsed, .localMLX) + XCTAssertNil(resp.routingNote) + } + + func testVisionSpecializedUnavailableFallsToOnDeviceVisibly() async throws { + let sink = LogSink() + let resp = try await healthyRouter(log: { sink.append($0) }) + .route(AICompletionRequest(task: .vision, prompt: "p")) + XCTAssertEqual(resp.providerUsed, .onDevice) + XCTAssertEqual(resp.routingNote, "fallback from specialized → onDevice") + XCTAssertTrue(sink.lines.contains { $0.contains("specialized") && $0.contains("unavailable") }) + } + + func testExplicitOnDeviceUnavailableErrorsNoFallback() async throws { + let router = AIRouter(providers: [ + .onDevice: EchoProvider(id: .onDevice, available: false), + .localMLX: EchoProvider(id: .localMLX, available: true), + ]) + do { + _ = try await router.route(AICompletionRequest(task: .quickSummarize, prompt: "p", explicitProvider: .onDevice)) + XCTFail("explicit onDevice 불가 → 에러(자동 fallback X)") + } catch let AIRoutingError.explicitProviderUnavailable(id) { + XCTAssertEqual(id, .onDevice) + } + } + + func testRuleFallbackOnDeviceDownToLocalMLXWithNote() async throws { + let router = AIRouter(providers: [ + .onDevice: EchoProvider(id: .onDevice, available: false), + .localMLX: EchoProvider(id: .localMLX, available: true), + ]) + let resp = try await router.route(AICompletionRequest(task: .quickSummarize, prompt: "p")) + XCTAssertEqual(resp.providerUsed, .localMLX) + XCTAssertEqual(resp.routingNote, "fallback from onDevice → localMLX") + } + + func testAllUnavailableYieldsNoProviderAvailable() async throws { + let router = AIRouter(providers: [ + .onDevice: EchoProvider(id: .onDevice, available: false), + .localMLX: EchoProvider(id: .localMLX, available: false), + ]) + do { + _ = try await router.route(AICompletionRequest(task: .quickSummarize, prompt: "p")) + XCTFail("전부 불가 → noProviderAvailable") + } catch let AIRoutingError.noProviderAvailable(task) { + XCTAssertEqual(task, .quickSummarize) + } + } +} diff --git a/Tests/AITests/CompositionTests.swift b/Tests/AITests/CompositionTests.swift new file mode 100644 index 0000000..cb2bf2b --- /dev/null +++ b/Tests/AITests/CompositionTests.swift @@ -0,0 +1,66 @@ +import XCTest +@testable import AIFabric + +/// S2-Fa(config 단일소스) + S2-Fb(컴포지션 루트) + S2-Fe(타임아웃/취소 전파). +final class CompositionTests: XCTestCase { + + // MARK: S2-Fa — 엔드포인트 단일소스 config + + func testConfigDefaults() { + let c = AIProviderConfiguration.resolved(environment: [:]) + XCTAssertEqual(c.localMLXBaseURL.absoluteString, "http://100.76.254.116:8890") + XCTAssertEqual(c.localMLXModel, "gemma-macmini") + XCTAssertEqual(c.dsBaseURL.absoluteString, "https://document.hyungi.net/api") + XCTAssertEqual(c.probeTimeout, 2) + } + + func testConfigEnvOverride() { + let c = AIProviderConfiguration.resolved(environment: [ + "AIFABRIC_LOCALMLX_URL": "http://127.0.0.1:9999", + "AIFABRIC_LOCALMLX_MODEL": "test-model", + "AIFABRIC_DS_URL": "http://100.110.63.63:8000/api", + ]) + XCTAssertEqual(c.localMLXBaseURL.absoluteString, "http://127.0.0.1:9999") + XCTAssertEqual(c.localMLXModel, "test-model") + XCTAssertEqual(c.dsBaseURL.absoluteString, "http://100.110.63.63:8000/api") + } + + // MARK: S2-Fb — 컴포지션 루트 (4 provider 전부 등록) + + func testMakeDefaultRouterRegistersAllFour() async throws { + let client = MockDSAskClient(response: try Fixture.decode(AskResponse.self, from: "ask.json")) + let router = makeDefaultRouter(client: client, session: MockURLProtocol.session(), log: { _ in }) + XCTAssertEqual(Set(router.providers.keys), Set(AIProviderID.allCases)) + // corpusAsk 는 와이어링된 RemoteDS 로 흘러 citations 매핑. + let resp = try await router.route(AICompletionRequest(task: .corpusAsk, prompt: "p")) + XCTAssertEqual(resp.providerUsed, .remoteDS) + XCTAssertEqual(resp.citations.count, 1) + } + + func testMakeDefaultRouterVisionFallbackVisible() async throws { + let client = MockDSAskClient(response: try Fixture.decode(AskResponse.self, from: "ask.json")) + let sink = LogSink() + let router = makeDefaultRouter(client: client, session: MockURLProtocol.session(), log: { sink.append($0) }) + // specialized scaffold(불가) → onDevice. (onDevice 가용 여부는 머신 의존 — 최소한 specialized 불가 log 는 떠야) + _ = try? await router.route(AICompletionRequest(task: .vision, prompt: "p")) + XCTAssertTrue(sink.lines.contains { $0.contains("specialized") && $0.contains("unavailable") }, + "specialized 불가가 침묵 아닌 log 로 가시화") + } + + // MARK: S2-Fe — 취소 전파 (URLSession 경로 아날로그) + + func testCancellationPropagatesThroughRouter() async throws { + let router = AIRouter(providers: [.localMLX: SleepingProvider(id: .localMLX)]) + let task = Task { + try await router.route(AICompletionRequest(task: .classify, prompt: "p")) + } + try? await Task.sleep(nanoseconds: 100_000_000) + task.cancel() + do { + _ = try await task.value + XCTFail("취소된 생성은 CancellationError 전파") + } catch is CancellationError { + // 기대: URLSession async/Task.sleep 둘 다 취소 자동 honor. OnDevice respond() 도 협조적(S2-3a). + } + } +} diff --git a/Tests/AITests/FixtureSupport.swift b/Tests/AITests/FixtureSupport.swift new file mode 100644 index 0000000..6bae563 --- /dev/null +++ b/Tests/AITests/FixtureSupport.swift @@ -0,0 +1,26 @@ +// FixtureSupport.swift — canonical fixture 로더 (contract/fixtures/ 를 #filePath 기준으로 직접 읽음). +// +// 픽스처는 repo 루트의 `contract/fixtures/` 에 단일 소유(S1 ask.json 등 + S2 가 추가하는 +// foundationmodels-respond / llm-router-chat). 테스트 타깃 안에 복제하면 드리프트가 생기므로 +// 복제 대신 #filePath 에서 repo 루트를 계산해 canonical 파일을 직접 로드한다. +import Foundation + +enum Fixture { + /// repo 루트(.../ds-app-s2) — 이 파일은 /Tests/AITests/FixtureSupport.swift. + static let repoRoot: URL = URL(fileURLWithPath: #filePath) + .deletingLastPathComponent() // Tests/AITests + .deletingLastPathComponent() // Tests + .deletingLastPathComponent() // + + static func url(_ name: String) -> URL { + repoRoot.appendingPathComponent("contract/fixtures").appendingPathComponent(name) + } + + static func data(_ name: String) throws -> Data { + try Data(contentsOf: url(name)) + } + + static func decode(_ type: T.Type, from name: String, using decoder: JSONDecoder = JSONDecoder()) throws -> T { + try decoder.decode(type, from: data(name)) + } +} diff --git a/Tests/AITests/HarnessSmokeTests.swift b/Tests/AITests/HarnessSmokeTests.swift new file mode 100644 index 0000000..d592970 --- /dev/null +++ b/Tests/AITests/HarnessSmokeTests.swift @@ -0,0 +1,18 @@ +import XCTest +@testable import AIFabric + +/// Phase 0 하니스 스모크 — Sources/AI 단독 빌드/테스트가 도는지, 동결 타입이 보이는지만 확인. +final class HarnessSmokeTests: XCTestCase { + + func testFrozenTypesVisible() { + // 동결 인터페이스 심볼이 AIFabric product 에서 보인다. + XCTAssertEqual(AIProviderID.allCases.count, 4) + XCTAssertEqual(AIRoutingPolicy.default.chain(for: .corpusAsk), [.remoteDS]) + } + + func testAskFixtureReadable() throws { + // canonical contract/fixtures/ask.json 이 로드된다 (FixtureSupport 경로 확인). + let data = try Fixture.data("ask.json") + XCTAssertGreaterThan(data.count, 0) + } +} diff --git a/Tests/AITests/LocalMLXProviderTests.swift b/Tests/AITests/LocalMLXProviderTests.swift new file mode 100644 index 0000000..0cf5a19 --- /dev/null +++ b/Tests/AITests/LocalMLXProviderTests.swift @@ -0,0 +1,149 @@ +import XCTest +@testable import AIFabric + +final class LocalMLXProviderTests: XCTestCase { + + private let baseURL = URL(string: "http://100.76.254.116:8890")! + + override func tearDown() { + MockURLProtocol.reset() + super.tearDown() + } + + private func provider() -> LocalMLXProvider { + LocalMLXProvider(baseURL: baseURL, model: "gemma-macmini", session: MockURLProtocol.session()) + } + + // MARK: isAvailable probe (wake 미트리거) + + func testProbeAvailable() async throws { + MockURLProtocol.handler = { req in + MockURLProtocol.ok(req.url!, json: Data(#"{"data":[{"id":"gemma-macmini"}]}"#.utf8)) + } + let available = await provider().isAvailable + XCTAssertTrue(available) + // probe 는 GET /v1/models + XCTAssertEqual(MockURLProtocol.recorder.lastURL?.path, "/v1/models") + XCTAssertEqual(MockURLProtocol.recorder.lastMethod, "GET") + } + + func testProbeUnavailableOnError() async throws { + MockURLProtocol.handler = { _ in throw URLError(.cannotConnectToHost) } + let available = await provider().isAvailable + XCTAssertFalse(available) // 연결오류 → false(throw 아님) + } + + func testProbeUnavailableOn500() async throws { + MockURLProtocol.handler = { req in MockURLProtocol.status(req.url!, 500) } + let available = await provider().isAvailable + XCTAssertFalse(available) + } + + // MARK: complete — 응답 매핑 + 요청 call-shape 락 + + func testCompleteMapsResponseFixture() async throws { + let body = try Fixture.data("llm-router-chat.response.json") + MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) } + + let resp = try await provider().complete( + AICompletionRequest(task: .quickSummarize, prompt: "충격시험 면제 기준을 한 문장으로 요약해줘.", + systemPrompt: "You are a concise technical assistant.", maxTokens: 512) + ) + XCTAssertEqual(resp.providerUsed, .localMLX) + XCTAssertEqual(resp.finishReason, .completed) + XCTAssertTrue(resp.citations.isEmpty) + XCTAssertNotNil(resp.latencyMs) + XCTAssertTrue(resp.text.contains("UCS-66")) + } + + func testCompleteRequestCallShape() async throws { + let body = try Fixture.data("llm-router-chat.response.json") + MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) } + + _ = try await provider().complete( + AICompletionRequest(task: .quickSummarize, prompt: "PROMPT_X", + systemPrompt: "SYS_Y", maxTokens: 512) + ) + // POST /v1/chat/completions + XCTAssertEqual(MockURLProtocol.recorder.lastURL?.path, "/v1/chat/completions") + XCTAssertEqual(MockURLProtocol.recorder.lastMethod, "POST") + + // messages system/user 분리 call-shape (load-bearing) + let sent = try XCTUnwrap(MockURLProtocol.recorder.lastBody) + let decoded = try JSONDecoder().decode(SentRequest.self, from: sent) + XCTAssertEqual(decoded.model, "gemma-macmini") + XCTAssertEqual(decoded.maxTokens, 512) + XCTAssertEqual(decoded.stream, false) + XCTAssertEqual(decoded.messages.count, 2) + XCTAssertEqual(decoded.messages[0].role, "system") + XCTAssertEqual(decoded.messages[0].content, "SYS_Y") + XCTAssertEqual(decoded.messages[1].role, "user") + XCTAssertEqual(decoded.messages[1].content, "PROMPT_X") + } + + func testNilSystemPromptSendsEmptySystemMessage() async throws { + let body = try Fixture.data("llm-router-chat.response.json") + MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) } + _ = try await provider().complete(AICompletionRequest(task: .quickSummarize, prompt: "P")) + let sent = try XCTUnwrap(MockURLProtocol.recorder.lastBody) + let decoded = try JSONDecoder().decode(SentRequest.self, from: sent) + XCTAssertEqual(decoded.messages[0].role, "system") + XCTAssertEqual(decoded.messages[0].content, "") // plan S2-2c: systemPrompt ?? "" + } + + func testNon200BackendError() async throws { + MockURLProtocol.handler = { req in MockURLProtocol.status(req.url!, 503, body: "model loading") } + do { + _ = try await provider().complete(AICompletionRequest(task: .quickSummarize, prompt: "P")) + XCTFail("non-200 must throw backendError, not silent empty text") + } catch let AIProviderError.backendError(id, status, reason) { + XCTAssertEqual(id, .localMLX) + XCTAssertEqual(status, 503) + XCTAssertEqual(reason, "model loading") + } + } + + func testRequestFixtureMatchesEncoder() throws { + // 동결 request fixture 의 call-shape 가 encodeRequest 산출과 일치(템플릿 placeholder 회귀 방지). + let fixtureData = try Fixture.data("llm-router-chat.request.json") + let fixture = try JSONDecoder().decode(SentRequest.self, from: fixtureData) + XCTAssertEqual(fixture.messages.count, 2) + XCTAssertEqual(fixture.messages[0].role, "system") + XCTAssertEqual(fixture.messages[1].role, "user") + XCTAssertEqual(fixture.stream, false) + } + + // MARK: rule-fallback 라우팅 (S2-2d) — onDevice 불가 → localMLX + + func testFallbackFromOnDeviceToLocalMLX() async throws { + let body = try Fixture.data("llm-router-chat.response.json") + MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) } + let router = AIRouter(providers: [ + .onDevice: MockAIProvider(id: .onDevice, available: false), // 불가 + .localMLX: provider(), + ]) + let resp = try await router.route(AICompletionRequest(task: .quickSummarize, prompt: "P")) + XCTAssertEqual(resp.providerUsed, .localMLX) + XCTAssertEqual(resp.routingNote, "fallback from onDevice → localMLX") + } + + func testNoFallbackNoteOnFirstChoiceSuccess() async throws { + let body = try Fixture.data("llm-router-chat.response.json") + MockURLProtocol.handler = { req in MockURLProtocol.ok(req.url!, json: body) } + // classify 체인 = [.localMLX, .remoteDS, .onDevice] → 1순위 localMLX 성공 → note nil + let router = AIRouter(providers: [.localMLX: provider()]) + let resp = try await router.route(AICompletionRequest(task: .classify, prompt: "P")) + XCTAssertEqual(resp.providerUsed, .localMLX) + XCTAssertNil(resp.routingNote) + } + + /// 나간 요청 디코딩용(테스트 전용 미러). + struct SentRequest: Decodable { + struct Message: Decodable { let role: String; let content: String } + let model: String + let messages: [Message] + let maxTokens: Int? + let stream: Bool + enum CodingKeys: String, CodingKey { case model, messages, stream; case maxTokens = "max_tokens" } + } +} diff --git a/Tests/AITests/MockURLProtocol.swift b/Tests/AITests/MockURLProtocol.swift new file mode 100644 index 0000000..7c3d08b --- /dev/null +++ b/Tests/AITests/MockURLProtocol.swift @@ -0,0 +1,86 @@ +import Foundation + +/// 테스트용 URLProtocol — URLSession 을 가로채 canned 응답/에러를 돌려주고, 나간 요청을 기록. +/// 라이브 네트워크 0 으로 LocalMLX 의 probe/complete call-shape 를 검증. +final class MockURLProtocol: URLProtocol { + /// (request) -> (response, body). throw 하면 URLSession 에러 경로. + nonisolated(unsafe) static var handler: (@Sendable (URLRequest) throws -> (HTTPURLResponse, Data))? + /// 마지막으로 가로챈 요청(body 포함) 기록. + nonisolated(unsafe) static var recorder = RequestRecorder() + + static func reset() { + handler = nil + recorder = RequestRecorder() + } + + override class func canInit(with request: URLRequest) -> Bool { true } + override class func canonicalRequest(for request: URLRequest) -> URLRequest { request } + + override func startLoading() { + Self.recorder.record(request) + guard let handler = Self.handler else { + client?.urlProtocol(self, didFailWithError: URLError(.unsupportedURL)) + return + } + do { + let (response, data) = try handler(request) + client?.urlProtocol(self, didReceive: response, cacheStoragePolicy: .notAllowed) + client?.urlProtocol(self, didLoad: data) + client?.urlProtocolDidFinishLoading(self) + } catch { + client?.urlProtocol(self, didFailWithError: error) + } + } + + override func stopLoading() {} + + // MARK: helpers + + static func session() -> URLSession { + let config = URLSessionConfiguration.ephemeral + config.protocolClasses = [MockURLProtocol.self] + return URLSession(configuration: config) + } + + static func ok(_ url: URL, json: Data) -> (HTTPURLResponse, Data) { + (HTTPURLResponse(url: url, statusCode: 200, httpVersion: nil, headerFields: nil)!, json) + } + + static func status(_ url: URL, _ code: Int, body: String = "") -> (HTTPURLResponse, Data) { + (HTTPURLResponse(url: url, statusCode: code, httpVersion: nil, headerFields: nil)!, Data(body.utf8)) + } +} + +/// 나간 요청 기록(body 는 httpBody 또는 httpBodyStream 에서 추출 — URLProtocol 은 보통 stream 으로 전달). +final class RequestRecorder: @unchecked Sendable { + private(set) var lastURL: URL? + private(set) var lastMethod: String? + private(set) var lastBody: Data? + private(set) var callCount = 0 + + func record(_ request: URLRequest) { + callCount += 1 + lastURL = request.url + lastMethod = request.httpMethod + lastBody = request.bodyData + } +} + +extension URLRequest { + /// URLProtocol 단계에서 body 추출 — httpBody 가 nil 이면 httpBodyStream 에서 읽음. + var bodyData: Data? { + if let httpBody { return httpBody } + guard let stream = httpBodyStream else { return nil } + stream.open() + defer { stream.close() } + var data = Data() + let bufSize = 8192 + var buffer = [UInt8](repeating: 0, count: bufSize) + while stream.hasBytesAvailable { + let read = stream.read(&buffer, maxLength: bufSize) + if read <= 0 { break } + data.append(buffer, count: read) + } + return data + } +} diff --git a/Tests/AITests/OnDeviceProviderTests.swift b/Tests/AITests/OnDeviceProviderTests.swift new file mode 100644 index 0000000..88676ce --- /dev/null +++ b/Tests/AITests/OnDeviceProviderTests.swift @@ -0,0 +1,201 @@ +import XCTest +@testable import AIFabric +#if canImport(FoundationModels) +import FoundationModels +#endif + +/// HW 비의존 mock backend — availability + generate 결과를 주입. +struct MockOnDeviceBackend: OnDeviceModelBackend { + let avail: OnDeviceAvailability + let outcome: Result + + init(avail: OnDeviceAvailability = .available, + outcome: Result = .success("on-device ok")) { + self.avail = avail + self.outcome = outcome + } + var availability: OnDeviceAvailability { avail } + func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String { + switch outcome { + case .success(let s): return s + case .failure(let e): throw e + } + } +} + +/// complete() 호출 횟수를 세는 provider — 무폴백 단언용. +actor CountingProvider: AIProvider { + nonisolated let id: AIProviderID + let available: Bool + private(set) var completeCalls = 0 + + init(id: AIProviderID, available: Bool) { + self.id = id + self.available = available + } + var isAvailable: Bool { get async { available } } + func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse { + completeCalls += 1 + return AICompletionResponse(text: "should-not-be-called", providerUsed: id) + } +} + +final class LogSink: @unchecked Sendable { + private let lock = NSLock() + private var storage: [String] = [] + func append(_ s: String) { lock.lock(); storage.append(s); lock.unlock() } + var lines: [String] { lock.lock(); defer { lock.unlock() }; return storage } +} + +final class OnDeviceProviderTests: XCTestCase { + + // MARK: 가용성 + happy path (주입 backend) + + func testAvailableReturnsText() async throws { + let p = OnDeviceProvider(backend: MockOnDeviceBackend(avail: .available, outcome: .success("요약 결과"))) + let available = await p.isAvailable + XCTAssertTrue(available) + let resp = try await p.complete(AICompletionRequest(task: .quickSummarize, prompt: "p")) + XCTAssertEqual(resp.providerUsed, .onDevice) + XCTAssertEqual(resp.finishReason, .completed) + XCTAssertEqual(resp.text, "요약 결과") + XCTAssertNotNil(resp.latencyMs) + } + + func testUnavailableReportsFalse() async throws { + let p = OnDeviceProvider(backend: MockOnDeviceBackend(avail: .unavailable(reason: "appleIntelligenceNotEnabled"))) + let available = await p.isAvailable + XCTAssertFalse(available) + } + + // MARK: GenerationError 매핑 (S2-3c) + + func testGuardrailAndRefusalReturnRefused() async throws { + for err in [OnDeviceGenerationError.guardrailViolation, .refusal] { + let p = OnDeviceProvider(backend: MockOnDeviceBackend(outcome: .failure(err))) + let resp = try await p.complete(AICompletionRequest(task: .quickSummarize, prompt: "p")) + XCTAssertEqual(resp.finishReason, .refused, "\(err) → .refused (답변의 일종, 폴백 X)") + XCTAssertEqual(resp.providerUsed, .onDevice) + } + } + + func testRateLimitedThrowsUnavailableAndLoudLogs() async throws { + let sink = LogSink() + let p = OnDeviceProvider(backend: MockOnDeviceBackend(outcome: .failure(.rateLimited)), + log: { sink.append($0) }) + do { + _ = try await p.complete(AICompletionRequest(task: .quickSummarize, prompt: "p")) + XCTFail("rateLimited → throw unavailable") + } catch let AIProviderError.unavailable(id) { + XCTAssertEqual(id, .onDevice) + } + XCTAssertTrue(sink.lines.contains { $0.contains("rateLimited") }, "stateless 위반은 loud log") + } + + func testConcurrentRequestsThrowsUnavailableAndLoudLogs() async throws { + let sink = LogSink() + let p = OnDeviceProvider(backend: MockOnDeviceBackend(outcome: .failure(.concurrentRequests)), + log: { sink.append($0) }) + do { + _ = try await p.complete(AICompletionRequest(task: .quickSummarize, prompt: "p")) + XCTFail("concurrentRequests → throw unavailable") + } catch let AIProviderError.unavailable(id) { + XCTAssertEqual(id, .onDevice) + } + XCTAssertTrue(sink.lines.contains { $0.contains("concurrentRequests") }) + } + + func testContextOverflowThrowsUnavailable() async throws { + let p = OnDeviceProvider(backend: MockOnDeviceBackend(outcome: .failure(.exceededContextWindowSize))) + do { + _ = try await p.complete(AICompletionRequest(task: .quickSummarize, prompt: "p")) + XCTFail("exceededContextWindowSize → throw unavailable (폴백 유도)") + } catch let AIProviderError.unavailable(id) { + XCTAssertEqual(id, .onDevice) + } + } + + // MARK: 라우팅 (S2-3d) + + func testRouterFallsBackOnDeviceOverflowToLocalMLX() async throws { + let router = AIRouter(providers: [ + .onDevice: OnDeviceProvider(backend: MockOnDeviceBackend(outcome: .failure(.exceededContextWindowSize))), + .localMLX: MockAIProvider(id: .localMLX, available: true), + ]) + let resp = try await router.route(AICompletionRequest(task: .quickSummarize, prompt: "p")) + XCTAssertEqual(resp.providerUsed, .localMLX) + XCTAssertEqual(resp.routingNote, "fallback from onDevice → localMLX") + } + + func testExplicitOnDeviceUnavailableNoFallback() async throws { + let counting = CountingProvider(id: .localMLX, available: true) + let router = AIRouter(providers: [ + .onDevice: OnDeviceProvider(backend: MockOnDeviceBackend(avail: .unavailable(reason: "deviceNotEligible"))), + .localMLX: counting, + ]) + do { + _ = try await router.route(AICompletionRequest(task: .quickSummarize, prompt: "p", explicitProvider: .onDevice)) + XCTFail("explicit onDevice unavailable → explicitProviderUnavailable, 자동 폴백 금지") + } catch let AIRoutingError.explicitProviderUnavailable(id) { + XCTAssertEqual(id, .onDevice) + } + let calls = await counting.completeCalls + XCTAssertEqual(calls, 0, "명시 불가 시 타 provider complete() 호출 0") + } + + // MARK: SDK GenerationError → 중립 매핑 lock (구성 가능한 케이스) + + #if canImport(FoundationModels) + func testTranslateGenerationErrorCases() { + let ctx = LanguageModelSession.GenerationError.Context(debugDescription: "test") + XCTAssertEqual(FoundationModelsBackend.translate(.exceededContextWindowSize(ctx)), .exceededContextWindowSize) + XCTAssertEqual(FoundationModelsBackend.translate(.guardrailViolation(ctx)), .guardrailViolation) + XCTAssertEqual(FoundationModelsBackend.translate(.rateLimited(ctx)), .rateLimited) + XCTAssertEqual(FoundationModelsBackend.translate(.concurrentRequests(ctx)), .concurrentRequests) + XCTAssertEqual(FoundationModelsBackend.translate(.unsupportedLanguageOrLocale(ctx)), .unsupportedLanguageOrLocale) + XCTAssertEqual(FoundationModelsBackend.translate(.assetsUnavailable(ctx)), .assetsUnavailable) + } + #endif + + // MARK: 라이브 통합 (M5 Max — 비-AI Mac 은 skip) + + func testLiveOnDeviceIntegration() async throws { + let p = OnDeviceProvider() // 라이브 FoundationModels backend + guard await p.isAvailable else { + throw XCTSkip("FoundationModels not available on this machine — live test skipped") + } + let resp = try await p.complete( + AICompletionRequest(task: .quickSummarize, + prompt: "엘보 내경 가공의 핵심 관리 포인트를 한 문장으로 요약해줘.", + maxTokens: 120) + ) + XCTAssertEqual(resp.providerUsed, .onDevice) + XCTAssertEqual(resp.finishReason, .completed) + XCTAssertFalse(resp.text.isEmpty, "라이브 응답은 비어있지 않아야") + } + + func testLiveCancellationCooperative() async throws { + let p = OnDeviceProvider() + guard await p.isAvailable else { + throw XCTSkip("FoundationModels not available — cancellation live test skipped") + } + let started = Date() + let task = Task { () -> AIFinishReason in + let r = try await p.complete( + AICompletionRequest(task: .quickSummarize, + prompt: "대한민국 압력용기 산업과 ASME 표준 채택 역사를 아주 길고 자세하게 여러 단락으로 서술해줘.", + maxTokens: 4000) + ) + return r.finishReason + } + try? await Task.sleep(nanoseconds: 500_000_000) + task.cancel() + do { + _ = try await task.value + // 협조적이지 않으면 완주 — 정보로만(테스트 실패시키지 않음, 환경 의존). + } catch is CancellationError { + let elapsed = Date().timeIntervalSince(started) + XCTAssertLessThan(elapsed, 8.0, "협조적 취소면 빠르게 중단(S2-3a: ~33ms 후)") + } + } +} diff --git a/Tests/AITests/RemoteDSProviderTests.swift b/Tests/AITests/RemoteDSProviderTests.swift new file mode 100644 index 0000000..992a51c --- /dev/null +++ b/Tests/AITests/RemoteDSProviderTests.swift @@ -0,0 +1,151 @@ +import XCTest +@testable import AIFabric + +/// 테스트용 DS client — ask.json 등 fixture 를 반환하거나 에러를 던지고, 받은 call-shape 를 기록. +actor MockDSAskClient: DSAskClient { + let response: AskResponse? + let error: Error? + private(set) var lastBackend: String? + private(set) var lastQuery: String? + private(set) var callCount = 0 + + init(response: AskResponse? = nil, error: Error? = nil) { + self.response = response + self.error = error + } + + func ask(query: String, backend: String) async throws -> AskResponse { + callCount += 1 + lastBackend = backend + lastQuery = query + if let error { throw error } + return response! + } +} + +final class RemoteDSProviderTests: XCTestCase { + + private func askFixture() throws -> AskResponse { + try Fixture.decode(AskResponse.self, from: "ask.json") + } + + // MARK: ask.json 디코딩 + 매핑 (call-shape 회귀) + + func testAskJsonDecodeAndMap() throws { + let r = try askFixture() + XCTAssertEqual(r.synthesisStatus, "completed") + XCTAssertEqual(r.confidence, "high") + XCTAssertEqual(r.backendUsed, "gemma-macmini") + XCTAssertEqual(r.citations.count, 1) + XCTAssertEqual(r.citations[0].docId, 4912) + XCTAssertEqual(r.citations[0].n, 1) + XCTAssertEqual(r.citations[0].sectionTitle, "2. UCS-66 면제 곡선") + + let mapped = RemoteDSProvider.map(r) + XCTAssertEqual(mapped.providerUsed, .remoteDS) + XCTAssertEqual(mapped.finishReason, .completed) + XCTAssertEqual(mapped.citations.count, 1) + XCTAssertEqual(mapped.citations[0].docId, 4912) + XCTAssertEqual(mapped.confidence, .high) + XCTAssertEqual(mapped.routingNote, "gemma-macmini") + XCTAssertEqual(mapped.latencyMs, 2841.5) + XCTAssertEqual(mapped.text, r.aiAnswer) + } + + func testCompleteMapsFixture() async throws { + let mock = MockDSAskClient(response: try askFixture()) + let provider = RemoteDSProvider(client: mock) + let resp = try await provider.complete( + AICompletionRequest(task: .corpusAsk, prompt: "충격시험은 언제 면제되나") + ) + XCTAssertEqual(resp.providerUsed, .remoteDS) + XCTAssertEqual(resp.citations.count, 1) + XCTAssertEqual(resp.finishReason, .completed) + XCTAssertEqual(resp.routingNote, "gemma-macmini") + } + + // MARK: backend exhaustive switch call-shape (쿼리 파라미터 락) + + func testBackendCallShape_nilExplicit() async throws { + let mock = MockDSAskClient(response: try askFixture()) + let provider = RemoteDSProvider(client: mock) + _ = try await provider.complete(AICompletionRequest(task: .corpusAsk, prompt: "q")) + let backend = await mock.lastBackend + XCTAssertEqual(backend, "mac-mini-default") // 미지정 → DS 기본 + } + + func testBackendCallShape_localMLXExplicit() async throws { + let mock = MockDSAskClient(response: try askFixture()) + let provider = RemoteDSProvider(client: mock) + _ = try await provider.complete( + AICompletionRequest(task: .corpusAsk, prompt: "q", explicitProvider: .localMLX) + ) + let backend = await mock.lastBackend + XCTAssertEqual(backend, "gemma-macmini") + } + + func testBackendMapPure() { + XCTAssertEqual(RemoteDSProvider.dsBackend(for: nil), "mac-mini-default") + XCTAssertEqual(RemoteDSProvider.dsBackend(for: .localMLX), "gemma-macmini") + XCTAssertEqual(RemoteDSProvider.dsBackend(for: .remoteDS), "mac-mini-default") + XCTAssertEqual(RemoteDSProvider.dsBackend(for: .onDevice), "mac-mini-default") + XCTAssertEqual(RemoteDSProvider.dsBackend(for: .specialized), "mac-mini-default") + } + + func testNonCorpusTaskNotImplemented() async throws { + let mock = MockDSAskClient(response: try askFixture()) + let provider = RemoteDSProvider(client: mock) + do { + _ = try await provider.complete(AICompletionRequest(task: .quickSummarize, prompt: "q")) + XCTFail("non-corpus task should not be served by RemoteDS") + } catch let AIProviderError.notImplemented(id) { + XCTAssertEqual(id, .remoteDS) + } + } + + // MARK: 라우팅 — corpusAsk 무폴백 (오프라인에서도 정답) + + func testCorpusAskRoutesToRemoteDSOnly() async throws { + let router = AIRouter(providers: [ + .remoteDS: RemoteDSProvider(client: MockDSAskClient(response: try askFixture())), + .onDevice: MockAIProvider(id: .onDevice, available: true), // available 이어도 corpusAsk 폴백 금지 + ]) + let resp = try await router.route(AICompletionRequest(task: .corpusAsk, prompt: "q")) + XCTAssertEqual(resp.providerUsed, .remoteDS) + XCTAssertEqual(resp.citations.count, 1) + } + + func testCorpusAskRemoteDSDown_NoLocalFallback() async throws { + // remoteDS 불가 → 엉뚱한 온디바이스 코퍼스 답변 대신 명확한 실패(무폴백). + struct Net: Error {} + let router = AIRouter(providers: [ + .remoteDS: RemoteDSProvider(client: MockDSAskClient(error: Net())), + .onDevice: MockAIProvider(id: .onDevice, available: true), + ]) + do { + _ = try await router.route(AICompletionRequest(task: .corpusAsk, prompt: "q")) + XCTFail("corpusAsk must not fall back to onDevice") + } catch is Net { + // 기대: remoteDS 의 실패가 전파(코퍼스 체인 = [.remoteDS] only) + } + } + + // MARK: S2-4b — cloud 'claude-cloud' = 명시 503 (로컬 대체 응답 금지) + + func testCloud503Surfaces_NoSilentFallback() async throws { + let err = AIProviderError.backendError(.remoteDS, status: 503, reason: "cloud backend pending activation") + let router = AIRouter(providers: [ + .remoteDS: RemoteDSProvider(client: MockDSAskClient(error: err)), + .onDevice: MockAIProvider(id: .onDevice, available: true), + ]) + do { + _ = try await router.route( + AICompletionRequest(task: .corpusAsk, prompt: "q", explicitProvider: .remoteDS) + ) + XCTFail("503 must surface, not fall back") + } catch let AIProviderError.backendError(id, status, _) { + XCTAssertEqual(id, .remoteDS) + XCTAssertEqual(status, 503) + } + } +} diff --git a/Tests/AITests/SpecializedProviderTests.swift b/Tests/AITests/SpecializedProviderTests.swift new file mode 100644 index 0000000..7809f53 --- /dev/null +++ b/Tests/AITests/SpecializedProviderTests.swift @@ -0,0 +1,39 @@ +import XCTest +@testable import AIFabric + +final class SpecializedProviderTests: XCTestCase { + + func testScaffoldUnavailableAndNotImplemented() async throws { + let p = SpecializedProvider() + let available = await p.isAvailable + XCTAssertFalse(available) + do { + _ = try await p.complete(AICompletionRequest(task: .vision, prompt: "p")) + XCTFail("scaffold must throw notImplemented") + } catch let AIProviderError.notImplemented(id) { + XCTAssertEqual(id, .specialized) + } + } + + /// .vision 체인 [.specialized, .onDevice] — specialized 등록·불가 → onDevice 로 **가시** 폴백(라우터 log). + /// (onDevice 는 providerUsed=id 를 정직히 반환하는 CountingProvider 사용 — MockAIProvider 는 vision 케이스에서 + /// providerUsed 를 .specialized 로 하드코딩하므로 부적합.) + func testVisionFallsBackToOnDeviceVisibly() async throws { + let sink = LogSink() + let onDevice = CountingProvider(id: .onDevice, available: true) + let router = AIRouter( + providers: [ + .specialized: SpecializedProvider(), + .onDevice: onDevice, + ], + log: { sink.append($0) } + ) + let resp = try await router.route(AICompletionRequest(task: .vision, prompt: "도면 보기")) + XCTAssertEqual(resp.providerUsed, .onDevice) + XCTAssertEqual(resp.routingNote, "fallback from specialized → onDevice") + let calls = await onDevice.completeCalls + XCTAssertEqual(calls, 1) + XCTAssertTrue(sink.lines.contains { $0.contains("specialized") && $0.contains("unavailable") }, + "specialized 불가가 침묵 아닌 log 로 가시화") + } +} diff --git a/Tests/AITests/TestProviders.swift b/Tests/AITests/TestProviders.swift new file mode 100644 index 0000000..ae512bc --- /dev/null +++ b/Tests/AITests/TestProviders.swift @@ -0,0 +1,28 @@ +import Foundation +@testable import AIFabric + +/// providerUsed=id 를 정직히 반환하는 테스트 provider(MockAIProvider 는 일부 태스크에서 providerUsed 를 하드코딩). +struct EchoProvider: AIProvider { + let id: AIProviderID + let available: Bool + init(id: AIProviderID, available: Bool = true) { + self.id = id + self.available = available + } + var isAvailable: Bool { get async { available } } + func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse { + AICompletionResponse(text: "echo:\(id.rawValue)", providerUsed: id) + } +} + +/// 취소 전파 테스트용 — sleep 중 Task 취소 시 CancellationError(S2-Fe URLSession 경로 아날로그). +struct SleepingProvider: AIProvider { + let id: AIProviderID + init(id: AIProviderID = .localMLX) { self.id = id } + var isAvailable: Bool { get async { true } } + func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse { + try await Task.sleep(nanoseconds: 5_000_000_000) // 취소되면 CancellationError throw + try Task.checkCancellation() + return AICompletionResponse(text: "done", providerUsed: id) + } +} diff --git a/contract/fixtures/foundationmodels-respond.json b/contract/fixtures/foundationmodels-respond.json new file mode 100644 index 0000000..294cd2d --- /dev/null +++ b/contract/fixtures/foundationmodels-respond.json @@ -0,0 +1,96 @@ +{ + "_meta": { + "fixture": "foundationmodels-respond", + "purpose": "S2-3a — Apple FoundationModels live capture (OnDeviceProvider 결선 + 테스트 동결 기준)", + "captured_on": "M5 Max MacBook Pro (128GB, Apple Intelligence)", + "captured_date": "2026-06-04", + "sdk": "macOS 26.5 SDK / FoundationModels.framework", + "note": "SDK 가 marshaling 하므로 raw request_body 는 없음. 이 파일은 응답 모양 + 에러 타입 + 취소 동작의 동결 기준." + }, + + "availability": { + "observed": "available", + "is_available_convenience": true, + "read_is_synchronous": true, + "api": "SystemLanguageModel.default.availability", + "enum": { + "available": "case available", + "unavailable_reasons": ["deviceNotEligible", "appleIntelligenceNotEnabled", "modelNotReady"] + }, + "supports_korean": true, + "supported_language_count": 23 + }, + + "happy_path": { + "api": "session.respond(to: String, options: GenerationOptions) async throws -> Response", + "content_accessor": "response.content (Response.content : String)", + "observed_content": "압력용기의 충격시험(Charpy) 면제 판정은, 용기의 압력 등급이 10MPa 이하인 경우, 충격 시험을 면제할 수 있으며, 이는 용기의 안전성을 보장하기 위한 중요한 기준입니다.", + "observed_latency_ms": 1291.3, + "transcript_entries_count": 1, + "is_responding_after": false, + "quality_note": "내용은 부정확(온디바이스 ~3B/2-bit QAT). corpusAsk 부적합·quickSummarize/classify 적합 라우팅 정합 — 사실성은 RemoteDS 코퍼스가 담당." + }, + + "session_init": { + "api": "LanguageModelSession(model: .default, tools: [], instructions: String?)", + "instructions_timing": "init (per-call 아님)", + "instructions_nil_handling": "systemPrompt == nil 이면 instructions 인자 생략 (빈 문자열 금지)", + "prewarm": "session.prewarm() — 동기 반환(관찰 ~1.3ms), 백그라운드 워밍", + "stateless_per_request": "호출마다 새 세션 생성 → instructions(init-time) + rateLimited/concurrentRequests(세션 상태) 둘 다 우회" + }, + + "generation_options": { + "api": "GenerationOptions(sampling: SamplingMode? = nil, temperature: Double? = nil, maximumResponseTokens: Int? = nil)", + "mapping": "AICompletionRequest.maxTokens -> maximumResponseTokens", + "temperature_note": "AICompletionRequest 에 temperature 필드 없음(동결) → 미설정(모델 기본). LocalMLX 와 동일 정책(둘 다 미설정)." + }, + + "generation_error": { + "_source": "Xcode jump-to-def / swiftinterface (LanguageModelSession.GenerationError) — authoritative, version-accurate", + "type": "LanguageModelSession.GenerationError : Error, LocalizedError", + "associated_value": "각 case 는 GenerationError.Context (refusal 은 (Refusal, Context))", + "cases": [ + "exceededContextWindowSize(Context)", + "assetsUnavailable(Context)", + "guardrailViolation(Context)", + "unsupportedGuide(Context)", + "unsupportedLanguageOrLocale(Context)", + "decodingFailure(Context)", + "rateLimited(Context)", + "concurrentRequests(Context)", + "refusal(Refusal, Context)" + ], + "plan_corrections": [ + "plan 가정 'refusal 케이스명 없음' = 틀림 → refusal 은 별도 case 로 존재(guardrailViolation 과 구분).", + "plan 에 없던 concurrentRequests case 존재 — rateLimited 와 함께 stateless 세션에서 뜨면 세션 공유 버그 신호.", + "assetsUnavailable 정확명 확정(모델 자산 미가용)." + ], + "reproduced_live": { + "exceededContextWindowSize": { + "trigger": "의도적 컨텍스트 오버플로(긴 프롬프트)", + "errorDescription": "Exceeded model context window size" + } + }, + "finish_reason_mapping": { + "guardrailViolation": ".refused", + "refusal": ".refused", + "exceededContextWindowSize": ".unavailable", + "rateLimited": ".unavailable + loud log (stateless 인데 발생 = 세션 재사용 버그 신호)", + "concurrentRequests": ".unavailable + loud log (동일 — stateless 위반 신호)", + "unsupportedLanguageOrLocale": ".unavailable (+ supportedLocale 사전체크로 회피)", + "unsupportedGuide": ".unavailable", + "decodingFailure": ".unavailable", + "assetsUnavailable": ".unavailable", + "@unknown default": ".unavailable + loud log" + } + }, + + "cancellation": { + "_finding": "S2-Fe 전제 확정 — COOPERATIVE", + "cancel_requested_at_ms": 500, + "threw": "CancellationError", + "elapsed_ms": 533.6, + "interpretation": "respond() 는 mid-flight Task 취소를 협조적으로 honor(요청 33ms 후 CancellationError throw).", + "implication": "OnDevice complete() 에 surrounding Task.checkCancellation() 은 belt-and-suspenders(실제 중단은 respond() 내부). streamResponse 토큰단위 취소 폴백 불필요(선전환 금지)." + } +} diff --git a/contract/fixtures/llm-router-chat.request.json b/contract/fixtures/llm-router-chat.request.json new file mode 100644 index 0000000..1fb6fbe --- /dev/null +++ b/contract/fixtures/llm-router-chat.request.json @@ -0,0 +1,19 @@ +{ + "_meta": { + "fixture": "llm-router-chat.request", + "status": "PROVISIONAL_SYNTHETIC", + "synthetic": true, + "captured_date": null, + "reason": "맥미니(hyungi-macmini 100.76.254.116) Tailscale offline(last seen 8h+, 2026-06-04) → 라이브 캡처 불가. OpenAI /v1/chat/completions 표준 스펙 기반 합성. 맥미니 복귀 시 S2-2a 라이브 재캡처로 교체(별 fixture-update PR, S2-Ff drift-check 경유).", + "endpoint": "POST http://100.76.254.116:8890/v1/chat/completions", + "call_shape_note": "messages = [system, user] 분리 고정(load-bearing). system.content = AICompletionRequest.systemPrompt ?? \"\" (plan S2-2c). max_tokens = AICompletionRequest.maxTokens.", + "model_note": "model 문자열은 llm-router 가 기대하는 별칭 — 라이브 캡처로 확정 필요(provisional: 'gemma-macmini')." + }, + "model": "gemma-macmini", + "messages": [ + { "role": "system", "content": "You are a concise technical assistant." }, + { "role": "user", "content": "충격시험 면제 기준을 한 문장으로 요약해줘." } + ], + "max_tokens": 512, + "stream": false +} diff --git a/contract/fixtures/llm-router-chat.response.json b/contract/fixtures/llm-router-chat.response.json new file mode 100644 index 0000000..5a948bb --- /dev/null +++ b/contract/fixtures/llm-router-chat.response.json @@ -0,0 +1,25 @@ +{ + "_meta": { + "fixture": "llm-router-chat.response", + "status": "PROVISIONAL_SYNTHETIC", + "synthetic": true, + "captured_date": null, + "reason": "맥미니 offline → 라이브 캡처 불가. OpenAI chat.completion 표준 응답 모양 기반 합성. 라이브 재캡처로 교체(S2-Ff).", + "shape": "OpenAI chat.completion (choices[0].message.content → text, finish_reason → AIFinishReason)" + }, + "id": "chatcmpl-provisional-0001", + "object": "chat.completion", + "created": 0, + "model": "gemma-macmini", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "충격시험 면제는 재료군(Curve A~D)과 거버닝 두께에 따른 UCS-66 면제 곡선으로 MDMT에서 판정합니다." + }, + "finish_reason": "stop" + } + ], + "usage": { "prompt_tokens": 24, "completion_tokens": 41, "total_tokens": 65 } +}