f512d94c74
git-subtree-dir: clients/ds-app git-subtree-mainline:a24e3e6f22git-subtree-split:5206cf3b0c
197 lines
8.8 KiB
Swift
197 lines
8.8 KiB
Swift
// OnDeviceProvider.swift — S2 구현 (맥북·아이폰 온디바이스, Apple FoundationModels).
|
|
//
|
|
// 라이브 경로(결정 #3): SystemLanguageModel.default.availability(동기) + LanguageModelSession.respond().
|
|
// 모델 ~3B/2-bit QAT → quickSummarize/classify 적합, corpusAsk 무리(라우팅이 corpusAsk 를 RemoteDS 로만 보냄).
|
|
//
|
|
// 테스트 가능성: 모델 backend 를 seam(OnDeviceModelBackend)으로 주입 → CI/비-AI Mac 은 mock 으로
|
|
// 에러매핑/라우팅 단위테스트, M5 Max 는 라이브 통합테스트(아래 default = FoundationModels).
|
|
//
|
|
// 실측 동결(S2-3a, M5 Max 2026-06-04): availability=available · respond()→Response<String>.content ·
|
|
// GenerationError 9 case · 컨텍스트 오버플로→exceededContextWindowSize · 취소=COOPERATIVE(respond() 가
|
|
// mid-flight Task.cancel 협조 → checkCancellation 은 belt-and-suspenders, streamResponse 불필요).
|
|
import Foundation
|
|
import os
|
|
|
|
// MARK: - 중립 타입 (FoundationModels 비의존 — 매핑 로직을 HW 없이 단위테스트)
|
|
|
|
public enum OnDeviceAvailability: Sendable, Equatable {
|
|
case available
|
|
case unavailable(reason: String)
|
|
}
|
|
|
|
/// FoundationModels.LanguageModelSession.GenerationError 의 중립 미러(9 case + unknown).
|
|
/// 라이브 backend 가 SDK 에러를 이걸로 번역 → provider 가 finishReason/throw 로 매핑(순수·테스트 가능).
|
|
public enum OnDeviceGenerationError: Error, Sendable, Equatable {
|
|
case guardrailViolation
|
|
case refusal
|
|
case exceededContextWindowSize
|
|
case rateLimited
|
|
case concurrentRequests
|
|
case unsupportedLanguageOrLocale
|
|
case unsupportedGuide
|
|
case decodingFailure
|
|
case assetsUnavailable
|
|
case unknown(String)
|
|
}
|
|
|
|
/// 온디바이스 모델 backend seam. 라이브 = FoundationModelsBackend, 테스트 = mock.
|
|
protocol OnDeviceModelBackend: Sendable {
|
|
var availability: OnDeviceAvailability { get }
|
|
/// 실패는 OnDeviceGenerationError 또는 CancellationError 로 throw.
|
|
func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String
|
|
}
|
|
|
|
// MARK: - Provider
|
|
|
|
public struct OnDeviceProvider: AIProvider {
|
|
public let id: AIProviderID = .onDevice
|
|
private let backend: OnDeviceModelBackend
|
|
private let log: @Sendable (String) -> Void
|
|
|
|
public init() {
|
|
self.backend = Self.makeLiveBackend()
|
|
let logger = Logger(subsystem: "ds-app.AIFabric", category: "OnDeviceProvider")
|
|
self.log = { msg in logger.warning("\(msg, privacy: .public)") }
|
|
}
|
|
|
|
/// 테스트 seam — backend/log 주입(HW 비의존 단위테스트).
|
|
init(backend: OnDeviceModelBackend, log: @escaping @Sendable (String) -> Void = { _ in }) {
|
|
self.backend = backend
|
|
self.log = log
|
|
}
|
|
|
|
public var isAvailable: Bool {
|
|
get async { backend.availability == .available }
|
|
}
|
|
|
|
public func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse {
|
|
// belt-and-suspenders: 실제 mid-flight 취소는 respond() 내부가 협조(S2-3a 실측).
|
|
try Task.checkCancellation()
|
|
let started = Date()
|
|
do {
|
|
let text = try await backend.generate(
|
|
prompt: request.prompt,
|
|
systemPrompt: request.systemPrompt,
|
|
maxTokens: request.maxTokens
|
|
)
|
|
return AICompletionResponse(
|
|
text: text,
|
|
providerUsed: .onDevice,
|
|
finishReason: .completed,
|
|
latencyMs: Date().timeIntervalSince(started) * 1000
|
|
)
|
|
} catch let e as OnDeviceGenerationError {
|
|
return try mapError(e)
|
|
}
|
|
// CancellationError 등은 자연 전파.
|
|
}
|
|
|
|
/// GenerationError 매핑(S2-3c). 거부 = 답변의 일종 → 반환(폴백 X). 그 외 = provider 불가 → throw(라우터가 가시 폴백).
|
|
private func mapError(_ e: OnDeviceGenerationError) throws -> AICompletionResponse {
|
|
switch e {
|
|
case .guardrailViolation, .refusal:
|
|
return AICompletionResponse(
|
|
text: "",
|
|
providerUsed: .onDevice,
|
|
finishReason: .refused,
|
|
routingNote: "on-device refused (guardrail/refusal)"
|
|
)
|
|
case .rateLimited:
|
|
// stateless-per-request 인데 발생 = 세션 재사용 버그 신호(가림 금지 → loud + 불가).
|
|
log("UNEXPECTED onDevice rateLimited on stateless session — 세션 재사용 버그 의심")
|
|
throw AIProviderError.unavailable(id)
|
|
case .concurrentRequests:
|
|
log("UNEXPECTED onDevice concurrentRequests on stateless session — 세션 재사용 버그 의심")
|
|
throw AIProviderError.unavailable(id)
|
|
case .exceededContextWindowSize:
|
|
log("onDevice context window(4096) exceeded — 라우터가 localMLX 로 폴백")
|
|
throw AIProviderError.unavailable(id)
|
|
case .unsupportedLanguageOrLocale:
|
|
log("onDevice unsupported language/locale — 폴백")
|
|
throw AIProviderError.unavailable(id)
|
|
case .unsupportedGuide, .decodingFailure, .assetsUnavailable:
|
|
throw AIProviderError.unavailable(id)
|
|
case .unknown(let detail):
|
|
log("onDevice unknown generation error: \(detail)")
|
|
throw AIProviderError.unavailable(id)
|
|
}
|
|
}
|
|
|
|
static func makeLiveBackend() -> OnDeviceModelBackend {
|
|
#if canImport(FoundationModels)
|
|
return FoundationModelsBackend()
|
|
#else
|
|
return UnavailableBackend(reason: "FoundationModels not importable on this platform")
|
|
#endif
|
|
}
|
|
}
|
|
|
|
/// FoundationModels 미가용 플랫폼/SDK 폴백.
|
|
struct UnavailableBackend: OnDeviceModelBackend {
|
|
let reason: String
|
|
var availability: OnDeviceAvailability { .unavailable(reason: reason) }
|
|
func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String {
|
|
throw OnDeviceGenerationError.unknown("backend unavailable: \(reason)")
|
|
}
|
|
}
|
|
|
|
// MARK: - 라이브 FoundationModels backend (M5 Max / Apple Intelligence)
|
|
|
|
#if canImport(FoundationModels)
|
|
import FoundationModels
|
|
|
|
struct FoundationModelsBackend: OnDeviceModelBackend {
|
|
|
|
var availability: OnDeviceAvailability {
|
|
switch SystemLanguageModel.default.availability {
|
|
case .available:
|
|
return .available
|
|
case .unavailable(let reason):
|
|
switch reason {
|
|
case .deviceNotEligible: return .unavailable(reason: "deviceNotEligible")
|
|
case .appleIntelligenceNotEnabled: return .unavailable(reason: "appleIntelligenceNotEnabled")
|
|
case .modelNotReady: return .unavailable(reason: "modelNotReady")
|
|
@unknown default: return .unavailable(reason: "unknownReason")
|
|
}
|
|
@unknown default:
|
|
return .unavailable(reason: "unknown")
|
|
}
|
|
}
|
|
|
|
func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String {
|
|
// instructions = init 시점. systemPrompt nil 이면 생략(빈 문자열 금지 — S2-3c, LocalMLX 의 ?? '' 와 구분).
|
|
let session: LanguageModelSession
|
|
if let systemPrompt {
|
|
session = LanguageModelSession(model: .default, instructions: systemPrompt)
|
|
} else {
|
|
session = LanguageModelSession(model: .default)
|
|
}
|
|
session.prewarm() // 동기 반환(~1.3ms), 백그라운드 워밍(S2-3a)
|
|
// temperature 는 AICompletionRequest 에 없음(동결) → 미설정(모델 기본). LocalMLX 와 동일 정책.
|
|
let options = GenerationOptions(maximumResponseTokens: maxTokens)
|
|
do {
|
|
let response = try await session.respond(to: prompt, options: options)
|
|
return response.content // Response<String>.content : String
|
|
} catch let g as LanguageModelSession.GenerationError {
|
|
throw Self.translate(g)
|
|
}
|
|
}
|
|
|
|
/// SDK GenerationError(9 case) → 중립 OnDeviceGenerationError. exhaustive + @unknown default.
|
|
static func translate(_ g: LanguageModelSession.GenerationError) -> OnDeviceGenerationError {
|
|
switch g {
|
|
case .guardrailViolation: return .guardrailViolation
|
|
case .refusal: return .refusal
|
|
case .exceededContextWindowSize: return .exceededContextWindowSize
|
|
case .rateLimited: return .rateLimited
|
|
case .concurrentRequests: return .concurrentRequests
|
|
case .unsupportedLanguageOrLocale: return .unsupportedLanguageOrLocale
|
|
case .unsupportedGuide: return .unsupportedGuide
|
|
case .decodingFailure: return .decodingFailure
|
|
case .assetsUnavailable: return .assetsUnavailable
|
|
@unknown default: return .unknown("\(g)")
|
|
}
|
|
}
|
|
}
|
|
#endif
|