Files
hyungi_document_server/clients/ds-app/Sources/AI/Providers/OnDeviceProvider.swift
T

197 lines
8.8 KiB
Swift

// OnDeviceProvider.swift S2 (· , Apple FoundationModels).
//
// ( #3): SystemLanguageModel.default.availability() + LanguageModelSession.respond().
// ~3B/2-bit QAT quickSummarize/classify , corpusAsk ( corpusAsk RemoteDS ).
//
// : backend seam(OnDeviceModelBackend) CI/-AI Mac mock
// / , M5 Max ( default = FoundationModels).
//
// (S2-3a, M5 Max 2026-06-04): availability=available · respond()Response<String>.content ·
// GenerationError 9 case · exceededContextWindowSize · =COOPERATIVE(respond()
// mid-flight Task.cancel checkCancellation belt-and-suspenders, streamResponse ).
import Foundation
import os
// MARK: - (FoundationModels HW )
public enum OnDeviceAvailability: Sendable, Equatable {
case available
case unavailable(reason: String)
}
/// FoundationModels.LanguageModelSession.GenerationError (9 case + unknown).
/// backend SDK provider finishReason/throw (· ).
public enum OnDeviceGenerationError: Error, Sendable, Equatable {
case guardrailViolation
case refusal
case exceededContextWindowSize
case rateLimited
case concurrentRequests
case unsupportedLanguageOrLocale
case unsupportedGuide
case decodingFailure
case assetsUnavailable
case unknown(String)
}
/// backend seam. = FoundationModelsBackend, = mock.
protocol OnDeviceModelBackend: Sendable {
var availability: OnDeviceAvailability { get }
/// OnDeviceGenerationError CancellationError throw.
func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String
}
// MARK: - Provider
public struct OnDeviceProvider: AIProvider {
public let id: AIProviderID = .onDevice
private let backend: OnDeviceModelBackend
private let log: @Sendable (String) -> Void
public init() {
self.backend = Self.makeLiveBackend()
let logger = Logger(subsystem: "ds-app.AIFabric", category: "OnDeviceProvider")
self.log = { msg in logger.warning("\(msg, privacy: .public)") }
}
/// seam backend/log (HW ).
init(backend: OnDeviceModelBackend, log: @escaping @Sendable (String) -> Void = { _ in }) {
self.backend = backend
self.log = log
}
public var isAvailable: Bool {
get async { backend.availability == .available }
}
public func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse {
// belt-and-suspenders: mid-flight respond() (S2-3a ).
try Task.checkCancellation()
let started = Date()
do {
let text = try await backend.generate(
prompt: request.prompt,
systemPrompt: request.systemPrompt,
maxTokens: request.maxTokens
)
return AICompletionResponse(
text: text,
providerUsed: .onDevice,
finishReason: .completed,
latencyMs: Date().timeIntervalSince(started) * 1000
)
} catch let e as OnDeviceGenerationError {
return try mapError(e)
}
// CancellationError .
}
/// GenerationError (S2-3c). = ( X). = provider throw( ).
private func mapError(_ e: OnDeviceGenerationError) throws -> AICompletionResponse {
switch e {
case .guardrailViolation, .refusal:
return AICompletionResponse(
text: "",
providerUsed: .onDevice,
finishReason: .refused,
routingNote: "on-device refused (guardrail/refusal)"
)
case .rateLimited:
// stateless-per-request = ( loud + ).
log("UNEXPECTED onDevice rateLimited on stateless session — 세션 재사용 버그 의심")
throw AIProviderError.unavailable(id)
case .concurrentRequests:
log("UNEXPECTED onDevice concurrentRequests on stateless session — 세션 재사용 버그 의심")
throw AIProviderError.unavailable(id)
case .exceededContextWindowSize:
log("onDevice context window(4096) exceeded — 라우터가 localMLX 로 폴백")
throw AIProviderError.unavailable(id)
case .unsupportedLanguageOrLocale:
log("onDevice unsupported language/locale — 폴백")
throw AIProviderError.unavailable(id)
case .unsupportedGuide, .decodingFailure, .assetsUnavailable:
throw AIProviderError.unavailable(id)
case .unknown(let detail):
log("onDevice unknown generation error: \(detail)")
throw AIProviderError.unavailable(id)
}
}
static func makeLiveBackend() -> OnDeviceModelBackend {
#if canImport(FoundationModels)
return FoundationModelsBackend()
#else
return UnavailableBackend(reason: "FoundationModels not importable on this platform")
#endif
}
}
/// FoundationModels /SDK .
struct UnavailableBackend: OnDeviceModelBackend {
let reason: String
var availability: OnDeviceAvailability { .unavailable(reason: reason) }
func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String {
throw OnDeviceGenerationError.unknown("backend unavailable: \(reason)")
}
}
// MARK: - FoundationModels backend (M5 Max / Apple Intelligence)
#if canImport(FoundationModels)
import FoundationModels
struct FoundationModelsBackend: OnDeviceModelBackend {
var availability: OnDeviceAvailability {
switch SystemLanguageModel.default.availability {
case .available:
return .available
case .unavailable(let reason):
switch reason {
case .deviceNotEligible: return .unavailable(reason: "deviceNotEligible")
case .appleIntelligenceNotEnabled: return .unavailable(reason: "appleIntelligenceNotEnabled")
case .modelNotReady: return .unavailable(reason: "modelNotReady")
@unknown default: return .unavailable(reason: "unknownReason")
}
@unknown default:
return .unavailable(reason: "unknown")
}
}
func generate(prompt: String, systemPrompt: String?, maxTokens: Int?) async throws -> String {
// instructions = init . systemPrompt nil ( S2-3c, LocalMLX ?? '' ).
let session: LanguageModelSession
if let systemPrompt {
session = LanguageModelSession(model: .default, instructions: systemPrompt)
} else {
session = LanguageModelSession(model: .default)
}
session.prewarm() // (~1.3ms), (S2-3a)
// temperature AICompletionRequest () ( ). LocalMLX .
let options = GenerationOptions(maximumResponseTokens: maxTokens)
do {
let response = try await session.respond(to: prompt, options: options)
return response.content // Response<String>.content : String
} catch let g as LanguageModelSession.GenerationError {
throw Self.translate(g)
}
}
/// SDK GenerationError(9 case) OnDeviceGenerationError. exhaustive + @unknown default.
static func translate(_ g: LanguageModelSession.GenerationError) -> OnDeviceGenerationError {
switch g {
case .guardrailViolation: return .guardrailViolation
case .refusal: return .refusal
case .exceededContextWindowSize: return .exceededContextWindowSize
case .rateLimited: return .rateLimited
case .concurrentRequests: return .concurrentRequests
case .unsupportedLanguageOrLocale: return .unsupportedLanguageOrLocale
case .unsupportedGuide: return .unsupportedGuide
case .decodingFailure: return .decodingFailure
case .assetsUnavailable: return .assetsUnavailable
@unknown default: return .unknown("\(g)")
}
}
}
#endif