Files
hyungi_document_server/clients/ds-app/Sources/AI/Providers/LocalMLXProvider.swift
T

162 lines
6.4 KiB
Swift

// LocalMLXProvider.swift S2 ( LLM ).
//
// Gemma 4 26B, llm-router :8890 (OpenAI , wake-on-call). #4: raw MLX :8801 .
// - isAvailable = GET /v1/models probe( timeout, wake ' ' )
// - complete = POST /v1/chat/completions, messages system/user (call-shape )
//
// fixture(llm-router-chat.*.json) = CAPTURED_LIVE (2026-06-05, Tailscale 100.76.254.116:8890 ).
// model='mac-mini-default'() model='mlx-community/gemma-4-26b-a4b-it-8bit'.
import Foundation
public struct LocalMLXProvider: AIProvider {
public let id: AIProviderID = .localMLX
/// URL (S2-Fa config ). trailing slash base, appendingPathComponent.
public let baseURL: URL
let model: String
let session: URLSession
let requestTimeout: TimeInterval
let probeTimeout: TimeInterval
public init(
baseURL: URL,
model: String = "mac-mini-default", // llm-router ( /v1/models ) gemma-4-26b resolve
session: URLSession = .shared,
requestTimeout: TimeInterval = 60,
probeTimeout: TimeInterval = 2
) {
self.baseURL = baseURL
self.model = model
self.session = session
self.requestTimeout = requestTimeout
self.probeTimeout = probeTimeout
}
// MARK: isAvailable health probe (wake )
public var isAvailable: Bool {
get async {
var req = URLRequest(url: baseURL.appendingPathComponent("v1/models"))
req.httpMethod = "GET"
req.timeoutInterval = probeTimeout
do {
let (_, resp) = try await session.data(for: req)
guard let http = resp as? HTTPURLResponse else { return false }
return (200..<300).contains(http.statusCode)
} catch {
// timeout/ false(throw ). probe wake/ X.
// '= ' complete() .
return false
}
}
}
// MARK: complete OpenAI chat/completions
public func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse {
try Task.checkCancellation()
var req = URLRequest(url: baseURL.appendingPathComponent("v1/chat/completions"))
req.httpMethod = "POST"
req.setValue("application/json", forHTTPHeaderField: "Content-Type")
req.timeoutInterval = requestTimeout // S2-Fe:
req.httpBody = try Self.encodeRequest(request, model: model)
let started = Date()
let data: Data
let resp: URLResponse
do {
// URLSession async Task honor CancellationError (S2-Fe).
(data, resp) = try await session.data(for: req)
} catch let e as URLError where e.code == .timedOut {
throw AIProviderError.backendError(id, status: -1, reason: "request timed out after \(Int(requestTimeout))s")
}
guard let http = resp as? HTTPURLResponse else {
throw AIProviderError.backendError(id, status: -1, reason: "non-HTTP response")
}
guard (200..<300).contains(http.statusCode) else {
// non-200 backendError ( text ).
let reason = String(data: data, encoding: .utf8).map { String($0.prefix(300)) }
throw AIProviderError.backendError(id, status: http.statusCode, reason: reason)
}
let decoded = try JSONDecoder().decode(OpenAIChatResponse.self, from: data)
guard let choice = decoded.choices.first else {
throw AIProviderError.backendError(id, status: http.statusCode, reason: "no choices in response")
}
return AICompletionResponse(
text: choice.message.content,
providerUsed: .localMLX,
finishReason: Self.finishReason(choice.finishReason),
citations: [], //
confidence: nil,
latencyMs: Date().timeIntervalSince(started) * 1000,
routingNote: nil // fallback note
)
}
// MARK:
static func finishReason(_ openAI: String?) -> AIFinishReason {
switch openAI {
case "stop": return .completed
case "length": return .completed // max_tokens
default: return .completed
}
}
/// AICompletionRequest OpenAI chat/completions body. messages system/user (fixture source-of-truth).
/// system.content = systemPrompt ?? "" (plan S2-2c). temperature AICompletionRequest () ( ).
static func encodeRequest(_ request: AICompletionRequest, model: String) throws -> Data {
let body = OpenAIChatRequest(
model: model,
messages: [
OpenAIChatRequest.Message(role: "system", content: request.systemPrompt ?? ""),
OpenAIChatRequest.Message(role: "user", content: request.prompt),
],
maxTokens: request.maxTokens,
stream: false
)
let enc = JSONEncoder()
enc.outputFormatting = [.sortedKeys]
return try enc.encode(body)
}
}
// MARK: - OpenAI wire ()
struct OpenAIChatRequest: Encodable, Sendable {
struct Message: Encodable, Sendable {
let role: String
let content: String
}
let model: String
let messages: [Message]
let maxTokens: Int?
let stream: Bool
enum CodingKeys: String, CodingKey {
case model, messages, stream
case maxTokens = "max_tokens"
}
}
struct OpenAIChatResponse: Decodable, Sendable {
struct Choice: Decodable, Sendable {
struct Message: Decodable, Sendable {
let role: String
let content: String
}
let index: Int?
let message: Message
let finishReason: String?
enum CodingKeys: String, CodingKey {
case index, message
case finishReason = "finish_reason"
}
}
let choices: [Choice]
}