Files
hyungi_document_server/Sources/AI/Providers/RemoteDSProvider.swift
T
hyungi 5383a93f98 feat(ai-fabric): S2 LLM 패브릭 4 provider 결선 + 컴포지션 루트
risk-first 채움(RemoteDS→LocalMLX→OnDevice→Specialized) + makeDefaultRouter 컴포지션 루트.
동결 인터페이스(AIProvider/AIRouter/MockAIProvider) 무변경. SPM AIFabric 단독 빌드·테스트(46 PASS).

- RemoteDS: DSAskClient seam + AskResponse(ask.json) 매핑 + backend exhaustive switch(qwen/cloud TODO)
- LocalMLX: GET /v1/models probe + OpenAI /v1/chat/completions system/user call-shape + non-200 backendError
- OnDevice: FoundationModels 라이브(M5 Max) availability + respond() + GenerationError 9-case 매핑 + stateless/prewarm
- Specialized: scaffold-only(명시 unavailable, vision 폴백 가시화), cloud='claude-cloud' 503
- config 단일소스(env override) + 타임아웃/취소(URLSession 자동 honor, OnDevice 협조적)

실측 동결(S2-3a, M5 Max): availability=available · 취소=COOPERATIVE(~33ms) · 오버플로=exceededContextWindowSize
  · GenerationError 9-case(refusal·concurrentRequests 추가 발견, plan 정정).
한계: LocalMLX fixture=PROVISIONAL_SYNTHETIC(맥미니 offline → 라이브 재캡처 S2-Ff 대기).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
2026-06-04 17:20:10 +09:00

156 lines
6.6 KiB
Swift

// RemoteDSProvider.swift S2 ( DS RAG).
//
// S1 (CONTRACT.md §4 / AI-ROUTING.md §4):
// complete(corpusAsk) DSAskClient.ask(query:backend:) AskResponse AICompletionResponse
// AskResponse.ai_answer text
// AskResponse.citations[] [AICitation]
// AskResponse.synthesis_status AIFinishReason
// AskResponse.confidence AIConfidence
// AskResponse.backend_used routingNote ( LLM )
//
// HTTP S3 client(LiveDSClient) S2 DSAskClient seam + .
// : AIProvider . RemoteDSProvider.init(client:) S2 .
import Foundation
// MARK: - S2 DS ask seam ( impl = S3)
/// DS `GET /search/ask?q=&backend=` . S3 LiveDSClient conform,
/// S2 mock ( 0). HTTP conformer throw
/// (: `AIProviderError.backendError(.remoteDS, status:, reason:)`) .
public protocol DSAskClient: Sendable {
func ask(query: String, backend: String) async throws -> AskResponse
}
// MARK: - DS /search/ask ( , )
//
// CodingKeys convertFromSnakeCase (S3 ). fixture: contract/fixtures/ask.json.
public struct AskResponse: Decodable, Sendable {
public let aiAnswer: String
public let citations: [AskCitation]
public let synthesisStatus: String
public let synthesisMs: Double?
public let confidence: String?
public let backendUsed: String?
public let refused: Bool?
enum CodingKeys: String, CodingKey {
case aiAnswer = "ai_answer"
case citations
case synthesisStatus = "synthesis_status"
case synthesisMs = "synthesis_ms"
case confidence
case backendUsed = "backend_used"
case refused
}
public init(aiAnswer: String, citations: [AskCitation], synthesisStatus: String,
synthesisMs: Double? = nil, confidence: String? = nil,
backendUsed: String? = nil, refused: Bool? = nil) {
self.aiAnswer = aiAnswer
self.citations = citations
self.synthesisStatus = synthesisStatus
self.synthesisMs = synthesisMs
self.confidence = confidence
self.backendUsed = backendUsed
self.refused = refused
}
}
public struct AskCitation: Decodable, Sendable {
public let n: Int
public let docId: Int
public let title: String?
public let sectionTitle: String?
public let spanText: String
enum CodingKeys: String, CodingKey {
case n
case docId = "doc_id"
case title
case sectionTitle = "section_title"
case spanText = "span_text"
}
public init(n: Int, docId: Int, title: String?, sectionTitle: String?, spanText: String) {
self.n = n
self.docId = docId
self.title = title
self.sectionTitle = sectionTitle
self.spanText = spanText
}
}
// MARK: - Provider
public struct RemoteDSProvider: AIProvider {
public let id: AIProviderID = .remoteDS
private let client: DSAskClient
public init(client: DSAskClient) {
self.client = client
}
/// ( ). complete .
public var isAvailable: Bool {
get async { true }
}
public func complete(_ request: AICompletionRequest) async throws -> AICompletionResponse {
// corpusAsk provider ( ).
guard request.task == .corpusAsk else {
throw AIProviderError.notImplemented(id)
}
try Task.checkCancellation()
let backend = Self.dsBackend(for: request.explicitProvider)
// HTTP (503 ) client throw ( ).
let response = try await client.ask(query: request.prompt, backend: backend)
return Self.map(response)
}
// MARK: (AI-ROUTING.md §4, )
static func map(_ r: AskResponse) -> AICompletionResponse {
let citations = r.citations.map {
AICitation(n: $0.n, docId: $0.docId, title: $0.title,
sectionTitle: $0.sectionTitle, spanText: $0.spanText)
}
return AICompletionResponse(
text: r.aiAnswer,
providerUsed: .remoteDS,
finishReason: finishReason(fromSynthesisStatus: r.synthesisStatus),
citations: citations,
confidence: r.confidence.flatMap(AIConfidence.init(rawValue:)),
latencyMs: r.synthesisMs, // latency synthesis_ms ( )
routingNote: r.backendUsed // LLM
)
}
static func finishReason(fromSynthesisStatus status: String) -> AIFinishReason {
switch status {
case "completed": return .completed
case "timeout": return .timeout
case "no_evidence", "skipped": return .noEvidence
case "backend_unavailable": return .unavailable
default: return .refused
}
}
/// explicitProvider DS backend (AI-ROUTING.md §4, ).
/// **dict exhaustive switch** AIProviderID backend
/// ( provider nil backend 404 ).
static func dsBackend(for explicit: AIProviderID?) -> String {
guard let explicit else { return "mac-mini-default" } // DS
switch explicit {
case .localMLX: return "gemma-macmini"
case .remoteDS: return "mac-mini-default" // remoteDS = DS
case .onDevice: return "mac-mini-default" // onDevice DS
case .specialized: return "mac-mini-default" // specialized backend DS
}
// TODO(qwen-macbook): AIProviderID 'qwen-macbook'(M5 Max Qwen VLM) .
// provider case exhaustive switch backend (S2-1b b).
// TODO(claude-cloud): cloud backend = 'claude-cloud' DS 503(scaffold, S2-4b). case .
// 503 client backendError(.remoteDS, status:503, ) X( ).
}
}