aimx/inference/provider.rs
1//! # Inference Provider Configuration
2//!
3//! This module defines the configuration types for AI inference providers used by AIMX.
4//! It supports multiple AI APIs with configurable models, capabilities, and performance characteristics.
5//!
6//! ## Overview
7//!
8//! The provider system allows AIMX workflows to interact with different AI inference services
9//! through a unified interface. Providers can be configured for various use cases:
10//!
11//! - **Fast inference**: Quick responses for simple tasks
12//! - **Standard inference**: Balanced performance for general use
13//! - **Planning inference**: Advanced reasoning for complex tasks
14//!
15//! ## Supported APIs
16//!
17//! - **Ollama**: Local model inference (e.g., `http://localhost:11434`)
18//! - **OpenAI**: Cloud-based inference (e.g., OpenAI API, OpenRouter)
19//!
20//! ## Example Usage
21//!
22//! ```rust
23//! use aimx::{Provider, Api, Model, Capability};
24//!
25//! // Create a provider for local Ollama
26//! let provider = Provider {
27//! api: Api::Ollama,
28//! url: "http://localhost:11434".to_string(),
29//! key: "".to_string(), // No API key needed for local Ollama
30//! model: Model::Standard,
31//! capability: Capability::Standard,
32//! fast: "mistral:latest".to_string(),
33//! standard: "llama2:latest".to_string(),
34//! planning: "codellama:latest".to_string(),
35//! temperature: 0.7,
36//! max_tokens: 2048,
37//! connection_timeout_ms: 30000,
38//! request_timeout_ms: 120000,
39//! };
40//! ```
41
42use serde::{Deserialize, Serialize};
43use std::fmt;
44
45/// Supported AI inference APIs
46///
47/// This enum defines the different AI service providers that AIMX can connect to.
48/// Each API has different endpoint formats and authentication requirements.
49#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
50pub enum Api {
51 /// Ollama API for local model inference
52 ///
53 /// Uses endpoint format: `http://localhost:11434/api/chat`
54 /// No API key required for local instances
55 Ollama,
56 /// OpenAI-compatible API for cloud-based inference
57 ///
58 /// Uses endpoint format: `{url}/chat/completions`
59 /// Requires API key authentication
60 Openai,
61}
62
63impl fmt::Display for Api {
64 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65 match self {
66 Api::Ollama => write!(f, "{}", "ollama"),
67 Api::Openai => write!(f, "{}", "openai"),
68 }
69 }
70}
71
72impl Api {
73 /// Create an Api variant from a string identifier
74 ///
75 /// # Arguments
76 ///
77 /// * `api` - String identifier ("ollama" or any other value defaults to "openai")
78 ///
79 /// # Examples
80 ///
81 /// ```rust
82 /// use aimx::Api;
83 ///
84 /// let ollama = Api::new("ollama");
85 /// let openai = Api::new("openai");
86 /// let default = Api::new("anything"); // defaults to Openai
87 /// ```
88 pub fn new(api: &str) -> Self {
89 match api {
90 "ollama" => Api::Ollama,
91 _ => Api::Openai,
92 }
93 }
94}
95
96/// Model capability levels
97///
98/// Defines the expected capability level of the AI model, which influences
99/// prompt complexity and response expectations.
100#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
101pub enum Capability {
102 /// Minimal capability - simple tasks, basic responses
103 /// Suitable for classification, extraction, or simple Q&A
104 Minimal,
105 /// Limited capability - moderate complexity
106 /// Suitable for summarization, basic reasoning, or multi-step tasks
107 Limited,
108 /// Standard capability - full reasoning ability
109 /// Suitable for complex reasoning, planning, and creative tasks
110 Standard,
111}
112
113impl fmt::Display for Capability {
114 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
115 match self {
116 Capability::Minimal => write!(f, "{}", "minimal"),
117 Capability::Limited => write!(f, "{}", "limited"),
118 Capability::Standard => write!(f, "{}", "standard"),
119 }
120 }
121}
122
123impl Capability {
124 /// Create a Capability variant from a string identifier
125 ///
126 /// # Arguments
127 ///
128 /// * `capability` - String identifier ("minimal", "limited", or defaults to "standard")
129 ///
130 /// # Examples
131 ///
132 /// ```rust
133 /// use aimx::Capability;
134 ///
135 /// let minimal = Capability::new("minimal");
136 /// let limited = Capability::new("limited");
137 /// let standard = Capability::new("standard");
138 /// let default = Capability::new("anything"); // defaults to Standard
139 /// ```
140 pub fn new(capability: &str) -> Self {
141 match capability {
142 "minimal" => Capability::Minimal,
143 "limited" => Capability::Limited,
144 _ => Capability::Standard,
145 }
146 }
147}
148
149/// Model performance types
150///
151/// Defines the performance characteristics and intended use cases for AI models.
152#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
153pub enum Model {
154 /// Fast model - optimized for speed and low latency
155 /// Suitable for simple classification, extraction, or quick responses
156 Fast,
157 /// Standard model - balanced performance for general use
158 /// Suitable for most inference tasks including summarization and basic reasoning
159 Standard,
160 /// Planning model - optimized for complex reasoning and planning
161 /// Suitable for multi-step reasoning, complex problem solving, and creative tasks
162 Planning,
163}
164
165impl fmt::Display for Model {
166 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
167 match self {
168 Model::Fast => write!(f, "{}", "fast"),
169 Model::Standard => write!(f, "{}", "standard"),
170 Model::Planning => write!(f, "{}", "planning"),
171 }
172 }
173}
174
175impl Model {
176 /// Create a Model variant from a string identifier
177 ///
178 /// # Arguments
179 ///
180 /// * `performance` - String identifier ("fast", "planning", or defaults to "standard")
181 ///
182 /// # Examples
183 ///
184 /// ```rust
185 /// use aimx::Model;
186 ///
187 /// let fast = Model::new("fast");
188 /// let planning = Model::new("planning");
189 /// let standard = Model::new("standard");
190 /// let default = Model::new("anything"); // defaults to Standard
191 /// ```
192 pub fn new(performance: &str) -> Self {
193 match performance {
194 "fast" => Model::Fast,
195 "planning" => Model::Planning,
196 _ => Model::Standard,
197 }
198 }
199}
200
201/// AI inference provider configuration
202///
203/// Contains all the settings needed to connect to and interact with an AI inference service.
204/// This struct is serializable/deserializable for configuration storage.
205#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
206pub struct Provider {
207 /// The AI service API to use
208 pub api: Api,
209 /// Base URL for the API endpoint
210 ///
211 /// Examples:
212 /// - Ollama: "http://localhost:11434"
213 /// - OpenAI: "<https://api.openai.com/v1>"
214 /// - OpenRouter: "<https://openrouter.ai/api/v1>"
215 pub url: String,
216 /// API key for authentication (empty string for no authentication)
217 ///
218 /// For OpenAI-compatible APIs, this should be a valid API key.
219 /// For Ollama, this is typically empty.
220 pub key: String,
221 /// Model performance type to use
222 pub model: Model,
223 /// Expected capability level of the model
224 pub capability: Capability,
225 /// Model name for fast inference tasks
226 pub fast: String,
227 /// Model name for standard inference tasks
228 pub standard: String,
229 /// Model name for planning/advanced reasoning tasks
230 pub planning: String,
231 /// Temperature setting for inference (0.0 to 1.0)
232 ///
233 /// Controls randomness:
234 /// - Lower values (e.g., 0.2) = more focused and deterministic
235 /// - Higher values (e.g., 0.8) = more creative and random
236 pub temperature: f64,
237 /// Maximum number of tokens to generate in the response
238 pub max_tokens: u32,
239 /// Connection timeout in milliseconds
240 #[serde(default = "default_connection_timeout")]
241 pub connection_timeout_ms: u64,
242 /// Request timeout in milliseconds
243 #[serde(default = "default_request_timeout")]
244 pub request_timeout_ms: u64,
245}
246
247impl Provider {
248 /// Get the actual model name based on the selected model type
249 ///
250 /// Returns the model name string configured for the current performance type.
251 ///
252 /// # Examples
253 ///
254 /// ```rust
255 /// use aimx::inference::provider::{Provider, Api, Model, Capability};
256 ///
257 /// let provider = Provider {
258 /// api: Api::Ollama,
259 /// url: "http://localhost:11434".to_string(),
260 /// key: "".to_string(),
261 /// model: Model::Fast,
262 /// capability: Capability::Standard,
263 /// fast: "mistral:latest".to_string(),
264 /// standard: "llama2:latest".to_string(),
265 /// planning: "codellama:latest".to_string(),
266 /// temperature: 0.7,
267 /// max_tokens: 2048,
268 /// connection_timeout_ms: 30000,
269 /// request_timeout_ms: 120000,
270 /// };
271 ///
272 /// assert_eq!(provider.model(), "mistral:latest");
273 /// ```
274 pub fn model(&self) -> &str {
275 match self.model {
276 Model::Fast => self.fast.as_str(),
277 Model::Standard => self.standard.as_str(),
278 Model::Planning => self.planning.as_str(),
279 }
280 }
281}
282
283/// Default connection timeout value (30 seconds)
284fn default_connection_timeout() -> u64 {
285 30000 // 30 seconds
286}
287
288/// Default request timeout value (2 minutes)
289fn default_request_timeout() -> u64 {
290 120000 // 2 minutes
291}