aimx/inference/
provider.rs

1//! # Inference Provider Configuration
2//!
3//! This module defines the configuration types for AI inference providers used by AIMX.
4//! It supports multiple AI APIs with configurable models, capabilities, and performance characteristics.
5//!
6//! ## Overview
7//!
8//! The provider system allows AIMX workflows to interact with different AI inference services
9//! through a unified interface. Providers can be configured for various use cases:
10//!
11//! - **Fast inference**: Quick responses for simple tasks
12//! - **Standard inference**: Balanced performance for general use
13//! - **Planning inference**: Advanced reasoning for complex tasks
14//!
15//! ## Supported APIs
16//!
17//! - **Ollama**: Local model inference (e.g., `http://localhost:11434`)
18//! - **OpenAI**: Cloud-based inference (e.g., OpenAI API, OpenRouter)
19//!
20//! ## Example Usage
21//!
22//! ```rust
23//! use aimx::{Provider, Api, Model, Capability};
24//!
25//! // Create a provider for local Ollama
26//! let provider = Provider {
27//!     api: Api::Ollama,
28//!     url: "http://localhost:11434".to_string(),
29//!     key: "".to_string(),  // No API key needed for local Ollama
30//!     model: Model::Standard,
31//!     capability: Capability::Standard,
32//!     fast: "mistral:latest".to_string(),
33//!     standard: "llama2:latest".to_string(),
34//!     planning: "codellama:latest".to_string(),
35//!     temperature: 0.7,
36//!     max_tokens: 2048,
37//!     connection_timeout_ms: 30000,
38//!     request_timeout_ms: 120000,
39//! };
40//! ```
41
42use serde::{Deserialize, Serialize};
43use std::fmt;
44
45/// Supported AI inference APIs
46///
47/// This enum defines the different AI service providers that AIMX can connect to.
48/// Each API has different endpoint formats and authentication requirements.
49#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
50pub enum Api {
51    /// Ollama API for local model inference
52    ///
53    /// Uses endpoint format: `http://localhost:11434/api/chat`
54    /// No API key required for local instances
55    Ollama,
56    /// OpenAI-compatible API for cloud-based inference
57    ///
58    /// Uses endpoint format: `{url}/chat/completions`
59    /// Requires API key authentication
60    Openai,
61}
62
63impl fmt::Display for Api {
64    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
65        match self {
66            Api::Ollama => write!(f, "{}", "ollama"),
67            Api::Openai => write!(f, "{}", "openai"),
68        }
69    }
70}
71
72impl Api {
73    /// Create an Api variant from a string identifier
74    ///
75    /// # Arguments
76    ///
77    /// * `api` - String identifier ("ollama" or any other value defaults to "openai")
78    ///
79    /// # Examples
80    ///
81    /// ```rust
82    /// use aimx::Api;
83    ///
84    /// let ollama = Api::new("ollama");
85    /// let openai = Api::new("openai");
86    /// let default = Api::new("anything"); // defaults to Openai
87    /// ```
88    pub fn new(api: &str) -> Self {
89        match api {
90            "ollama" => Api::Ollama,
91            _ => Api::Openai,
92        }
93    }
94}
95
96/// Model capability levels
97///
98/// Defines the expected capability level of the AI model, which influences
99/// prompt complexity and response expectations.
100#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
101pub enum Capability {
102    /// Minimal capability - simple tasks, basic responses
103    /// Suitable for classification, extraction, or simple Q&A
104    Minimal,
105    /// Limited capability - moderate complexity
106    /// Suitable for summarization, basic reasoning, or multi-step tasks
107    Limited,
108    /// Standard capability - full reasoning ability
109    /// Suitable for complex reasoning, planning, and creative tasks
110    Standard,
111}
112
113impl fmt::Display for Capability {
114    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
115        match self {
116            Capability::Minimal => write!(f, "{}", "minimal"),
117            Capability::Limited => write!(f, "{}", "limited"),
118            Capability::Standard => write!(f, "{}", "standard"),
119        }
120    }
121}
122
123impl Capability {
124    /// Create a Capability variant from a string identifier
125    ///
126    /// # Arguments
127    ///
128    /// * `capability` - String identifier ("minimal", "limited", or defaults to "standard")
129    ///
130    /// # Examples
131    ///
132    /// ```rust
133    /// use aimx::Capability;
134    ///
135    /// let minimal = Capability::new("minimal");
136    /// let limited = Capability::new("limited");
137    /// let standard = Capability::new("standard");
138    /// let default = Capability::new("anything"); // defaults to Standard
139    /// ```
140    pub fn new(capability: &str) -> Self {
141        match capability {
142            "minimal" => Capability::Minimal,
143            "limited" => Capability::Limited,
144            _ => Capability::Standard,
145        }
146    }
147}
148
149/// Model performance types
150///
151/// Defines the performance characteristics and intended use cases for AI models.
152#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
153pub enum Model {
154    /// Fast model - optimized for speed and low latency
155    /// Suitable for simple classification, extraction, or quick responses
156    Fast,
157    /// Standard model - balanced performance for general use
158    /// Suitable for most inference tasks including summarization and basic reasoning
159    Standard,
160    /// Planning model - optimized for complex reasoning and planning
161    /// Suitable for multi-step reasoning, complex problem solving, and creative tasks
162    Planning,
163}
164
165impl fmt::Display for Model {
166    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
167        match self {
168            Model::Fast => write!(f, "{}", "fast"),
169            Model::Standard => write!(f, "{}", "standard"),
170            Model::Planning => write!(f, "{}", "planning"),
171        }
172    }
173}
174
175impl Model {
176    /// Create a Model variant from a string identifier
177    ///
178    /// # Arguments
179    ///
180    /// * `performance` - String identifier ("fast", "planning", or defaults to "standard")
181    ///
182    /// # Examples
183    ///
184    /// ```rust
185    /// use aimx::Model;
186    ///
187    /// let fast = Model::new("fast");
188    /// let planning = Model::new("planning");
189    /// let standard = Model::new("standard");
190    /// let default = Model::new("anything"); // defaults to Standard
191    /// ```
192    pub fn new(performance: &str) -> Self {
193        match performance {
194            "fast" => Model::Fast,
195            "planning" => Model::Planning,
196            _ => Model::Standard,
197        }
198    }
199}
200
201/// AI inference provider configuration
202///
203/// Contains all the settings needed to connect to and interact with an AI inference service.
204/// This struct is serializable/deserializable for configuration storage.
205#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
206pub struct Provider {
207    /// The AI service API to use
208    pub api: Api,
209    /// Base URL for the API endpoint
210    ///
211    /// Examples:
212    /// - Ollama: "http://localhost:11434"
213    /// - OpenAI: "<https://api.openai.com/v1>"
214    /// - OpenRouter: "<https://openrouter.ai/api/v1>"
215    pub url: String,
216    /// API key for authentication (empty string for no authentication)
217    ///
218    /// For OpenAI-compatible APIs, this should be a valid API key.
219    /// For Ollama, this is typically empty.
220    pub key: String,
221    /// Model performance type to use
222    pub model: Model,
223    /// Expected capability level of the model
224    pub capability: Capability,
225    /// Model name for fast inference tasks
226    pub fast: String,
227    /// Model name for standard inference tasks
228    pub standard: String,
229    /// Model name for planning/advanced reasoning tasks
230    pub planning: String,
231    /// Temperature setting for inference (0.0 to 1.0)
232    ///
233    /// Controls randomness:
234    /// - Lower values (e.g., 0.2) = more focused and deterministic
235    /// - Higher values (e.g., 0.8) = more creative and random
236    pub temperature: f64,
237    /// Maximum number of tokens to generate in the response
238    pub max_tokens: u32,
239    /// Connection timeout in milliseconds
240    #[serde(default = "default_connection_timeout")]
241    pub connection_timeout_ms: u64,
242    /// Request timeout in milliseconds
243    #[serde(default = "default_request_timeout")]
244    pub request_timeout_ms: u64,
245}
246
247impl Provider {
248    /// Get the actual model name based on the selected model type
249    ///
250    /// Returns the model name string configured for the current performance type.
251    ///
252    /// # Examples
253    ///
254    /// ```rust
255    /// use aimx::inference::provider::{Provider, Api, Model, Capability};
256    ///
257    /// let provider = Provider {
258    ///     api: Api::Ollama,
259    ///     url: "http://localhost:11434".to_string(),
260    ///     key: "".to_string(),
261    ///     model: Model::Fast,
262    ///     capability: Capability::Standard,
263    ///     fast: "mistral:latest".to_string(),
264    ///     standard: "llama2:latest".to_string(),
265    ///     planning: "codellama:latest".to_string(),
266    ///     temperature: 0.7,
267    ///     max_tokens: 2048,
268    ///     connection_timeout_ms: 30000,
269    ///     request_timeout_ms: 120000,
270    /// };
271    ///
272    /// assert_eq!(provider.model(), "mistral:latest");
273    /// ```
274    pub fn model(&self) -> &str {
275        match self.model {
276            Model::Fast => self.fast.as_str(),
277            Model::Standard => self.standard.as_str(),
278            Model::Planning => self.planning.as_str(),
279        }
280    }
281}
282
283/// Default connection timeout value (30 seconds)
284fn default_connection_timeout() -> u64 {
285    30000 // 30 seconds
286}
287
288/// Default request timeout value (2 minutes)
289fn default_request_timeout() -> u64 {
290    120000 // 2 minutes
291}