aimx/inference/
key.rs

1//! Inference key parsing for Agentic Inference Markup (AIM) format.
2//!
3//! This module provides functionality for parsing inference keys, which are UPPERCASE C-style identifiers
4//! used in AIM files to identify different types of inference directives and their termination patterns.
5//! These keys are used to structure inference prompts and parse model responses.
6//!
7//! # Key Grammar
8//! The key grammar supports three different suffix patterns:
9//! - `key_colon_eol`: UCID ':' whitespace? end-of-line (e.g., `INSTRUCTIONS:\\n`) - indicates multi-line content follows
10//! - `key_eol`: UCID whitespace? end-of-line (e.g., `SYSTEM\\n`) - indicates simple value on same line
11//! - `key_colon`: UCID ':' (e.g., `MODEL:`) - indicates inline value follows
12//!
13//! # Examples
14//! In AIM files, inference keys are used to structure prompts:
15//! ```text
16//! // Modifier rules in AIM files
17//! SYSTEM: You are a helpful assistant
18//! INSTRUCTIONS:
19//! Summarize the following text in 100 words or less.
20//! Focus on key points and maintain clarity.
21//! MODEL: gpt-4
22//! ```
23//!
24//! In model responses, keys indicate how to parse the returned content:
25//! ```text
26//! ANSWER: 42
27//! LIST:
28//! - First item
29//! - Second item
30//! SUMMARY
31//! This is a summary on the same line.
32//! ```
33//!
34//! The parser recognizes these keys and their different termination patterns to properly
35//! structure inference prompts and parse responses.
36
37use nom::{
38    error::{Error, ErrorKind},
39    IResult, Parser,
40    character::complete::{satisfy, char, multispace0},
41    bytes::complete::{take_while},
42    combinator::recognize,
43    branch::alt,
44    sequence::pair,
45};
46
47/// Represents the different suffix patterns that can follow an inference key.
48///
49/// Inference keys in AIM files can have different termination patterns that indicate
50/// how the key's value should be parsed and interpreted. The parser tries patterns
51/// in a specific order of precedence.
52#[derive(Debug, PartialEq, Eq, Clone)]
53pub enum Suffix {
54    /// Key followed by colon and end-of-line (e.g., `INSTRUCTIONS:\\n`)
55    /// This indicates a multi-line value follows on subsequent lines.
56    /// This pattern has the highest precedence in parsing.
57    ColonEol,
58    
59    /// Key followed by optional whitespace and end-of-line (e.g., `SYSTEM\\n`)
60    /// This indicates a simple value follows on the same line or in the response parser
61    /// indicates that list items follow.
62    /// This pattern has medium precedence in parsing.
63    Eol,
64    
65    /// Key followed by colon (e.g., `MODEL:`)
66    /// This indicates an inline value follows on the same line.
67    /// This pattern has the lowest precedence in parsing.
68    Colon,
69}
70
71/// Parse an UPPERCASE C-style identifier (UCID).
72///
73/// UCIDs follow the pattern: starting with an uppercase letter or underscore,
74/// followed by any combination of uppercase letters, digits, or underscores.
75///
76/// # Grammar
77/// ```text
78/// UCID = [A-Z_] [A-Z0-9_]*
79/// ```
80///
81/// # Examples
82/// ```rust
83/// use aimx::inference::key::parse_ucid;
84///
85/// assert_eq!(parse_ucid("INSTRUCTIONS"), Ok(("", "INSTRUCTIONS")));
86/// assert_eq!(parse_ucid("MODEL_NAME"), Ok(("", "MODEL_NAME")));
87/// assert_eq!(parse_ucid("_PRIVATE_KEY"), Ok(("", "_PRIVATE_KEY")));
88/// ```
89///
90/// # Arguments
91/// * `input` - The input string to parse
92///
93/// # Returns
94/// Returns an `IResult` containing the remaining input and the parsed identifier.
95pub fn parse_ucid(input: &str) -> IResult<&str, &str> {
96    recognize(pair(
97        satisfy(|c| (c >= 'A' && c <= 'Z') || c == '_'),
98        take_while(|c: char| (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_')
99    )).parse(input)
100}
101
102/// Check if we're at the end of a line (no more content).
103///
104/// This is a helper function that succeeds only when there's no more content
105/// to parse, indicating the end of a line or input.
106fn check_eol(input: &str) -> IResult<&str, ()> {
107    if input.is_empty() {
108        Ok((input, ()))
109    } else {
110        Err(nom::Err::Error(Error::new(input, ErrorKind::Fail)))
111    }
112}
113
114/// Parse an inference key according to the AIM grammar.
115///
116/// This function parses inference keys and their suffix patterns, which indicate
117/// how the key's value should be interpreted. The parser tries each suffix pattern
118/// in order of specificity (most specific first).
119///
120/// # Grammar Patterns
121/// 1. `key_colon_eol` = UCID ':' whitespace? EOL
122/// 2. `key_eol` = UCID whitespace? EOL
123/// 3. `key_colon` = UCID ':'
124///
125/// # Examples
126/// ```rust
127/// use aimx::inference::key::{parse_key, Suffix};
128///
129/// // ColonEol pattern
130/// assert_eq!( 
131///     parse_key("INSTRUCTIONS:\n"), 
132///     Ok(("", ("INSTRUCTIONS".to_string(), Suffix::ColonEol)))
133/// );
134///
135/// // Eol pattern
136/// assert_eq!(
137///     parse_key("SYSTEM\n"),
138///     Ok(("", ("SYSTEM".to_string(), Suffix::Eol)))
139/// );
140///
141/// // Colon pattern
142/// assert_eq!(
143///     parse_key("MODEL:"),
144///     Ok(("", ("MODEL".to_string(), Suffix::ColonEol)))
145/// );
146/// ```
147///
148/// # Arguments
149/// * `input` - The input string to parse
150///
151/// # Returns
152/// Returns an `IResult` containing the remaining input and a tuple of:
153/// - The parsed key as a `String`
154/// - The detected `Suffix` pattern
155pub fn parse_key(input: &str) -> IResult<&str, (String, Suffix)> {
156    let (input, _) = multispace0(input)?;
157    let (remaining, ucid) = parse_ucid(input)?;
158    
159    // Try to parse each possible suffix in order of specificity
160    alt((
161        // key_colon_eol = UCID ':' WS? EOL
162        |input| {
163            let (input, _) = char(':')(input)?;
164            let (input, _) = multispace0(input)?;
165            let (input, _) = check_eol(input)?;
166            Ok((input, (ucid.to_string(), Suffix::ColonEol)))
167        },
168        // key_eol = UCID WS? EOL  
169        |input| {
170            let (input, _) = multispace0(input)?;
171            let (input, _) = check_eol(input)?;
172            Ok((input, (ucid.to_string(), Suffix::Eol)))
173        },
174        // key_colon = UCID ':'
175        |input| {
176            let (input, _) = char(':')(input)?;
177            Ok((input, (ucid.to_string(), Suffix::Colon)))
178        }
179    )).parse(remaining)
180}