aimx/inference/key.rs
1//! Inference key parsing for Agentic Inference Markup (AIM) format.
2//!
3//! This module provides functionality for parsing inference keys, which are UPPERCASE C-style identifiers
4//! used in AIM files to identify different types of inference directives and their termination patterns.
5//! These keys are used to structure inference prompts and parse model responses.
6//!
7//! # Key Grammar
8//! The key grammar supports three different suffix patterns:
9//! - `key_colon_eol`: UCID ':' whitespace? end-of-line (e.g., `INSTRUCTIONS:\\n`) - indicates multi-line content follows
10//! - `key_eol`: UCID whitespace? end-of-line (e.g., `SYSTEM\\n`) - indicates simple value on same line
11//! - `key_colon`: UCID ':' (e.g., `MODEL:`) - indicates inline value follows
12//!
13//! # Examples
14//! In AIM files, inference keys are used to structure prompts:
15//! ```text
16//! // Modifier rules in AIM files
17//! SYSTEM: You are a helpful assistant
18//! INSTRUCTIONS:
19//! Summarize the following text in 100 words or less.
20//! Focus on key points and maintain clarity.
21//! MODEL: gpt-4
22//! ```
23//!
24//! In model responses, keys indicate how to parse the returned content:
25//! ```text
26//! ANSWER: 42
27//! LIST:
28//! - First item
29//! - Second item
30//! SUMMARY
31//! This is a summary on the same line.
32//! ```
33//!
34//! The parser recognizes these keys and their different termination patterns to properly
35//! structure inference prompts and parse responses.
36
37use nom::{
38 error::{Error, ErrorKind},
39 IResult, Parser,
40 character::complete::{satisfy, char, multispace0},
41 bytes::complete::{take_while},
42 combinator::recognize,
43 branch::alt,
44 sequence::pair,
45};
46
47/// Represents the different suffix patterns that can follow an inference key.
48///
49/// Inference keys in AIM files can have different termination patterns that indicate
50/// how the key's value should be parsed and interpreted. The parser tries patterns
51/// in a specific order of precedence.
52#[derive(Debug, PartialEq, Eq, Clone)]
53pub enum Suffix {
54 /// Key followed by colon and end-of-line (e.g., `INSTRUCTIONS:\\n`)
55 /// This indicates a multi-line value follows on subsequent lines.
56 /// This pattern has the highest precedence in parsing.
57 ColonEol,
58
59 /// Key followed by optional whitespace and end-of-line (e.g., `SYSTEM\\n`)
60 /// This indicates a simple value follows on the same line or in the response parser
61 /// indicates that list items follow.
62 /// This pattern has medium precedence in parsing.
63 Eol,
64
65 /// Key followed by colon (e.g., `MODEL:`)
66 /// This indicates an inline value follows on the same line.
67 /// This pattern has the lowest precedence in parsing.
68 Colon,
69}
70
71/// Parse an UPPERCASE C-style identifier (UCID).
72///
73/// UCIDs follow the pattern: starting with an uppercase letter or underscore,
74/// followed by any combination of uppercase letters, digits, or underscores.
75///
76/// # Grammar
77/// ```text
78/// UCID = [A-Z_] [A-Z0-9_]*
79/// ```
80///
81/// # Examples
82/// ```rust
83/// use aimx::inference::key::parse_ucid;
84///
85/// assert_eq!(parse_ucid("INSTRUCTIONS"), Ok(("", "INSTRUCTIONS")));
86/// assert_eq!(parse_ucid("MODEL_NAME"), Ok(("", "MODEL_NAME")));
87/// assert_eq!(parse_ucid("_PRIVATE_KEY"), Ok(("", "_PRIVATE_KEY")));
88/// ```
89///
90/// # Arguments
91/// * `input` - The input string to parse
92///
93/// # Returns
94/// Returns an `IResult` containing the remaining input and the parsed identifier.
95pub fn parse_ucid(input: &str) -> IResult<&str, &str> {
96 recognize(pair(
97 satisfy(|c| (c >= 'A' && c <= 'Z') || c == '_'),
98 take_while(|c: char| (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_')
99 )).parse(input)
100}
101
102/// Check if we're at the end of a line (no more content).
103///
104/// This is a helper function that succeeds only when there's no more content
105/// to parse, indicating the end of a line or input.
106fn check_eol(input: &str) -> IResult<&str, ()> {
107 if input.is_empty() {
108 Ok((input, ()))
109 } else {
110 Err(nom::Err::Error(Error::new(input, ErrorKind::Fail)))
111 }
112}
113
114/// Parse an inference key according to the AIM grammar.
115///
116/// This function parses inference keys and their suffix patterns, which indicate
117/// how the key's value should be interpreted. The parser tries each suffix pattern
118/// in order of specificity (most specific first).
119///
120/// # Grammar Patterns
121/// 1. `key_colon_eol` = UCID ':' whitespace? EOL
122/// 2. `key_eol` = UCID whitespace? EOL
123/// 3. `key_colon` = UCID ':'
124///
125/// # Examples
126/// ```rust
127/// use aimx::inference::key::{parse_key, Suffix};
128///
129/// // ColonEol pattern
130/// assert_eq!(
131/// parse_key("INSTRUCTIONS:\n"),
132/// Ok(("", ("INSTRUCTIONS".to_string(), Suffix::ColonEol)))
133/// );
134///
135/// // Eol pattern
136/// assert_eq!(
137/// parse_key("SYSTEM\n"),
138/// Ok(("", ("SYSTEM".to_string(), Suffix::Eol)))
139/// );
140///
141/// // Colon pattern
142/// assert_eq!(
143/// parse_key("MODEL:"),
144/// Ok(("", ("MODEL".to_string(), Suffix::ColonEol)))
145/// );
146/// ```
147///
148/// # Arguments
149/// * `input` - The input string to parse
150///
151/// # Returns
152/// Returns an `IResult` containing the remaining input and a tuple of:
153/// - The parsed key as a `String`
154/// - The detected `Suffix` pattern
155pub fn parse_key(input: &str) -> IResult<&str, (String, Suffix)> {
156 let (input, _) = multispace0(input)?;
157 let (remaining, ucid) = parse_ucid(input)?;
158
159 // Try to parse each possible suffix in order of specificity
160 alt((
161 // key_colon_eol = UCID ':' WS? EOL
162 |input| {
163 let (input, _) = char(':')(input)?;
164 let (input, _) = multispace0(input)?;
165 let (input, _) = check_eol(input)?;
166 Ok((input, (ucid.to_string(), Suffix::ColonEol)))
167 },
168 // key_eol = UCID WS? EOL
169 |input| {
170 let (input, _) = multispace0(input)?;
171 let (input, _) = check_eol(input)?;
172 Ok((input, (ucid.to_string(), Suffix::Eol)))
173 },
174 // key_colon = UCID ':'
175 |input| {
176 let (input, _) = char(':')(input)?;
177 Ok((input, (ucid.to_string(), Suffix::Colon)))
178 }
179 )).parse(remaining)
180}