fix(api): sanitize tuning params for reasoning models (o1/o3/grok-3-mini)

Reasoning models reject temperature, top_p, frequency_penalty, and presence_penalty with 400 errors. Instead of letting these flow through and returning cryptic provider errors, strip them silently at the request-builder boundary. is_reasoning_model() classifies: o1*, o3*, o4*, grok-3-mini. stop sequences are preserved (safe for all providers). Tests added: - reasoning_model_strips_tuning_params: o1-mini strips all 4 params, keeps stop - grok_3_mini_is_reasoning_model: classification coverage for grok-3-mini, o1, o3-mini, and negative cases (gpt-4o, grok-3, claude) 85 api lib tests passing, 0 failing.
2026-06-14 10:22:14 +08:00 · 2026-04-08 07:32:47 +09:00
parent c667d47c70
commit b513d6e462
1 changed files with 66 additions and 13 deletions
--- a/rust/crates/api/src/providers/openai_compat.rs
+++ b/rust/crates/api/src/providers/openai_compat.rs
@@ -690,6 +690,19 @@ struct ErrorBody {
    message: Option<String>,
 }

+/// Returns true for models known to reject tuning parameters like temperature,
+/// top_p, frequency_penalty, and presence_penalty. These are typically
+/// reasoning/chain-of-thought models with fixed sampling.
+fn is_reasoning_model(model: &str) -> bool {
+    let lowered = model.to_ascii_lowercase();
+    // OpenAI reasoning models
+    lowered.starts_with("o1")
+        || lowered.starts_with("o3")
+        || lowered.starts_with("o4")
+        // xAI reasoning: grok-3-mini always uses reasoning mode
+        || lowered == "grok-3-mini"
+}
+
 fn build_chat_completion_request(request: &MessageRequest, config: OpenAiCompatConfig) -> Value {
    let mut messages = Vec::new();
    if let Some(system) = request.system.as_ref().filter(|value| !value.is_empty()) {
@@ -722,18 +735,23 @@ fn build_chat_completion_request(request: &MessageRequest, config: OpenAiCompatC
    }

    // OpenAI-compatible tuning parameters — only included when explicitly set.
-    if let Some(temperature) = request.temperature {
-        payload["temperature"] = json!(temperature);
-    }
-    if let Some(top_p) = request.top_p {
-        payload["top_p"] = json!(top_p);
-    }
-    if let Some(frequency_penalty) = request.frequency_penalty {
-        payload["frequency_penalty"] = json!(frequency_penalty);
-    }
-    if let Some(presence_penalty) = request.presence_penalty {
-        payload["presence_penalty"] = json!(presence_penalty);
+    // Reasoning models (o1/o3/o4/grok-3-mini) reject these params with 400;
+    // silently strip them to avoid cryptic provider errors.
+    if !is_reasoning_model(&request.model) {
+        if let Some(temperature) = request.temperature {
+            payload["temperature"] = json!(temperature);
+        }
+        if let Some(top_p) = request.top_p {
+            payload["top_p"] = json!(top_p);
+        }
+        if let Some(frequency_penalty) = request.frequency_penalty {
+            payload["frequency_penalty"] = json!(frequency_penalty);
+        }
+        if let Some(presence_penalty) = request.presence_penalty {
+            payload["presence_penalty"] = json!(presence_penalty);
+        }
    }
+    // stop is generally safe for all providers
    if let Some(stop) = &request.stop {
        if !stop.is_empty() {
            payload["stop"] = json!(stop);
@@ -1028,8 +1046,9 @@ impl StringExt for String {
 #[cfg(test)]
 mod tests {
    use super::{
-        build_chat_completion_request, chat_completions_endpoint, normalize_finish_reason,
-        openai_tool_choice, parse_tool_arguments, OpenAiCompatClient, OpenAiCompatConfig,
+        build_chat_completion_request, chat_completions_endpoint, is_reasoning_model,
+        normalize_finish_reason, openai_tool_choice, parse_tool_arguments, OpenAiCompatClient,
+        OpenAiCompatConfig,
    };
    use crate::error::ApiError;
    use crate::types::{
@@ -1206,6 +1225,40 @@ mod tests {
        assert_eq!(payload["stop"], json!(["\n"]));
    }

+    #[test]
+    fn reasoning_model_strips_tuning_params() {
+        let request = MessageRequest {
+            model: "o1-mini".to_string(),
+            max_tokens: 1024,
+            messages: vec![],
+            stream: false,
+            temperature: Some(0.7),
+            top_p: Some(0.9),
+            frequency_penalty: Some(0.5),
+            presence_penalty: Some(0.3),
+            stop: Some(vec!["\n".to_string()]),
+            ..Default::default()
+        };
+        let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai());
+        assert!(payload.get("temperature").is_none(), "reasoning model should strip temperature");
+        assert!(payload.get("top_p").is_none(), "reasoning model should strip top_p");
+        assert!(payload.get("frequency_penalty").is_none());
+        assert!(payload.get("presence_penalty").is_none());
+        // stop is safe for all providers
+        assert_eq!(payload["stop"], json!(["\n"]));
+    }
+
+    #[test]
+    fn grok_3_mini_is_reasoning_model() {
+        assert!(is_reasoning_model("grok-3-mini"));
+        assert!(is_reasoning_model("o1"));
+        assert!(is_reasoning_model("o1-mini"));
+        assert!(is_reasoning_model("o3-mini"));
+        assert!(!is_reasoning_model("gpt-4o"));
+        assert!(!is_reasoning_model("grok-3"));
+        assert!(!is_reasoning_model("claude-sonnet-4-6"));
+    }
+
    #[test]
    fn tuning_params_omitted_from_payload_when_none() {
        let request = MessageRequest {