From eb044f0a02a48bdf5e7e1680334f5db9d6c00185 Mon Sep 17 00:00:00 2001 From: YeonGyu-Kim Date: Thu, 9 Apr 2026 09:33:45 +0900 Subject: [PATCH] =?UTF-8?q?fix(api):=20emit=20max=5Fcompletion=5Ftokens=20?= =?UTF-8?q?for=20gpt-5*=20on=20OpenAI-compat=20path=20=E2=80=94=20closes?= =?UTF-8?q?=20ROADMAP=20#35?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gpt-5.x models reject requests with max_tokens and require max_completion_tokens. Detect wire model starting with 'gpt-5' and switch the JSON key accordingly. Older models (gpt-4o etc.) continue to receive max_tokens unchanged. Two regression tests added: - gpt5_uses_max_completion_tokens_not_max_tokens - non_gpt5_uses_max_tokens 140 api tests pass, cargo fmt clean. --- .../crates/api/src/providers/openai_compat.rs | 52 ++++++++++++++++++- 1 file changed, 51 insertions(+), 1 deletion(-) diff --git a/rust/crates/api/src/providers/openai_compat.rs b/rust/crates/api/src/providers/openai_compat.rs index 140d93e..c664365 100644 --- a/rust/crates/api/src/providers/openai_compat.rs +++ b/rust/crates/api/src/providers/openai_compat.rs @@ -759,9 +759,18 @@ fn build_chat_completion_request(request: &MessageRequest, config: OpenAiCompatC // Strip routing prefix (e.g., "openai/gpt-4" → "gpt-4") for the wire. let wire_model = strip_routing_prefix(&request.model); + // gpt-5* requires `max_completion_tokens`; older OpenAI models accept both. + // We send the correct field based on the wire model name so gpt-5.x requests + // don't fail with "unknown field max_tokens". + let max_tokens_key = if wire_model.starts_with("gpt-5") { + "max_completion_tokens" + } else { + "max_tokens" + }; + let mut payload = json!({ "model": wire_model, - "max_tokens": request.max_tokens, + max_tokens_key: request.max_tokens, "messages": messages, "stream": request.stream, }); @@ -1451,4 +1460,45 @@ mod tests { assert!(payload.get("presence_penalty").is_none()); assert!(payload.get("stop").is_none()); } + + #[test] + fn gpt5_uses_max_completion_tokens_not_max_tokens() { + // gpt-5* models require `max_completion_tokens`; legacy `max_tokens` causes + // a request-validation failure. Verify the correct key is emitted. + let request = MessageRequest { + model: "gpt-5.2".to_string(), + max_tokens: 512, + messages: vec![], + stream: false, + ..Default::default() + }; + let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai()); + assert_eq!( + payload["max_completion_tokens"], + json!(512), + "gpt-5.2 should emit max_completion_tokens" + ); + assert!( + payload.get("max_tokens").is_none(), + "gpt-5.2 must not emit max_tokens" + ); + } + + #[test] + fn non_gpt5_uses_max_tokens() { + // Older OpenAI models expect `max_tokens`; verify gpt-4o is unaffected. + let request = MessageRequest { + model: "gpt-4o".to_string(), + max_tokens: 512, + messages: vec![], + stream: false, + ..Default::default() + }; + let payload = build_chat_completion_request(&request, OpenAiCompatConfig::openai()); + assert_eq!(payload["max_tokens"], json!(512)); + assert!( + payload.get("max_completion_tokens").is_none(), + "gpt-4o must not emit max_completion_tokens" + ); + } }