Compare commits

..

1 Commits

Author SHA1 Message Date
Yeachan-Heo
fa72cd665e Block oversized requests before providers hard-fail
The runtime already tracked rough token estimates for compaction, but provider-bound
requests still relied on naive model output limits and could be sent upstream even
when the selected model could not fit the estimated prompt plus requested output.

This adds a small model token/context registry in the API layer, estimates request
size from the serialized prompt payload, and fails locally with a dedicated
context-window error before Anthropic or xAI calls are made. Focused integration
coverage asserts the preflight fires before any HTTP request leaves the process.

Constraint: Keep the first pass minimal and reusable across both Anthropic and OpenAI-compatible providers
Rejected: Auto-compact-and-retry in the same patch | broader control-flow change than the requested minimal preflight
Confidence: medium
Scope-risk: narrow
Reversibility: clean
Directive: Expand the model registry before enabling preflight for additional providers or aliases
Tested: cargo build -p api -p tools -p rusty-claude-cli; cargo test -p api
Not-tested: End-to-end CLI auto-compaction or retry behavior after a local context_window_blocked failure
2026-04-05 16:39:58 +00:00
8 changed files with 268 additions and 110 deletions

View File

@@ -8,6 +8,13 @@ pub enum ApiError {
provider: &'static str,
env_vars: &'static [&'static str],
},
ContextWindowExceeded {
model: String,
estimated_input_tokens: u32,
requested_output_tokens: u32,
estimated_total_tokens: u32,
context_window_tokens: u32,
},
ExpiredOAuthToken,
Auth(String),
InvalidApiKeyEnv(VarError),
@@ -48,6 +55,7 @@ impl ApiError {
Self::Api { retryable, .. } => *retryable,
Self::RetriesExhausted { last_error, .. } => last_error.is_retryable(),
Self::MissingCredentials { .. }
| Self::ContextWindowExceeded { .. }
| Self::ExpiredOAuthToken
| Self::Auth(_)
| Self::InvalidApiKeyEnv(_)
@@ -67,6 +75,16 @@ impl Display for ApiError {
"missing {provider} credentials; export {} before calling the {provider} API",
env_vars.join(" or ")
),
Self::ContextWindowExceeded {
model,
estimated_input_tokens,
requested_output_tokens,
estimated_total_tokens,
context_window_tokens,
} => write!(
f,
"context_window_blocked for {model}: estimated input {estimated_input_tokens} + requested output {requested_output_tokens} = {estimated_total_tokens} tokens exceeds the {context_window_tokens}-token context window; compact the session or reduce request size before retrying"
),
Self::ExpiredOAuthToken => {
write!(
f,

View File

@@ -14,7 +14,7 @@ use telemetry::{AnalyticsEvent, AnthropicRequestProfile, ClientIdentity, Session
use crate::error::ApiError;
use crate::prompt_cache::{PromptCache, PromptCacheRecord, PromptCacheStats};
use super::{Provider, ProviderFuture};
use super::{preflight_message_request, Provider, ProviderFuture};
use crate::sse::SseParser;
use crate::types::{MessageDeltaEvent, MessageRequest, MessageResponse, StreamEvent, Usage};
@@ -294,6 +294,8 @@ impl AnthropicClient {
}
}
preflight_message_request(&request)?;
let response = self.send_with_retry(&request).await?;
let request_id = request_id_from_headers(response.headers());
let mut response = response
@@ -337,6 +339,7 @@ impl AnthropicClient {
&self,
request: &MessageRequest,
) -> Result<MessageStream, ApiError> {
preflight_message_request(request)?;
let response = self
.send_with_retry(&request.clone().with_streaming())
.await?;

View File

@@ -1,6 +1,8 @@
use std::future::Future;
use std::pin::Pin;
use serde::Serialize;
use crate::error::ApiError;
use crate::types::{MessageRequest, MessageResponse};
@@ -40,6 +42,12 @@ pub struct ProviderMetadata {
pub default_base_url: &'static str,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ModelTokenLimit {
pub max_output_tokens: u32,
pub context_window_tokens: u32,
}
const MODEL_REGISTRY: &[(&str, ProviderMetadata)] = &[
(
"opus",
@@ -182,17 +190,86 @@ pub fn detect_provider_kind(model: &str) -> ProviderKind {
#[must_use]
pub fn max_tokens_for_model(model: &str) -> u32 {
model_token_limit(model).map_or_else(
|| {
let canonical = resolve_model_alias(model);
if canonical.contains("opus") {
32_000
} else {
64_000
}
},
|limit| limit.max_output_tokens,
)
}
#[must_use]
pub fn model_token_limit(model: &str) -> Option<ModelTokenLimit> {
let canonical = resolve_model_alias(model);
if canonical.contains("opus") {
32_000
} else {
64_000
match canonical.as_str() {
"claude-opus-4-6" => Some(ModelTokenLimit {
max_output_tokens: 32_000,
context_window_tokens: 200_000,
}),
"claude-sonnet-4-6" | "claude-haiku-4-5-20251213" => Some(ModelTokenLimit {
max_output_tokens: 64_000,
context_window_tokens: 200_000,
}),
"grok-3" | "grok-3-mini" => Some(ModelTokenLimit {
max_output_tokens: 64_000,
context_window_tokens: 131_072,
}),
_ => None,
}
}
pub fn preflight_message_request(request: &MessageRequest) -> Result<(), ApiError> {
let Some(limit) = model_token_limit(&request.model) else {
return Ok(());
};
let estimated_input_tokens = estimate_message_request_input_tokens(request);
let estimated_total_tokens = estimated_input_tokens.saturating_add(request.max_tokens);
if estimated_total_tokens > limit.context_window_tokens {
return Err(ApiError::ContextWindowExceeded {
model: resolve_model_alias(&request.model),
estimated_input_tokens,
requested_output_tokens: request.max_tokens,
estimated_total_tokens,
context_window_tokens: limit.context_window_tokens,
});
}
Ok(())
}
fn estimate_message_request_input_tokens(request: &MessageRequest) -> u32 {
let mut estimate = estimate_serialized_tokens(&request.messages);
estimate = estimate.saturating_add(estimate_serialized_tokens(&request.system));
estimate = estimate.saturating_add(estimate_serialized_tokens(&request.tools));
estimate = estimate.saturating_add(estimate_serialized_tokens(&request.tool_choice));
estimate
}
fn estimate_serialized_tokens<T: Serialize>(value: &T) -> u32 {
serde_json::to_vec(value)
.ok()
.map_or(0, |bytes| (bytes.len() / 4 + 1) as u32)
}
#[cfg(test)]
mod tests {
use super::{detect_provider_kind, max_tokens_for_model, resolve_model_alias, ProviderKind};
use serde_json::json;
use crate::error::ApiError;
use crate::types::{
InputContentBlock, InputMessage, MessageRequest, ToolChoice, ToolDefinition,
};
use super::{
detect_provider_kind, max_tokens_for_model, model_token_limit, preflight_message_request,
resolve_model_alias, ProviderKind,
};
#[test]
fn resolves_grok_aliases() {
@@ -215,4 +292,86 @@ mod tests {
assert_eq!(max_tokens_for_model("opus"), 32_000);
assert_eq!(max_tokens_for_model("grok-3"), 64_000);
}
#[test]
fn returns_context_window_metadata_for_supported_models() {
assert_eq!(
model_token_limit("claude-sonnet-4-6")
.expect("claude-sonnet-4-6 should be registered")
.context_window_tokens,
200_000
);
assert_eq!(
model_token_limit("grok-mini")
.expect("grok-mini should resolve to a registered model")
.context_window_tokens,
131_072
);
}
#[test]
fn preflight_blocks_requests_that_exceed_the_model_context_window() {
let request = MessageRequest {
model: "claude-sonnet-4-6".to_string(),
max_tokens: 64_000,
messages: vec![InputMessage {
role: "user".to_string(),
content: vec![InputContentBlock::Text {
text: "x".repeat(600_000),
}],
}],
system: Some("Keep the answer short.".to_string()),
tools: Some(vec![ToolDefinition {
name: "weather".to_string(),
description: Some("Fetches weather".to_string()),
input_schema: json!({
"type": "object",
"properties": { "city": { "type": "string" } },
}),
}]),
tool_choice: Some(ToolChoice::Auto),
stream: true,
};
let error = preflight_message_request(&request)
.expect_err("oversized request should be rejected before the provider call");
match error {
ApiError::ContextWindowExceeded {
model,
estimated_input_tokens,
requested_output_tokens,
estimated_total_tokens,
context_window_tokens,
} => {
assert_eq!(model, "claude-sonnet-4-6");
assert!(estimated_input_tokens > 136_000);
assert_eq!(requested_output_tokens, 64_000);
assert!(estimated_total_tokens > context_window_tokens);
assert_eq!(context_window_tokens, 200_000);
}
other => panic!("expected context-window preflight failure, got {other:?}"),
}
}
#[test]
fn preflight_skips_unknown_models() {
let request = MessageRequest {
model: "unknown-model".to_string(),
max_tokens: 64_000,
messages: vec![InputMessage {
role: "user".to_string(),
content: vec![InputContentBlock::Text {
text: "x".repeat(600_000),
}],
}],
system: None,
tools: None,
tool_choice: None,
stream: false,
};
preflight_message_request(&request)
.expect("models without context metadata should skip the guarded preflight");
}
}

View File

@@ -12,7 +12,7 @@ use crate::types::{
ToolChoice, ToolDefinition, ToolResultContentBlock, Usage,
};
use super::{Provider, ProviderFuture};
use super::{preflight_message_request, Provider, ProviderFuture};
pub const DEFAULT_XAI_BASE_URL: &str = "https://api.x.ai/v1";
pub const DEFAULT_OPENAI_BASE_URL: &str = "https://api.openai.com/v1";
@@ -128,6 +128,7 @@ impl OpenAiCompatClient {
stream: false,
..request.clone()
};
preflight_message_request(&request)?;
let response = self.send_with_retry(&request).await?;
let request_id = request_id_from_headers(response.headers());
let payload = response.json::<ChatCompletionResponse>().await?;
@@ -142,6 +143,7 @@ impl OpenAiCompatClient {
&self,
request: &MessageRequest,
) -> Result<MessageStream, ApiError> {
preflight_message_request(request)?;
let response = self
.send_with_retry(&request.clone().with_streaming())
.await?;

View File

@@ -103,6 +103,41 @@ async fn send_message_posts_json_and_parses_response() {
);
}
#[tokio::test]
async fn send_message_blocks_oversized_requests_before_the_http_call() {
let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
let server = spawn_server(
state.clone(),
vec![http_response("200 OK", "application/json", "{}")],
)
.await;
let client = AnthropicClient::new("test-key").with_base_url(server.base_url());
let error = client
.send_message(&MessageRequest {
model: "claude-sonnet-4-6".to_string(),
max_tokens: 64_000,
messages: vec![InputMessage {
role: "user".to_string(),
content: vec![InputContentBlock::Text {
text: "x".repeat(600_000),
}],
}],
system: Some("Keep the answer short.".to_string()),
tools: None,
tool_choice: None,
stream: false,
})
.await
.expect_err("oversized request should fail local context-window preflight");
assert!(matches!(error, ApiError::ContextWindowExceeded { .. }));
assert!(
state.lock().await.is_empty(),
"preflight failure should avoid any upstream HTTP request"
);
}
#[tokio::test]
async fn send_message_applies_request_profile_and_records_telemetry() {
let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));

View File

@@ -4,10 +4,10 @@ use std::sync::Arc;
use std::sync::{Mutex as StdMutex, OnceLock};
use api::{
ContentBlockDelta, ContentBlockDeltaEvent, ContentBlockStartEvent, ContentBlockStopEvent,
InputContentBlock, InputMessage, MessageDeltaEvent, MessageRequest, OpenAiCompatClient,
OpenAiCompatConfig, OutputContentBlock, ProviderClient, StreamEvent, ToolChoice,
ToolDefinition,
ApiError, ContentBlockDelta, ContentBlockDeltaEvent, ContentBlockStartEvent,
ContentBlockStopEvent, InputContentBlock, InputMessage, MessageDeltaEvent, MessageRequest,
OpenAiCompatClient, OpenAiCompatConfig, OutputContentBlock, ProviderClient, StreamEvent,
ToolChoice, ToolDefinition,
};
use serde_json::json;
use tokio::io::{AsyncReadExt, AsyncWriteExt};
@@ -63,6 +63,42 @@ async fn send_message_uses_openai_compatible_endpoint_and_auth() {
assert_eq!(body["tools"][0]["type"], json!("function"));
}
#[tokio::test]
async fn send_message_blocks_oversized_xai_requests_before_the_http_call() {
let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));
let server = spawn_server(
state.clone(),
vec![http_response("200 OK", "application/json", "{}")],
)
.await;
let client = OpenAiCompatClient::new("xai-test-key", OpenAiCompatConfig::xai())
.with_base_url(server.base_url());
let error = client
.send_message(&MessageRequest {
model: "grok-3".to_string(),
max_tokens: 64_000,
messages: vec![InputMessage {
role: "user".to_string(),
content: vec![InputContentBlock::Text {
text: "x".repeat(300_000),
}],
}],
system: Some("Keep the answer short.".to_string()),
tools: None,
tool_choice: None,
stream: false,
})
.await
.expect_err("oversized request should fail local context-window preflight");
assert!(matches!(error, ApiError::ContextWindowExceeded { .. }));
assert!(
state.lock().await.is_empty(),
"preflight failure should avoid any upstream HTTP request"
);
}
#[tokio::test]
async fn send_message_accepts_full_chat_completions_endpoint_override() {
let state = Arc::new(Mutex::new(Vec::<CapturedRequest>::new()));

View File

@@ -2142,22 +2142,13 @@ pub fn handle_plugins_slash_command(
}
pub fn handle_agents_slash_command(args: Option<&str>, cwd: &Path) -> std::io::Result<String> {
if let Some(args) = normalize_optional_args(args) {
if let Some(help_path) = help_path_from_args(args) {
return Ok(match help_path.as_slice() {
[] => render_agents_usage(None),
_ => render_agents_usage(Some(&help_path.join(" "))),
});
}
}
match normalize_optional_args(args) {
None | Some("list") => {
let roots = discover_definition_roots(cwd, "agents");
let agents = load_agents_from_roots(&roots)?;
Ok(render_agents_report(&agents))
}
Some(args) if is_help_arg(args) => Ok(render_agents_usage(None)),
Some("-h" | "--help" | "help") => Ok(render_agents_usage(None)),
Some(args) => Ok(render_agents_usage(Some(args))),
}
}
@@ -2171,16 +2162,6 @@ pub fn handle_mcp_slash_command(
}
pub fn handle_skills_slash_command(args: Option<&str>, cwd: &Path) -> std::io::Result<String> {
if let Some(args) = normalize_optional_args(args) {
if let Some(help_path) = help_path_from_args(args) {
return Ok(match help_path.as_slice() {
[] => render_skills_usage(None),
["install", ..] => render_skills_usage(Some("install")),
_ => render_skills_usage(Some(&help_path.join(" "))),
});
}
}
match normalize_optional_args(args) {
None | Some("list") => {
let roots = discover_skill_roots(cwd);
@@ -2196,7 +2177,7 @@ pub fn handle_skills_slash_command(args: Option<&str>, cwd: &Path) -> std::io::R
let install = install_skill(target, cwd)?;
Ok(render_skill_install_report(&install))
}
Some(args) if is_help_arg(args) => Ok(render_skills_usage(None)),
Some("-h" | "--help" | "help") => Ok(render_skills_usage(None)),
Some(args) => Ok(render_skills_usage(Some(args))),
}
}
@@ -2206,16 +2187,6 @@ fn render_mcp_report_for(
cwd: &Path,
args: Option<&str>,
) -> Result<String, runtime::ConfigError> {
if let Some(args) = normalize_optional_args(args) {
if let Some(help_path) = help_path_from_args(args) {
return Ok(match help_path.as_slice() {
[] => render_mcp_usage(None),
["show", ..] => render_mcp_usage(Some("show")),
_ => render_mcp_usage(Some(&help_path.join(" "))),
});
}
}
match normalize_optional_args(args) {
None | Some("list") => {
let runtime_config = loader.load()?;
@@ -2224,7 +2195,7 @@ fn render_mcp_report_for(
runtime_config.mcp().servers(),
))
}
Some(args) if is_help_arg(args) => Ok(render_mcp_usage(None)),
Some("-h" | "--help" | "help") => Ok(render_mcp_usage(None)),
Some("show") => Ok(render_mcp_usage(Some("show"))),
Some(args) if args.split_whitespace().next() == Some("show") => {
let mut parts = args.split_whitespace();
@@ -3065,16 +3036,6 @@ fn normalize_optional_args(args: Option<&str>) -> Option<&str> {
args.map(str::trim).filter(|value| !value.is_empty())
}
fn is_help_arg(arg: &str) -> bool {
matches!(arg, "help" | "-h" | "--help")
}
fn help_path_from_args(args: &str) -> Option<Vec<&str>> {
let parts = args.split_whitespace().collect::<Vec<_>>();
let help_index = parts.iter().position(|part| is_help_arg(part))?;
Some(parts[..help_index].to_vec())
}
fn render_agents_usage(unexpected: Option<&str>) -> String {
let mut lines = vec![
"Agents".to_string(),
@@ -4044,17 +4005,7 @@ mod tests {
let skills_unexpected =
super::handle_skills_slash_command(Some("show help"), &cwd).expect("skills usage");
assert!(skills_unexpected.contains("Unexpected show"));
let skills_install_help = super::handle_skills_slash_command(Some("install --help"), &cwd)
.expect("nested skills help");
assert!(skills_install_help.contains("Usage /skills [list|install <path>|help]"));
assert!(skills_install_help.contains("Unexpected install"));
let skills_unknown_help =
super::handle_skills_slash_command(Some("show --help"), &cwd).expect("skills help");
assert!(skills_unknown_help.contains("Usage /skills [list|install <path>|help]"));
assert!(skills_unknown_help.contains("Unexpected show"));
assert!(skills_unexpected.contains("Unexpected show help"));
let _ = fs::remove_dir_all(cwd);
}
@@ -4071,16 +4022,6 @@ mod tests {
super::handle_mcp_slash_command(Some("show alpha beta"), &cwd).expect("mcp usage");
assert!(unexpected.contains("Unexpected show alpha beta"));
let nested_help =
super::handle_mcp_slash_command(Some("show --help"), &cwd).expect("mcp help");
assert!(nested_help.contains("Usage /mcp [list|show <server>|help]"));
assert!(nested_help.contains("Unexpected show"));
let unknown_help =
super::handle_mcp_slash_command(Some("inspect --help"), &cwd).expect("mcp usage");
assert!(unknown_help.contains("Usage /mcp [list|show <server>|help]"));
assert!(unknown_help.contains("Unexpected inspect"));
let _ = fs::remove_dir_all(cwd);
}

View File

@@ -160,42 +160,6 @@ fn config_command_loads_defaults_from_standard_config_locations() {
fs::remove_dir_all(temp_dir).expect("cleanup temp dir");
}
#[test]
fn nested_help_flags_render_usage_instead_of_falling_through() {
let temp_dir = unique_temp_dir("nested-help");
fs::create_dir_all(&temp_dir).expect("temp dir should exist");
let mcp_output = command_in(&temp_dir)
.args(["mcp", "show", "--help"])
.output()
.expect("claw should launch");
assert_success(&mcp_output);
let mcp_stdout = String::from_utf8(mcp_output.stdout).expect("stdout should be utf8");
assert!(mcp_stdout.contains("Usage /mcp [list|show <server>|help]"));
assert!(mcp_stdout.contains("Unexpected show"));
assert!(!mcp_stdout.contains("server `--help` is not configured"));
let skills_output = command_in(&temp_dir)
.args(["skills", "install", "--help"])
.output()
.expect("claw should launch");
assert_success(&skills_output);
let skills_stdout = String::from_utf8(skills_output.stdout).expect("stdout should be utf8");
assert!(skills_stdout.contains("Usage /skills [list|install <path>|help]"));
assert!(skills_stdout.contains("Unexpected install"));
let unknown_output = command_in(&temp_dir)
.args(["mcp", "inspect", "--help"])
.output()
.expect("claw should launch");
assert_success(&unknown_output);
let unknown_stdout = String::from_utf8(unknown_output.stdout).expect("stdout should be utf8");
assert!(unknown_stdout.contains("Usage /mcp [list|show <server>|help]"));
assert!(unknown_stdout.contains("Unexpected inspect"));
fs::remove_dir_all(temp_dir).expect("cleanup temp dir");
}
fn command_in(cwd: &Path) -> Command {
let mut command = Command::new(env!("CARGO_BIN_EXE_claw"));
command.current_dir(cwd);