feat(tools): expose WorkerObserveCompletion tool; add provider-degraded classification tests

observe_completion() on WorkerRegistry classifies finish_reason into
Finished vs Failed (finish='unknown' + 0 tokens = provider degraded).
This logic existed in the runtime but had no tool wrapper — clawhip
could not call it. Added WorkerObserveCompletion as a first-class tool.

Tool schema:
  { worker_id, finish_reason: string, tokens_output: integer }

Handler: run_worker_observe_completion -> global_worker_registry().observe_completion()

Tests added:
- worker_observe_completion_success_finish_sets_finished_status
  finish=end_turn + tokens=512 -> status=finished
- worker_observe_completion_degraded_provider_sets_failed_status
  finish=unknown + tokens=0 -> status=failed, last_error populated

89 tool tests passing, 0 failing.
This commit is contained in:
YeonGyu-Kim
2026-04-08 04:35:05 +09:00
parent c08f060ca1
commit 9461522af5

View File

@@ -963,6 +963,21 @@ pub fn mvp_tool_specs() -> Vec<ToolSpec> {
}),
required_permission: PermissionMode::DangerFullAccess,
},
ToolSpec {
name: "WorkerObserveCompletion",
description: "Report session completion to the worker, classifying finish_reason into Finished or Failed (provider-degraded). Use after the opencode session completes to advance the worker to its terminal state.",
input_schema: json!({
"type": "object",
"properties": {
"worker_id": { "type": "string" },
"finish_reason": { "type": "string" },
"tokens_output": { "type": "integer", "minimum": 0 }
},
"required": ["worker_id", "finish_reason", "tokens_output"],
"additionalProperties": false
}),
required_permission: PermissionMode::DangerFullAccess,
},
ToolSpec {
name: "TeamCreate",
description: "Create a team of sub-agents for parallel task execution.",
@@ -1229,6 +1244,10 @@ fn execute_tool_with_enforcer(
}
"WorkerRestart" => from_value::<WorkerIdInput>(input).and_then(run_worker_restart),
"WorkerTerminate" => from_value::<WorkerIdInput>(input).and_then(run_worker_terminate),
"WorkerObserveCompletion" => {
from_value::<WorkerObserveCompletionInput>(input)
.and_then(run_worker_observe_completion)
}
"TeamCreate" => from_value::<TeamCreateInput>(input).and_then(run_team_create),
"TeamDelete" => from_value::<TeamDeleteInput>(input).and_then(run_team_delete),
"CronCreate" => from_value::<CronCreateInput>(input).and_then(run_cron_create),
@@ -1490,6 +1509,18 @@ fn run_worker_terminate(input: WorkerIdInput) -> Result<String, String> {
to_pretty_json(worker)
}
#[allow(clippy::needless_pass_by_value)]
fn run_worker_observe_completion(
input: WorkerObserveCompletionInput,
) -> Result<String, String> {
let worker = global_worker_registry().observe_completion(
&input.worker_id,
&input.finish_reason,
input.tokens_output,
)?;
to_pretty_json(worker)
}
#[allow(clippy::needless_pass_by_value)]
fn run_team_create(input: TeamCreateInput) -> Result<String, String> {
let task_ids: Vec<String> = input
@@ -2224,6 +2255,13 @@ struct WorkerIdInput {
worker_id: String,
}
#[derive(Debug, Deserialize)]
struct WorkerObserveCompletionInput {
worker_id: String,
finish_reason: String,
tokens_output: u64,
}
#[derive(Debug, Deserialize)]
struct WorkerObserveInput {
worker_id: String,
@@ -5739,6 +5777,63 @@ mod tests {
);
}
#[test]
fn worker_observe_completion_success_finish_sets_finished_status() {
let created = execute_tool(
"WorkerCreate",
&json!({"cwd": "/tmp/observe-completion-test", "trusted_roots": ["/tmp"]}),
)
.expect("WorkerCreate should succeed");
let output: serde_json::Value = serde_json::from_str(&created).expect("json");
let worker_id = output["worker_id"].as_str().expect("worker_id").to_string();
let completed = execute_tool(
"WorkerObserveCompletion",
&json!({
"worker_id": worker_id,
"finish_reason": "end_turn",
"tokens_output": 512
}),
)
.expect("WorkerObserveCompletion should succeed");
let completed_output: serde_json::Value = serde_json::from_str(&completed).expect("json");
assert_eq!(completed_output["status"], "finished");
assert_eq!(completed_output["prompt_in_flight"], false);
}
#[test]
fn worker_observe_completion_degraded_provider_sets_failed_status() {
let created = execute_tool(
"WorkerCreate",
&json!({"cwd": "/tmp/observe-degraded-test", "trusted_roots": ["/tmp"]}),
)
.expect("WorkerCreate should succeed");
let output: serde_json::Value = serde_json::from_str(&created).expect("json");
let worker_id = output["worker_id"].as_str().expect("worker_id").to_string();
// finish=unknown + 0 tokens = degraded provider classification
let failed = execute_tool(
"WorkerObserveCompletion",
&json!({
"worker_id": worker_id,
"finish_reason": "unknown",
"tokens_output": 0
}),
)
.expect("WorkerObserveCompletion should succeed");
let failed_output: serde_json::Value = serde_json::from_str(&failed).expect("json");
assert_eq!(
failed_output["status"], "failed",
"finish=unknown + 0 tokens should classify as provider failure"
);
assert_eq!(failed_output["prompt_in_flight"], false);
// last_error should be set with provider failure message
assert!(
!failed_output["last_error"].is_null(),
"last_error should be populated for provider failure"
);
}
#[test]
fn worker_tools_detect_misdelivery_and_arm_prompt_replay() {
let created = execute_tool(