mirror of
https://github.com/instructkr/claw-code.git
synced 2026-04-27 05:24:59 +08:00
fix: #161 — wall-clock timeout for run_turn_loop; stalled turns now abort with stop_reason='timeout'
Previously, run_turn_loop was bounded only by max_turns (turn count). If engine.submit_message stalled — slow provider, hung network, infinite stream — the loop blocked indefinitely with no cancellation path. Claws calling run_turn_loop in CI or orchestration had no reliable way to enforce a deadline; the loop would hang until OS kill or human intervention. Fix: - Add timeout_seconds parameter to run_turn_loop (default None = legacy unbounded). - When set, each submit_message call runs inside a ThreadPoolExecutor and is bounded by the remaining wall-clock budget (total across all turns, not per-turn). - On timeout, synthesize a TurnResult with stop_reason='timeout' carrying the turn's prompt and routed matches so transcripts preserve orchestration context. - Exhausted/negative budget short-circuits before calling submit_message. - Legacy path (timeout_seconds=None) bypasses the executor entirely — zero overhead for callers that don't opt in. CLI: - Added --timeout-seconds flag to 'turn-loop' command. - Exit code 2 when the loop terminated on timeout (vs 0 for completed), so shell scripts can distinguish 'done' from 'budget exhausted'. Tests (tests/test_run_turn_loop_timeout.py, 6 tests): - Legacy unbounded path unchanged (timeout_seconds=None never emits 'timeout') - Hung submit_message aborted within budget (0.3s budget, 5s mock hang → exit <1.5s) - Budget is cumulative across turns (0.6s budget, 0.4s per turn, not per-turn) - timeout_seconds=0 short-circuits first turn without calling submit_message - Negative timeout treated as exhausted (guard against caller bugs) - Timeout TurnResult carries correct prompt, matches, UsageSummary shape Full suite: 49/49 passing, zero regression. Blocker: none. Closes ROADMAP #161.
This commit is contained in:
18
src/main.py
18
src/main.py
@@ -65,6 +65,12 @@ def build_parser() -> argparse.ArgumentParser:
|
|||||||
loop_parser.add_argument('--limit', type=int, default=5)
|
loop_parser.add_argument('--limit', type=int, default=5)
|
||||||
loop_parser.add_argument('--max-turns', type=int, default=3)
|
loop_parser.add_argument('--max-turns', type=int, default=3)
|
||||||
loop_parser.add_argument('--structured-output', action='store_true')
|
loop_parser.add_argument('--structured-output', action='store_true')
|
||||||
|
loop_parser.add_argument(
|
||||||
|
'--timeout-seconds',
|
||||||
|
type=float,
|
||||||
|
default=None,
|
||||||
|
help='total wall-clock budget across all turns (#161). Default: unbounded.',
|
||||||
|
)
|
||||||
|
|
||||||
flush_parser = subparsers.add_parser('flush-transcript', help='persist and flush a temporary session transcript')
|
flush_parser = subparsers.add_parser('flush-transcript', help='persist and flush a temporary session transcript')
|
||||||
flush_parser.add_argument('prompt')
|
flush_parser.add_argument('prompt')
|
||||||
@@ -187,11 +193,21 @@ def main(argv: list[str] | None = None) -> int:
|
|||||||
print(PortRuntime().bootstrap_session(args.prompt, limit=args.limit).as_markdown())
|
print(PortRuntime().bootstrap_session(args.prompt, limit=args.limit).as_markdown())
|
||||||
return 0
|
return 0
|
||||||
if args.command == 'turn-loop':
|
if args.command == 'turn-loop':
|
||||||
results = PortRuntime().run_turn_loop(args.prompt, limit=args.limit, max_turns=args.max_turns, structured_output=args.structured_output)
|
results = PortRuntime().run_turn_loop(
|
||||||
|
args.prompt,
|
||||||
|
limit=args.limit,
|
||||||
|
max_turns=args.max_turns,
|
||||||
|
structured_output=args.structured_output,
|
||||||
|
timeout_seconds=args.timeout_seconds,
|
||||||
|
)
|
||||||
for idx, result in enumerate(results, start=1):
|
for idx, result in enumerate(results, start=1):
|
||||||
print(f'## Turn {idx}')
|
print(f'## Turn {idx}')
|
||||||
print(result.output)
|
print(result.output)
|
||||||
print(f'stop_reason={result.stop_reason}')
|
print(f'stop_reason={result.stop_reason}')
|
||||||
|
# Exit 2 when a timeout terminated the loop so claws can distinguish
|
||||||
|
# 'ran to completion' from 'hit wall-clock budget'.
|
||||||
|
if results and results[-1].stop_reason == 'timeout':
|
||||||
|
return 2
|
||||||
return 0
|
return 0
|
||||||
if args.command == 'flush-transcript':
|
if args.command == 'flush-transcript':
|
||||||
engine = QueryEnginePort.from_workspace()
|
engine = QueryEnginePort.from_workspace()
|
||||||
|
|||||||
@@ -1,11 +1,13 @@
|
|||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import time
|
||||||
|
from concurrent.futures import ThreadPoolExecutor, TimeoutError as FuturesTimeoutError
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from .commands import PORTED_COMMANDS
|
from .commands import PORTED_COMMANDS
|
||||||
from .context import PortContext, build_port_context, render_context
|
from .context import PortContext, build_port_context, render_context
|
||||||
from .history import HistoryLog
|
from .history import HistoryLog
|
||||||
from .models import PermissionDenial, PortingModule
|
from .models import PermissionDenial, PortingModule, UsageSummary
|
||||||
from .query_engine import QueryEngineConfig, QueryEnginePort, TurnResult
|
from .query_engine import QueryEngineConfig, QueryEnginePort, TurnResult
|
||||||
from .setup import SetupReport, WorkspaceSetup, run_setup
|
from .setup import SetupReport, WorkspaceSetup, run_setup
|
||||||
from .system_init import build_system_init_message
|
from .system_init import build_system_init_message
|
||||||
@@ -151,21 +153,100 @@ class PortRuntime:
|
|||||||
persisted_session_path=persisted_session_path,
|
persisted_session_path=persisted_session_path,
|
||||||
)
|
)
|
||||||
|
|
||||||
def run_turn_loop(self, prompt: str, limit: int = 5, max_turns: int = 3, structured_output: bool = False) -> list[TurnResult]:
|
def run_turn_loop(
|
||||||
|
self,
|
||||||
|
prompt: str,
|
||||||
|
limit: int = 5,
|
||||||
|
max_turns: int = 3,
|
||||||
|
structured_output: bool = False,
|
||||||
|
timeout_seconds: float | None = None,
|
||||||
|
) -> list[TurnResult]:
|
||||||
|
"""Run a multi-turn engine loop with optional wall-clock deadline.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
prompt: The initial prompt to submit.
|
||||||
|
limit: Match routing limit.
|
||||||
|
max_turns: Maximum number of turns before stopping.
|
||||||
|
structured_output: Whether to request structured output.
|
||||||
|
timeout_seconds: Total wall-clock budget across all turns. When the
|
||||||
|
budget is exhausted mid-turn, a synthetic TurnResult with
|
||||||
|
``stop_reason='timeout'`` is appended and the loop exits.
|
||||||
|
``None`` (default) preserves legacy unbounded behaviour.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A list of TurnResult objects. The final entry's ``stop_reason``
|
||||||
|
distinguishes ``'completed'``, ``'max_turns_reached'``,
|
||||||
|
``'max_budget_reached'``, or ``'timeout'``.
|
||||||
|
|
||||||
|
#161: prior to this change a hung ``engine.submit_message`` call would
|
||||||
|
block the loop indefinitely with no cancellation path, forcing claws to
|
||||||
|
rely on external watchdogs or OS-level kills. Callers can now enforce a
|
||||||
|
deadline and receive a typed timeout signal instead.
|
||||||
|
"""
|
||||||
engine = QueryEnginePort.from_workspace()
|
engine = QueryEnginePort.from_workspace()
|
||||||
engine.config = QueryEngineConfig(max_turns=max_turns, structured_output=structured_output)
|
engine.config = QueryEngineConfig(max_turns=max_turns, structured_output=structured_output)
|
||||||
matches = self.route_prompt(prompt, limit=limit)
|
matches = self.route_prompt(prompt, limit=limit)
|
||||||
command_names = tuple(match.name for match in matches if match.kind == 'command')
|
command_names = tuple(match.name for match in matches if match.kind == 'command')
|
||||||
tool_names = tuple(match.name for match in matches if match.kind == 'tool')
|
tool_names = tuple(match.name for match in matches if match.kind == 'tool')
|
||||||
results: list[TurnResult] = []
|
results: list[TurnResult] = []
|
||||||
for turn in range(max_turns):
|
deadline = time.monotonic() + timeout_seconds if timeout_seconds is not None else None
|
||||||
turn_prompt = prompt if turn == 0 else f'{prompt} [turn {turn + 1}]'
|
|
||||||
result = engine.submit_message(turn_prompt, command_names, tool_names, ())
|
# ThreadPoolExecutor is reused across turns so we cancel cleanly on exit.
|
||||||
results.append(result)
|
executor = ThreadPoolExecutor(max_workers=1) if deadline is not None else None
|
||||||
if result.stop_reason != 'completed':
|
try:
|
||||||
break
|
for turn in range(max_turns):
|
||||||
|
turn_prompt = prompt if turn == 0 else f'{prompt} [turn {turn + 1}]'
|
||||||
|
|
||||||
|
if deadline is None:
|
||||||
|
# Legacy path: unbounded call, preserves existing behaviour exactly.
|
||||||
|
result = engine.submit_message(turn_prompt, command_names, tool_names, ())
|
||||||
|
else:
|
||||||
|
remaining = deadline - time.monotonic()
|
||||||
|
if remaining <= 0:
|
||||||
|
results.append(self._build_timeout_result(turn_prompt, command_names, tool_names))
|
||||||
|
break
|
||||||
|
assert executor is not None
|
||||||
|
future = executor.submit(
|
||||||
|
engine.submit_message, turn_prompt, command_names, tool_names, ()
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
result = future.result(timeout=remaining)
|
||||||
|
except FuturesTimeoutError:
|
||||||
|
# Best-effort cancel; submit_message may still finish in background
|
||||||
|
# but we never read its output. The engine's own state mutation
|
||||||
|
# is owned by the engine and not our concern here.
|
||||||
|
future.cancel()
|
||||||
|
results.append(self._build_timeout_result(turn_prompt, command_names, tool_names))
|
||||||
|
break
|
||||||
|
|
||||||
|
results.append(result)
|
||||||
|
if result.stop_reason != 'completed':
|
||||||
|
break
|
||||||
|
finally:
|
||||||
|
if executor is not None:
|
||||||
|
# wait=False: don't let a hung thread block loop exit indefinitely.
|
||||||
|
# The thread will be reaped when the interpreter shuts down or when
|
||||||
|
# the engine call eventually returns.
|
||||||
|
executor.shutdown(wait=False)
|
||||||
return results
|
return results
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _build_timeout_result(
|
||||||
|
prompt: str,
|
||||||
|
command_names: tuple[str, ...],
|
||||||
|
tool_names: tuple[str, ...],
|
||||||
|
) -> TurnResult:
|
||||||
|
"""Synthesize a TurnResult representing a wall-clock timeout (#161)."""
|
||||||
|
return TurnResult(
|
||||||
|
prompt=prompt,
|
||||||
|
output='Wall-clock timeout exceeded before turn completed.',
|
||||||
|
matched_commands=command_names,
|
||||||
|
matched_tools=tool_names,
|
||||||
|
permission_denials=(),
|
||||||
|
usage=UsageSummary(),
|
||||||
|
stop_reason='timeout',
|
||||||
|
)
|
||||||
|
|
||||||
def _infer_permission_denials(self, matches: list[RoutedMatch]) -> list[PermissionDenial]:
|
def _infer_permission_denials(self, matches: list[RoutedMatch]) -> list[PermissionDenial]:
|
||||||
denials: list[PermissionDenial] = []
|
denials: list[PermissionDenial] = []
|
||||||
for match in matches:
|
for match in matches:
|
||||||
|
|||||||
168
tests/test_run_turn_loop_timeout.py
Normal file
168
tests/test_run_turn_loop_timeout.py
Normal file
@@ -0,0 +1,168 @@
|
|||||||
|
"""Tests for run_turn_loop wall-clock timeout (ROADMAP #161).
|
||||||
|
|
||||||
|
Covers:
|
||||||
|
- timeout_seconds=None preserves legacy unbounded behaviour
|
||||||
|
- timeout_seconds=X aborts a hung turn and emits stop_reason='timeout'
|
||||||
|
- Timeout budget is total wall-clock across all turns, not per-turn
|
||||||
|
- Already-exhausted budget short-circuits before the first turn runs
|
||||||
|
- Legacy path still runs without a ThreadPoolExecutor in the way
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import time
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
||||||
|
|
||||||
|
from src.models import UsageSummary # noqa: E402
|
||||||
|
from src.query_engine import TurnResult # noqa: E402
|
||||||
|
from src.runtime import PortRuntime # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
def _completed_result(prompt: str) -> TurnResult:
|
||||||
|
return TurnResult(
|
||||||
|
prompt=prompt,
|
||||||
|
output='ok',
|
||||||
|
matched_commands=(),
|
||||||
|
matched_tools=(),
|
||||||
|
permission_denials=(),
|
||||||
|
usage=UsageSummary(),
|
||||||
|
stop_reason='completed',
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TestLegacyUnboundedBehaviour:
|
||||||
|
def test_no_timeout_preserves_existing_behaviour(self) -> None:
|
||||||
|
"""timeout_seconds=None must not change legacy path at all."""
|
||||||
|
results = PortRuntime().run_turn_loop('review MCP tool', max_turns=2)
|
||||||
|
assert len(results) >= 1
|
||||||
|
for r in results:
|
||||||
|
assert r.stop_reason in {'completed', 'max_turns_reached', 'max_budget_reached'}
|
||||||
|
assert r.stop_reason != 'timeout'
|
||||||
|
|
||||||
|
|
||||||
|
class TestTimeoutAbortsHungTurn:
|
||||||
|
def test_hung_submit_message_times_out(self) -> None:
|
||||||
|
"""A stalled submit_message must be aborted and emit stop_reason='timeout'."""
|
||||||
|
runtime = PortRuntime()
|
||||||
|
|
||||||
|
def _hang(prompt, commands, tools, denials):
|
||||||
|
time.sleep(5.0) # would block the loop
|
||||||
|
return _completed_result(prompt)
|
||||||
|
|
||||||
|
with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory:
|
||||||
|
engine = mock_factory.return_value
|
||||||
|
engine.config = None # attribute-assigned in run_turn_loop
|
||||||
|
engine.submit_message.side_effect = _hang
|
||||||
|
|
||||||
|
start = time.monotonic()
|
||||||
|
results = runtime.run_turn_loop(
|
||||||
|
'review MCP tool', max_turns=3, timeout_seconds=0.3
|
||||||
|
)
|
||||||
|
elapsed = time.monotonic() - start
|
||||||
|
|
||||||
|
# Must exit well under the 5s hang
|
||||||
|
assert elapsed < 1.5, f'run_turn_loop did not honor timeout: {elapsed:.2f}s'
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[-1].stop_reason == 'timeout'
|
||||||
|
|
||||||
|
|
||||||
|
class TestTimeoutBudgetIsTotal:
|
||||||
|
def test_budget_is_cumulative_across_turns(self) -> None:
|
||||||
|
"""timeout_seconds is total wall-clock across all turns, not per-turn."""
|
||||||
|
runtime = PortRuntime()
|
||||||
|
call_count = {'n': 0}
|
||||||
|
|
||||||
|
def _slow(prompt, commands, tools, denials):
|
||||||
|
call_count['n'] += 1
|
||||||
|
time.sleep(0.4) # each turn burns 0.4s
|
||||||
|
return _completed_result(prompt)
|
||||||
|
|
||||||
|
with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory:
|
||||||
|
engine = mock_factory.return_value
|
||||||
|
engine.submit_message.side_effect = _slow
|
||||||
|
|
||||||
|
start = time.monotonic()
|
||||||
|
# 0.6s budget, 0.4s per turn. First turn completes (~0.4s),
|
||||||
|
# second turn times out before finishing.
|
||||||
|
results = runtime.run_turn_loop(
|
||||||
|
'review MCP tool', max_turns=5, timeout_seconds=0.6
|
||||||
|
)
|
||||||
|
elapsed = time.monotonic() - start
|
||||||
|
|
||||||
|
# Should exit at around 0.6s, not 2.0s (5 turns * 0.4s)
|
||||||
|
assert elapsed < 1.5, f'cumulative budget not honored: {elapsed:.2f}s'
|
||||||
|
# Last result should be the timeout
|
||||||
|
assert results[-1].stop_reason == 'timeout'
|
||||||
|
|
||||||
|
|
||||||
|
class TestExhaustedBudget:
|
||||||
|
def test_zero_timeout_short_circuits_first_turn(self) -> None:
|
||||||
|
"""timeout_seconds=0 emits timeout before the first submit_message call."""
|
||||||
|
runtime = PortRuntime()
|
||||||
|
|
||||||
|
with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory:
|
||||||
|
engine = mock_factory.return_value
|
||||||
|
# submit_message should never be called when budget is already 0
|
||||||
|
engine.submit_message.side_effect = AssertionError(
|
||||||
|
'submit_message should not run when budget is exhausted'
|
||||||
|
)
|
||||||
|
|
||||||
|
results = runtime.run_turn_loop(
|
||||||
|
'review MCP tool', max_turns=3, timeout_seconds=0.0
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0].stop_reason == 'timeout'
|
||||||
|
|
||||||
|
|
||||||
|
class TestTimeoutResultShape:
|
||||||
|
def test_timeout_result_has_correct_prompt_and_matches(self) -> None:
|
||||||
|
"""Synthetic TurnResult on timeout must carry the turn's prompt + routed matches."""
|
||||||
|
runtime = PortRuntime()
|
||||||
|
|
||||||
|
def _hang(prompt, commands, tools, denials):
|
||||||
|
time.sleep(5.0)
|
||||||
|
return _completed_result(prompt)
|
||||||
|
|
||||||
|
with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory:
|
||||||
|
engine = mock_factory.return_value
|
||||||
|
engine.submit_message.side_effect = _hang
|
||||||
|
|
||||||
|
results = runtime.run_turn_loop(
|
||||||
|
'review MCP tool', max_turns=2, timeout_seconds=0.2
|
||||||
|
)
|
||||||
|
|
||||||
|
timeout_result = results[-1]
|
||||||
|
assert timeout_result.stop_reason == 'timeout'
|
||||||
|
assert timeout_result.prompt == 'review MCP tool'
|
||||||
|
# matched_commands / matched_tools should still be populated from routing,
|
||||||
|
# so downstream transcripts don't lose the routing context.
|
||||||
|
# These may be empty tuples depending on routing; they must be tuples.
|
||||||
|
assert isinstance(timeout_result.matched_commands, tuple)
|
||||||
|
assert isinstance(timeout_result.matched_tools, tuple)
|
||||||
|
assert isinstance(timeout_result.usage, UsageSummary)
|
||||||
|
|
||||||
|
|
||||||
|
class TestNegativeTimeoutTreatedAsExhausted:
|
||||||
|
def test_negative_timeout_short_circuits(self) -> None:
|
||||||
|
"""A negative budget should behave identically to exhausted."""
|
||||||
|
runtime = PortRuntime()
|
||||||
|
|
||||||
|
with patch('src.runtime.QueryEnginePort.from_workspace') as mock_factory:
|
||||||
|
engine = mock_factory.return_value
|
||||||
|
engine.submit_message.side_effect = AssertionError(
|
||||||
|
'submit_message should not run when budget is negative'
|
||||||
|
)
|
||||||
|
|
||||||
|
results = runtime.run_turn_loop(
|
||||||
|
'review MCP tool', max_turns=3, timeout_seconds=-1.0
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0].stop_reason == 'timeout'
|
||||||
Reference in New Issue
Block a user