mirror of
https://github.com/instructkr/claw-code.git
synced 2026-04-05 23:54:50 +08:00
merge: ultraclaw/summary-compression into main
This commit is contained in:
@@ -26,6 +26,8 @@ pub mod sandbox;
|
||||
mod session;
|
||||
mod sse;
|
||||
pub mod stale_branch;
|
||||
||||||| f76311f
|
||||
pub mod summary_compression;
|
||||
pub mod task_registry;
|
||||
pub mod task_packet;
|
||||
pub mod team_cron_registry;
|
||||
|
||||
300
rust/crates/runtime/src/summary_compression.rs
Normal file
300
rust/crates/runtime/src/summary_compression.rs
Normal file
@@ -0,0 +1,300 @@
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
const DEFAULT_MAX_CHARS: usize = 1_200;
|
||||
const DEFAULT_MAX_LINES: usize = 24;
|
||||
const DEFAULT_MAX_LINE_CHARS: usize = 160;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct SummaryCompressionBudget {
|
||||
pub max_chars: usize,
|
||||
pub max_lines: usize,
|
||||
pub max_line_chars: usize,
|
||||
}
|
||||
|
||||
impl Default for SummaryCompressionBudget {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
max_chars: DEFAULT_MAX_CHARS,
|
||||
max_lines: DEFAULT_MAX_LINES,
|
||||
max_line_chars: DEFAULT_MAX_LINE_CHARS,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct SummaryCompressionResult {
|
||||
pub summary: String,
|
||||
pub original_chars: usize,
|
||||
pub compressed_chars: usize,
|
||||
pub original_lines: usize,
|
||||
pub compressed_lines: usize,
|
||||
pub removed_duplicate_lines: usize,
|
||||
pub omitted_lines: usize,
|
||||
pub truncated: bool,
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn compress_summary(
|
||||
summary: &str,
|
||||
budget: SummaryCompressionBudget,
|
||||
) -> SummaryCompressionResult {
|
||||
let original_chars = summary.chars().count();
|
||||
let original_lines = summary.lines().count();
|
||||
|
||||
let normalized = normalize_lines(summary, budget.max_line_chars);
|
||||
if normalized.lines.is_empty() || budget.max_chars == 0 || budget.max_lines == 0 {
|
||||
return SummaryCompressionResult {
|
||||
summary: String::new(),
|
||||
original_chars,
|
||||
compressed_chars: 0,
|
||||
original_lines,
|
||||
compressed_lines: 0,
|
||||
removed_duplicate_lines: normalized.removed_duplicate_lines,
|
||||
omitted_lines: normalized.lines.len(),
|
||||
truncated: original_chars > 0,
|
||||
};
|
||||
}
|
||||
|
||||
let selected = select_line_indexes(&normalized.lines, budget);
|
||||
let mut compressed_lines = selected
|
||||
.iter()
|
||||
.map(|index| normalized.lines[*index].clone())
|
||||
.collect::<Vec<_>>();
|
||||
if compressed_lines.is_empty() {
|
||||
compressed_lines.push(truncate_line(&normalized.lines[0], budget.max_chars));
|
||||
}
|
||||
let omitted_lines = normalized
|
||||
.lines
|
||||
.len()
|
||||
.saturating_sub(compressed_lines.len());
|
||||
|
||||
if omitted_lines > 0 {
|
||||
let omission_notice = omission_notice(omitted_lines);
|
||||
push_line_with_budget(&mut compressed_lines, omission_notice, budget);
|
||||
}
|
||||
|
||||
let compressed_summary = compressed_lines.join("\n");
|
||||
|
||||
SummaryCompressionResult {
|
||||
compressed_chars: compressed_summary.chars().count(),
|
||||
compressed_lines: compressed_lines.len(),
|
||||
removed_duplicate_lines: normalized.removed_duplicate_lines,
|
||||
omitted_lines,
|
||||
truncated: compressed_summary != summary.trim(),
|
||||
summary: compressed_summary,
|
||||
original_chars,
|
||||
original_lines,
|
||||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn compress_summary_text(summary: &str) -> String {
|
||||
compress_summary(summary, SummaryCompressionBudget::default()).summary
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct NormalizedSummary {
|
||||
lines: Vec<String>,
|
||||
removed_duplicate_lines: usize,
|
||||
}
|
||||
|
||||
fn normalize_lines(summary: &str, max_line_chars: usize) -> NormalizedSummary {
|
||||
let mut seen = BTreeSet::new();
|
||||
let mut lines = Vec::new();
|
||||
let mut removed_duplicate_lines = 0;
|
||||
|
||||
for raw_line in summary.lines() {
|
||||
let normalized = collapse_inline_whitespace(raw_line);
|
||||
if normalized.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
let truncated = truncate_line(&normalized, max_line_chars);
|
||||
let dedupe_key = dedupe_key(&truncated);
|
||||
if !seen.insert(dedupe_key) {
|
||||
removed_duplicate_lines += 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
lines.push(truncated);
|
||||
}
|
||||
|
||||
NormalizedSummary {
|
||||
lines,
|
||||
removed_duplicate_lines,
|
||||
}
|
||||
}
|
||||
|
||||
fn select_line_indexes(lines: &[String], budget: SummaryCompressionBudget) -> Vec<usize> {
|
||||
let mut selected = BTreeSet::<usize>::new();
|
||||
|
||||
for priority in 0..=3 {
|
||||
for (index, line) in lines.iter().enumerate() {
|
||||
if selected.contains(&index) || line_priority(line) != priority {
|
||||
continue;
|
||||
}
|
||||
|
||||
let candidate = selected
|
||||
.iter()
|
||||
.map(|selected_index| lines[*selected_index].as_str())
|
||||
.chain(std::iter::once(line.as_str()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if candidate.len() > budget.max_lines {
|
||||
continue;
|
||||
}
|
||||
|
||||
if joined_char_count(&candidate) > budget.max_chars {
|
||||
continue;
|
||||
}
|
||||
|
||||
selected.insert(index);
|
||||
}
|
||||
}
|
||||
|
||||
selected.into_iter().collect()
|
||||
}
|
||||
|
||||
fn push_line_with_budget(lines: &mut Vec<String>, line: String, budget: SummaryCompressionBudget) {
|
||||
let candidate = lines
|
||||
.iter()
|
||||
.map(String::as_str)
|
||||
.chain(std::iter::once(line.as_str()))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if candidate.len() <= budget.max_lines && joined_char_count(&candidate) <= budget.max_chars {
|
||||
lines.push(line);
|
||||
}
|
||||
}
|
||||
|
||||
fn joined_char_count(lines: &[&str]) -> usize {
|
||||
lines.iter().map(|line| line.chars().count()).sum::<usize>() + lines.len().saturating_sub(1)
|
||||
}
|
||||
|
||||
fn line_priority(line: &str) -> usize {
|
||||
if line == "Summary:" || line == "Conversation summary:" || is_core_detail(line) {
|
||||
0
|
||||
} else if is_section_header(line) {
|
||||
1
|
||||
} else if line.starts_with("- ") || line.starts_with(" - ") {
|
||||
2
|
||||
} else {
|
||||
3
|
||||
}
|
||||
}
|
||||
|
||||
fn is_core_detail(line: &str) -> bool {
|
||||
[
|
||||
"- Scope:",
|
||||
"- Current work:",
|
||||
"- Pending work:",
|
||||
"- Key files referenced:",
|
||||
"- Tools mentioned:",
|
||||
"- Recent user requests:",
|
||||
"- Previously compacted context:",
|
||||
"- Newly compacted context:",
|
||||
]
|
||||
.iter()
|
||||
.any(|prefix| line.starts_with(prefix))
|
||||
}
|
||||
|
||||
fn is_section_header(line: &str) -> bool {
|
||||
line.ends_with(':')
|
||||
}
|
||||
|
||||
fn omission_notice(omitted_lines: usize) -> String {
|
||||
format!("- … {omitted_lines} additional line(s) omitted.")
|
||||
}
|
||||
|
||||
fn collapse_inline_whitespace(line: &str) -> String {
|
||||
line.split_whitespace().collect::<Vec<_>>().join(" ")
|
||||
}
|
||||
|
||||
fn truncate_line(line: &str, max_chars: usize) -> String {
|
||||
if max_chars == 0 || line.chars().count() <= max_chars {
|
||||
return line.to_string();
|
||||
}
|
||||
|
||||
if max_chars == 1 {
|
||||
return "…".to_string();
|
||||
}
|
||||
|
||||
let mut truncated = line
|
||||
.chars()
|
||||
.take(max_chars.saturating_sub(1))
|
||||
.collect::<String>();
|
||||
truncated.push('…');
|
||||
truncated
|
||||
}
|
||||
|
||||
fn dedupe_key(line: &str) -> String {
|
||||
line.to_ascii_lowercase()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{compress_summary, compress_summary_text, SummaryCompressionBudget};
|
||||
|
||||
#[test]
|
||||
fn collapses_whitespace_and_duplicate_lines() {
|
||||
// given
|
||||
let summary = "Conversation summary:\n\n- Scope: compact earlier messages.\n- Scope: compact earlier messages.\n- Current work: update runtime module.\n";
|
||||
|
||||
// when
|
||||
let result = compress_summary(summary, SummaryCompressionBudget::default());
|
||||
|
||||
// then
|
||||
assert_eq!(result.removed_duplicate_lines, 1);
|
||||
assert!(result
|
||||
.summary
|
||||
.contains("- Scope: compact earlier messages."));
|
||||
assert!(!result.summary.contains(" compact earlier"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn keeps_core_lines_when_budget_is_tight() {
|
||||
// given
|
||||
let summary = [
|
||||
"Conversation summary:",
|
||||
"- Scope: 18 earlier messages compacted.",
|
||||
"- Current work: finish summary compression.",
|
||||
"- Key timeline:",
|
||||
" - user: asked for a working implementation.",
|
||||
" - assistant: inspected runtime compaction flow.",
|
||||
" - tool: cargo check succeeded.",
|
||||
]
|
||||
.join("\n");
|
||||
|
||||
// when
|
||||
let result = compress_summary(
|
||||
&summary,
|
||||
SummaryCompressionBudget {
|
||||
max_chars: 120,
|
||||
max_lines: 3,
|
||||
max_line_chars: 80,
|
||||
},
|
||||
);
|
||||
|
||||
// then
|
||||
assert!(result.summary.contains("Conversation summary:"));
|
||||
assert!(result
|
||||
.summary
|
||||
.contains("- Scope: 18 earlier messages compacted."));
|
||||
assert!(result
|
||||
.summary
|
||||
.contains("- Current work: finish summary compression."));
|
||||
assert!(result.omitted_lines > 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn provides_a_default_text_only_helper() {
|
||||
// given
|
||||
let summary = "Summary:\n\nA short line.";
|
||||
|
||||
// when
|
||||
let compressed = compress_summary_text(summary);
|
||||
|
||||
// then
|
||||
assert_eq!(compressed, "Summary:\nA short line.");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user