feat: b5-context-compress — batch 5 wave 2

This commit is contained in:
YeonGyu-Kim
2026-04-07 15:15:42 +09:00
parent 260bac321f
commit 86c3667836

View File

@@ -249,13 +249,14 @@ impl TerminalRenderer {
#[must_use]
pub fn render_markdown(&self, markdown: &str) -> String {
let normalized = normalize_nested_fences(markdown);
let mut output = String::new();
let mut state = RenderState::default();
let mut code_language = String::new();
let mut code_buffer = String::new();
let mut in_code_block = false;
for event in Parser::new_ext(markdown, Options::all()) {
for event in Parser::new_ext(&normalized, Options::all()) {
self.render_event(
event,
&mut state,
@@ -634,6 +635,178 @@ fn apply_code_block_background(line: &str) -> String {
format!("\u{1b}[48;5;236m{with_background}\u{1b}[0m{trailing_newline}")
}
/// Pre-process raw markdown so that fenced code blocks whose body contains
/// fence markers of equal or greater length are wrapped with a longer fence.
///
/// LLMs frequently emit triple-backtick code blocks that contain triple-backtick
/// examples. CommonMark (and pulldown-cmark) treats the inner marker as the
/// closing fence, breaking the render. This function detects the situation and
/// upgrades the outer fence to use enough backticks (or tildes) that the inner
/// markers become ordinary content.
fn normalize_nested_fences(markdown: &str) -> String {
// A fence line is either "labeled" (has an info string ⇒ always an opener)
// or "bare" (no info string ⇒ could be opener or closer).
#[derive(Debug, Clone)]
struct FenceLine {
char: char,
len: usize,
has_info: bool,
indent: usize,
}
fn parse_fence_line(line: &str) -> Option<FenceLine> {
let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
let indent = trimmed.chars().take_while(|c| *c == ' ').count();
if indent > 3 {
return None;
}
let rest = &trimmed[indent..];
let ch = rest.chars().next()?;
if ch != '`' && ch != '~' {
return None;
}
let len = rest.chars().take_while(|c| *c == ch).count();
if len < 3 {
return None;
}
let after = &rest[len..];
if ch == '`' && after.contains('`') {
return None;
}
let has_info = !after.trim().is_empty();
Some(FenceLine {
char: ch,
len,
has_info,
indent,
})
}
let lines: Vec<&str> = markdown.split_inclusive('\n').collect();
// Handle final line that may lack trailing newline.
// split_inclusive already keeps the original chunks, including a
// final chunk without '\n' if the input doesn't end with one.
// First pass: classify every line.
let fence_info: Vec<Option<FenceLine>> = lines.iter().map(|l| parse_fence_line(l)).collect();
// Second pass: pair openers with closers using a stack, recording
// (opener_idx, closer_idx) pairs plus the max fence length found between
// them.
struct StackEntry {
line_idx: usize,
fence: FenceLine,
}
let mut stack: Vec<StackEntry> = Vec::new();
// Paired blocks: (opener_line, closer_line, max_inner_fence_len)
let mut pairs: Vec<(usize, usize, usize)> = Vec::new();
for (i, fi) in fence_info.iter().enumerate() {
let Some(fl) = fi else { continue };
if fl.has_info {
// Labeled fence ⇒ always an opener.
stack.push(StackEntry {
line_idx: i,
fence: fl.clone(),
});
} else {
// Bare fence ⇒ try to close the top of the stack if compatible.
let closes_top = stack
.last()
.is_some_and(|top| top.fence.char == fl.char && fl.len >= top.fence.len);
if closes_top {
let opener = stack.pop().unwrap();
// Find max fence length of any fence line strictly between
// opener and closer (these are the nested fences).
let inner_max = fence_info[opener.line_idx + 1..i]
.iter()
.filter_map(|fi| fi.as_ref().map(|f| f.len))
.max()
.unwrap_or(0);
pairs.push((opener.line_idx, i, inner_max));
} else {
// Treat as opener.
stack.push(StackEntry {
line_idx: i,
fence: fl.clone(),
});
}
}
}
// Determine which lines need rewriting. A pair needs rewriting when
// its opener length <= max inner fence length.
struct Rewrite {
char: char,
new_len: usize,
indent: usize,
}
let mut rewrites: std::collections::HashMap<usize, Rewrite> = std::collections::HashMap::new();
for (opener_idx, closer_idx, inner_max) in &pairs {
let opener_fl = fence_info[*opener_idx].as_ref().unwrap();
if opener_fl.len <= *inner_max {
let new_len = inner_max + 1;
let info_part = {
let trimmed = lines[*opener_idx]
.trim_end_matches('\n')
.trim_end_matches('\r');
let rest = &trimmed[opener_fl.indent..];
rest[opener_fl.len..].to_string()
};
rewrites.insert(
*opener_idx,
Rewrite {
char: opener_fl.char,
new_len,
indent: opener_fl.indent,
},
);
let closer_fl = fence_info[*closer_idx].as_ref().unwrap();
rewrites.insert(
*closer_idx,
Rewrite {
char: closer_fl.char,
new_len,
indent: closer_fl.indent,
},
);
// Store info string only in the opener; closer keeps the trailing
// portion which is already handled through the original line.
// Actually, we rebuild both lines from scratch below, including
// the info string for the opener.
let _ = info_part; // consumed in rebuild
}
}
if rewrites.is_empty() {
return markdown.to_string();
}
// Rebuild.
let mut out = String::with_capacity(markdown.len() + rewrites.len() * 4);
for (i, line) in lines.iter().enumerate() {
if let Some(rw) = rewrites.get(&i) {
let fence_str: String = std::iter::repeat(rw.char).take(rw.new_len).collect();
let indent_str: String = std::iter::repeat(' ').take(rw.indent).collect();
// Recover the original info string (if any) and trailing newline.
let trimmed = line.trim_end_matches('\n').trim_end_matches('\r');
let fi = fence_info[i].as_ref().unwrap();
let info = &trimmed[fi.indent + fi.len..];
let trailing = &line[trimmed.len()..];
out.push_str(&indent_str);
out.push_str(&fence_str);
out.push_str(info);
out.push_str(trailing);
} else {
out.push_str(line);
}
}
out
}
fn find_stream_safe_boundary(markdown: &str) -> Option<usize> {
let mut open_fence: Option<FenceMarker> = None;
let mut last_boundary = None;