From 284163be911ccf0d38687399034ee6c1b1f8639a Mon Sep 17 00:00:00 2001 From: Jobdori Date: Fri, 3 Apr 2026 17:09:54 +0900 Subject: [PATCH] =?UTF-8?q?feat(file=5Fops):=20add=20edge-case=20guards=20?= =?UTF-8?q?=E2=80=94=20binary=20detection,=20size=20limits,=20workspace=20?= =?UTF-8?q?boundary,=20symlink=20escape?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses PARITY.md file-tool edge cases: - Binary file detection: read_file rejects files with NUL bytes in first 8KB - Size limits: read_file rejects files >10MB, write_file rejects content >10MB - Workspace boundary enforcement: read_file_in_workspace, write_file_in_workspace, edit_file_in_workspace validate resolved paths stay within workspace root - Symlink escape detection: is_symlink_escape checks if a symlink resolves outside workspace boundaries - Path traversal prevention: validate_workspace_boundary catches ../ escapes after canonicalization 4 new tests (binary, oversize write, workspace boundary, symlink escape). Total: 142 runtime tests green. --- rust/crates/runtime/src/file_ops.rs | 196 +++++++++++++++++++++++++++- 1 file changed, 195 insertions(+), 1 deletion(-) diff --git a/rust/crates/runtime/src/file_ops.rs b/rust/crates/runtime/src/file_ops.rs index a647b85..770efd4 100644 --- a/rust/crates/runtime/src/file_ops.rs +++ b/rust/crates/runtime/src/file_ops.rs @@ -9,6 +9,39 @@ use regex::RegexBuilder; use serde::{Deserialize, Serialize}; use walkdir::WalkDir; +/// Maximum file size that can be read (10 MB). +const MAX_READ_SIZE: u64 = 10 * 1024 * 1024; + +/// Maximum file size that can be written (10 MB). +const MAX_WRITE_SIZE: usize = 10 * 1024 * 1024; + +/// Check whether a file appears to contain binary content by examining +/// the first chunk for NUL bytes. +fn is_binary_file(path: &Path) -> io::Result { + use std::io::Read; + let mut file = fs::File::open(path)?; + let mut buffer = [0u8; 8192]; + let bytes_read = file.read(&mut buffer)?; + Ok(buffer[..bytes_read].contains(&0)) +} + +/// Validate that a resolved path stays within the given workspace root. +/// Returns the canonical path on success, or an error if the path escapes +/// the workspace boundary (e.g. via `../` traversal or symlink). +fn validate_workspace_boundary(resolved: &Path, workspace_root: &Path) -> io::Result<()> { + if !resolved.starts_with(workspace_root) { + return Err(io::Error::new( + io::ErrorKind::PermissionDenied, + format!( + "path {} escapes workspace boundary {}", + resolved.display(), + workspace_root.display() + ), + )); + } + Ok(()) +} + #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] pub struct TextFilePayload { #[serde(rename = "filePath")] @@ -135,6 +168,28 @@ pub fn read_file( limit: Option, ) -> io::Result { let absolute_path = normalize_path(path)?; + + // Check file size before reading + let metadata = fs::metadata(&absolute_path)?; + if metadata.len() > MAX_READ_SIZE { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "file is too large ({} bytes, max {} bytes)", + metadata.len(), + MAX_READ_SIZE + ), + )); + } + + // Detect binary files + if is_binary_file(&absolute_path)? { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + "file appears to be binary", + )); + } + let content = fs::read_to_string(&absolute_path)?; let lines: Vec<&str> = content.lines().collect(); let start_index = offset.unwrap_or(0).min(lines.len()); @@ -156,6 +211,17 @@ pub fn read_file( } pub fn write_file(path: &str, content: &str) -> io::Result { + if content.len() > MAX_WRITE_SIZE { + return Err(io::Error::new( + io::ErrorKind::InvalidData, + format!( + "content is too large ({} bytes, max {} bytes)", + content.len(), + MAX_WRITE_SIZE + ), + )); + } + let absolute_path = normalize_path_allow_missing(path)?; let original_file = fs::read_to_string(&absolute_path).ok(); if let Some(parent) = absolute_path.parent() { @@ -477,11 +543,72 @@ fn normalize_path_allow_missing(path: &str) -> io::Result { Ok(candidate) } +/// Read a file with workspace boundary enforcement. +pub fn read_file_in_workspace( + path: &str, + offset: Option, + limit: Option, + workspace_root: &Path, +) -> io::Result { + let absolute_path = normalize_path(path)?; + let canonical_root = workspace_root + .canonicalize() + .unwrap_or_else(|_| workspace_root.to_path_buf()); + validate_workspace_boundary(&absolute_path, &canonical_root)?; + read_file(path, offset, limit) +} + +/// Write a file with workspace boundary enforcement. +pub fn write_file_in_workspace( + path: &str, + content: &str, + workspace_root: &Path, +) -> io::Result { + let absolute_path = normalize_path_allow_missing(path)?; + let canonical_root = workspace_root + .canonicalize() + .unwrap_or_else(|_| workspace_root.to_path_buf()); + validate_workspace_boundary(&absolute_path, &canonical_root)?; + write_file(path, content) +} + +/// Edit a file with workspace boundary enforcement. +pub fn edit_file_in_workspace( + path: &str, + old_string: &str, + new_string: &str, + replace_all: bool, + workspace_root: &Path, +) -> io::Result { + let absolute_path = normalize_path(path)?; + let canonical_root = workspace_root + .canonicalize() + .unwrap_or_else(|_| workspace_root.to_path_buf()); + validate_workspace_boundary(&absolute_path, &canonical_root)?; + edit_file(path, old_string, new_string, replace_all) +} + +/// Check whether a path is a symlink that resolves outside the workspace. +pub fn is_symlink_escape(path: &Path, workspace_root: &Path) -> io::Result { + let metadata = fs::symlink_metadata(path)?; + if !metadata.is_symlink() { + return Ok(false); + } + let resolved = path.canonicalize()?; + let canonical_root = workspace_root + .canonicalize() + .unwrap_or_else(|_| workspace_root.to_path_buf()); + Ok(!resolved.starts_with(&canonical_root)) +} + #[cfg(test)] mod tests { use std::time::{SystemTime, UNIX_EPOCH}; - use super::{edit_file, glob_search, grep_search, read_file, write_file, GrepSearchInput}; + use super::{ + edit_file, glob_search, grep_search, is_symlink_escape, read_file, read_file_in_workspace, + write_file, GrepSearchInput, MAX_WRITE_SIZE, + }; fn temp_path(name: &str) -> std::path::PathBuf { let unique = SystemTime::now() @@ -513,6 +640,73 @@ mod tests { assert!(output.replace_all); } + #[test] + fn rejects_binary_files() { + let path = temp_path("binary-test.bin"); + std::fs::write(&path, b"\x00\x01\x02\x03binary content").expect("write should succeed"); + let result = read_file(path.to_string_lossy().as_ref(), None, None); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert_eq!(error.kind(), std::io::ErrorKind::InvalidData); + assert!(error.to_string().contains("binary")); + } + + #[test] + fn rejects_oversized_writes() { + let path = temp_path("oversize-write.txt"); + let huge = "x".repeat(MAX_WRITE_SIZE + 1); + let result = write_file(path.to_string_lossy().as_ref(), &huge); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert_eq!(error.kind(), std::io::ErrorKind::InvalidData); + assert!(error.to_string().contains("too large")); + } + + #[test] + fn enforces_workspace_boundary() { + let workspace = temp_path("workspace-boundary"); + std::fs::create_dir_all(&workspace).expect("workspace dir should be created"); + let inside = workspace.join("inside.txt"); + write_file(inside.to_string_lossy().as_ref(), "safe content") + .expect("write inside workspace should succeed"); + + // Reading inside workspace should succeed + let result = + read_file_in_workspace(inside.to_string_lossy().as_ref(), None, None, &workspace); + assert!(result.is_ok()); + + // Reading outside workspace should fail + let outside = temp_path("outside-boundary.txt"); + write_file(outside.to_string_lossy().as_ref(), "unsafe content") + .expect("write outside should succeed"); + let result = + read_file_in_workspace(outside.to_string_lossy().as_ref(), None, None, &workspace); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert_eq!(error.kind(), std::io::ErrorKind::PermissionDenied); + assert!(error.to_string().contains("escapes workspace")); + } + + #[test] + fn detects_symlink_escape() { + let workspace = temp_path("symlink-workspace"); + std::fs::create_dir_all(&workspace).expect("workspace dir should be created"); + let outside = temp_path("symlink-target.txt"); + std::fs::write(&outside, "target content").expect("target should write"); + + let link_path = workspace.join("escape-link.txt"); + #[cfg(unix)] + { + std::os::unix::fs::symlink(&outside, &link_path).expect("symlink should create"); + assert!(is_symlink_escape(&link_path, &workspace).expect("check should succeed")); + } + + // Non-symlink file should not be an escape + let normal = workspace.join("normal.txt"); + std::fs::write(&normal, "normal content").expect("normal file should write"); + assert!(!is_symlink_escape(&normal, &workspace).expect("check should succeed")); + } + #[test] fn globs_and_greps_directory() { let dir = temp_path("search-dir");