diff --git a/docker_agent_sandbox/tools/read_file.py b/docker_agent_sandbox/tools/read_file.py index 20f9101..4fa52ca 100644 --- a/docker_agent_sandbox/tools/read_file.py +++ b/docker_agent_sandbox/tools/read_file.py @@ -7,6 +7,8 @@ from typing import TYPE_CHECKING from langchain_core.tools import BaseTool, tool from loguru import logger +from docker_agent_sandbox.tools._utils import _MAX_OUTPUT_CHARS + if TYPE_CHECKING: from docker_agent_sandbox.sandbox import DockerSandbox @@ -15,38 +17,69 @@ def make_read_file_tool(sandbox: "DockerSandbox") -> BaseTool: """Return a read_file tool bound to *sandbox*.""" @tool - def read_file(path: str, offset: int = 0, length: int = 5000) -> str: + def read_file(path: str, start_line: int = 1, end_line: int | None = None) -> str: """ - Read a file at *path*. + Read a file at *path*, returning its contents as text. - *path* can be absolute (``/tmp/re-agent/result.csv``) or relative to the + *path* can be absolute (/tmp/re-agent/result.csv) or relative to the working directory. - *offset* is the number of bytes to skip from the start of the file. - *length* is the maximum number of bytes to return. If the file is - longer than ``offset + length``, the output is trimmed and a summary - line is appended showing how many bytes were omitted. + *start_line* is the 1-based line number to start reading from (default: 1). - Returns the (possibly trimmed) file contents as text, or an error message. + *end_line* is the last line to include, inclusive (default: read as many + lines as the MAX_CHARS cap allows). Use start_line/end_line to page + through large files in chunks. + + At most 5,000 characters are returned per call. If the requested range + exceeds this, output is truncated and a summary line is appended showing + how many lines were omitted. + + Returns the (possibly truncated) file contents, or an error message. """ logger.debug( - "Reading file inside sandbox: {} offset={} length={}", path, offset, length + "Reading file inside sandbox: {} start_line={} end_line={}", + path, + start_line, + end_line, ) try: data = sandbox.read_file(path) except (FileNotFoundError, IsADirectoryError, RuntimeError) as exc: return f"[ERROR reading {path!r}] {exc}" - total = len(data) - chunk = data[offset : offset + length] - text = chunk.decode("utf-8", errors="replace") + lines = data.decode("utf-8", errors="replace").splitlines(keepends=True) + total_lines = len(lines) - suffix = "" - if offset + length < total: - remaining = total - (offset + length) - suffix = f"\n[... {remaining} more bytes not shown (total {total} bytes). Use offset/length to read further.]" - elif offset > 0 or total > length: - suffix = f"\n[File total: {total} bytes, showing {len(chunk)} bytes from offset {offset}.]" - return text + suffix + # Clamp to valid range (1-based, inclusive) + start_idx = max(0, start_line - 1) + end_idx = total_lines if end_line is None else min(end_line, total_lines) + selected = lines[start_idx:end_idx] + + # Enforce character cap + text = "" + last_included_line = start_idx # track how far we got + for i, line in enumerate(selected): + if len(text) + len(line) > _MAX_OUTPUT_CHARS: + break + text += line + last_included_line = start_idx + i + 1 # 1-based + + # Build informative suffix + suffix_parts = [] + if last_included_line < end_idx: + omitted = end_idx - last_included_line + suffix_parts.append( + f"[... {omitted} more lines not shown (char cap reached). " + f"Call again with start_line={last_included_line + 1}.]" + ) + elif end_idx < total_lines: + suffix_parts.append( + f"[Showing lines {start_line}–{end_idx} of {total_lines} total.]" + ) + + if suffix_parts: + text += "\n" + " ".join(suffix_parts) + + return text return read_file