feat: Use liens for read_file tool

2026-05-04 11:19:48 +02:00
parent eac1643d48
commit 4ee0cda29a
1 changed files with 52 additions and 19 deletions
@@ -7,6 +7,8 @@ from typing import TYPE_CHECKING
 from langchain_core.tools import BaseTool, tool
 from loguru import logger

+from docker_agent_sandbox.tools._utils import _MAX_OUTPUT_CHARS
+
 if TYPE_CHECKING:
    from docker_agent_sandbox.sandbox import DockerSandbox

@@ -15,38 +17,69 @@ def make_read_file_tool(sandbox: "DockerSandbox") -> BaseTool:
    """Return a read_file tool bound to *sandbox*."""

    @tool
-    def read_file(path: str, offset: int = 0, length: int = 5000) -> str:
+    def read_file(path: str, start_line: int = 1, end_line: int | None = None) -> str:
        """
-        Read a file at *path*.
+        Read a file at *path*, returning its contents as text.

-        *path* can be absolute (``/tmp/re-agent/result.csv``) or relative to the
+        *path* can be absolute (/tmp/re-agent/result.csv) or relative to the
        working directory.

-        *offset* is the number of bytes to skip from the start of the file.
-        *length* is the maximum number of bytes to return.  If the file is
-        longer than ``offset + length``, the output is trimmed and a summary
-        line is appended showing how many bytes were omitted.
+        *start_line* is the 1-based line number to start reading from (default: 1).

-        Returns the (possibly trimmed) file contents as text, or an error message.
+        *end_line* is the last line to include, inclusive (default: read as many
+        lines as the MAX_CHARS cap allows). Use start_line/end_line to page
+        through large files in chunks.
+
+        At most 5,000 characters are returned per call. If the requested range
+        exceeds this, output is truncated and a summary line is appended showing
+        how many lines were omitted.
+
+        Returns the (possibly truncated) file contents, or an error message.
        """
        logger.debug(
-            "Reading file inside sandbox: {} offset={} length={}", path, offset, length
+            "Reading file inside sandbox: {} start_line={} end_line={}",
+            path,
+            start_line,
+            end_line,
        )
        try:
            data = sandbox.read_file(path)
        except (FileNotFoundError, IsADirectoryError, RuntimeError) as exc:
            return f"[ERROR reading {path!r}] {exc}"

-        total = len(data)
-        chunk = data[offset : offset + length]
-        text = chunk.decode("utf-8", errors="replace")
+        lines = data.decode("utf-8", errors="replace").splitlines(keepends=True)
+        total_lines = len(lines)

-        suffix = ""
-        if offset + length < total:
-            remaining = total - (offset + length)
-            suffix = f"\n[... {remaining} more bytes not shown (total {total} bytes). Use offset/length to read further.]"
-        elif offset > 0 or total > length:
-            suffix = f"\n[File total: {total} bytes, showing {len(chunk)} bytes from offset {offset}.]"
-        return text + suffix
+        # Clamp to valid range (1-based, inclusive)
+        start_idx = max(0, start_line - 1)
+        end_idx = total_lines if end_line is None else min(end_line, total_lines)
+        selected = lines[start_idx:end_idx]
+
+        # Enforce character cap
+        text = ""
+        last_included_line = start_idx  # track how far we got
+        for i, line in enumerate(selected):
+            if len(text) + len(line) > _MAX_OUTPUT_CHARS:
+                break
+            text += line
+            last_included_line = start_idx + i + 1  # 1-based
+
+        # Build informative suffix
+        suffix_parts = []
+        if last_included_line < end_idx:
+            omitted = end_idx - last_included_line
+            suffix_parts.append(
+                f"[... {omitted} more lines not shown (char cap reached). "
+                f"Call again with start_line={last_included_line + 1}.]"
+            )
+        elif end_idx < total_lines:
+            suffix_parts.append(
+                f"[Showing lines {start_line}–{end_idx} of {total_lines} total.]"
+            )
+
+        if suffix_parts:
+            text += "\n" + " ".join(suffix_parts)
+
+        return text

    return read_file