docker-agent-sandbox/docker_agent_sandbox/tools/read_file.py

"""read_file.py – tool for reading files inside the sandbox."""

from __future__ import annotations

from typing import TYPE_CHECKING

from langchain_core.tools import BaseTool, tool
from loguru import logger

from docker_agent_sandbox.tools._utils import _MAX_OUTPUT_CHARS

if TYPE_CHECKING:
    from docker_agent_sandbox.sandbox import DockerSandbox


def make_read_file_tool(sandbox: "DockerSandbox") -> BaseTool:
    """Return a read_file tool bound to *sandbox*."""

    @tool
    def read_file(path: str, start_line: int = 1, end_line: int | None = None) -> str:
        """
        Read a file at *path*, returning its contents as text.

        *path* can be absolute (/tmp/re-agent/result.csv) or relative to the
        working directory.

        *start_line* is the 1-based line number to start reading from (default: 1).

        *end_line* is the last line to include, inclusive (default: read as many
        lines as the MAX_CHARS cap allows). Use start_line/end_line to page
        through large files in chunks.

        At most 5,000 characters are returned per call. If the requested range
        exceeds this, output is truncated and a summary line is appended showing
        how many lines were omitted.

        Returns the (possibly truncated) file contents, or an error message.
        """
        logger.debug(
            "Reading file inside sandbox: {} start_line={} end_line={}",
            path,
            start_line,
            end_line,
        )
        try:
            data = sandbox.read_file(path)
        except (FileNotFoundError, IsADirectoryError, RuntimeError) as exc:
            return f"[ERROR reading {path!r}] {exc}"

        lines = data.decode("utf-8", errors="replace").splitlines(keepends=True)
        total_lines = len(lines)

        # Clamp to valid range (1-based, inclusive)
        start_idx = max(0, start_line - 1)
        end_idx = total_lines if end_line is None else min(end_line, total_lines)
        selected = lines[start_idx:end_idx]

        # Enforce character cap
        text = ""
        last_included_line = start_idx  # track how far we got
        for i, line in enumerate(selected):
            if len(text) + len(line) > _MAX_OUTPUT_CHARS:
                break
            text += line
            last_included_line = start_idx + i + 1  # 1-based

        # Build informative suffix
        suffix_parts = []
        if last_included_line < end_idx:
            omitted = end_idx - last_included_line
            suffix_parts.append(
                f"[... {omitted} more lines not shown (char cap reached). "
                f"Call again with start_line={last_included_line + 1}.]"
            )
        elif end_idx < total_lines:
            suffix_parts.append(
                f"[Showing lines {start_line}–{end_idx} of {total_lines} total.]"
            )

        if suffix_parts:
            text += "\n" + " ".join(suffix_parts)

        return text

    return read_file