ci: Switch to uv

feat: Use liens for read_file tool
tests: Add more side cases for exec command
2026-05-04 11:19:55 +02:00 · 2026-05-04 11:19:48 +02:00 · 2026-05-04 11:19:28 +02:00
4 changed files with 170 additions and 48 deletions
@@ -7,47 +7,53 @@ on:
    branches: ["main"]

 jobs:
-  test:
+  unit-tests:
    runs-on: ubuntu-latest

    steps:
      - uses: actions/checkout@v4

-      - uses: actions/setup-python@v5
+      - uses: astral-sh/setup-uv@v5
        with:
-          python-version: "3.12"
+          enable-cache: true

-      - name: Install dependencies
-        run: |
-          python -m venv .venv
-          .venv/bin/pip install -e ".[dev]" -q
+      - name: Run unit tests
+        run: uv run --extra dev pytest tests/unit/ -v --tb=short

-      - name: Run tests
-        run: .venv/bin/pytest tests/ -v
+  integration-tests:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
+      - name: Pull container image
+        run: docker pull python:3.11-slim
+
+      - name: Run integration tests
+        run: uv run --extra dev pytest tests/integration/ -v --tb=short

  publish:
    runs-on: ubuntu-latest
-    needs: test
+    needs: [unit-tests, integration-tests]
    if: github.event_name == 'push' && github.ref == 'refs/heads/main'

    steps:
      - uses: actions/checkout@v4

-      - uses: actions/setup-python@v5
+      - uses: astral-sh/setup-uv@v5
        with:
-          python-version: "3.12"
+          enable-cache: true

      - name: Build package
-        run: |
-          pip install build -q
-          python -m build
+        run: uv build

      - name: Publish to Gitea package registry
        env:
-          TWINE_USERNAME: ${{ github.repository_owner }}
-          TWINE_PASSWORD: ${{ secrets.GITEA_TOKEN }}
-        run: |
-          pip install twine -q
-          twine upload \
-            --repository-url ${{ gitea.server_url }}/api/packages/${{ github.repository_owner }}/pypi \
-            dist/*
+          UV_PUBLISH_URL: ${{ gitea.server_url }}/api/packages/${{ github.repository_owner }}/pypi
+          UV_PUBLISH_USERNAME: ${{ github.repository_owner }}
+          UV_PUBLISH_PASSWORD: ${{ secrets.GITEA_TOKEN }}
+        run: uv publish
@@ -7,6 +7,8 @@ from typing import TYPE_CHECKING
 from langchain_core.tools import BaseTool, tool
 from loguru import logger

+from docker_agent_sandbox.tools._utils import _MAX_OUTPUT_CHARS
+
 if TYPE_CHECKING:
    from docker_agent_sandbox.sandbox import DockerSandbox

@@ -15,38 +17,69 @@ def make_read_file_tool(sandbox: "DockerSandbox") -> BaseTool:
    """Return a read_file tool bound to *sandbox*."""

    @tool
-    def read_file(path: str, offset: int = 0, length: int = 5000) -> str:
+    def read_file(path: str, start_line: int = 1, end_line: int | None = None) -> str:
        """
-        Read a file at *path*.
+        Read a file at *path*, returning its contents as text.

-        *path* can be absolute (``/tmp/re-agent/result.csv``) or relative to the
+        *path* can be absolute (/tmp/re-agent/result.csv) or relative to the
        working directory.

-        *offset* is the number of bytes to skip from the start of the file.
-        *length* is the maximum number of bytes to return.  If the file is
-        longer than ``offset + length``, the output is trimmed and a summary
-        line is appended showing how many bytes were omitted.
+        *start_line* is the 1-based line number to start reading from (default: 1).

-        Returns the (possibly trimmed) file contents as text, or an error message.
+        *end_line* is the last line to include, inclusive (default: read as many
+        lines as the MAX_CHARS cap allows). Use start_line/end_line to page
+        through large files in chunks.
+
+        At most 5,000 characters are returned per call. If the requested range
+        exceeds this, output is truncated and a summary line is appended showing
+        how many lines were omitted.
+
+        Returns the (possibly truncated) file contents, or an error message.
        """
        logger.debug(
-            "Reading file inside sandbox: {} offset={} length={}", path, offset, length
+            "Reading file inside sandbox: {} start_line={} end_line={}",
+            path,
+            start_line,
+            end_line,
        )
        try:
            data = sandbox.read_file(path)
        except (FileNotFoundError, IsADirectoryError, RuntimeError) as exc:
            return f"[ERROR reading {path!r}] {exc}"

-        total = len(data)
-        chunk = data[offset : offset + length]
-        text = chunk.decode("utf-8", errors="replace")
+        lines = data.decode("utf-8", errors="replace").splitlines(keepends=True)
+        total_lines = len(lines)

-        suffix = ""
-        if offset + length < total:
-            remaining = total - (offset + length)
-            suffix = f"\n[... {remaining} more bytes not shown (total {total} bytes). Use offset/length to read further.]"
-        elif offset > 0 or total > length:
-            suffix = f"\n[File total: {total} bytes, showing {len(chunk)} bytes from offset {offset}.]"
-        return text + suffix
+        # Clamp to valid range (1-based, inclusive)
+        start_idx = max(0, start_line - 1)
+        end_idx = total_lines if end_line is None else min(end_line, total_lines)
+        selected = lines[start_idx:end_idx]
+
+        # Enforce character cap
+        text = ""
+        last_included_line = start_idx  # track how far we got
+        for i, line in enumerate(selected):
+            if len(text) + len(line) > _MAX_OUTPUT_CHARS:
+                break
+            text += line
+            last_included_line = start_idx + i + 1  # 1-based
+
+        # Build informative suffix
+        suffix_parts = []
+        if last_included_line < end_idx:
+            omitted = end_idx - last_included_line
+            suffix_parts.append(
+                f"[... {omitted} more lines not shown (char cap reached). "
+                f"Call again with start_line={last_included_line + 1}.]"
+            )
+        elif end_idx < total_lines:
+            suffix_parts.append(
+                f"[Showing lines {start_line}–{end_idx} of {total_lines} total.]"
+            )
+
+        if suffix_parts:
+            text += "\n" + " ".join(suffix_parts)
+
+        return text

    return read_file
@@ -46,12 +46,87 @@ def test_exec_returns_error_when_container_not_running():
    assert "not running" in out.lower()


+def test_exec_instant_command(sandbox: DockerSandbox):
+    code, out = sandbox.exec("true")
+    assert code == 0
+    assert out == ""
+
+
+def test_exec_instant_nonzero(sandbox: DockerSandbox):
+    code, _ = sandbox.exec("false")
+    assert code == 1
+
+
+def test_exec_delayed_command_within_timeout(sandbox: DockerSandbox):
+    code, out = sandbox.exec("sleep 1 && echo done", timeout=10)
+    assert code == 0
+    assert "done" in out
+
+
 def test_exec_timeout(sandbox: DockerSandbox):
    code, out = sandbox.exec("sleep 60", timeout=2)
    assert code == 124
    assert "timed out" in out


+def test_exec_timeout_longer_than_sleep(sandbox: DockerSandbox):
+    # Command finishes before timeout — must not raise or return 124.
+    code, out = sandbox.exec("sleep 1 && echo ok", timeout=10)
+    assert code == 0
+    assert "ok" in out
+
+
+def test_exec_and_chain_both_succeed(sandbox: DockerSandbox):
+    code, out = sandbox.exec("echo first && echo second")
+    assert code == 0
+    assert "first" in out
+    assert "second" in out
+
+
+def test_exec_and_chain_short_circuits_on_failure(sandbox: DockerSandbox):
+    code, out = sandbox.exec("false && echo should_not_print")
+    assert code != 0
+    assert "should_not_print" not in out
+
+
+def test_exec_pipe(sandbox: DockerSandbox):
+    code, out = sandbox.exec("echo hello world | tr ' ' '_'")
+    assert code == 0
+    assert "hello_world" in out
+
+
+def test_exec_pipe_exit_code_is_last_command(sandbox: DockerSandbox):
+    # grep finds no match → exit 1, even though echo succeeded
+    code, _ = sandbox.exec("echo hello | grep nomatch")
+    assert code == 1
+
+
+def test_exec_stdout_redirect_to_file(sandbox: DockerSandbox, workdir: str):
+    code, out = sandbox.exec(f"echo redirected > {workdir}/out.txt && cat {workdir}/out.txt")
+    assert code == 0
+    assert "redirected" in out
+
+
+def test_exec_stderr_redirect_to_stdout(sandbox: DockerSandbox):
+    code, out = sandbox.exec("echo err_msg >&2 2>&1")
+    assert code == 0
+    assert "err_msg" in out
+
+
+def test_exec_subshell(sandbox: DockerSandbox):
+    code, out = sandbox.exec("result=$(echo inner) && echo $result")
+    assert code == 0
+    assert "inner" in out
+
+
+def test_exec_multiline_via_semicolons(sandbox: DockerSandbox):
+    code, out = sandbox.exec("echo a; echo b; echo c")
+    assert code == 0
+    assert "a" in out
+    assert "b" in out
+    assert "c" in out
+
+
 def test_exec_working_dir_respected():
    """When working_dir is set, exec uses it as cwd."""
    sb = DockerSandbox(
@@ -7,7 +7,6 @@ exactly as they would be when called by an LLM agent.

 from __future__ import annotations

-import pytest
 from langchain_core.tools import BaseTool

 from docker_agent_sandbox import (
@@ -26,7 +25,6 @@ from docker_agent_sandbox import (
    make_write_file_tool,
 )

-
 # ---------------------------------------------------------------------------
 # bash
 # ---------------------------------------------------------------------------
@@ -113,7 +111,9 @@ def test_read_file_pagination(sandbox: DockerSandbox, workdir: str):
    content = "\n".join(f"line{i}" for i in range(1, 11)) + "\n"
    sandbox.write_file(f"{workdir}/paged.txt", content)
    tool = make_read_file_tool(sandbox)
-    result = tool.invoke({"path": f"{workdir}/paged.txt", "start_line": 3, "end_line": 5})
+    result = tool.invoke(
+        {"path": f"{workdir}/paged.txt", "start_line": 3, "end_line": 5}
+    )
    assert "line3" in result
    assert "line5" in result
    assert "line1" not in result
@@ -124,7 +124,9 @@ def test_read_file_shows_total_line_count(sandbox: DockerSandbox, workdir: str):
    content = "\n".join(f"line{i}" for i in range(1, 21)) + "\n"
    sandbox.write_file(f"{workdir}/info.txt", content)
    tool = make_read_file_tool(sandbox)
-    result = tool.invoke({"path": f"{workdir}/info.txt", "start_line": 1, "end_line": 5})
+    result = tool.invoke(
+        {"path": f"{workdir}/info.txt", "start_line": 1, "end_line": 5}
+    )
    # There are 20 lines but we only requested 1-5, suffix should mention totals.
    assert "20" in result

@@ -187,7 +189,9 @@ def test_edit_file_delete_block(sandbox: DockerSandbox, workdir: str):

 def test_edit_file_missing_file_returns_error(sandbox: DockerSandbox, workdir: str):
    tool = make_edit_file_tool(sandbox)
-    result = tool.invoke({"path": f"{workdir}/ghost.txt", "old_str": "x", "new_str": "y"})
+    result = tool.invoke(
+        {"path": f"{workdir}/ghost.txt", "old_str": "x", "new_str": "y"}
+    )
    assert result.startswith("[ERROR")


@@ -195,7 +199,9 @@ def test_edit_file_multiline_replace(sandbox: DockerSandbox, workdir: str):
    path = f"{workdir}/multi.txt"
    sandbox.write_file(path, "line1\nline2\nline3\n")
    tool = make_edit_file_tool(sandbox)
-    result = tool.invoke({"path": path, "old_str": "line1\nline2\n", "new_str": "replaced\n"})
+    result = tool.invoke(
+        {"path": path, "old_str": "line1\nline2\n", "new_str": "replaced\n"}
+    )
    assert result.startswith("[OK]")
    assert sandbox.read_file(path) == b"replaced\nline3\n"

@@ -438,7 +444,9 @@ def test_grep_recursive(sandbox: DockerSandbox, workdir: str):
    sandbox.write_file(f"{workdir}/d/a.txt", "find_me\n")
    sandbox.write_file(f"{workdir}/d/b.txt", "not here\n")
    tool = make_grep_tool(sandbox)
-    result = tool.invoke({"pattern": "find_me", "path": f"{workdir}/d", "recursive": True})
+    result = tool.invoke(
+        {"pattern": "find_me", "path": f"{workdir}/d", "recursive": True}
+    )
    assert "find_me" in result
    assert "a.txt" in result
Author	SHA1	Message	Date
Matte23	909b238cab	ci: Switch to uv CI / unit-tests (push) Successful in 1m5s Details CI / integration-tests (push) Failing after 47s Details CI / publish (push) Has been skipped Details	2026-05-04 11:19:55 +02:00
Matte23	4ee0cda29a	feat: Use liens for read_file tool	2026-05-04 11:19:48 +02:00
Matte23	eac1643d48	tests: Add more side cases for exec command	2026-05-04 11:19:28 +02:00