Compare commits
4 Commits
9dc5b9ba50
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
|
0b2d2982ab
|
|||
|
909b238cab
|
|||
|
4ee0cda29a
|
|||
|
eac1643d48
|
+28
-22
@@ -7,47 +7,53 @@ on:
|
|||||||
branches: ["main"]
|
branches: ["main"]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
unit-tests:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- uses: actions/setup-python@v5
|
- uses: astral-sh/setup-uv@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.12"
|
enable-cache: true
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Run unit tests
|
||||||
run: |
|
run: uv run --extra dev pytest tests/unit/ -v --tb=short
|
||||||
python -m venv .venv
|
|
||||||
.venv/bin/pip install -e ".[dev]" -q
|
|
||||||
|
|
||||||
- name: Run tests
|
integration-tests:
|
||||||
run: .venv/bin/pytest tests/ -v
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
|
- uses: astral-sh/setup-uv@v5
|
||||||
|
with:
|
||||||
|
enable-cache: true
|
||||||
|
|
||||||
|
- name: Pull container image
|
||||||
|
run: docker pull python:3.11-slim
|
||||||
|
|
||||||
|
- name: Run integration tests
|
||||||
|
run: uv run --extra dev pytest tests/integration/ -v --tb=short
|
||||||
|
|
||||||
publish:
|
publish:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
needs: test
|
needs: [unit-tests, integration-tests]
|
||||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
- uses: actions/setup-python@v5
|
- uses: astral-sh/setup-uv@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.12"
|
enable-cache: true
|
||||||
|
|
||||||
- name: Build package
|
- name: Build package
|
||||||
run: |
|
run: uv build
|
||||||
pip install build -q
|
|
||||||
python -m build
|
|
||||||
|
|
||||||
- name: Publish to Gitea package registry
|
- name: Publish to Gitea package registry
|
||||||
env:
|
env:
|
||||||
TWINE_USERNAME: ${{ github.repository_owner }}
|
UV_PUBLISH_URL: ${{ gitea.server_url }}/api/packages/${{ github.repository_owner }}/pypi
|
||||||
TWINE_PASSWORD: ${{ secrets.GITEA_TOKEN }}
|
UV_PUBLISH_USERNAME: ${{ github.repository_owner }}
|
||||||
run: |
|
UV_PUBLISH_PASSWORD: ${{ secrets.GITEA_TOKEN }}
|
||||||
pip install twine -q
|
run: uv publish
|
||||||
twine upload \
|
|
||||||
--repository-url ${{ gitea.server_url }}/api/packages/${{ github.repository_owner }}/pypi \
|
|
||||||
dist/*
|
|
||||||
|
|||||||
@@ -7,6 +7,8 @@ from typing import TYPE_CHECKING
|
|||||||
from langchain_core.tools import BaseTool, tool
|
from langchain_core.tools import BaseTool, tool
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
from docker_agent_sandbox.tools._utils import _MAX_OUTPUT_CHARS
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from docker_agent_sandbox.sandbox import DockerSandbox
|
from docker_agent_sandbox.sandbox import DockerSandbox
|
||||||
|
|
||||||
@@ -15,38 +17,69 @@ def make_read_file_tool(sandbox: "DockerSandbox") -> BaseTool:
|
|||||||
"""Return a read_file tool bound to *sandbox*."""
|
"""Return a read_file tool bound to *sandbox*."""
|
||||||
|
|
||||||
@tool
|
@tool
|
||||||
def read_file(path: str, offset: int = 0, length: int = 5000) -> str:
|
def read_file(path: str, start_line: int = 1, end_line: int | None = None) -> str:
|
||||||
"""
|
"""
|
||||||
Read a file at *path*.
|
Read a file at *path*, returning its contents as text.
|
||||||
|
|
||||||
*path* can be absolute (``/tmp/re-agent/result.csv``) or relative to the
|
*path* can be absolute (/tmp/re-agent/result.csv) or relative to the
|
||||||
working directory.
|
working directory.
|
||||||
|
|
||||||
*offset* is the number of bytes to skip from the start of the file.
|
*start_line* is the 1-based line number to start reading from (default: 1).
|
||||||
*length* is the maximum number of bytes to return. If the file is
|
|
||||||
longer than ``offset + length``, the output is trimmed and a summary
|
|
||||||
line is appended showing how many bytes were omitted.
|
|
||||||
|
|
||||||
Returns the (possibly trimmed) file contents as text, or an error message.
|
*end_line* is the last line to include, inclusive (default: read as many
|
||||||
|
lines as the MAX_CHARS cap allows). Use start_line/end_line to page
|
||||||
|
through large files in chunks.
|
||||||
|
|
||||||
|
At most 5,000 characters are returned per call. If the requested range
|
||||||
|
exceeds this, output is truncated and a summary line is appended showing
|
||||||
|
how many lines were omitted.
|
||||||
|
|
||||||
|
Returns the (possibly truncated) file contents, or an error message.
|
||||||
"""
|
"""
|
||||||
logger.debug(
|
logger.debug(
|
||||||
"Reading file inside sandbox: {} offset={} length={}", path, offset, length
|
"Reading file inside sandbox: {} start_line={} end_line={}",
|
||||||
|
path,
|
||||||
|
start_line,
|
||||||
|
end_line,
|
||||||
)
|
)
|
||||||
try:
|
try:
|
||||||
data = sandbox.read_file(path)
|
data = sandbox.read_file(path)
|
||||||
except (FileNotFoundError, IsADirectoryError, RuntimeError) as exc:
|
except (FileNotFoundError, IsADirectoryError, RuntimeError) as exc:
|
||||||
return f"[ERROR reading {path!r}] {exc}"
|
return f"[ERROR reading {path!r}] {exc}"
|
||||||
|
|
||||||
total = len(data)
|
lines = data.decode("utf-8", errors="replace").splitlines(keepends=True)
|
||||||
chunk = data[offset : offset + length]
|
total_lines = len(lines)
|
||||||
text = chunk.decode("utf-8", errors="replace")
|
|
||||||
|
|
||||||
suffix = ""
|
# Clamp to valid range (1-based, inclusive)
|
||||||
if offset + length < total:
|
start_idx = max(0, start_line - 1)
|
||||||
remaining = total - (offset + length)
|
end_idx = total_lines if end_line is None else min(end_line, total_lines)
|
||||||
suffix = f"\n[... {remaining} more bytes not shown (total {total} bytes). Use offset/length to read further.]"
|
selected = lines[start_idx:end_idx]
|
||||||
elif offset > 0 or total > length:
|
|
||||||
suffix = f"\n[File total: {total} bytes, showing {len(chunk)} bytes from offset {offset}.]"
|
# Enforce character cap
|
||||||
return text + suffix
|
text = ""
|
||||||
|
last_included_line = start_idx # track how far we got
|
||||||
|
for i, line in enumerate(selected):
|
||||||
|
if len(text) + len(line) > _MAX_OUTPUT_CHARS:
|
||||||
|
break
|
||||||
|
text += line
|
||||||
|
last_included_line = start_idx + i + 1 # 1-based
|
||||||
|
|
||||||
|
# Build informative suffix
|
||||||
|
suffix_parts = []
|
||||||
|
if last_included_line < end_idx:
|
||||||
|
omitted = end_idx - last_included_line
|
||||||
|
suffix_parts.append(
|
||||||
|
f"[... {omitted} more lines not shown (char cap reached). "
|
||||||
|
f"Call again with start_line={last_included_line + 1}.]"
|
||||||
|
)
|
||||||
|
elif end_idx < total_lines:
|
||||||
|
suffix_parts.append(
|
||||||
|
f"[Showing lines {start_line}–{end_idx} of {total_lines} total.]"
|
||||||
|
)
|
||||||
|
|
||||||
|
if suffix_parts:
|
||||||
|
text += "\n" + " ".join(suffix_parts)
|
||||||
|
|
||||||
|
return text
|
||||||
|
|
||||||
return read_file
|
return read_file
|
||||||
|
|||||||
@@ -46,12 +46,87 @@ def test_exec_returns_error_when_container_not_running():
|
|||||||
assert "not running" in out.lower()
|
assert "not running" in out.lower()
|
||||||
|
|
||||||
|
|
||||||
|
def test_exec_instant_command(sandbox: DockerSandbox):
|
||||||
|
code, out = sandbox.exec("true")
|
||||||
|
assert code == 0
|
||||||
|
assert out == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_exec_instant_nonzero(sandbox: DockerSandbox):
|
||||||
|
code, _ = sandbox.exec("false")
|
||||||
|
assert code == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_exec_delayed_command_within_timeout(sandbox: DockerSandbox):
|
||||||
|
code, out = sandbox.exec("sleep 1 && echo done", timeout=10)
|
||||||
|
assert code == 0
|
||||||
|
assert "done" in out
|
||||||
|
|
||||||
|
|
||||||
def test_exec_timeout(sandbox: DockerSandbox):
|
def test_exec_timeout(sandbox: DockerSandbox):
|
||||||
code, out = sandbox.exec("sleep 60", timeout=2)
|
code, out = sandbox.exec("sleep 60", timeout=2)
|
||||||
assert code == 124
|
assert code == 124
|
||||||
assert "timed out" in out
|
assert "timed out" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_exec_timeout_longer_than_sleep(sandbox: DockerSandbox):
|
||||||
|
# Command finishes before timeout — must not raise or return 124.
|
||||||
|
code, out = sandbox.exec("sleep 1 && echo ok", timeout=10)
|
||||||
|
assert code == 0
|
||||||
|
assert "ok" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_exec_and_chain_both_succeed(sandbox: DockerSandbox):
|
||||||
|
code, out = sandbox.exec("echo first && echo second")
|
||||||
|
assert code == 0
|
||||||
|
assert "first" in out
|
||||||
|
assert "second" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_exec_and_chain_short_circuits_on_failure(sandbox: DockerSandbox):
|
||||||
|
code, out = sandbox.exec("false && echo should_not_print")
|
||||||
|
assert code != 0
|
||||||
|
assert "should_not_print" not in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_exec_pipe(sandbox: DockerSandbox):
|
||||||
|
code, out = sandbox.exec("echo hello world | tr ' ' '_'")
|
||||||
|
assert code == 0
|
||||||
|
assert "hello_world" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_exec_pipe_exit_code_is_last_command(sandbox: DockerSandbox):
|
||||||
|
# grep finds no match → exit 1, even though echo succeeded
|
||||||
|
code, _ = sandbox.exec("echo hello | grep nomatch")
|
||||||
|
assert code == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_exec_stdout_redirect_to_file(sandbox: DockerSandbox, workdir: str):
|
||||||
|
code, out = sandbox.exec(f"echo redirected > {workdir}/out.txt && cat {workdir}/out.txt")
|
||||||
|
assert code == 0
|
||||||
|
assert "redirected" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_exec_stderr_redirect_to_stdout(sandbox: DockerSandbox):
|
||||||
|
code, out = sandbox.exec("echo err_msg >&2 2>&1")
|
||||||
|
assert code == 0
|
||||||
|
assert "err_msg" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_exec_subshell(sandbox: DockerSandbox):
|
||||||
|
code, out = sandbox.exec("result=$(echo inner) && echo $result")
|
||||||
|
assert code == 0
|
||||||
|
assert "inner" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_exec_multiline_via_semicolons(sandbox: DockerSandbox):
|
||||||
|
code, out = sandbox.exec("echo a; echo b; echo c")
|
||||||
|
assert code == 0
|
||||||
|
assert "a" in out
|
||||||
|
assert "b" in out
|
||||||
|
assert "c" in out
|
||||||
|
|
||||||
|
|
||||||
def test_exec_working_dir_respected():
|
def test_exec_working_dir_respected():
|
||||||
"""When working_dir is set, exec uses it as cwd."""
|
"""When working_dir is set, exec uses it as cwd."""
|
||||||
sb = DockerSandbox(
|
sb = DockerSandbox(
|
||||||
@@ -59,6 +134,8 @@ def test_exec_working_dir_respected():
|
|||||||
image="python:3.11-slim",
|
image="python:3.11-slim",
|
||||||
command="sleep infinity",
|
command="sleep infinity",
|
||||||
working_dir="/tmp",
|
working_dir="/tmp",
|
||||||
|
cpu_limit=1,
|
||||||
|
memory_limit="256m",
|
||||||
)
|
)
|
||||||
sb.start()
|
sb.start()
|
||||||
try:
|
try:
|
||||||
@@ -174,6 +251,8 @@ def test_context_manager_stops_container():
|
|||||||
container_name="test-ctx-manager",
|
container_name="test-ctx-manager",
|
||||||
image="python:3.11-slim",
|
image="python:3.11-slim",
|
||||||
command="sleep infinity",
|
command="sleep infinity",
|
||||||
|
cpu_limit=1,
|
||||||
|
memory_limit="256m",
|
||||||
)
|
)
|
||||||
sb.start()
|
sb.start()
|
||||||
with sb:
|
with sb:
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ exactly as they would be when called by an LLM agent.
|
|||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import pytest
|
|
||||||
from langchain_core.tools import BaseTool
|
from langchain_core.tools import BaseTool
|
||||||
|
|
||||||
from docker_agent_sandbox import (
|
from docker_agent_sandbox import (
|
||||||
@@ -26,7 +25,6 @@ from docker_agent_sandbox import (
|
|||||||
make_write_file_tool,
|
make_write_file_tool,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# bash
|
# bash
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -113,7 +111,9 @@ def test_read_file_pagination(sandbox: DockerSandbox, workdir: str):
|
|||||||
content = "\n".join(f"line{i}" for i in range(1, 11)) + "\n"
|
content = "\n".join(f"line{i}" for i in range(1, 11)) + "\n"
|
||||||
sandbox.write_file(f"{workdir}/paged.txt", content)
|
sandbox.write_file(f"{workdir}/paged.txt", content)
|
||||||
tool = make_read_file_tool(sandbox)
|
tool = make_read_file_tool(sandbox)
|
||||||
result = tool.invoke({"path": f"{workdir}/paged.txt", "start_line": 3, "end_line": 5})
|
result = tool.invoke(
|
||||||
|
{"path": f"{workdir}/paged.txt", "start_line": 3, "end_line": 5}
|
||||||
|
)
|
||||||
assert "line3" in result
|
assert "line3" in result
|
||||||
assert "line5" in result
|
assert "line5" in result
|
||||||
assert "line1" not in result
|
assert "line1" not in result
|
||||||
@@ -124,7 +124,9 @@ def test_read_file_shows_total_line_count(sandbox: DockerSandbox, workdir: str):
|
|||||||
content = "\n".join(f"line{i}" for i in range(1, 21)) + "\n"
|
content = "\n".join(f"line{i}" for i in range(1, 21)) + "\n"
|
||||||
sandbox.write_file(f"{workdir}/info.txt", content)
|
sandbox.write_file(f"{workdir}/info.txt", content)
|
||||||
tool = make_read_file_tool(sandbox)
|
tool = make_read_file_tool(sandbox)
|
||||||
result = tool.invoke({"path": f"{workdir}/info.txt", "start_line": 1, "end_line": 5})
|
result = tool.invoke(
|
||||||
|
{"path": f"{workdir}/info.txt", "start_line": 1, "end_line": 5}
|
||||||
|
)
|
||||||
# There are 20 lines but we only requested 1-5, suffix should mention totals.
|
# There are 20 lines but we only requested 1-5, suffix should mention totals.
|
||||||
assert "20" in result
|
assert "20" in result
|
||||||
|
|
||||||
@@ -187,7 +189,9 @@ def test_edit_file_delete_block(sandbox: DockerSandbox, workdir: str):
|
|||||||
|
|
||||||
def test_edit_file_missing_file_returns_error(sandbox: DockerSandbox, workdir: str):
|
def test_edit_file_missing_file_returns_error(sandbox: DockerSandbox, workdir: str):
|
||||||
tool = make_edit_file_tool(sandbox)
|
tool = make_edit_file_tool(sandbox)
|
||||||
result = tool.invoke({"path": f"{workdir}/ghost.txt", "old_str": "x", "new_str": "y"})
|
result = tool.invoke(
|
||||||
|
{"path": f"{workdir}/ghost.txt", "old_str": "x", "new_str": "y"}
|
||||||
|
)
|
||||||
assert result.startswith("[ERROR")
|
assert result.startswith("[ERROR")
|
||||||
|
|
||||||
|
|
||||||
@@ -195,7 +199,9 @@ def test_edit_file_multiline_replace(sandbox: DockerSandbox, workdir: str):
|
|||||||
path = f"{workdir}/multi.txt"
|
path = f"{workdir}/multi.txt"
|
||||||
sandbox.write_file(path, "line1\nline2\nline3\n")
|
sandbox.write_file(path, "line1\nline2\nline3\n")
|
||||||
tool = make_edit_file_tool(sandbox)
|
tool = make_edit_file_tool(sandbox)
|
||||||
result = tool.invoke({"path": path, "old_str": "line1\nline2\n", "new_str": "replaced\n"})
|
result = tool.invoke(
|
||||||
|
{"path": path, "old_str": "line1\nline2\n", "new_str": "replaced\n"}
|
||||||
|
)
|
||||||
assert result.startswith("[OK]")
|
assert result.startswith("[OK]")
|
||||||
assert sandbox.read_file(path) == b"replaced\nline3\n"
|
assert sandbox.read_file(path) == b"replaced\nline3\n"
|
||||||
|
|
||||||
@@ -438,7 +444,9 @@ def test_grep_recursive(sandbox: DockerSandbox, workdir: str):
|
|||||||
sandbox.write_file(f"{workdir}/d/a.txt", "find_me\n")
|
sandbox.write_file(f"{workdir}/d/a.txt", "find_me\n")
|
||||||
sandbox.write_file(f"{workdir}/d/b.txt", "not here\n")
|
sandbox.write_file(f"{workdir}/d/b.txt", "not here\n")
|
||||||
tool = make_grep_tool(sandbox)
|
tool = make_grep_tool(sandbox)
|
||||||
result = tool.invoke({"pattern": "find_me", "path": f"{workdir}/d", "recursive": True})
|
result = tool.invoke(
|
||||||
|
{"pattern": "find_me", "path": f"{workdir}/d", "recursive": True}
|
||||||
|
)
|
||||||
assert "find_me" in result
|
assert "find_me" in result
|
||||||
assert "a.txt" in result
|
assert "a.txt" in result
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user