From eac1643d48a33c8c999d7106d844b0e805bf609f Mon Sep 17 00:00:00 2001 From: Matte23 Date: Mon, 4 May 2026 11:19:28 +0200 Subject: [PATCH] tests: Add more side cases for exec command --- tests/integration/test_sandbox.py | 75 +++++++++++++++++++++++++++++++ tests/integration/test_tools.py | 22 ++++++--- 2 files changed, 90 insertions(+), 7 deletions(-) diff --git a/tests/integration/test_sandbox.py b/tests/integration/test_sandbox.py index 3963214..6b4f403 100644 --- a/tests/integration/test_sandbox.py +++ b/tests/integration/test_sandbox.py @@ -46,12 +46,87 @@ def test_exec_returns_error_when_container_not_running(): assert "not running" in out.lower() +def test_exec_instant_command(sandbox: DockerSandbox): + code, out = sandbox.exec("true") + assert code == 0 + assert out == "" + + +def test_exec_instant_nonzero(sandbox: DockerSandbox): + code, _ = sandbox.exec("false") + assert code == 1 + + +def test_exec_delayed_command_within_timeout(sandbox: DockerSandbox): + code, out = sandbox.exec("sleep 1 && echo done", timeout=10) + assert code == 0 + assert "done" in out + + def test_exec_timeout(sandbox: DockerSandbox): code, out = sandbox.exec("sleep 60", timeout=2) assert code == 124 assert "timed out" in out +def test_exec_timeout_longer_than_sleep(sandbox: DockerSandbox): + # Command finishes before timeout — must not raise or return 124. + code, out = sandbox.exec("sleep 1 && echo ok", timeout=10) + assert code == 0 + assert "ok" in out + + +def test_exec_and_chain_both_succeed(sandbox: DockerSandbox): + code, out = sandbox.exec("echo first && echo second") + assert code == 0 + assert "first" in out + assert "second" in out + + +def test_exec_and_chain_short_circuits_on_failure(sandbox: DockerSandbox): + code, out = sandbox.exec("false && echo should_not_print") + assert code != 0 + assert "should_not_print" not in out + + +def test_exec_pipe(sandbox: DockerSandbox): + code, out = sandbox.exec("echo hello world | tr ' ' '_'") + assert code == 0 + assert "hello_world" in out + + +def test_exec_pipe_exit_code_is_last_command(sandbox: DockerSandbox): + # grep finds no match → exit 1, even though echo succeeded + code, _ = sandbox.exec("echo hello | grep nomatch") + assert code == 1 + + +def test_exec_stdout_redirect_to_file(sandbox: DockerSandbox, workdir: str): + code, out = sandbox.exec(f"echo redirected > {workdir}/out.txt && cat {workdir}/out.txt") + assert code == 0 + assert "redirected" in out + + +def test_exec_stderr_redirect_to_stdout(sandbox: DockerSandbox): + code, out = sandbox.exec("echo err_msg >&2 2>&1") + assert code == 0 + assert "err_msg" in out + + +def test_exec_subshell(sandbox: DockerSandbox): + code, out = sandbox.exec("result=$(echo inner) && echo $result") + assert code == 0 + assert "inner" in out + + +def test_exec_multiline_via_semicolons(sandbox: DockerSandbox): + code, out = sandbox.exec("echo a; echo b; echo c") + assert code == 0 + assert "a" in out + assert "b" in out + assert "c" in out + + def test_exec_working_dir_respected(): """When working_dir is set, exec uses it as cwd.""" sb = DockerSandbox( diff --git a/tests/integration/test_tools.py b/tests/integration/test_tools.py index 52fe998..67f5d79 100644 --- a/tests/integration/test_tools.py +++ b/tests/integration/test_tools.py @@ -7,7 +7,6 @@ exactly as they would be when called by an LLM agent. from __future__ import annotations -import pytest from langchain_core.tools import BaseTool from docker_agent_sandbox import ( @@ -26,7 +25,6 @@ from docker_agent_sandbox import ( make_write_file_tool, ) - # --------------------------------------------------------------------------- # bash # --------------------------------------------------------------------------- @@ -113,7 +111,9 @@ def test_read_file_pagination(sandbox: DockerSandbox, workdir: str): content = "\n".join(f"line{i}" for i in range(1, 11)) + "\n" sandbox.write_file(f"{workdir}/paged.txt", content) tool = make_read_file_tool(sandbox) - result = tool.invoke({"path": f"{workdir}/paged.txt", "start_line": 3, "end_line": 5}) + result = tool.invoke( + {"path": f"{workdir}/paged.txt", "start_line": 3, "end_line": 5} + ) assert "line3" in result assert "line5" in result assert "line1" not in result @@ -124,7 +124,9 @@ def test_read_file_shows_total_line_count(sandbox: DockerSandbox, workdir: str): content = "\n".join(f"line{i}" for i in range(1, 21)) + "\n" sandbox.write_file(f"{workdir}/info.txt", content) tool = make_read_file_tool(sandbox) - result = tool.invoke({"path": f"{workdir}/info.txt", "start_line": 1, "end_line": 5}) + result = tool.invoke( + {"path": f"{workdir}/info.txt", "start_line": 1, "end_line": 5} + ) # There are 20 lines but we only requested 1-5, suffix should mention totals. assert "20" in result @@ -187,7 +189,9 @@ def test_edit_file_delete_block(sandbox: DockerSandbox, workdir: str): def test_edit_file_missing_file_returns_error(sandbox: DockerSandbox, workdir: str): tool = make_edit_file_tool(sandbox) - result = tool.invoke({"path": f"{workdir}/ghost.txt", "old_str": "x", "new_str": "y"}) + result = tool.invoke( + {"path": f"{workdir}/ghost.txt", "old_str": "x", "new_str": "y"} + ) assert result.startswith("[ERROR") @@ -195,7 +199,9 @@ def test_edit_file_multiline_replace(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/multi.txt" sandbox.write_file(path, "line1\nline2\nline3\n") tool = make_edit_file_tool(sandbox) - result = tool.invoke({"path": path, "old_str": "line1\nline2\n", "new_str": "replaced\n"}) + result = tool.invoke( + {"path": path, "old_str": "line1\nline2\n", "new_str": "replaced\n"} + ) assert result.startswith("[OK]") assert sandbox.read_file(path) == b"replaced\nline3\n" @@ -438,7 +444,9 @@ def test_grep_recursive(sandbox: DockerSandbox, workdir: str): sandbox.write_file(f"{workdir}/d/a.txt", "find_me\n") sandbox.write_file(f"{workdir}/d/b.txt", "not here\n") tool = make_grep_tool(sandbox) - result = tool.invoke({"pattern": "find_me", "path": f"{workdir}/d", "recursive": True}) + result = tool.invoke( + {"pattern": "find_me", "path": f"{workdir}/d", "recursive": True} + ) assert "find_me" in result assert "a.txt" in result