From eac1643d48a33c8c999d7106d844b0e805bf609f Mon Sep 17 00:00:00 2001
From: Matte23 <matteo@underdesk.net>
Date: Mon, 4 May 2026 11:19:28 +0200
Subject: [PATCH] tests: Add more side cases for exec command

---
 tests/integration/test_sandbox.py | 75 +++++++++++++++++++++++++++++++
 tests/integration/test_tools.py   | 22 ++++++---
 2 files changed, 90 insertions(+), 7 deletions(-)

diff --git a/tests/integration/test_sandbox.py b/tests/integration/test_sandbox.py
index 3963214..6b4f403 100644
--- a/tests/integration/test_sandbox.py
+++ b/tests/integration/test_sandbox.py
@@ -46,12 +46,87 @@ def test_exec_returns_error_when_container_not_running():
     assert "not running" in out.lower()
 
 
+def test_exec_instant_command(sandbox: DockerSandbox):
+    code, out = sandbox.exec("true")
+    assert code == 0
+    assert out == ""
+
+
+def test_exec_instant_nonzero(sandbox: DockerSandbox):
+    code, _ = sandbox.exec("false")
+    assert code == 1
+
+
+def test_exec_delayed_command_within_timeout(sandbox: DockerSandbox):
+    code, out = sandbox.exec("sleep 1 && echo done", timeout=10)
+    assert code == 0
+    assert "done" in out
+
+
 def test_exec_timeout(sandbox: DockerSandbox):
     code, out = sandbox.exec("sleep 60", timeout=2)
     assert code == 124
     assert "timed out" in out
 
 
+def test_exec_timeout_longer_than_sleep(sandbox: DockerSandbox):
+    # Command finishes before timeout — must not raise or return 124.
+    code, out = sandbox.exec("sleep 1 && echo ok", timeout=10)
+    assert code == 0
+    assert "ok" in out
+
+
+def test_exec_and_chain_both_succeed(sandbox: DockerSandbox):
+    code, out = sandbox.exec("echo first && echo second")
+    assert code == 0
+    assert "first" in out
+    assert "second" in out
+
+
+def test_exec_and_chain_short_circuits_on_failure(sandbox: DockerSandbox):
+    code, out = sandbox.exec("false && echo should_not_print")
+    assert code != 0
+    assert "should_not_print" not in out
+
+
+def test_exec_pipe(sandbox: DockerSandbox):
+    code, out = sandbox.exec("echo hello world | tr ' ' '_'")
+    assert code == 0
+    assert "hello_world" in out
+
+
+def test_exec_pipe_exit_code_is_last_command(sandbox: DockerSandbox):
+    # grep finds no match → exit 1, even though echo succeeded
+    code, _ = sandbox.exec("echo hello | grep nomatch")
+    assert code == 1
+
+
+def test_exec_stdout_redirect_to_file(sandbox: DockerSandbox, workdir: str):
+    code, out = sandbox.exec(f"echo redirected > {workdir}/out.txt && cat {workdir}/out.txt")
+    assert code == 0
+    assert "redirected" in out
+
+
+def test_exec_stderr_redirect_to_stdout(sandbox: DockerSandbox):
+    code, out = sandbox.exec("echo err_msg >&2 2>&1")
+    assert code == 0
+    assert "err_msg" in out
+
+
+def test_exec_subshell(sandbox: DockerSandbox):
+    code, out = sandbox.exec("result=$(echo inner) && echo $result")
+    assert code == 0
+    assert "inner" in out
+
+
+def test_exec_multiline_via_semicolons(sandbox: DockerSandbox):
+    code, out = sandbox.exec("echo a; echo b; echo c")
+    assert code == 0
+    assert "a" in out
+    assert "b" in out
+    assert "c" in out
+
+
 def test_exec_working_dir_respected():
     """When working_dir is set, exec uses it as cwd."""
     sb = DockerSandbox(
diff --git a/tests/integration/test_tools.py b/tests/integration/test_tools.py
index 52fe998..67f5d79 100644
--- a/tests/integration/test_tools.py
+++ b/tests/integration/test_tools.py
@@ -7,7 +7,6 @@ exactly as they would be when called by an LLM agent.
 
 from __future__ import annotations
 
-import pytest
 from langchain_core.tools import BaseTool
 
 from docker_agent_sandbox import (
@@ -26,7 +25,6 @@ from docker_agent_sandbox import (
     make_write_file_tool,
 )
 
-
 # ---------------------------------------------------------------------------
 # bash
 # ---------------------------------------------------------------------------
@@ -113,7 +111,9 @@ def test_read_file_pagination(sandbox: DockerSandbox, workdir: str):
     content = "\n".join(f"line{i}" for i in range(1, 11)) + "\n"
     sandbox.write_file(f"{workdir}/paged.txt", content)
     tool = make_read_file_tool(sandbox)
-    result = tool.invoke({"path": f"{workdir}/paged.txt", "start_line": 3, "end_line": 5})
+    result = tool.invoke(
+        {"path": f"{workdir}/paged.txt", "start_line": 3, "end_line": 5}
+    )
     assert "line3" in result
     assert "line5" in result
     assert "line1" not in result
@@ -124,7 +124,9 @@ def test_read_file_shows_total_line_count(sandbox: DockerSandbox, workdir: str):
     content = "\n".join(f"line{i}" for i in range(1, 21)) + "\n"
     sandbox.write_file(f"{workdir}/info.txt", content)
     tool = make_read_file_tool(sandbox)
-    result = tool.invoke({"path": f"{workdir}/info.txt", "start_line": 1, "end_line": 5})
+    result = tool.invoke(
+        {"path": f"{workdir}/info.txt", "start_line": 1, "end_line": 5}
+    )
     # There are 20 lines but we only requested 1-5, suffix should mention totals.
     assert "20" in result
 
@@ -187,7 +189,9 @@ def test_edit_file_delete_block(sandbox: DockerSandbox, workdir: str):
 
 def test_edit_file_missing_file_returns_error(sandbox: DockerSandbox, workdir: str):
     tool = make_edit_file_tool(sandbox)
-    result = tool.invoke({"path": f"{workdir}/ghost.txt", "old_str": "x", "new_str": "y"})
+    result = tool.invoke(
+        {"path": f"{workdir}/ghost.txt", "old_str": "x", "new_str": "y"}
+    )
     assert result.startswith("[ERROR")
 
 
@@ -195,7 +199,9 @@ def test_edit_file_multiline_replace(sandbox: DockerSandbox, workdir: str):
     path = f"{workdir}/multi.txt"
     sandbox.write_file(path, "line1\nline2\nline3\n")
     tool = make_edit_file_tool(sandbox)
-    result = tool.invoke({"path": path, "old_str": "line1\nline2\n", "new_str": "replaced\n"})
+    result = tool.invoke(
+        {"path": path, "old_str": "line1\nline2\n", "new_str": "replaced\n"}
+    )
     assert result.startswith("[OK]")
     assert sandbox.read_file(path) == b"replaced\nline3\n"
 
@@ -438,7 +444,9 @@ def test_grep_recursive(sandbox: DockerSandbox, workdir: str):
     sandbox.write_file(f"{workdir}/d/a.txt", "find_me\n")
     sandbox.write_file(f"{workdir}/d/b.txt", "not here\n")
     tool = make_grep_tool(sandbox)
-    result = tool.invoke({"pattern": "find_me", "path": f"{workdir}/d", "recursive": True})
+    result = tool.invoke(
+        {"pattern": "find_me", "path": f"{workdir}/d", "recursive": True}
+    )
     assert "find_me" in result
     assert "a.txt" in result