491 lines
17 KiB
Python
491 lines
17 KiB
Python
"""Integration tests for all LangChain tools.
|
|
|
|
Each tool is invoked through its public LangChain interface (``tool.invoke``)
|
|
so that argument validation, logging, and output formatting are all exercised
|
|
exactly as they would be when called by an LLM agent.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
from langchain_core.tools import BaseTool
|
|
|
|
from docker_agent_sandbox import (
|
|
DockerSandbox,
|
|
make_bash_tool,
|
|
make_copy_file_tool,
|
|
make_delete_file_tool,
|
|
make_edit_file_tool,
|
|
make_file_ops_tools,
|
|
make_grep_tool,
|
|
make_list_dir_tool,
|
|
make_make_dir_tool,
|
|
make_move_file_tool,
|
|
make_read_file_tool,
|
|
make_search_files_tool,
|
|
make_write_file_tool,
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# bash
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_bash_success(sandbox: DockerSandbox):
|
|
tool = make_bash_tool(sandbox)
|
|
result = tool.invoke({"command": "echo hello"})
|
|
assert result.startswith("EXIT:0")
|
|
assert "hello" in result
|
|
|
|
|
|
def test_bash_nonzero_exit(sandbox: DockerSandbox):
|
|
tool = make_bash_tool(sandbox)
|
|
result = tool.invoke({"command": "exit 3"})
|
|
assert result.startswith("EXIT:3")
|
|
|
|
|
|
def test_bash_stderr_included(sandbox: DockerSandbox):
|
|
tool = make_bash_tool(sandbox)
|
|
result = tool.invoke({"command": "echo err >&2"})
|
|
assert "EXIT:0" in result
|
|
assert "err" in result
|
|
|
|
|
|
def test_bash_large_output_truncated(sandbox: DockerSandbox):
|
|
tool = make_bash_tool(sandbox)
|
|
# Generate 300 lines — more than the 200-line cap.
|
|
result = tool.invoke({"command": "python3 -c \"print('\\n'.join(['x'] * 300))\""})
|
|
assert "[output truncated]" in result
|
|
|
|
|
|
def test_bash_timeout(sandbox: DockerSandbox):
|
|
tool = make_bash_tool(sandbox)
|
|
result = tool.invoke({"command": "sleep 60", "timeout": 2})
|
|
assert "EXIT:124" in result
|
|
assert "timed out" in result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# write_file
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_write_file_ok(sandbox: DockerSandbox, workdir: str):
|
|
tool = make_write_file_tool(sandbox)
|
|
result = tool.invoke({"path": f"{workdir}/new.txt", "content": "data"})
|
|
assert result.startswith("[OK]")
|
|
assert "bytes" in result
|
|
|
|
|
|
def test_write_file_reports_byte_count(sandbox: DockerSandbox, workdir: str):
|
|
tool = make_write_file_tool(sandbox)
|
|
content = "hello"
|
|
result = tool.invoke({"path": f"{workdir}/bytes.txt", "content": content})
|
|
assert str(len(content.encode())) in result
|
|
|
|
|
|
def test_write_file_creates_parent_dirs(sandbox: DockerSandbox, workdir: str):
|
|
tool = make_write_file_tool(sandbox)
|
|
path = f"{workdir}/a/b/c/file.txt"
|
|
result = tool.invoke({"path": path, "content": "nested"})
|
|
assert result.startswith("[OK]")
|
|
# Verify the file exists
|
|
code, _ = sandbox.exec(f"test -f {path}")
|
|
assert code == 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# read_file
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_read_file_full(sandbox: DockerSandbox, workdir: str):
|
|
sandbox.write_file(f"{workdir}/r.txt", "line1\nline2\nline3\n")
|
|
tool = make_read_file_tool(sandbox)
|
|
result = tool.invoke({"path": f"{workdir}/r.txt"})
|
|
assert "line1" in result
|
|
assert "line2" in result
|
|
assert "line3" in result
|
|
|
|
|
|
def test_read_file_pagination(sandbox: DockerSandbox, workdir: str):
|
|
content = "\n".join(f"line{i}" for i in range(1, 11)) + "\n"
|
|
sandbox.write_file(f"{workdir}/paged.txt", content)
|
|
tool = make_read_file_tool(sandbox)
|
|
result = tool.invoke({"path": f"{workdir}/paged.txt", "start_line": 3, "end_line": 5})
|
|
assert "line3" in result
|
|
assert "line5" in result
|
|
assert "line1" not in result
|
|
assert "line6" not in result
|
|
|
|
|
|
def test_read_file_shows_total_line_count(sandbox: DockerSandbox, workdir: str):
|
|
content = "\n".join(f"line{i}" for i in range(1, 21)) + "\n"
|
|
sandbox.write_file(f"{workdir}/info.txt", content)
|
|
tool = make_read_file_tool(sandbox)
|
|
result = tool.invoke({"path": f"{workdir}/info.txt", "start_line": 1, "end_line": 5})
|
|
# There are 20 lines but we only requested 1-5, suffix should mention totals.
|
|
assert "20" in result
|
|
|
|
|
|
def test_read_file_missing_returns_error(sandbox: DockerSandbox, workdir: str):
|
|
tool = make_read_file_tool(sandbox)
|
|
result = tool.invoke({"path": f"{workdir}/does_not_exist.txt"})
|
|
assert result.startswith("[ERROR")
|
|
|
|
|
|
def test_read_file_directory_returns_error(sandbox: DockerSandbox, workdir: str):
|
|
sandbox.exec(f"mkdir -p {workdir}/adir")
|
|
tool = make_read_file_tool(sandbox)
|
|
result = tool.invoke({"path": f"{workdir}/adir"})
|
|
assert result.startswith("[ERROR")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# edit_file
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_edit_file_basic_replace(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/edit.txt"
|
|
sandbox.write_file(path, "foo bar baz\n")
|
|
tool = make_edit_file_tool(sandbox)
|
|
result = tool.invoke({"path": path, "old_str": "bar", "new_str": "qux"})
|
|
assert result.startswith("[OK]")
|
|
assert sandbox.read_file(path) == b"foo qux baz\n"
|
|
|
|
|
|
def test_edit_file_old_str_not_found(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/nf.txt"
|
|
sandbox.write_file(path, "hello\n")
|
|
tool = make_edit_file_tool(sandbox)
|
|
result = tool.invoke({"path": path, "old_str": "missing", "new_str": "x"})
|
|
assert result.startswith("[ERROR]")
|
|
assert "not found" in result
|
|
|
|
|
|
def test_edit_file_ambiguous_old_str(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/amb.txt"
|
|
sandbox.write_file(path, "foo\nfoo\n")
|
|
tool = make_edit_file_tool(sandbox)
|
|
result = tool.invoke({"path": path, "old_str": "foo", "new_str": "bar"})
|
|
assert result.startswith("[ERROR]")
|
|
assert "2 times" in result
|
|
|
|
|
|
def test_edit_file_delete_block(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/del.txt"
|
|
sandbox.write_file(path, "keep\nremove me\nalso keep\n")
|
|
tool = make_edit_file_tool(sandbox)
|
|
result = tool.invoke({"path": path, "old_str": "remove me\n", "new_str": ""})
|
|
assert result.startswith("[OK]")
|
|
content = sandbox.read_file(path).decode()
|
|
assert "remove me" not in content
|
|
assert "keep" in content
|
|
|
|
|
|
def test_edit_file_missing_file_returns_error(sandbox: DockerSandbox, workdir: str):
|
|
tool = make_edit_file_tool(sandbox)
|
|
result = tool.invoke({"path": f"{workdir}/ghost.txt", "old_str": "x", "new_str": "y"})
|
|
assert result.startswith("[ERROR")
|
|
|
|
|
|
def test_edit_file_multiline_replace(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/multi.txt"
|
|
sandbox.write_file(path, "line1\nline2\nline3\n")
|
|
tool = make_edit_file_tool(sandbox)
|
|
result = tool.invoke({"path": path, "old_str": "line1\nline2\n", "new_str": "replaced\n"})
|
|
assert result.startswith("[OK]")
|
|
assert sandbox.read_file(path) == b"replaced\nline3\n"
|
|
|
|
|
|
def test_edit_file_over_size_limit_returns_error(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/big.txt"
|
|
# Write just over 1 MB
|
|
sandbox.write_file(path, "x" * (1_000_001))
|
|
tool = make_edit_file_tool(sandbox)
|
|
result = tool.invoke({"path": path, "old_str": "x", "new_str": "y"})
|
|
assert result.startswith("[ERROR]")
|
|
assert "bytes" in result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# list_dir
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_list_dir_shows_files(sandbox: DockerSandbox, workdir: str):
|
|
sandbox.write_file(f"{workdir}/a.txt", "a")
|
|
sandbox.write_file(f"{workdir}/b.txt", "b")
|
|
tool = make_list_dir_tool(sandbox)
|
|
result = tool.invoke({"path": workdir})
|
|
assert "a.txt" in result
|
|
assert "b.txt" in result
|
|
|
|
|
|
def test_list_dir_missing_path_returns_error(sandbox: DockerSandbox, workdir: str):
|
|
tool = make_list_dir_tool(sandbox)
|
|
result = tool.invoke({"path": f"{workdir}/nonexistent"})
|
|
assert result.startswith("[ERROR")
|
|
|
|
|
|
def test_list_dir_default_path(sandbox: DockerSandbox):
|
|
# Default path is "." — just check it doesn't crash and returns something.
|
|
tool = make_list_dir_tool(sandbox)
|
|
result = tool.invoke({})
|
|
assert "[ERROR" not in result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# delete_file
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_delete_file_removes_file(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/to_delete.txt"
|
|
sandbox.write_file(path, "bye")
|
|
tool = make_delete_file_tool(sandbox)
|
|
result = tool.invoke({"path": path})
|
|
assert result.startswith("[OK]")
|
|
code, _ = sandbox.exec(f"test -f {path}")
|
|
assert code != 0
|
|
|
|
|
|
def test_delete_file_missing_returns_error(sandbox: DockerSandbox, workdir: str):
|
|
tool = make_delete_file_tool(sandbox)
|
|
result = tool.invoke({"path": f"{workdir}/ghost.txt"})
|
|
assert result.startswith("[ERROR")
|
|
|
|
|
|
def test_delete_file_nonempty_dir_returns_error(sandbox: DockerSandbox, workdir: str):
|
|
d = f"{workdir}/nonempty"
|
|
sandbox.exec(f"mkdir -p {d}")
|
|
sandbox.write_file(f"{d}/file.txt", "x")
|
|
tool = make_delete_file_tool(sandbox)
|
|
result = tool.invoke({"path": d})
|
|
assert result.startswith("[ERROR")
|
|
|
|
|
|
def test_delete_empty_directory(sandbox: DockerSandbox, workdir: str):
|
|
d = f"{workdir}/emptydir"
|
|
sandbox.exec(f"mkdir -p {d}")
|
|
tool = make_delete_file_tool(sandbox)
|
|
result = tool.invoke({"path": d})
|
|
assert result.startswith("[OK]")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# move_file
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_move_file_renames_file(sandbox: DockerSandbox, workdir: str):
|
|
src = f"{workdir}/src.txt"
|
|
dst = f"{workdir}/dst.txt"
|
|
sandbox.write_file(src, "move me")
|
|
tool = make_move_file_tool(sandbox)
|
|
result = tool.invoke({"src": src, "dst": dst})
|
|
assert result.startswith("[OK]")
|
|
assert sandbox.read_file(dst) == b"move me"
|
|
code, _ = sandbox.exec(f"test -f {src}")
|
|
assert code != 0
|
|
|
|
|
|
def test_move_file_creates_parent_dirs(sandbox: DockerSandbox, workdir: str):
|
|
src = f"{workdir}/mv_src.txt"
|
|
dst = f"{workdir}/new/nested/dst.txt"
|
|
sandbox.write_file(src, "data")
|
|
tool = make_move_file_tool(sandbox)
|
|
result = tool.invoke({"src": src, "dst": dst})
|
|
assert result.startswith("[OK]")
|
|
assert sandbox.read_file(dst) == b"data"
|
|
|
|
|
|
def test_move_file_missing_src_returns_error(sandbox: DockerSandbox, workdir: str):
|
|
tool = make_move_file_tool(sandbox)
|
|
result = tool.invoke({"src": f"{workdir}/nope.txt", "dst": f"{workdir}/out.txt"})
|
|
assert result.startswith("[ERROR")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# copy_file
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_copy_file_duplicates_file(sandbox: DockerSandbox, workdir: str):
|
|
src = f"{workdir}/orig.txt"
|
|
dst = f"{workdir}/copy.txt"
|
|
sandbox.write_file(src, "original")
|
|
tool = make_copy_file_tool(sandbox)
|
|
result = tool.invoke({"src": src, "dst": dst})
|
|
assert result.startswith("[OK]")
|
|
assert sandbox.read_file(dst) == b"original"
|
|
assert sandbox.read_file(src) == b"original" # source still present
|
|
|
|
|
|
def test_copy_file_creates_parent_dirs(sandbox: DockerSandbox, workdir: str):
|
|
src = f"{workdir}/cp_src.txt"
|
|
dst = f"{workdir}/deep/copy/file.txt"
|
|
sandbox.write_file(src, "copied")
|
|
tool = make_copy_file_tool(sandbox)
|
|
result = tool.invoke({"src": src, "dst": dst})
|
|
assert result.startswith("[OK]")
|
|
assert sandbox.read_file(dst) == b"copied"
|
|
|
|
|
|
def test_copy_file_missing_src_returns_error(sandbox: DockerSandbox, workdir: str):
|
|
tool = make_copy_file_tool(sandbox)
|
|
result = tool.invoke({"src": f"{workdir}/ghost.txt", "dst": f"{workdir}/out.txt"})
|
|
assert result.startswith("[ERROR")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# make_dir
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_make_dir_creates_directory(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/brand_new"
|
|
tool = make_make_dir_tool(sandbox)
|
|
result = tool.invoke({"path": path})
|
|
assert result.startswith("[OK]")
|
|
code, _ = sandbox.exec(f"test -d {path}")
|
|
assert code == 0
|
|
|
|
|
|
def test_make_dir_idempotent(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/existing_dir"
|
|
sandbox.exec(f"mkdir -p {path}")
|
|
tool = make_make_dir_tool(sandbox)
|
|
result = tool.invoke({"path": path})
|
|
assert result.startswith("[OK]")
|
|
|
|
|
|
def test_make_dir_nested(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/a/b/c/d"
|
|
tool = make_make_dir_tool(sandbox)
|
|
result = tool.invoke({"path": path})
|
|
assert result.startswith("[OK]")
|
|
code, _ = sandbox.exec(f"test -d {path}")
|
|
assert code == 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# search_files
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_search_files_finds_match(sandbox: DockerSandbox, workdir: str):
|
|
sandbox.write_file(f"{workdir}/target.py", "# python file")
|
|
sandbox.write_file(f"{workdir}/other.txt", "text file")
|
|
tool = make_search_files_tool(sandbox)
|
|
result = tool.invoke({"pattern": "*.py", "directory": workdir})
|
|
assert "target.py" in result
|
|
assert "other.txt" not in result
|
|
|
|
|
|
def test_search_files_no_matches(sandbox: DockerSandbox, workdir: str):
|
|
tool = make_search_files_tool(sandbox)
|
|
result = tool.invoke({"pattern": "*.nonexistent", "directory": workdir})
|
|
assert result == "[no matches found]"
|
|
|
|
|
|
def test_search_files_nested(sandbox: DockerSandbox, workdir: str):
|
|
sandbox.write_file(f"{workdir}/sub/deep.txt", "content")
|
|
tool = make_search_files_tool(sandbox)
|
|
result = tool.invoke({"pattern": "*.txt", "directory": workdir})
|
|
assert "deep.txt" in result
|
|
|
|
|
|
def test_search_files_by_exact_name(sandbox: DockerSandbox, workdir: str):
|
|
sandbox.write_file(f"{workdir}/exact_name.txt", "x")
|
|
tool = make_search_files_tool(sandbox)
|
|
result = tool.invoke({"pattern": "exact_name.txt", "directory": workdir})
|
|
assert "exact_name.txt" in result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# grep
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_grep_finds_pattern(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/grep_me.txt"
|
|
sandbox.write_file(path, "line one\nline two\nline three\n")
|
|
tool = make_grep_tool(sandbox)
|
|
result = tool.invoke({"pattern": "two", "path": path})
|
|
assert "line two" in result
|
|
|
|
|
|
def test_grep_no_matches(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/grep_empty.txt"
|
|
sandbox.write_file(path, "no match here\n")
|
|
tool = make_grep_tool(sandbox)
|
|
result = tool.invoke({"pattern": "zzznomatch", "path": path})
|
|
assert result == "[no matches found]"
|
|
|
|
|
|
def test_grep_includes_line_numbers(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/ln.txt"
|
|
sandbox.write_file(path, "alpha\nbeta\ngamma\n")
|
|
tool = make_grep_tool(sandbox)
|
|
result = tool.invoke({"pattern": "beta", "path": path})
|
|
assert "2" in result # line number
|
|
|
|
|
|
def test_grep_recursive(sandbox: DockerSandbox, workdir: str):
|
|
sandbox.write_file(f"{workdir}/d/a.txt", "find_me\n")
|
|
sandbox.write_file(f"{workdir}/d/b.txt", "not here\n")
|
|
tool = make_grep_tool(sandbox)
|
|
result = tool.invoke({"pattern": "find_me", "path": f"{workdir}/d", "recursive": True})
|
|
assert "find_me" in result
|
|
assert "a.txt" in result
|
|
|
|
|
|
def test_grep_extended_regex(sandbox: DockerSandbox, workdir: str):
|
|
path = f"{workdir}/regex.txt"
|
|
sandbox.write_file(path, "foo123\nbar456\nbaz\n")
|
|
tool = make_grep_tool(sandbox)
|
|
result = tool.invoke({"pattern": "foo[0-9]+", "path": path})
|
|
assert "foo123" in result
|
|
assert "bar456" not in result
|
|
|
|
|
|
def test_grep_missing_file_returns_error(sandbox: DockerSandbox, workdir: str):
|
|
tool = make_grep_tool(sandbox)
|
|
result = tool.invoke({"pattern": "x", "path": f"{workdir}/no_file.txt"})
|
|
assert result.startswith("[ERROR")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# make_file_ops_tools assembly
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_make_file_ops_tools_returns_ten_tools(sandbox: DockerSandbox):
|
|
tools = make_file_ops_tools(sandbox)
|
|
assert len(tools) == 10
|
|
|
|
|
|
def test_make_file_ops_tools_all_are_base_tools(sandbox: DockerSandbox):
|
|
for t in make_file_ops_tools(sandbox):
|
|
assert isinstance(t, BaseTool)
|
|
|
|
|
|
def test_make_file_ops_tools_expected_names(sandbox: DockerSandbox):
|
|
names = {t.name for t in make_file_ops_tools(sandbox)}
|
|
expected = {
|
|
"read_file",
|
|
"write_file",
|
|
"edit_file",
|
|
"list_dir",
|
|
"delete_file",
|
|
"move_file",
|
|
"copy_file",
|
|
"make_dir",
|
|
"search_files",
|
|
"grep",
|
|
}
|
|
assert names == expected
|