"""Integration tests for all LangChain tools. Each tool is invoked through its public LangChain interface (``tool.invoke``) so that argument validation, logging, and output formatting are all exercised exactly as they would be when called by an LLM agent. """ from __future__ import annotations from langchain_core.tools import BaseTool from docker_agent_sandbox import ( DockerSandbox, make_bash_tool, make_copy_file_tool, make_delete_file_tool, make_edit_file_tool, make_file_ops_tools, make_grep_tool, make_list_dir_tool, make_make_dir_tool, make_move_file_tool, make_read_file_tool, make_search_files_tool, make_write_file_tool, ) # --------------------------------------------------------------------------- # bash # --------------------------------------------------------------------------- def test_bash_success(sandbox: DockerSandbox): tool = make_bash_tool(sandbox) result = tool.invoke({"command": "echo hello"}) assert result.startswith("EXIT:0") assert "hello" in result def test_bash_nonzero_exit(sandbox: DockerSandbox): tool = make_bash_tool(sandbox) result = tool.invoke({"command": "exit 3"}) assert result.startswith("EXIT:3") def test_bash_stderr_included(sandbox: DockerSandbox): tool = make_bash_tool(sandbox) result = tool.invoke({"command": "echo err >&2"}) assert "EXIT:0" in result assert "err" in result def test_bash_large_output_truncated(sandbox: DockerSandbox): tool = make_bash_tool(sandbox) # Generate 300 lines — more than the 200-line cap. result = tool.invoke({"command": "python3 -c \"print('\\n'.join(['x'] * 300))\""}) assert "[output truncated]" in result def test_bash_timeout(sandbox: DockerSandbox): tool = make_bash_tool(sandbox) result = tool.invoke({"command": "sleep 60", "timeout": 2}) assert "EXIT:124" in result assert "timed out" in result # --------------------------------------------------------------------------- # write_file # --------------------------------------------------------------------------- def test_write_file_ok(sandbox: DockerSandbox, workdir: str): tool = make_write_file_tool(sandbox) result = tool.invoke({"path": f"{workdir}/new.txt", "content": "data"}) assert result.startswith("[OK]") assert "bytes" in result def test_write_file_reports_byte_count(sandbox: DockerSandbox, workdir: str): tool = make_write_file_tool(sandbox) content = "hello" result = tool.invoke({"path": f"{workdir}/bytes.txt", "content": content}) assert str(len(content.encode())) in result def test_write_file_creates_parent_dirs(sandbox: DockerSandbox, workdir: str): tool = make_write_file_tool(sandbox) path = f"{workdir}/a/b/c/file.txt" result = tool.invoke({"path": path, "content": "nested"}) assert result.startswith("[OK]") # Verify the file exists code, _ = sandbox.exec(f"test -f {path}") assert code == 0 # --------------------------------------------------------------------------- # read_file # --------------------------------------------------------------------------- def test_read_file_full(sandbox: DockerSandbox, workdir: str): sandbox.write_file(f"{workdir}/r.txt", "line1\nline2\nline3\n") tool = make_read_file_tool(sandbox) result = tool.invoke({"path": f"{workdir}/r.txt"}) assert "line1" in result assert "line2" in result assert "line3" in result def test_read_file_pagination(sandbox: DockerSandbox, workdir: str): content = "\n".join(f"line{i}" for i in range(1, 11)) + "\n" sandbox.write_file(f"{workdir}/paged.txt", content) tool = make_read_file_tool(sandbox) result = tool.invoke( {"path": f"{workdir}/paged.txt", "start_line": 3, "end_line": 5} ) assert "line3" in result assert "line5" in result assert "line1" not in result assert "line6" not in result def test_read_file_shows_total_line_count(sandbox: DockerSandbox, workdir: str): content = "\n".join(f"line{i}" for i in range(1, 21)) + "\n" sandbox.write_file(f"{workdir}/info.txt", content) tool = make_read_file_tool(sandbox) result = tool.invoke( {"path": f"{workdir}/info.txt", "start_line": 1, "end_line": 5} ) # There are 20 lines but we only requested 1-5, suffix should mention totals. assert "20" in result def test_read_file_missing_returns_error(sandbox: DockerSandbox, workdir: str): tool = make_read_file_tool(sandbox) result = tool.invoke({"path": f"{workdir}/does_not_exist.txt"}) assert result.startswith("[ERROR") def test_read_file_directory_returns_error(sandbox: DockerSandbox, workdir: str): sandbox.exec(f"mkdir -p {workdir}/adir") tool = make_read_file_tool(sandbox) result = tool.invoke({"path": f"{workdir}/adir"}) assert result.startswith("[ERROR") # --------------------------------------------------------------------------- # edit_file # --------------------------------------------------------------------------- def test_edit_file_basic_replace(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/edit.txt" sandbox.write_file(path, "foo bar baz\n") tool = make_edit_file_tool(sandbox) result = tool.invoke({"path": path, "old_str": "bar", "new_str": "qux"}) assert result.startswith("[OK]") assert sandbox.read_file(path) == b"foo qux baz\n" def test_edit_file_old_str_not_found(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/nf.txt" sandbox.write_file(path, "hello\n") tool = make_edit_file_tool(sandbox) result = tool.invoke({"path": path, "old_str": "missing", "new_str": "x"}) assert result.startswith("[ERROR]") assert "not found" in result def test_edit_file_ambiguous_old_str(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/amb.txt" sandbox.write_file(path, "foo\nfoo\n") tool = make_edit_file_tool(sandbox) result = tool.invoke({"path": path, "old_str": "foo", "new_str": "bar"}) assert result.startswith("[ERROR]") assert "2 times" in result def test_edit_file_delete_block(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/del.txt" sandbox.write_file(path, "keep\nremove me\nalso keep\n") tool = make_edit_file_tool(sandbox) result = tool.invoke({"path": path, "old_str": "remove me\n", "new_str": ""}) assert result.startswith("[OK]") content = sandbox.read_file(path).decode() assert "remove me" not in content assert "keep" in content def test_edit_file_missing_file_returns_error(sandbox: DockerSandbox, workdir: str): tool = make_edit_file_tool(sandbox) result = tool.invoke( {"path": f"{workdir}/ghost.txt", "old_str": "x", "new_str": "y"} ) assert result.startswith("[ERROR") def test_edit_file_multiline_replace(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/multi.txt" sandbox.write_file(path, "line1\nline2\nline3\n") tool = make_edit_file_tool(sandbox) result = tool.invoke( {"path": path, "old_str": "line1\nline2\n", "new_str": "replaced\n"} ) assert result.startswith("[OK]") assert sandbox.read_file(path) == b"replaced\nline3\n" def test_edit_file_over_size_limit_returns_error(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/big.txt" # Write just over 1 MB sandbox.write_file(path, "x" * (1_000_001)) tool = make_edit_file_tool(sandbox) result = tool.invoke({"path": path, "old_str": "x", "new_str": "y"}) assert result.startswith("[ERROR]") assert "bytes" in result # --------------------------------------------------------------------------- # list_dir # --------------------------------------------------------------------------- def test_list_dir_shows_files(sandbox: DockerSandbox, workdir: str): sandbox.write_file(f"{workdir}/a.txt", "a") sandbox.write_file(f"{workdir}/b.txt", "b") tool = make_list_dir_tool(sandbox) result = tool.invoke({"path": workdir}) assert "a.txt" in result assert "b.txt" in result def test_list_dir_missing_path_returns_error(sandbox: DockerSandbox, workdir: str): tool = make_list_dir_tool(sandbox) result = tool.invoke({"path": f"{workdir}/nonexistent"}) assert result.startswith("[ERROR") def test_list_dir_default_path(sandbox: DockerSandbox): # Default path is "." — just check it doesn't crash and returns something. tool = make_list_dir_tool(sandbox) result = tool.invoke({}) assert "[ERROR" not in result # --------------------------------------------------------------------------- # delete_file # --------------------------------------------------------------------------- def test_delete_file_removes_file(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/to_delete.txt" sandbox.write_file(path, "bye") tool = make_delete_file_tool(sandbox) result = tool.invoke({"path": path}) assert result.startswith("[OK]") code, _ = sandbox.exec(f"test -f {path}") assert code != 0 def test_delete_file_missing_returns_error(sandbox: DockerSandbox, workdir: str): tool = make_delete_file_tool(sandbox) result = tool.invoke({"path": f"{workdir}/ghost.txt"}) assert result.startswith("[ERROR") def test_delete_file_nonempty_dir_returns_error(sandbox: DockerSandbox, workdir: str): d = f"{workdir}/nonempty" sandbox.exec(f"mkdir -p {d}") sandbox.write_file(f"{d}/file.txt", "x") tool = make_delete_file_tool(sandbox) result = tool.invoke({"path": d}) assert result.startswith("[ERROR") def test_delete_empty_directory(sandbox: DockerSandbox, workdir: str): d = f"{workdir}/emptydir" sandbox.exec(f"mkdir -p {d}") tool = make_delete_file_tool(sandbox) result = tool.invoke({"path": d}) assert result.startswith("[OK]") # --------------------------------------------------------------------------- # move_file # --------------------------------------------------------------------------- def test_move_file_renames_file(sandbox: DockerSandbox, workdir: str): src = f"{workdir}/src.txt" dst = f"{workdir}/dst.txt" sandbox.write_file(src, "move me") tool = make_move_file_tool(sandbox) result = tool.invoke({"src": src, "dst": dst}) assert result.startswith("[OK]") assert sandbox.read_file(dst) == b"move me" code, _ = sandbox.exec(f"test -f {src}") assert code != 0 def test_move_file_creates_parent_dirs(sandbox: DockerSandbox, workdir: str): src = f"{workdir}/mv_src.txt" dst = f"{workdir}/new/nested/dst.txt" sandbox.write_file(src, "data") tool = make_move_file_tool(sandbox) result = tool.invoke({"src": src, "dst": dst}) assert result.startswith("[OK]") assert sandbox.read_file(dst) == b"data" def test_move_file_missing_src_returns_error(sandbox: DockerSandbox, workdir: str): tool = make_move_file_tool(sandbox) result = tool.invoke({"src": f"{workdir}/nope.txt", "dst": f"{workdir}/out.txt"}) assert result.startswith("[ERROR") # --------------------------------------------------------------------------- # copy_file # --------------------------------------------------------------------------- def test_copy_file_duplicates_file(sandbox: DockerSandbox, workdir: str): src = f"{workdir}/orig.txt" dst = f"{workdir}/copy.txt" sandbox.write_file(src, "original") tool = make_copy_file_tool(sandbox) result = tool.invoke({"src": src, "dst": dst}) assert result.startswith("[OK]") assert sandbox.read_file(dst) == b"original" assert sandbox.read_file(src) == b"original" # source still present def test_copy_file_creates_parent_dirs(sandbox: DockerSandbox, workdir: str): src = f"{workdir}/cp_src.txt" dst = f"{workdir}/deep/copy/file.txt" sandbox.write_file(src, "copied") tool = make_copy_file_tool(sandbox) result = tool.invoke({"src": src, "dst": dst}) assert result.startswith("[OK]") assert sandbox.read_file(dst) == b"copied" def test_copy_file_missing_src_returns_error(sandbox: DockerSandbox, workdir: str): tool = make_copy_file_tool(sandbox) result = tool.invoke({"src": f"{workdir}/ghost.txt", "dst": f"{workdir}/out.txt"}) assert result.startswith("[ERROR") # --------------------------------------------------------------------------- # make_dir # --------------------------------------------------------------------------- def test_make_dir_creates_directory(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/brand_new" tool = make_make_dir_tool(sandbox) result = tool.invoke({"path": path}) assert result.startswith("[OK]") code, _ = sandbox.exec(f"test -d {path}") assert code == 0 def test_make_dir_idempotent(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/existing_dir" sandbox.exec(f"mkdir -p {path}") tool = make_make_dir_tool(sandbox) result = tool.invoke({"path": path}) assert result.startswith("[OK]") def test_make_dir_nested(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/a/b/c/d" tool = make_make_dir_tool(sandbox) result = tool.invoke({"path": path}) assert result.startswith("[OK]") code, _ = sandbox.exec(f"test -d {path}") assert code == 0 # --------------------------------------------------------------------------- # search_files # --------------------------------------------------------------------------- def test_search_files_finds_match(sandbox: DockerSandbox, workdir: str): sandbox.write_file(f"{workdir}/target.py", "# python file") sandbox.write_file(f"{workdir}/other.txt", "text file") tool = make_search_files_tool(sandbox) result = tool.invoke({"pattern": "*.py", "directory": workdir}) assert "target.py" in result assert "other.txt" not in result def test_search_files_no_matches(sandbox: DockerSandbox, workdir: str): tool = make_search_files_tool(sandbox) result = tool.invoke({"pattern": "*.nonexistent", "directory": workdir}) assert result == "[no matches found]" def test_search_files_nested(sandbox: DockerSandbox, workdir: str): sandbox.write_file(f"{workdir}/sub/deep.txt", "content") tool = make_search_files_tool(sandbox) result = tool.invoke({"pattern": "*.txt", "directory": workdir}) assert "deep.txt" in result def test_search_files_by_exact_name(sandbox: DockerSandbox, workdir: str): sandbox.write_file(f"{workdir}/exact_name.txt", "x") tool = make_search_files_tool(sandbox) result = tool.invoke({"pattern": "exact_name.txt", "directory": workdir}) assert "exact_name.txt" in result # --------------------------------------------------------------------------- # grep # --------------------------------------------------------------------------- def test_grep_finds_pattern(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/grep_me.txt" sandbox.write_file(path, "line one\nline two\nline three\n") tool = make_grep_tool(sandbox) result = tool.invoke({"pattern": "two", "path": path}) assert "line two" in result def test_grep_no_matches(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/grep_empty.txt" sandbox.write_file(path, "no match here\n") tool = make_grep_tool(sandbox) result = tool.invoke({"pattern": "zzznomatch", "path": path}) assert result == "[no matches found]" def test_grep_includes_line_numbers(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/ln.txt" sandbox.write_file(path, "alpha\nbeta\ngamma\n") tool = make_grep_tool(sandbox) result = tool.invoke({"pattern": "beta", "path": path}) assert "2" in result # line number def test_grep_recursive(sandbox: DockerSandbox, workdir: str): sandbox.write_file(f"{workdir}/d/a.txt", "find_me\n") sandbox.write_file(f"{workdir}/d/b.txt", "not here\n") tool = make_grep_tool(sandbox) result = tool.invoke( {"pattern": "find_me", "path": f"{workdir}/d", "recursive": True} ) assert "find_me" in result assert "a.txt" in result def test_grep_extended_regex(sandbox: DockerSandbox, workdir: str): path = f"{workdir}/regex.txt" sandbox.write_file(path, "foo123\nbar456\nbaz\n") tool = make_grep_tool(sandbox) result = tool.invoke({"pattern": "foo[0-9]+", "path": path}) assert "foo123" in result assert "bar456" not in result def test_grep_missing_file_returns_error(sandbox: DockerSandbox, workdir: str): tool = make_grep_tool(sandbox) result = tool.invoke({"pattern": "x", "path": f"{workdir}/no_file.txt"}) assert result.startswith("[ERROR") # --------------------------------------------------------------------------- # make_file_ops_tools assembly # --------------------------------------------------------------------------- def test_make_file_ops_tools_returns_ten_tools(sandbox: DockerSandbox): tools = make_file_ops_tools(sandbox) assert len(tools) == 10 def test_make_file_ops_tools_all_are_base_tools(sandbox: DockerSandbox): for t in make_file_ops_tools(sandbox): assert isinstance(t, BaseTool) def test_make_file_ops_tools_expected_names(sandbox: DockerSandbox): names = {t.name for t in make_file_ops_tools(sandbox)} expected = { "read_file", "write_file", "edit_file", "list_dir", "delete_file", "move_file", "copy_file", "make_dir", "search_files", "grep", } assert names == expected