diff --git a/docker_agent_sandbox/sandbox.py b/docker_agent_sandbox/sandbox.py index 374e93b..585cf42 100644 --- a/docker_agent_sandbox/sandbox.py +++ b/docker_agent_sandbox/sandbox.py @@ -2,14 +2,18 @@ from __future__ import annotations -import concurrent.futures import io +import select as _select +import socket as _socket import tarfile +from select import select as _original_select from pathlib import Path from typing import TYPE_CHECKING +from unittest.mock import patch import docker import docker.errors +from docker.utils.socket import consume_socket_output, demux_adaptor, frames_iter from loguru import logger if TYPE_CHECKING: @@ -33,6 +37,9 @@ class DockerSandbox: ports: dict | None = None, environment: dict[str, str] | None = None, working_dir: str | None = None, + # TODO(security): default network_mode is "bridge", giving containers unrestricted + # outbound network access. For an untrusted-code sandbox this should default to + # "none" so callers must explicitly opt in to network access. network_mode: str = "bridge", cap_drop: list[str] | None = None, cap_add: list[str] | None = None, @@ -52,6 +59,13 @@ class DockerSandbox: self._client: docker.DockerClient = docker.from_env() self._container: docker.models.containers.Container | None = None + def __enter__(self) -> "DockerSandbox": + return self + + def __exit__(self, *_: object) -> None: + self.stop() + self._client.close() + # ------------------------------------------------------------------ # Lifecycle # ------------------------------------------------------------------ @@ -111,7 +125,13 @@ class DockerSandbox: if self._security_opt is not None: run_kwargs["security_opt"] = self._security_opt - self._container = self._client.containers.run(self._image, **run_kwargs) + try: + self._container = self._client.containers.run(self._image, **run_kwargs) + except docker.errors.ImageNotFound: + raise RuntimeError( + f"Image {self._image!r} not found locally. " + "Call build_image_if_missing() first, or pull/build the image manually." + ) from None self._container.reload() logger.info( @@ -163,6 +183,8 @@ class DockerSandbox: raise RuntimeError("Sandbox container is not running.") p = Path(path) + if not p.is_absolute() and self._working_dir is not None: + p = Path(self._working_dir) / p encoded = content.encode("utf-8") self._container.exec_run(["mkdir", "-p", str(p.parent)]) @@ -182,30 +204,47 @@ class DockerSandbox: def exec(self, command: str, timeout: int = 120) -> tuple[int, str]: """ - Run *command* inside the container via ``exec_run``. + Run *command* inside the container via the low-level exec API. Returns ``(exit_code, combined_stdout_stderr)``. - The call is wrapped in a thread so the *timeout* is enforced without - modifying the command string. + Timeout is enforced via a socket-level timeout on the exec socket. """ if self._container is None: return 1, "Sandbox container is not running." - def _run() -> tuple[int, bytes]: - kwargs: dict = dict(demux=False) - if self._working_dir is not None: - kwargs["workdir"] = self._working_dir - exit_code, output = self._container.exec_run( - ["bash", "-c", command], **kwargs - ) - return exit_code, output or b"" + create_kwargs: dict = {} + if self._working_dir is not None: + create_kwargs["workdir"] = self._working_dir - with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: - future = pool.submit(_run) - try: - exit_code, raw = future.result(timeout=timeout) - return exit_code, raw.decode("utf-8", errors="replace") - except concurrent.futures.TimeoutError: - return 124, f"Command timed out after {timeout}s" - except Exception as exc: - return 1, f"Error running command in container: {exc}" + # TODO(fragile): timeout enforcement relies on private docker-py internals + # (frames_iter, demux_adaptor, consume_socket_output from docker.utils.socket) + # and monkey-patches select.select for the duration of the read — not thread-safe + # if multiple exec() calls run concurrently. Replace when docker-py adds native + # per-call timeout support. See https://github.com/docker/docker-py/issues/2651 + try: + exec_id = self._client.api.exec_create( + self._container.id, + ["bash", "-c", command], + stdout=True, + stderr=True, + **create_kwargs, + ) + sock = self._client.api.exec_start(exec_id["Id"], socket=True) + sock._sock.settimeout(timeout) + with patch.object( + _select, + "select", + new=lambda rlist, wlist, xlist: _original_select( + rlist, wlist, xlist, timeout + ), + ): + gen = (demux_adaptor(*frame) for frame in frames_iter(sock, tty=False)) + stdout, stderr = consume_socket_output(gen, demux=True) + + exit_code = self._client.api.exec_inspect(exec_id["Id"])["ExitCode"] or 0 + output = (stdout or b"") + (stderr or b"") + return exit_code, output.decode("utf-8", errors="replace") + except _socket.timeout: + return 124, f"Command timed out after {timeout}s" + except Exception as exc: + return 1, f"Error running command in container: {exc}" diff --git a/docker_agent_sandbox/tools/bash.py b/docker_agent_sandbox/tools/bash.py index f6b97ac..b1ffa63 100644 --- a/docker_agent_sandbox/tools/bash.py +++ b/docker_agent_sandbox/tools/bash.py @@ -28,8 +28,6 @@ def make_bash_tool(sandbox: "DockerSandbox") -> BaseTool: Returns EXIT: followed by combined stdout+stderr. Large outputs are truncated to stay within token limits. - Use for: running the target binary, processing PIN output, - compiling plugins, or any other shell operation during analysis. """ logger.debug("Running inside sandbox: {}", command) exit_code, output = sandbox.exec(command, timeout=timeout) diff --git a/pyproject.toml b/pyproject.toml index ff4a9d6..10613e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,11 +3,14 @@ name = "docker-agent-sandbox" version = "0.1.0" requires-python = ">=3.11" dependencies = [ - "docker", - "langchain-core", - "loguru", + "docker>=7.1.0", + "langchain-core>=1.2.24", + "loguru>=0.7.3", ] +[project.optional-dependencies] +dev = ["pytest"] + [build-system] requires = ["hatchling"] build-backend = "hatchling.build"