fix: Various fixes

This commit is contained in:
2026-04-02 14:41:18 +02:00
parent cd51258fb5
commit 0efa9e2720
3 changed files with 67 additions and 27 deletions

View File

@@ -2,14 +2,18 @@
from __future__ import annotations from __future__ import annotations
import concurrent.futures
import io import io
import select as _select
import socket as _socket
import tarfile import tarfile
from select import select as _original_select
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from unittest.mock import patch
import docker import docker
import docker.errors import docker.errors
from docker.utils.socket import consume_socket_output, demux_adaptor, frames_iter
from loguru import logger from loguru import logger
if TYPE_CHECKING: if TYPE_CHECKING:
@@ -33,6 +37,9 @@ class DockerSandbox:
ports: dict | None = None, ports: dict | None = None,
environment: dict[str, str] | None = None, environment: dict[str, str] | None = None,
working_dir: str | None = None, working_dir: str | None = None,
# TODO(security): default network_mode is "bridge", giving containers unrestricted
# outbound network access. For an untrusted-code sandbox this should default to
# "none" so callers must explicitly opt in to network access.
network_mode: str = "bridge", network_mode: str = "bridge",
cap_drop: list[str] | None = None, cap_drop: list[str] | None = None,
cap_add: list[str] | None = None, cap_add: list[str] | None = None,
@@ -52,6 +59,13 @@ class DockerSandbox:
self._client: docker.DockerClient = docker.from_env() self._client: docker.DockerClient = docker.from_env()
self._container: docker.models.containers.Container | None = None self._container: docker.models.containers.Container | None = None
def __enter__(self) -> "DockerSandbox":
return self
def __exit__(self, *_: object) -> None:
self.stop()
self._client.close()
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Lifecycle # Lifecycle
# ------------------------------------------------------------------ # ------------------------------------------------------------------
@@ -111,7 +125,13 @@ class DockerSandbox:
if self._security_opt is not None: if self._security_opt is not None:
run_kwargs["security_opt"] = self._security_opt run_kwargs["security_opt"] = self._security_opt
self._container = self._client.containers.run(self._image, **run_kwargs) try:
self._container = self._client.containers.run(self._image, **run_kwargs)
except docker.errors.ImageNotFound:
raise RuntimeError(
f"Image {self._image!r} not found locally. "
"Call build_image_if_missing() first, or pull/build the image manually."
) from None
self._container.reload() self._container.reload()
logger.info( logger.info(
@@ -163,6 +183,8 @@ class DockerSandbox:
raise RuntimeError("Sandbox container is not running.") raise RuntimeError("Sandbox container is not running.")
p = Path(path) p = Path(path)
if not p.is_absolute() and self._working_dir is not None:
p = Path(self._working_dir) / p
encoded = content.encode("utf-8") encoded = content.encode("utf-8")
self._container.exec_run(["mkdir", "-p", str(p.parent)]) self._container.exec_run(["mkdir", "-p", str(p.parent)])
@@ -182,30 +204,47 @@ class DockerSandbox:
def exec(self, command: str, timeout: int = 120) -> tuple[int, str]: def exec(self, command: str, timeout: int = 120) -> tuple[int, str]:
""" """
Run *command* inside the container via ``exec_run``. Run *command* inside the container via the low-level exec API.
Returns ``(exit_code, combined_stdout_stderr)``. Returns ``(exit_code, combined_stdout_stderr)``.
The call is wrapped in a thread so the *timeout* is enforced without Timeout is enforced via a socket-level timeout on the exec socket.
modifying the command string.
""" """
if self._container is None: if self._container is None:
return 1, "Sandbox container is not running." return 1, "Sandbox container is not running."
def _run() -> tuple[int, bytes]: create_kwargs: dict = {}
kwargs: dict = dict(demux=False) if self._working_dir is not None:
if self._working_dir is not None: create_kwargs["workdir"] = self._working_dir
kwargs["workdir"] = self._working_dir
exit_code, output = self._container.exec_run(
["bash", "-c", command], **kwargs
)
return exit_code, output or b""
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: # TODO(fragile): timeout enforcement relies on private docker-py internals
future = pool.submit(_run) # (frames_iter, demux_adaptor, consume_socket_output from docker.utils.socket)
try: # and monkey-patches select.select for the duration of the read — not thread-safe
exit_code, raw = future.result(timeout=timeout) # if multiple exec() calls run concurrently. Replace when docker-py adds native
return exit_code, raw.decode("utf-8", errors="replace") # per-call timeout support. See https://github.com/docker/docker-py/issues/2651
except concurrent.futures.TimeoutError: try:
return 124, f"Command timed out after {timeout}s" exec_id = self._client.api.exec_create(
except Exception as exc: self._container.id,
return 1, f"Error running command in container: {exc}" ["bash", "-c", command],
stdout=True,
stderr=True,
**create_kwargs,
)
sock = self._client.api.exec_start(exec_id["Id"], socket=True)
sock._sock.settimeout(timeout)
with patch.object(
_select,
"select",
new=lambda rlist, wlist, xlist: _original_select(
rlist, wlist, xlist, timeout
),
):
gen = (demux_adaptor(*frame) for frame in frames_iter(sock, tty=False))
stdout, stderr = consume_socket_output(gen, demux=True)
exit_code = self._client.api.exec_inspect(exec_id["Id"])["ExitCode"] or 0
output = (stdout or b"") + (stderr or b"")
return exit_code, output.decode("utf-8", errors="replace")
except _socket.timeout:
return 124, f"Command timed out after {timeout}s"
except Exception as exc:
return 1, f"Error running command in container: {exc}"

View File

@@ -28,8 +28,6 @@ def make_bash_tool(sandbox: "DockerSandbox") -> BaseTool:
Returns EXIT:<code> followed by combined stdout+stderr. Returns EXIT:<code> followed by combined stdout+stderr.
Large outputs are truncated to stay within token limits. Large outputs are truncated to stay within token limits.
Use for: running the target binary, processing PIN output,
compiling plugins, or any other shell operation during analysis.
""" """
logger.debug("Running inside sandbox: {}", command) logger.debug("Running inside sandbox: {}", command)
exit_code, output = sandbox.exec(command, timeout=timeout) exit_code, output = sandbox.exec(command, timeout=timeout)

View File

@@ -3,11 +3,14 @@ name = "docker-agent-sandbox"
version = "0.1.0" version = "0.1.0"
requires-python = ">=3.11" requires-python = ">=3.11"
dependencies = [ dependencies = [
"docker", "docker>=7.1.0",
"langchain-core", "langchain-core>=1.2.24",
"loguru", "loguru>=0.7.3",
] ]
[project.optional-dependencies]
dev = ["pytest"]
[build-system] [build-system]
requires = ["hatchling"] requires = ["hatchling"]
build-backend = "hatchling.build" build-backend = "hatchling.build"