diff --git a/docker_agent_sandbox/sandbox.py b/docker_agent_sandbox/sandbox.py
index 374e93b..585cf42 100644
--- a/docker_agent_sandbox/sandbox.py
+++ b/docker_agent_sandbox/sandbox.py
@@ -2,14 +2,18 @@
 
 from __future__ import annotations
 
-import concurrent.futures
 import io
+import select as _select
+import socket as _socket
 import tarfile
+from select import select as _original_select
 from pathlib import Path
 from typing import TYPE_CHECKING
+from unittest.mock import patch
 
 import docker
 import docker.errors
+from docker.utils.socket import consume_socket_output, demux_adaptor, frames_iter
 from loguru import logger
 
 if TYPE_CHECKING:
@@ -33,6 +37,9 @@ class DockerSandbox:
         ports: dict | None = None,
         environment: dict[str, str] | None = None,
         working_dir: str | None = None,
+        # TODO(security): default network_mode is "bridge", giving containers unrestricted
+        # outbound network access. For an untrusted-code sandbox this should default to
+        # "none" so callers must explicitly opt in to network access.
         network_mode: str = "bridge",
         cap_drop: list[str] | None = None,
         cap_add: list[str] | None = None,
@@ -52,6 +59,13 @@ class DockerSandbox:
         self._client: docker.DockerClient = docker.from_env()
         self._container: docker.models.containers.Container | None = None
 
+    def __enter__(self) -> "DockerSandbox":
+        return self
+
+    def __exit__(self, *_: object) -> None:
+        self.stop()
+        self._client.close()
+
     # ------------------------------------------------------------------
     # Lifecycle
     # ------------------------------------------------------------------
@@ -111,7 +125,13 @@ class DockerSandbox:
         if self._security_opt is not None:
             run_kwargs["security_opt"] = self._security_opt
 
-        self._container = self._client.containers.run(self._image, **run_kwargs)
+        try:
+            self._container = self._client.containers.run(self._image, **run_kwargs)
+        except docker.errors.ImageNotFound:
+            raise RuntimeError(
+                f"Image {self._image!r} not found locally. "
+                "Call build_image_if_missing() first, or pull/build the image manually."
+            ) from None
         self._container.reload()
 
         logger.info(
@@ -163,6 +183,8 @@ class DockerSandbox:
             raise RuntimeError("Sandbox container is not running.")
 
         p = Path(path)
+        if not p.is_absolute() and self._working_dir is not None:
+            p = Path(self._working_dir) / p
         encoded = content.encode("utf-8")
 
         self._container.exec_run(["mkdir", "-p", str(p.parent)])
@@ -182,30 +204,47 @@ class DockerSandbox:
 
     def exec(self, command: str, timeout: int = 120) -> tuple[int, str]:
         """
-        Run *command* inside the container via ``exec_run``.
+        Run *command* inside the container via the low-level exec API.
 
         Returns ``(exit_code, combined_stdout_stderr)``.
-        The call is wrapped in a thread so the *timeout* is enforced without
-        modifying the command string.
+        Timeout is enforced via a socket-level timeout on the exec socket.
         """
         if self._container is None:
             return 1, "Sandbox container is not running."
 
-        def _run() -> tuple[int, bytes]:
-            kwargs: dict = dict(demux=False)
-            if self._working_dir is not None:
-                kwargs["workdir"] = self._working_dir
-            exit_code, output = self._container.exec_run(
-                ["bash", "-c", command], **kwargs
-            )
-            return exit_code, output or b""
+        create_kwargs: dict = {}
+        if self._working_dir is not None:
+            create_kwargs["workdir"] = self._working_dir
 
-        with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
-            future = pool.submit(_run)
-            try:
-                exit_code, raw = future.result(timeout=timeout)
-                return exit_code, raw.decode("utf-8", errors="replace")
-            except concurrent.futures.TimeoutError:
-                return 124, f"Command timed out after {timeout}s"
-            except Exception as exc:
-                return 1, f"Error running command in container: {exc}"
+        # TODO(fragile): timeout enforcement relies on private docker-py internals
+        # (frames_iter, demux_adaptor, consume_socket_output from docker.utils.socket)
+        # and monkey-patches select.select for the duration of the read — not thread-safe
+        # if multiple exec() calls run concurrently. Replace when docker-py adds native
+        # per-call timeout support. See https://github.com/docker/docker-py/issues/2651
+        try:
+            exec_id = self._client.api.exec_create(
+                self._container.id,
+                ["bash", "-c", command],
+                stdout=True,
+                stderr=True,
+                **create_kwargs,
+            )
+            sock = self._client.api.exec_start(exec_id["Id"], socket=True)
+            sock._sock.settimeout(timeout)
+            with patch.object(
+                _select,
+                "select",
+                new=lambda rlist, wlist, xlist: _original_select(
+                    rlist, wlist, xlist, timeout
+                ),
+            ):
+                gen = (demux_adaptor(*frame) for frame in frames_iter(sock, tty=False))
+                stdout, stderr = consume_socket_output(gen, demux=True)
+
+            exit_code = self._client.api.exec_inspect(exec_id["Id"])["ExitCode"] or 0
+            output = (stdout or b"") + (stderr or b"")
+            return exit_code, output.decode("utf-8", errors="replace")
+        except _socket.timeout:
+            return 124, f"Command timed out after {timeout}s"
+        except Exception as exc:
+            return 1, f"Error running command in container: {exc}"
diff --git a/docker_agent_sandbox/tools/bash.py b/docker_agent_sandbox/tools/bash.py
index f6b97ac..b1ffa63 100644
--- a/docker_agent_sandbox/tools/bash.py
+++ b/docker_agent_sandbox/tools/bash.py
@@ -28,8 +28,6 @@ def make_bash_tool(sandbox: "DockerSandbox") -> BaseTool:
 
         Returns EXIT:<code> followed by combined stdout+stderr.
         Large outputs are truncated to stay within token limits.
-        Use for: running the target binary, processing PIN output,
-        compiling plugins, or any other shell operation during analysis.
         """
         logger.debug("Running inside sandbox: {}", command)
         exit_code, output = sandbox.exec(command, timeout=timeout)
diff --git a/pyproject.toml b/pyproject.toml
index ff4a9d6..10613e3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,11 +3,14 @@ name = "docker-agent-sandbox"
 version = "0.1.0"
 requires-python = ">=3.11"
 dependencies = [
-    "docker",
-    "langchain-core",
-    "loguru",
+    "docker>=7.1.0",
+    "langchain-core>=1.2.24",
+    "loguru>=0.7.3",
 ]
 
+[project.optional-dependencies]
+dev = ["pytest"]
+
 [build-system]
 requires = ["hatchling"]
 build-backend = "hatchling.build"