feat: Initial library extraction from PIN LLM benchmark

DockerSandbox + LangChain file/shell tools extracted into a standalone package.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-04-02 11:47:44 +02:00
commit 80c2f9b159
17 changed files with 758 additions and 0 deletions

View File

@@ -0,0 +1,38 @@
"""bash.py tool for executing shell commands inside the sandbox."""
from __future__ import annotations
from typing import TYPE_CHECKING
from langchain_core.tools import BaseTool, tool
from loguru import logger
from docker_agent_sandbox.tools._utils import truncate_output
if TYPE_CHECKING:
from docker_agent_sandbox.sandbox import DockerSandbox
def make_bash_tool(sandbox: "DockerSandbox") -> BaseTool:
"""
Return a bash tool that executes commands inside the Docker sandbox container.
The model's working directory is the sandbox root; all paths it uses are
identical on the host (via the bind mount) and inside the container.
"""
@tool
def bash(command: str, timeout: int = 120) -> str:
"""
Execute a shell command in the sandbox container.
Returns EXIT:<code> followed by combined stdout+stderr.
Large outputs are truncated to stay within token limits.
Use for: running the target binary, processing PIN output,
compiling plugins, or any other shell operation during analysis.
"""
logger.debug("Running inside sandbox: {}", command)
exit_code, output = sandbox.exec(command, timeout=timeout)
return f"EXIT:{exit_code}\n{truncate_output(output)}"
return bash