n8n/packages/@n8n/instance-ai/evaluations/computer-use/data/6.1-curl-connectivity.json
Bernhard Wittmann b445221c6a
feat: Computer-use evaluation harness (no-changelog) (#29797)
Co-authored-by: Elias Meire <elias@meire.dev>
2026-05-12 08:36:12 +00:00

23 lines
705 B
JSON

{
"id": "6.1-curl-connectivity",
"category": "shell",
"prompt": "Can you run a curl command to test if I can reach the OpenAI API from my machine?",
"budgets": { "maxToolCalls": 10, "maxDurationMs": 120000 },
"graders": [
{ "type": "trace.mustCallMcpServer", "server": "computer-use" },
{ "type": "trace.mustCallTool", "name": "shell_execute" },
{ "type": "trace.mustNotLoop", "maxRepeatedCall": 3 },
{
"type": "trace.budget",
"maxToolCalls": 10,
"maxToolResultTokensEst": 20000,
"maxSingleToolResultTokensEst": 10000
},
{
"type": "trace.finalTextMatches",
"anyOf": ["openai|api\\.openai\\.com", "200|401|reachable|connected"]
}
],
"tags": ["shell", "regression"]
}