summaryrefslogtreecommitdiff
path: root/Omni/Agent/Worker.hs
diff options
context:
space:
mode:
authorBen Sima <ben@bensima.com>2025-12-01 10:02:12 -0500
committerBen Sima <ben@bensima.com>2025-12-01 10:02:12 -0500
commitfb019f46c3adcf772df2dacf688cc75c30ed6e8e (patch)
tree1b365bac2cfd513852f73355893ffb9501ece18f /Omni/Agent/Worker.hs
parentffeb13fb9f2543dfc9cdecf8ed6778226267b403 (diff)
Add guardrails and progress tracking to Jr agent
Implement runtime guardrails in Engine.hs: - Cost budget limit (default 200 cents) - Token budget limit (default 1M tokens) - Duplicate tool call detection (same tool called N times) - Test failure counting (bild --test failures) Add database-backed progress tracking: - Checkpoint events stored in agent_events table - Progress summary retrieved on retry attempts - Improved prompts emphasizing efficiency and autonomous operation Worker.hs improvements: - Uses guardrails configuration - Reports guardrail violations via callbacks - Better prompt structure for autonomous operation Task-Id: t-203
Diffstat (limited to 'Omni/Agent/Worker.hs')
-rw-r--r--Omni/Agent/Worker.hs107
1 files changed, 59 insertions, 48 deletions
diff --git a/Omni/Agent/Worker.hs b/Omni/Agent/Worker.hs
index 38e29cb..bbdba9d 100644
--- a/Omni/Agent/Worker.hs
+++ b/Omni/Agent/Worker.hs
@@ -49,6 +49,18 @@ toMetadata pairs =
let obj = Aeson.object [(AesonKey.fromText k, Aeson.String v) | (k, v) <- pairs]
in TE.decodeUtf8 (BSL.toStrict (Aeson.encode obj))
+-- | Format guardrail result for logging
+formatGuardrailResult :: Engine.GuardrailResult -> Text
+formatGuardrailResult Engine.GuardrailOk = "OK"
+formatGuardrailResult (Engine.GuardrailCostExceeded actual limit) =
+ "Cost exceeded: " <> tshow actual <> " cents (limit: " <> tshow limit <> ")"
+formatGuardrailResult (Engine.GuardrailTokensExceeded actual limit) =
+ "Tokens exceeded: " <> tshow actual <> " (limit: " <> tshow limit <> ")"
+formatGuardrailResult (Engine.GuardrailDuplicateToolCalls tool count) =
+ "Duplicate tool calls: " <> tool <> " called " <> tshow count <> " times"
+formatGuardrailResult (Engine.GuardrailTestFailures count) =
+ "Test failures: " <> tshow count <> " failures"
+
runOnce :: Core.Worker -> Maybe Text -> IO ()
runOnce worker maybeTaskId = do
-- Find work
@@ -209,8 +221,8 @@ runWithEngine worker repo task = do
-- Check for retry context
maybeRetry <- TaskCore.getRetryContext (TaskCore.taskId task)
- -- Read progress file if it exists
- progressContent <- readProgressFile repo (TaskCore.taskId task)
+ -- Get progress from database (checkpoint events from previous sessions)
+ progressContent <- TaskCore.getProgressSummary (TaskCore.taskId task)
-- Build the full prompt
let ns = fromMaybe "." (TaskCore.taskNamespace task)
@@ -291,16 +303,28 @@ runWithEngine worker repo task = do
logEventText "Complete" "",
Engine.engineOnError = \err -> do
sayLog <| "[error] " <> err
- logEventText "Error" err
+ logEventText "Error" err,
+ Engine.engineOnGuardrail = \guardrailResult -> do
+ let guardrailMsg = formatGuardrailResult guardrailResult
+ sayLog <| "[guardrail] " <> guardrailMsg
+ logEventJson "Guardrail" (Aeson.toJSON guardrailResult)
}
- -- Build Agent config
- let agentCfg =
+ -- Build Agent config with guardrails
+ let guardrails =
+ Engine.Guardrails
+ { Engine.guardrailMaxCostCents = 200.0,
+ Engine.guardrailMaxTokens = 1000000,
+ Engine.guardrailMaxDuplicateToolCalls = 5,
+ Engine.guardrailMaxTestFailures = 3
+ }
+ agentCfg =
Engine.AgentConfig
{ Engine.agentModel = model,
Engine.agentTools = Tools.allTools,
Engine.agentSystemPrompt = systemPrompt,
- Engine.agentMaxIterations = 100
+ Engine.agentMaxIterations = 100,
+ Engine.agentGuardrails = guardrails
}
-- Run the agent
@@ -316,40 +340,37 @@ runWithEngine worker repo task = do
-- | Build the base prompt for the agent
buildBasePrompt :: TaskCore.Task -> Text -> FilePath -> Text
buildBasePrompt task ns repo =
- "You are a Worker Agent.\n"
+ "You are an autonomous Worker Agent.\n"
<> "Your goal is to implement the following task:\n\n"
<> formatTask task
<> "\n\nCRITICAL INSTRUCTIONS:\n"
- <> "1. Read AGENTS.md and any existing progress file for this task.\n"
- <> "2. Pick ONE specific change to implement (not everything at once).\n"
- <> "3. Analyze the codebase to understand where to make that change.\n"
- <> "4. Implement ONLY that one change.\n"
- <> "5. BEFORE finishing, you MUST run: bild --test "
+ <> "1. Read AGENTS.md first to understand the codebase conventions.\n"
+ <> "2. Complete ONE logical change (e.g., update schema + call sites + tests).\n"
+ <> "3. Run 'bild --test "
<> ns
- <> "\n"
- <> "6. Fix ALL errors from bild --test (including lint issues).\n"
- <> "7. Keep running bild --test until it passes with no errors.\n"
- <> "8. After tests pass, write progress to: _/llm/"
- <> TaskCore.taskId task
- <> "-progress.md\n"
- <> "9. Do NOT update task status or manage git.\n"
- <> "10. Only exit after bild --test passes and progress is saved.\n\n"
- <> "INCREMENTAL WORKFLOW (IMPORTANT):\n"
- <> "- DO NOT try to implement everything in one go\n"
- <> "- Make ONE focused change, test it, save progress, then stop\n"
- <> "- The task may be run multiple times to complete all changes\n"
- <> "- Each session should leave the code in a clean, testable state\n"
- <> "- If the task is already complete, just verify tests pass and note that in progress\n\n"
- <> "IMPORTANT: The git commit will fail if lint finds issues.\n"
- <> "You must fix all lint suggestions.\n\n"
+ <> "' ONCE after implementing.\n"
+ <> "4. If tests pass, you are DONE - stop immediately.\n"
+ <> "5. If tests fail, fix the issue and run tests again.\n"
+ <> "6. If tests fail 3 times on the same issue, STOP - the task will be marked for human review.\n"
+ <> "7. Do NOT update task status or manage git - the worker handles that.\n\n"
+ <> "AUTONOMOUS OPERATION (NO HUMAN IN LOOP):\n"
+ <> "- You are running autonomously without human intervention\n"
+ <> "- There is NO human to ask questions or get clarification from\n"
+ <> "- Make reasonable decisions based on the task description\n"
+ <> "- If something is truly ambiguous, implement the most straightforward interpretation\n"
+ <> "- Guardrails will stop you if you exceed cost/token budgets or make repeated mistakes\n\n"
<> "BUILD SYSTEM NOTES:\n"
- <> "- Running 'bild --test "
+ <> "- 'bild --test "
<> ns
- <> "' automatically tests ALL dependencies of that namespace\n"
- <> "- You do NOT need to run bild --test on individual files - just the main namespace ONCE\n"
- <> "- Once tests pass, do NOT re-run them unless you make more changes\n"
- <> "- The 'lint' command will be run automatically during git commit via hooks\n"
- <> "- You can run 'lint --fix' on changed files if needed, but it's optional\n\n"
+ <> "' tests ALL dependencies transitively - run it ONCE, not per-file\n"
+ <> "- Do NOT run bild --test on individual files separately\n"
+ <> "- Once tests pass, STOP - do not continue adding features or re-running tests\n"
+ <> "- Use 'lint --fix' for formatting issues (not hlint directly)\n\n"
+ <> "EFFICIENCY REQUIREMENTS:\n"
+ <> "- Do not repeat the same action multiple times\n"
+ <> "- Do not re-run passing tests\n"
+ <> "- Do not test files individually when namespace test covers them\n"
+ <> "- Aim to complete the task in under 50 tool calls\n\n"
<> "Context:\n"
<> "- Working directory: "
<> Text.pack repo
@@ -358,28 +379,18 @@ buildBasePrompt task ns repo =
<> ns
<> "\n"
--- | Read progress file for a task if it exists
-readProgressFile :: FilePath -> Text -> IO (Maybe Text)
-readProgressFile repo taskId = do
- let progressPath = repo </> "_" </> "llm" </> Text.unpack taskId <> "-progress.md"
- exists <- Directory.doesFileExist progressPath
- if exists
- then Just </ readFile progressPath
- else pure Nothing
-
-- | Build progress context prompt
buildProgressPrompt :: Maybe Text -> Text
buildProgressPrompt Nothing = ""
buildProgressPrompt (Just progress) =
- "\n\n## PROGRESS FROM PREVIOUS SESSIONS\n\n"
- <> "This task has been worked on before. Here's what has been completed:\n\n"
+ "\n\n## PROGRESS FROM PREVIOUS SESSIONS (from database)\n\n"
+ <> "This task has been worked on before. Here are the checkpoint notes:\n\n"
<> progress
<> "\n\n"
<> "IMPORTANT:\n"
- <> "- Review this progress to understand what's already done\n"
+ <> "- Review these checkpoints to understand what's already done\n"
<> "- Do NOT repeat work that's already completed\n"
- <> "- Pick the NEXT logical step that hasn't been done yet\n"
- <> "- Update the progress file after completing your change\n\n"
+ <> "- If the task appears complete, verify tests pass and exit\n\n"
-- | Build retry context prompt
buildRetryPrompt :: Maybe TaskCore.RetryContext -> Text