Add guardrails and progress tracking to Jr agent

Implement runtime guardrails in Engine.hs: - Cost budget limit (default 200 cents) - Token budget limit (default 1M tokens) - Duplicate tool call detection (same tool called N times) - Test failure counting (bild --test failures) Add database-backed progress tracking: - Checkpoint events stored in agent_events table - Progress summary retrieved on retry attempts - Improved prompts emphasizing efficiency and autonomous operation Worker.hs improvements: - Uses guardrails configuration - Reports guardrail violations via callbacks - Better prompt structure for autonomous operation Task-Id: t-203
author: Ben Sima <ben@bensima.com> 2025-12-01 10:02:12 -0500
committer: Ben Sima <ben@bensima.com> 2025-12-01 10:02:12 -0500
commit: fb019f46c3adcf772df2dacf688cc75c30ed6e8e (patch)
tree: 1b365bac2cfd513852f73355893ffb9501ece18f /Omni/Agent/Worker.hs
parent: ffeb13fb9f2543dfc9cdecf8ed6778226267b403 (diff)
1 files changed, 59 insertions, 48 deletions
diff --git a/Omni/Agent/Worker.hs b/Omni/Agent/Worker.hs
index 38e29cb..bbdba9d 100644
--- a/Omni/Agent/Worker.hs
+++ b/Omni/Agent/Worker.hs
@@ -49,6 +49,18 @@ toMetadata pairs =
   let obj = Aeson.object [(AesonKey.fromText k, Aeson.String v) | (k, v) <- pairs]
    in TE.decodeUtf8 (BSL.toStrict (Aeson.encode obj))
 
+-- | Format guardrail result for logging
+formatGuardrailResult :: Engine.GuardrailResult -> Text
+formatGuardrailResult Engine.GuardrailOk = "OK"
+formatGuardrailResult (Engine.GuardrailCostExceeded actual limit) =
+  "Cost exceeded: " <> tshow actual <> " cents (limit: " <> tshow limit <> ")"
+formatGuardrailResult (Engine.GuardrailTokensExceeded actual limit) =
+  "Tokens exceeded: " <> tshow actual <> " (limit: " <> tshow limit <> ")"
+formatGuardrailResult (Engine.GuardrailDuplicateToolCalls tool count) =
+  "Duplicate tool calls: " <> tool <> " called " <> tshow count <> " times"
+formatGuardrailResult (Engine.GuardrailTestFailures count) =
+  "Test failures: " <> tshow count <> " failures"
+
 runOnce :: Core.Worker -> Maybe Text -> IO ()
 runOnce worker maybeTaskId = do
   -- Find work
@@ -209,8 +221,8 @@ runWithEngine worker repo task = do
       -- Check for retry context
       maybeRetry <- TaskCore.getRetryContext (TaskCore.taskId task)
 
-      -- Read progress file if it exists
-      progressContent <- readProgressFile repo (TaskCore.taskId task)
+      -- Get progress from database (checkpoint events from previous sessions)
+      progressContent <- TaskCore.getProgressSummary (TaskCore.taskId task)
 
       -- Build the full prompt
       let ns = fromMaybe "." (TaskCore.taskNamespace task)
@@ -291,16 +303,28 @@ runWithEngine worker repo task = do
                   logEventText "Complete" "",
                 Engine.engineOnError = \err -> do
                   sayLog <| "[error] " <> err
-                  logEventText "Error" err
+                  logEventText "Error" err,
+                Engine.engineOnGuardrail = \guardrailResult -> do
+                  let guardrailMsg = formatGuardrailResult guardrailResult
+                  sayLog <| "[guardrail] " <> guardrailMsg
+                  logEventJson "Guardrail" (Aeson.toJSON guardrailResult)
               }
 
-      -- Build Agent config
-      let agentCfg =
+      -- Build Agent config with guardrails
+      let guardrails =
+            Engine.Guardrails
+              { Engine.guardrailMaxCostCents = 200.0,
+                Engine.guardrailMaxTokens = 1000000,
+                Engine.guardrailMaxDuplicateToolCalls = 5,
+                Engine.guardrailMaxTestFailures = 3
+              }
+          agentCfg =
             Engine.AgentConfig
               { Engine.agentModel = model,
                 Engine.agentTools = Tools.allTools,
                 Engine.agentSystemPrompt = systemPrompt,
-                Engine.agentMaxIterations = 100
+                Engine.agentMaxIterations = 100,
+                Engine.agentGuardrails = guardrails
               }
 
       -- Run the agent
@@ -316,40 +340,37 @@ runWithEngine worker repo task = do
 -- | Build the base prompt for the agent
 buildBasePrompt :: TaskCore.Task -> Text -> FilePath -> Text
 buildBasePrompt task ns repo =
-  "You are a Worker Agent.\n"
+  "You are an autonomous Worker Agent.\n"
     <> "Your goal is to implement the following task:\n\n"
     <> formatTask task
     <> "\n\nCRITICAL INSTRUCTIONS:\n"
-    <> "1. Read AGENTS.md and any existing progress file for this task.\n"
-    <> "2. Pick ONE specific change to implement (not everything at once).\n"
-    <> "3. Analyze the codebase to understand where to make that change.\n"
-    <> "4. Implement ONLY that one change.\n"
-    <> "5. BEFORE finishing, you MUST run: bild --test "
+    <> "1. Read AGENTS.md first to understand the codebase conventions.\n"
+    <> "2. Complete ONE logical change (e.g., update schema + call sites + tests).\n"
+    <> "3. Run 'bild --test "
     <> ns
-    <> "\n"
-    <> "6. Fix ALL errors from bild --test (including lint issues).\n"
-    <> "7. Keep running bild --test until it passes with no errors.\n"
-    <> "8. After tests pass, write progress to: _/llm/"
-    <> TaskCore.taskId task
-    <> "-progress.md\n"
-    <> "9. Do NOT update task status or manage git.\n"
-    <> "10. Only exit after bild --test passes and progress is saved.\n\n"
-    <> "INCREMENTAL WORKFLOW (IMPORTANT):\n"
-    <> "- DO NOT try to implement everything in one go\n"
-    <> "- Make ONE focused change, test it, save progress, then stop\n"
-    <> "- The task may be run multiple times to complete all changes\n"
-    <> "- Each session should leave the code in a clean, testable state\n"
-    <> "- If the task is already complete, just verify tests pass and note that in progress\n\n"
-    <> "IMPORTANT: The git commit will fail if lint finds issues.\n"
-    <> "You must fix all lint suggestions.\n\n"
+    <> "' ONCE after implementing.\n"
+    <> "4. If tests pass, you are DONE - stop immediately.\n"
+    <> "5. If tests fail, fix the issue and run tests again.\n"
+    <> "6. If tests fail 3 times on the same issue, STOP - the task will be marked for human review.\n"
+    <> "7. Do NOT update task status or manage git - the worker handles that.\n\n"
+    <> "AUTONOMOUS OPERATION (NO HUMAN IN LOOP):\n"
+    <> "- You are running autonomously without human intervention\n"
+    <> "- There is NO human to ask questions or get clarification from\n"
+    <> "- Make reasonable decisions based on the task description\n"
+    <> "- If something is truly ambiguous, implement the most straightforward interpretation\n"
+    <> "- Guardrails will stop you if you exceed cost/token budgets or make repeated mistakes\n\n"
     <> "BUILD SYSTEM NOTES:\n"
-    <> "- Running 'bild --test "
+    <> "- 'bild --test "
     <> ns
-    <> "' automatically tests ALL dependencies of that namespace\n"
-    <> "- You do NOT need to run bild --test on individual files - just the main namespace ONCE\n"
-    <> "- Once tests pass, do NOT re-run them unless you make more changes\n"
-    <> "- The 'lint' command will be run automatically during git commit via hooks\n"
-    <> "- You can run 'lint --fix' on changed files if needed, but it's optional\n\n"
+    <> "' tests ALL dependencies transitively - run it ONCE, not per-file\n"
+    <> "- Do NOT run bild --test on individual files separately\n"
+    <> "- Once tests pass, STOP - do not continue adding features or re-running tests\n"
+    <> "- Use 'lint --fix' for formatting issues (not hlint directly)\n\n"
+    <> "EFFICIENCY REQUIREMENTS:\n"
+    <> "- Do not repeat the same action multiple times\n"
+    <> "- Do not re-run passing tests\n"
+    <> "- Do not test files individually when namespace test covers them\n"
+    <> "- Aim to complete the task in under 50 tool calls\n\n"
     <> "Context:\n"
     <> "- Working directory: "
     <> Text.pack repo
@@ -358,28 +379,18 @@ buildBasePrompt task ns repo =
     <> ns
     <> "\n"
 
--- | Read progress file for a task if it exists
-readProgressFile :: FilePath -> Text -> IO (Maybe Text)
-readProgressFile repo taskId = do
-  let progressPath = repo </> "_" </> "llm" </> Text.unpack taskId <> "-progress.md"
-  exists <- Directory.doesFileExist progressPath
-  if exists
-    then Just </ readFile progressPath
-    else pure Nothing
-
 -- | Build progress context prompt
 buildProgressPrompt :: Maybe Text -> Text
 buildProgressPrompt Nothing = ""
 buildProgressPrompt (Just progress) =
-  "\n\n## PROGRESS FROM PREVIOUS SESSIONS\n\n"
-    <> "This task has been worked on before. Here's what has been completed:\n\n"
+  "\n\n## PROGRESS FROM PREVIOUS SESSIONS (from database)\n\n"
+    <> "This task has been worked on before. Here are the checkpoint notes:\n\n"
     <> progress
     <> "\n\n"
     <> "IMPORTANT:\n"
-    <> "- Review this progress to understand what's already done\n"
+    <> "- Review these checkpoints to understand what's already done\n"
     <> "- Do NOT repeat work that's already completed\n"
-    <> "- Pick the NEXT logical step that hasn't been done yet\n"
-    <> "- Update the progress file after completing your change\n\n"
+    <> "- If the task appears complete, verify tests pass and exit\n\n"
 
 -- | Build retry context prompt
 buildRetryPrompt :: Maybe TaskCore.RetryContext -> Text
author	Ben Sima <ben@bensima.com>	2025-12-01 10:02:12 -0500
committer	Ben Sima <ben@bensima.com>	2025-12-01 10:02:12 -0500
commit	fb019f46c3adcf772df2dacf688cc75c30ed6e8e (patch)
tree	1b365bac2cfd513852f73355893ffb9501ece18f /Omni/Agent/Worker.hs
parent	ffeb13fb9f2543dfc9cdecf8ed6778226267b403 (diff)