diff options
Diffstat (limited to 'Omni/Agent/Worker.hs')
| -rw-r--r-- | Omni/Agent/Worker.hs | 107 |
1 files changed, 59 insertions, 48 deletions
diff --git a/Omni/Agent/Worker.hs b/Omni/Agent/Worker.hs index 38e29cb..bbdba9d 100644 --- a/Omni/Agent/Worker.hs +++ b/Omni/Agent/Worker.hs @@ -49,6 +49,18 @@ toMetadata pairs = let obj = Aeson.object [(AesonKey.fromText k, Aeson.String v) | (k, v) <- pairs] in TE.decodeUtf8 (BSL.toStrict (Aeson.encode obj)) +-- | Format guardrail result for logging +formatGuardrailResult :: Engine.GuardrailResult -> Text +formatGuardrailResult Engine.GuardrailOk = "OK" +formatGuardrailResult (Engine.GuardrailCostExceeded actual limit) = + "Cost exceeded: " <> tshow actual <> " cents (limit: " <> tshow limit <> ")" +formatGuardrailResult (Engine.GuardrailTokensExceeded actual limit) = + "Tokens exceeded: " <> tshow actual <> " (limit: " <> tshow limit <> ")" +formatGuardrailResult (Engine.GuardrailDuplicateToolCalls tool count) = + "Duplicate tool calls: " <> tool <> " called " <> tshow count <> " times" +formatGuardrailResult (Engine.GuardrailTestFailures count) = + "Test failures: " <> tshow count <> " failures" + runOnce :: Core.Worker -> Maybe Text -> IO () runOnce worker maybeTaskId = do -- Find work @@ -209,8 +221,8 @@ runWithEngine worker repo task = do -- Check for retry context maybeRetry <- TaskCore.getRetryContext (TaskCore.taskId task) - -- Read progress file if it exists - progressContent <- readProgressFile repo (TaskCore.taskId task) + -- Get progress from database (checkpoint events from previous sessions) + progressContent <- TaskCore.getProgressSummary (TaskCore.taskId task) -- Build the full prompt let ns = fromMaybe "." (TaskCore.taskNamespace task) @@ -291,16 +303,28 @@ runWithEngine worker repo task = do logEventText "Complete" "", Engine.engineOnError = \err -> do sayLog <| "[error] " <> err - logEventText "Error" err + logEventText "Error" err, + Engine.engineOnGuardrail = \guardrailResult -> do + let guardrailMsg = formatGuardrailResult guardrailResult + sayLog <| "[guardrail] " <> guardrailMsg + logEventJson "Guardrail" (Aeson.toJSON guardrailResult) } - -- Build Agent config - let agentCfg = + -- Build Agent config with guardrails + let guardrails = + Engine.Guardrails + { Engine.guardrailMaxCostCents = 200.0, + Engine.guardrailMaxTokens = 1000000, + Engine.guardrailMaxDuplicateToolCalls = 5, + Engine.guardrailMaxTestFailures = 3 + } + agentCfg = Engine.AgentConfig { Engine.agentModel = model, Engine.agentTools = Tools.allTools, Engine.agentSystemPrompt = systemPrompt, - Engine.agentMaxIterations = 100 + Engine.agentMaxIterations = 100, + Engine.agentGuardrails = guardrails } -- Run the agent @@ -316,40 +340,37 @@ runWithEngine worker repo task = do -- | Build the base prompt for the agent buildBasePrompt :: TaskCore.Task -> Text -> FilePath -> Text buildBasePrompt task ns repo = - "You are a Worker Agent.\n" + "You are an autonomous Worker Agent.\n" <> "Your goal is to implement the following task:\n\n" <> formatTask task <> "\n\nCRITICAL INSTRUCTIONS:\n" - <> "1. Read AGENTS.md and any existing progress file for this task.\n" - <> "2. Pick ONE specific change to implement (not everything at once).\n" - <> "3. Analyze the codebase to understand where to make that change.\n" - <> "4. Implement ONLY that one change.\n" - <> "5. BEFORE finishing, you MUST run: bild --test " + <> "1. Read AGENTS.md first to understand the codebase conventions.\n" + <> "2. Complete ONE logical change (e.g., update schema + call sites + tests).\n" + <> "3. Run 'bild --test " <> ns - <> "\n" - <> "6. Fix ALL errors from bild --test (including lint issues).\n" - <> "7. Keep running bild --test until it passes with no errors.\n" - <> "8. After tests pass, write progress to: _/llm/" - <> TaskCore.taskId task - <> "-progress.md\n" - <> "9. Do NOT update task status or manage git.\n" - <> "10. Only exit after bild --test passes and progress is saved.\n\n" - <> "INCREMENTAL WORKFLOW (IMPORTANT):\n" - <> "- DO NOT try to implement everything in one go\n" - <> "- Make ONE focused change, test it, save progress, then stop\n" - <> "- The task may be run multiple times to complete all changes\n" - <> "- Each session should leave the code in a clean, testable state\n" - <> "- If the task is already complete, just verify tests pass and note that in progress\n\n" - <> "IMPORTANT: The git commit will fail if lint finds issues.\n" - <> "You must fix all lint suggestions.\n\n" + <> "' ONCE after implementing.\n" + <> "4. If tests pass, you are DONE - stop immediately.\n" + <> "5. If tests fail, fix the issue and run tests again.\n" + <> "6. If tests fail 3 times on the same issue, STOP - the task will be marked for human review.\n" + <> "7. Do NOT update task status or manage git - the worker handles that.\n\n" + <> "AUTONOMOUS OPERATION (NO HUMAN IN LOOP):\n" + <> "- You are running autonomously without human intervention\n" + <> "- There is NO human to ask questions or get clarification from\n" + <> "- Make reasonable decisions based on the task description\n" + <> "- If something is truly ambiguous, implement the most straightforward interpretation\n" + <> "- Guardrails will stop you if you exceed cost/token budgets or make repeated mistakes\n\n" <> "BUILD SYSTEM NOTES:\n" - <> "- Running 'bild --test " + <> "- 'bild --test " <> ns - <> "' automatically tests ALL dependencies of that namespace\n" - <> "- You do NOT need to run bild --test on individual files - just the main namespace ONCE\n" - <> "- Once tests pass, do NOT re-run them unless you make more changes\n" - <> "- The 'lint' command will be run automatically during git commit via hooks\n" - <> "- You can run 'lint --fix' on changed files if needed, but it's optional\n\n" + <> "' tests ALL dependencies transitively - run it ONCE, not per-file\n" + <> "- Do NOT run bild --test on individual files separately\n" + <> "- Once tests pass, STOP - do not continue adding features or re-running tests\n" + <> "- Use 'lint --fix' for formatting issues (not hlint directly)\n\n" + <> "EFFICIENCY REQUIREMENTS:\n" + <> "- Do not repeat the same action multiple times\n" + <> "- Do not re-run passing tests\n" + <> "- Do not test files individually when namespace test covers them\n" + <> "- Aim to complete the task in under 50 tool calls\n\n" <> "Context:\n" <> "- Working directory: " <> Text.pack repo @@ -358,28 +379,18 @@ buildBasePrompt task ns repo = <> ns <> "\n" --- | Read progress file for a task if it exists -readProgressFile :: FilePath -> Text -> IO (Maybe Text) -readProgressFile repo taskId = do - let progressPath = repo </> "_" </> "llm" </> Text.unpack taskId <> "-progress.md" - exists <- Directory.doesFileExist progressPath - if exists - then Just </ readFile progressPath - else pure Nothing - -- | Build progress context prompt buildProgressPrompt :: Maybe Text -> Text buildProgressPrompt Nothing = "" buildProgressPrompt (Just progress) = - "\n\n## PROGRESS FROM PREVIOUS SESSIONS\n\n" - <> "This task has been worked on before. Here's what has been completed:\n\n" + "\n\n## PROGRESS FROM PREVIOUS SESSIONS (from database)\n\n" + <> "This task has been worked on before. Here are the checkpoint notes:\n\n" <> progress <> "\n\n" <> "IMPORTANT:\n" - <> "- Review this progress to understand what's already done\n" + <> "- Review these checkpoints to understand what's already done\n" <> "- Do NOT repeat work that's already completed\n" - <> "- Pick the NEXT logical step that hasn't been done yet\n" - <> "- Update the progress file after completing your change\n\n" + <> "- If the task appears complete, verify tests pass and exit\n\n" -- | Build retry context prompt buildRetryPrompt :: Maybe TaskCore.RetryContext -> Text |
