summaryrefslogtreecommitdiff
path: root/Omni/Agent
diff options
context:
space:
mode:
Diffstat (limited to 'Omni/Agent')
-rw-r--r--Omni/Agent/Engine.hs303
-rw-r--r--Omni/Agent/Worker.hs107
2 files changed, 296 insertions, 114 deletions
diff --git a/Omni/Agent/Engine.hs b/Omni/Agent/Engine.hs
index 7da7fa5..a2a24a1 100644
--- a/Omni/Agent/Engine.hs
+++ b/Omni/Agent/Engine.hs
@@ -19,6 +19,8 @@ module Omni.Agent.Engine
EngineConfig (..),
AgentConfig (..),
AgentResult (..),
+ Guardrails (..),
+ GuardrailResult (..),
Message (..),
Role (..),
ToolCall (..),
@@ -31,6 +33,7 @@ module Omni.Agent.Engine
defaultLLM,
defaultEngineConfig,
defaultAgentConfig,
+ defaultGuardrails,
chat,
runAgent,
main,
@@ -41,6 +44,7 @@ where
import Alpha
import Data.Aeson ((.!=), (.:), (.:?), (.=))
import qualified Data.Aeson as Aeson
+import qualified Data.Aeson.KeyMap as KeyMap
import qualified Data.ByteString.Lazy as BL
import qualified Data.CaseInsensitive as CI
import qualified Data.Map.Strict as Map
@@ -164,7 +168,57 @@ test =
forM_ roles <| \role ->
case Aeson.decode (Aeson.encode role) of
Nothing -> Test.assertFailure ("Failed to decode Role: " <> show role)
- Just decoded -> decoded Test.@=? role
+ Just decoded -> decoded Test.@=? role,
+ Test.unit "defaultGuardrails has sensible defaults" <| do
+ guardrailMaxCostCents defaultGuardrails Test.@=? 100.0
+ guardrailMaxTokens defaultGuardrails Test.@=? 500000
+ guardrailMaxDuplicateToolCalls defaultGuardrails Test.@=? 3
+ guardrailMaxTestFailures defaultGuardrails Test.@=? 3,
+ Test.unit "checkCostGuardrail detects exceeded budget" <| do
+ let g = defaultGuardrails {guardrailMaxCostCents = 50.0}
+ checkCostGuardrail g 60.0 Test.@=? GuardrailCostExceeded 60.0 50.0
+ checkCostGuardrail g 40.0 Test.@=? GuardrailOk,
+ Test.unit "checkTokenGuardrail detects exceeded budget" <| do
+ let g = defaultGuardrails {guardrailMaxTokens = 1000}
+ checkTokenGuardrail g 1500 Test.@=? GuardrailTokensExceeded 1500 1000
+ checkTokenGuardrail g 500 Test.@=? GuardrailOk,
+ Test.unit "checkDuplicateGuardrail detects repeated calls" <| do
+ let g = defaultGuardrails {guardrailMaxDuplicateToolCalls = 3}
+ counts = Map.fromList [("bash", 3), ("read_file", 1)]
+ case checkDuplicateGuardrail g counts of
+ GuardrailDuplicateToolCalls name count -> do
+ name Test.@=? "bash"
+ count Test.@=? 3
+ _ -> Test.assertFailure "Expected GuardrailDuplicateToolCalls"
+ checkDuplicateGuardrail g (Map.fromList [("bash", 2)]) Test.@=? GuardrailOk,
+ Test.unit "checkTestFailureGuardrail detects failures" <| do
+ let g = defaultGuardrails {guardrailMaxTestFailures = 3}
+ checkTestFailureGuardrail g 3 Test.@=? GuardrailTestFailures 3
+ checkTestFailureGuardrail g 2 Test.@=? GuardrailOk,
+ Test.unit "updateToolCallCounts accumulates correctly" <| do
+ let tc1 = ToolCall "1" "function" (FunctionCall "bash" "{}")
+ tc2 = ToolCall "2" "function" (FunctionCall "bash" "{}")
+ tc3 = ToolCall "3" "function" (FunctionCall "read_file" "{}")
+ counts = updateToolCallCounts Map.empty [tc1, tc2, tc3]
+ Map.lookup "bash" counts Test.@=? Just 2
+ Map.lookup "read_file" counts Test.@=? Just 1,
+ Test.unit "Guardrails JSON roundtrip" <| do
+ let g = Guardrails 75.0 100000 5 4
+ case Aeson.decode (Aeson.encode g) of
+ Nothing -> Test.assertFailure "Failed to decode Guardrails"
+ Just decoded -> decoded Test.@=? g,
+ Test.unit "GuardrailResult JSON roundtrip" <| do
+ let results =
+ [ GuardrailOk,
+ GuardrailCostExceeded 100.0 50.0,
+ GuardrailTokensExceeded 2000 1000,
+ GuardrailDuplicateToolCalls "bash" 5,
+ GuardrailTestFailures 3
+ ]
+ forM_ results <| \r ->
+ case Aeson.decode (Aeson.encode r) of
+ Nothing -> Test.assertFailure ("Failed to decode GuardrailResult: " <> show r)
+ Just decoded -> decoded Test.@=? r
]
data Tool = Tool
@@ -249,16 +303,51 @@ data AgentConfig = AgentConfig
{ agentModel :: Text,
agentTools :: [Tool],
agentSystemPrompt :: Text,
- agentMaxIterations :: Int
+ agentMaxIterations :: Int,
+ agentGuardrails :: Guardrails
}
+data Guardrails = Guardrails
+ { guardrailMaxCostCents :: Double,
+ guardrailMaxTokens :: Int,
+ guardrailMaxDuplicateToolCalls :: Int,
+ guardrailMaxTestFailures :: Int
+ }
+ deriving (Show, Eq, Generic)
+
+instance Aeson.ToJSON Guardrails
+
+instance Aeson.FromJSON Guardrails
+
+data GuardrailResult
+ = GuardrailOk
+ | GuardrailCostExceeded Double Double
+ | GuardrailTokensExceeded Int Int
+ | GuardrailDuplicateToolCalls Text Int
+ | GuardrailTestFailures Int
+ deriving (Show, Eq, Generic)
+
+instance Aeson.ToJSON GuardrailResult
+
+instance Aeson.FromJSON GuardrailResult
+
+defaultGuardrails :: Guardrails
+defaultGuardrails =
+ Guardrails
+ { guardrailMaxCostCents = 100.0,
+ guardrailMaxTokens = 500000,
+ guardrailMaxDuplicateToolCalls = 3,
+ guardrailMaxTestFailures = 3
+ }
+
defaultAgentConfig :: AgentConfig
defaultAgentConfig =
AgentConfig
{ agentModel = "gpt-4",
agentTools = [],
agentSystemPrompt = "You are a helpful assistant.",
- agentMaxIterations = 10
+ agentMaxIterations = 10,
+ agentGuardrails = defaultGuardrails
}
data EngineConfig = EngineConfig
@@ -269,7 +358,8 @@ data EngineConfig = EngineConfig
engineOnAssistant :: Text -> IO (),
engineOnToolResult :: Text -> Bool -> Text -> IO (),
engineOnComplete :: IO (),
- engineOnError :: Text -> IO ()
+ engineOnError :: Text -> IO (),
+ engineOnGuardrail :: GuardrailResult -> IO ()
}
defaultEngineConfig :: EngineConfig
@@ -282,7 +372,8 @@ defaultEngineConfig =
engineOnAssistant = \_ -> pure (),
engineOnToolResult = \_ _ _ -> pure (),
engineOnComplete = pure (),
- engineOnError = \_ -> pure ()
+ engineOnError = \_ -> pure (),
+ engineOnGuardrail = \_ -> pure ()
}
data AgentResult = AgentResult
@@ -511,72 +602,138 @@ runAgent engineCfg agentCfg userPrompt = do
initialMessages = [systemMsg, userMsg]
engineOnActivity engineCfg "Starting agent loop"
- loop llm tools toolMap initialMessages 0 0 0
+ loop llm tools toolMap initialMessages 0 0 0 0.0 Map.empty 0
where
maxIter = agentMaxIterations agentCfg
-
- loop :: LLM -> [Tool] -> Map.Map Text Tool -> [Message] -> Int -> Int -> Int -> IO (Either Text AgentResult)
- loop llm tools' toolMap msgs iteration totalCalls totalTokens
+ guardrails' = agentGuardrails agentCfg
+
+ loop ::
+ LLM ->
+ [Tool] ->
+ Map.Map Text Tool ->
+ [Message] ->
+ Int ->
+ Int ->
+ Int ->
+ Double ->
+ Map.Map Text Int ->
+ Int ->
+ IO (Either Text AgentResult)
+ loop llm tools' toolMap msgs iteration totalCalls totalTokens totalCost toolCallCounts testFailures
| iteration >= maxIter = do
let errMsg = "Max iterations (" <> tshow maxIter <> ") reached"
engineOnError engineCfg errMsg
pure <| Left errMsg
| otherwise = do
- engineOnActivity engineCfg <| "Iteration " <> tshow (iteration + 1)
- result <- chatWithUsage llm tools' msgs
- case result of
- Left err -> do
- engineOnError engineCfg err
- pure (Left err)
- Right chatRes -> do
- let msg = chatMessage chatRes
- tokens = maybe 0 usageTotalTokens (chatUsage chatRes)
- -- Use actual cost from API response when available
- -- OpenRouter returns cost in dollars, convert to cents
- cost = case chatUsage chatRes +> usageCost of
- Just actualCost -> actualCost * 100
- Nothing -> estimateCost (llmModel llm) tokens
- engineOnCost engineCfg tokens cost
- let newTokens = totalTokens + tokens
- let assistantText = msgContent msg
- unless (Text.null assistantText)
- <| engineOnAssistant engineCfg assistantText
- case msgToolCalls msg of
- Nothing -> do
- engineOnActivity engineCfg "Agent completed"
- engineOnComplete engineCfg
- pure
- <| Right
- <| AgentResult
- { resultFinalMessage = msgContent msg,
- resultToolCallCount = totalCalls,
- resultIterations = iteration + 1,
- resultTotalCost = estimateTotalCost (llmModel llm) newTokens,
- resultTotalTokens = newTokens
- }
- Just [] -> do
- engineOnActivity engineCfg "Agent completed (empty tool calls)"
- engineOnComplete engineCfg
- pure
- <| Right
- <| AgentResult
- { resultFinalMessage = msgContent msg,
- resultToolCallCount = totalCalls,
- resultIterations = iteration + 1,
- resultTotalCost = estimateTotalCost (llmModel llm) newTokens,
- resultTotalTokens = newTokens
- }
- Just tcs -> do
- toolResults <- executeToolCalls engineCfg toolMap tcs
- let newMsgs = msgs <> [msg] <> toolResults
- newCalls = totalCalls + length tcs
- loop llm tools' toolMap newMsgs (iteration + 1) newCalls newTokens
+ let guardrailViolation = findGuardrailViolation guardrails' totalCost totalTokens toolCallCounts testFailures
+ case guardrailViolation of
+ Just (g, errMsg) -> do
+ engineOnGuardrail engineCfg g
+ pure <| Left errMsg
+ Nothing -> do
+ engineOnActivity engineCfg <| "Iteration " <> tshow (iteration + 1)
+ result <- chatWithUsage llm tools' msgs
+ case result of
+ Left err -> do
+ engineOnError engineCfg err
+ pure (Left err)
+ Right chatRes -> do
+ let msg = chatMessage chatRes
+ tokens = maybe 0 usageTotalTokens (chatUsage chatRes)
+ cost = case chatUsage chatRes +> usageCost of
+ Just actualCost -> actualCost * 100
+ Nothing -> estimateCost (llmModel llm) tokens
+ engineOnCost engineCfg tokens cost
+ let newTokens = totalTokens + tokens
+ newCost = totalCost + cost
+ let assistantText = msgContent msg
+ unless (Text.null assistantText)
+ <| engineOnAssistant engineCfg assistantText
+ case msgToolCalls msg of
+ Nothing -> do
+ engineOnActivity engineCfg "Agent completed"
+ engineOnComplete engineCfg
+ pure
+ <| Right
+ <| AgentResult
+ { resultFinalMessage = msgContent msg,
+ resultToolCallCount = totalCalls,
+ resultIterations = iteration + 1,
+ resultTotalCost = newCost,
+ resultTotalTokens = newTokens
+ }
+ Just [] -> do
+ engineOnActivity engineCfg "Agent completed (empty tool calls)"
+ engineOnComplete engineCfg
+ pure
+ <| Right
+ <| AgentResult
+ { resultFinalMessage = msgContent msg,
+ resultToolCallCount = totalCalls,
+ resultIterations = iteration + 1,
+ resultTotalCost = newCost,
+ resultTotalTokens = newTokens
+ }
+ Just tcs -> do
+ (toolResults, newTestFailures) <- executeToolCallsWithTracking engineCfg toolMap tcs testFailures
+ let newMsgs = msgs <> [msg] <> toolResults
+ newCalls = totalCalls + length tcs
+ newToolCallCounts = updateToolCallCounts toolCallCounts tcs
+ loop llm tools' toolMap newMsgs (iteration + 1) newCalls newTokens newCost newToolCallCounts newTestFailures
+
+checkCostGuardrail :: Guardrails -> Double -> GuardrailResult
+checkCostGuardrail g cost
+ | cost > guardrailMaxCostCents g = GuardrailCostExceeded cost (guardrailMaxCostCents g)
+ | otherwise = GuardrailOk
+
+checkTokenGuardrail :: Guardrails -> Int -> GuardrailResult
+checkTokenGuardrail g tokens
+ | tokens > guardrailMaxTokens g = GuardrailTokensExceeded tokens (guardrailMaxTokens g)
+ | otherwise = GuardrailOk
+
+checkDuplicateGuardrail :: Guardrails -> Map.Map Text Int -> GuardrailResult
+checkDuplicateGuardrail g counts =
+ let maxAllowed = guardrailMaxDuplicateToolCalls g
+ violations = [(name, count) | (name, count) <- Map.toList counts, count >= maxAllowed]
+ in case violations of
+ ((name, count) : _) -> GuardrailDuplicateToolCalls name count
+ [] -> GuardrailOk
+
+checkTestFailureGuardrail :: Guardrails -> Int -> GuardrailResult
+checkTestFailureGuardrail g failures
+ | failures >= guardrailMaxTestFailures g = GuardrailTestFailures failures
+ | otherwise = GuardrailOk
+
+updateToolCallCounts :: Map.Map Text Int -> [ToolCall] -> Map.Map Text Int
+updateToolCallCounts =
+ foldr (\tc m -> Map.insertWith (+) (fcName (tcFunction tc)) 1 m)
+
+findGuardrailViolation :: Guardrails -> Double -> Int -> Map.Map Text Int -> Int -> Maybe (GuardrailResult, Text)
+findGuardrailViolation g cost tokens toolCallCounts testFailures =
+ case checkCostGuardrail g cost of
+ r@(GuardrailCostExceeded actual limit) ->
+ Just (r, "Guardrail: cost budget exceeded (" <> tshow actual <> "/" <> tshow limit <> " cents)")
+ _ -> case checkTokenGuardrail g tokens of
+ r@(GuardrailTokensExceeded actual limit) ->
+ Just (r, "Guardrail: token budget exceeded (" <> tshow actual <> "/" <> tshow limit <> " tokens)")
+ _ -> case checkDuplicateGuardrail g toolCallCounts of
+ r@(GuardrailDuplicateToolCalls tool count) ->
+ Just (r, "Guardrail: duplicate tool calls (" <> tool <> " called " <> tshow count <> " times)")
+ _ -> case checkTestFailureGuardrail g testFailures of
+ r@(GuardrailTestFailures count) ->
+ Just (r, "Guardrail: too many test failures (" <> tshow count <> ")")
+ _ -> Nothing
buildToolMap :: [Tool] -> Map.Map Text Tool
buildToolMap = Map.fromList <. map (\t -> (toolName t, t))
-executeToolCalls :: EngineConfig -> Map.Map Text Tool -> [ToolCall] -> IO [Message]
-executeToolCalls engineCfg toolMap = traverse executeSingle
+executeToolCallsWithTracking :: EngineConfig -> Map.Map Text Tool -> [ToolCall] -> Int -> IO ([Message], Int)
+executeToolCallsWithTracking engineCfg toolMap tcs initialFailures = do
+ results <- traverse executeSingle tcs
+ let msgs = map fst results
+ failureDeltas = map snd results
+ totalNewFailures = initialFailures + sum failureDeltas
+ pure (msgs, totalNewFailures)
where
executeSingle tc = do
let name = fcName (tcFunction tc)
@@ -588,18 +745,35 @@ executeToolCalls engineCfg toolMap = traverse executeSingle
Nothing -> do
let errMsg = "Tool not found: " <> name
engineOnToolResult engineCfg name False errMsg
- pure <| Message ToolRole errMsg Nothing (Just callId)
+ pure (Message ToolRole errMsg Nothing (Just callId), 0)
Just tool -> do
case Aeson.decode (BL.fromStrict (TE.encodeUtf8 argsText)) of
Nothing -> do
let errMsg = "Invalid JSON arguments: " <> argsText
engineOnToolResult engineCfg name False errMsg
- pure <| Message ToolRole errMsg Nothing (Just callId)
+ pure (Message ToolRole errMsg Nothing (Just callId), 0)
Just args -> do
resultValue <- toolExecute tool args
let resultText = TE.decodeUtf8 (BL.toStrict (Aeson.encode resultValue))
+ isTestCall = name == "bash" && ("bild --test" `Text.isInfixOf` argsText || "bild -t" `Text.isInfixOf` argsText)
+ isTestFailure = isTestCall && isFailureResult resultValue
+ failureDelta = if isTestFailure then 1 else 0
engineOnToolResult engineCfg name True resultText
- pure <| Message ToolRole resultText Nothing (Just callId)
+ pure (Message ToolRole resultText Nothing (Just callId), failureDelta)
+
+ isFailureResult :: Aeson.Value -> Bool
+ isFailureResult (Aeson.Object obj) =
+ case KeyMap.lookup "exit_code" obj of
+ Just (Aeson.Number n) -> n /= 0
+ _ -> False
+ isFailureResult (Aeson.String s) =
+ "error"
+ `Text.isInfixOf` Text.toLower s
+ || "failed"
+ `Text.isInfixOf` Text.toLower s
+ || "FAILED"
+ `Text.isInfixOf` s
+ isFailureResult _ = False
-- | Estimate cost in cents from token count
estimateCost :: Text -> Int -> Double
@@ -609,6 +783,3 @@ estimateCost model tokens
| "gpt-4" `Text.isInfixOf` model = fromIntegral tokens * 3 / 100000
| "claude" `Text.isInfixOf` model = fromIntegral tokens * 3 / 100000
| otherwise = fromIntegral tokens / 100000
-
-estimateTotalCost :: Text -> Int -> Double
-estimateTotalCost = estimateCost
diff --git a/Omni/Agent/Worker.hs b/Omni/Agent/Worker.hs
index 38e29cb..bbdba9d 100644
--- a/Omni/Agent/Worker.hs
+++ b/Omni/Agent/Worker.hs
@@ -49,6 +49,18 @@ toMetadata pairs =
let obj = Aeson.object [(AesonKey.fromText k, Aeson.String v) | (k, v) <- pairs]
in TE.decodeUtf8 (BSL.toStrict (Aeson.encode obj))
+-- | Format guardrail result for logging
+formatGuardrailResult :: Engine.GuardrailResult -> Text
+formatGuardrailResult Engine.GuardrailOk = "OK"
+formatGuardrailResult (Engine.GuardrailCostExceeded actual limit) =
+ "Cost exceeded: " <> tshow actual <> " cents (limit: " <> tshow limit <> ")"
+formatGuardrailResult (Engine.GuardrailTokensExceeded actual limit) =
+ "Tokens exceeded: " <> tshow actual <> " (limit: " <> tshow limit <> ")"
+formatGuardrailResult (Engine.GuardrailDuplicateToolCalls tool count) =
+ "Duplicate tool calls: " <> tool <> " called " <> tshow count <> " times"
+formatGuardrailResult (Engine.GuardrailTestFailures count) =
+ "Test failures: " <> tshow count <> " failures"
+
runOnce :: Core.Worker -> Maybe Text -> IO ()
runOnce worker maybeTaskId = do
-- Find work
@@ -209,8 +221,8 @@ runWithEngine worker repo task = do
-- Check for retry context
maybeRetry <- TaskCore.getRetryContext (TaskCore.taskId task)
- -- Read progress file if it exists
- progressContent <- readProgressFile repo (TaskCore.taskId task)
+ -- Get progress from database (checkpoint events from previous sessions)
+ progressContent <- TaskCore.getProgressSummary (TaskCore.taskId task)
-- Build the full prompt
let ns = fromMaybe "." (TaskCore.taskNamespace task)
@@ -291,16 +303,28 @@ runWithEngine worker repo task = do
logEventText "Complete" "",
Engine.engineOnError = \err -> do
sayLog <| "[error] " <> err
- logEventText "Error" err
+ logEventText "Error" err,
+ Engine.engineOnGuardrail = \guardrailResult -> do
+ let guardrailMsg = formatGuardrailResult guardrailResult
+ sayLog <| "[guardrail] " <> guardrailMsg
+ logEventJson "Guardrail" (Aeson.toJSON guardrailResult)
}
- -- Build Agent config
- let agentCfg =
+ -- Build Agent config with guardrails
+ let guardrails =
+ Engine.Guardrails
+ { Engine.guardrailMaxCostCents = 200.0,
+ Engine.guardrailMaxTokens = 1000000,
+ Engine.guardrailMaxDuplicateToolCalls = 5,
+ Engine.guardrailMaxTestFailures = 3
+ }
+ agentCfg =
Engine.AgentConfig
{ Engine.agentModel = model,
Engine.agentTools = Tools.allTools,
Engine.agentSystemPrompt = systemPrompt,
- Engine.agentMaxIterations = 100
+ Engine.agentMaxIterations = 100,
+ Engine.agentGuardrails = guardrails
}
-- Run the agent
@@ -316,40 +340,37 @@ runWithEngine worker repo task = do
-- | Build the base prompt for the agent
buildBasePrompt :: TaskCore.Task -> Text -> FilePath -> Text
buildBasePrompt task ns repo =
- "You are a Worker Agent.\n"
+ "You are an autonomous Worker Agent.\n"
<> "Your goal is to implement the following task:\n\n"
<> formatTask task
<> "\n\nCRITICAL INSTRUCTIONS:\n"
- <> "1. Read AGENTS.md and any existing progress file for this task.\n"
- <> "2. Pick ONE specific change to implement (not everything at once).\n"
- <> "3. Analyze the codebase to understand where to make that change.\n"
- <> "4. Implement ONLY that one change.\n"
- <> "5. BEFORE finishing, you MUST run: bild --test "
+ <> "1. Read AGENTS.md first to understand the codebase conventions.\n"
+ <> "2. Complete ONE logical change (e.g., update schema + call sites + tests).\n"
+ <> "3. Run 'bild --test "
<> ns
- <> "\n"
- <> "6. Fix ALL errors from bild --test (including lint issues).\n"
- <> "7. Keep running bild --test until it passes with no errors.\n"
- <> "8. After tests pass, write progress to: _/llm/"
- <> TaskCore.taskId task
- <> "-progress.md\n"
- <> "9. Do NOT update task status or manage git.\n"
- <> "10. Only exit after bild --test passes and progress is saved.\n\n"
- <> "INCREMENTAL WORKFLOW (IMPORTANT):\n"
- <> "- DO NOT try to implement everything in one go\n"
- <> "- Make ONE focused change, test it, save progress, then stop\n"
- <> "- The task may be run multiple times to complete all changes\n"
- <> "- Each session should leave the code in a clean, testable state\n"
- <> "- If the task is already complete, just verify tests pass and note that in progress\n\n"
- <> "IMPORTANT: The git commit will fail if lint finds issues.\n"
- <> "You must fix all lint suggestions.\n\n"
+ <> "' ONCE after implementing.\n"
+ <> "4. If tests pass, you are DONE - stop immediately.\n"
+ <> "5. If tests fail, fix the issue and run tests again.\n"
+ <> "6. If tests fail 3 times on the same issue, STOP - the task will be marked for human review.\n"
+ <> "7. Do NOT update task status or manage git - the worker handles that.\n\n"
+ <> "AUTONOMOUS OPERATION (NO HUMAN IN LOOP):\n"
+ <> "- You are running autonomously without human intervention\n"
+ <> "- There is NO human to ask questions or get clarification from\n"
+ <> "- Make reasonable decisions based on the task description\n"
+ <> "- If something is truly ambiguous, implement the most straightforward interpretation\n"
+ <> "- Guardrails will stop you if you exceed cost/token budgets or make repeated mistakes\n\n"
<> "BUILD SYSTEM NOTES:\n"
- <> "- Running 'bild --test "
+ <> "- 'bild --test "
<> ns
- <> "' automatically tests ALL dependencies of that namespace\n"
- <> "- You do NOT need to run bild --test on individual files - just the main namespace ONCE\n"
- <> "- Once tests pass, do NOT re-run them unless you make more changes\n"
- <> "- The 'lint' command will be run automatically during git commit via hooks\n"
- <> "- You can run 'lint --fix' on changed files if needed, but it's optional\n\n"
+ <> "' tests ALL dependencies transitively - run it ONCE, not per-file\n"
+ <> "- Do NOT run bild --test on individual files separately\n"
+ <> "- Once tests pass, STOP - do not continue adding features or re-running tests\n"
+ <> "- Use 'lint --fix' for formatting issues (not hlint directly)\n\n"
+ <> "EFFICIENCY REQUIREMENTS:\n"
+ <> "- Do not repeat the same action multiple times\n"
+ <> "- Do not re-run passing tests\n"
+ <> "- Do not test files individually when namespace test covers them\n"
+ <> "- Aim to complete the task in under 50 tool calls\n\n"
<> "Context:\n"
<> "- Working directory: "
<> Text.pack repo
@@ -358,28 +379,18 @@ buildBasePrompt task ns repo =
<> ns
<> "\n"
--- | Read progress file for a task if it exists
-readProgressFile :: FilePath -> Text -> IO (Maybe Text)
-readProgressFile repo taskId = do
- let progressPath = repo </> "_" </> "llm" </> Text.unpack taskId <> "-progress.md"
- exists <- Directory.doesFileExist progressPath
- if exists
- then Just </ readFile progressPath
- else pure Nothing
-
-- | Build progress context prompt
buildProgressPrompt :: Maybe Text -> Text
buildProgressPrompt Nothing = ""
buildProgressPrompt (Just progress) =
- "\n\n## PROGRESS FROM PREVIOUS SESSIONS\n\n"
- <> "This task has been worked on before. Here's what has been completed:\n\n"
+ "\n\n## PROGRESS FROM PREVIOUS SESSIONS (from database)\n\n"
+ <> "This task has been worked on before. Here are the checkpoint notes:\n\n"
<> progress
<> "\n\n"
<> "IMPORTANT:\n"
- <> "- Review this progress to understand what's already done\n"
+ <> "- Review these checkpoints to understand what's already done\n"
<> "- Do NOT repeat work that's already completed\n"
- <> "- Pick the NEXT logical step that hasn't been done yet\n"
- <> "- Update the progress file after completing your change\n\n"
+ <> "- If the task appears complete, verify tests pass and exit\n\n"
-- | Build retry context prompt
buildRetryPrompt :: Maybe TaskCore.RetryContext -> Text