summaryrefslogtreecommitdiff
path: root/Omni/Agent/Subagent/Coder.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Omni/Agent/Subagent/Coder.hs')
-rw-r--r--Omni/Agent/Subagent/Coder.hs415
1 files changed, 415 insertions, 0 deletions
diff --git a/Omni/Agent/Subagent/Coder.hs b/Omni/Agent/Subagent/Coder.hs
new file mode 100644
index 0000000..0f5a274
--- /dev/null
+++ b/Omni/Agent/Subagent/Coder.hs
@@ -0,0 +1,415 @@
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- | Hardened Coder Subagent with automatic init/verify/commit phases.
+--
+-- Based on Anthropic's "Effective Harnesses for Long-Running Agents":
+-- https://www.anthropic.com/engineering/effective-harnesses-for-long-running-agents
+--
+-- Key features:
+-- - Init phase: Check git status, build namespace to detect broken state
+-- - Work phase: Standard agent loop with tools
+-- - Verify phase: Run lint --fix, build namespace, run tests
+-- - Commit phase: Git commit with namespace-prefixed message
+-- - Recovery phase: Revert changes on failure
+--
+-- : out omni-agent-subagent-coder
+-- : dep aeson
+-- : dep async
+-- : dep stm
+-- : dep time
+module Omni.Agent.Subagent.Coder
+ ( -- * Types
+ CoderConfig (..),
+ CoderContext (..),
+ InitResult (..),
+ VerifyResult (..),
+
+ -- * Main Entry Point
+ runCoderSubagent,
+
+ -- * Phases (exported for testing)
+ runCoderInit,
+ runCoderVerify,
+ runCoderCommit,
+ runCoderRecovery,
+
+ -- * Helpers
+ defaultCoderConfig,
+ coderSystemPrompt,
+ coderTools,
+
+ -- * Testing
+ main,
+ test,
+ )
+where
+
+import Alpha
+import Data.Aeson ((.=))
+import qualified Data.Aeson as Aeson
+import qualified Data.Text as Text
+import qualified Data.Time.Clock as Clock
+import qualified Omni.Agent.Engine as Engine
+import qualified Omni.Agent.Provider as Provider
+import qualified Omni.Agent.Tools as Tools
+import qualified Omni.Test as Test
+import qualified System.Exit as Exit
+import qualified System.Process as Process
+
+main :: IO ()
+main = Test.run test
+
+test :: Test.Tree
+test =
+ Test.group
+ "Omni.Agent.Subagent.Coder"
+ [ Test.unit "CoderConfig has sensible defaults" <| do
+ let cfg = defaultCoderConfig "Omni/Task" "Implement feature X"
+ coderNamespace cfg Test.@=? "Omni/Task"
+ coderTimeout cfg Test.@=? 1200,
+ Test.unit "coderSystemPrompt includes namespace" <| do
+ let cfg = defaultCoderConfig "Biz/Cloud" "Fix the bug"
+ let prompt = coderSystemPrompt cfg Nothing
+ Text.isInfixOf "Biz/Cloud" prompt Test.@=? True,
+ Test.unit "runBashCapture returns output" <| do
+ (code, out, _) <- runBashCapture "echo hello"
+ code Test.@=? Exit.ExitSuccess
+ Text.strip out Test.@=? "hello"
+ ]
+
+-- | Configuration for a Coder subagent run
+data CoderConfig = CoderConfig
+ { coderNamespace :: Text,
+ coderTask :: Text,
+ coderContext :: Maybe Text,
+ coderModel :: Text,
+ coderTimeout :: Int,
+ coderMaxCost :: Double,
+ coderMaxTokens :: Int,
+ coderMaxIterations :: Int,
+ coderMaxVerifyRetries :: Int
+ }
+ deriving (Show, Eq, Generic)
+
+instance Aeson.ToJSON CoderConfig
+
+instance Aeson.FromJSON CoderConfig
+
+-- | Runtime context gathered during init phase
+data CoderContext = CoderContext
+ { ctxGitBranch :: Text,
+ ctxInitiallyBroken :: Bool,
+ ctxChangedFiles :: [Text]
+ }
+ deriving (Show, Eq, Generic)
+
+-- | Result of the init phase
+data InitResult
+ = InitSuccess CoderContext
+ | InitBrokenState Text CoderContext
+ | InitFailed Text
+ deriving (Show, Eq)
+
+-- | Result of the verify phase
+data VerifyResult
+ = VerifySuccess
+ | VerifyLintFailed Text
+ | VerifyTypecheckFailed Text
+ | VerifyBuildFailed Text
+ | VerifyTestFailed Text
+ deriving (Show, Eq)
+
+defaultCoderConfig :: Text -> Text -> CoderConfig
+defaultCoderConfig namespace task =
+ CoderConfig
+ { coderNamespace = namespace,
+ coderTask = task,
+ coderContext = Nothing,
+ coderModel = "anthropic/claude-sonnet-4",
+ coderTimeout = 1200,
+ coderMaxCost = 200.0,
+ coderMaxTokens = 300000,
+ coderMaxIterations = 30,
+ coderMaxVerifyRetries = 3
+ }
+
+-- | Run a bash command and capture output
+runBashCapture :: Text -> IO (Exit.ExitCode, Text, Text)
+runBashCapture cmd = do
+ (code, out, err) <- Process.readProcessWithExitCode "bash" ["-c", Text.unpack cmd] ""
+ pure (code, Text.pack out, Text.pack err)
+
+-- | Phase 1: Initialize - check environment, detect broken state
+runCoderInit :: CoderConfig -> IO InitResult
+runCoderInit cfg = do
+ -- Check git status
+ (_, branchOut, _) <- runBashCapture "git branch --show-current"
+ let gitBranch = Text.strip branchOut
+
+ -- Try to build the namespace to detect broken state
+ (buildCode, buildOut, buildErr) <- runBashCapture ("bild " <> coderNamespace cfg <> " 2>&1 || true")
+
+ let isBroken = buildCode /= Exit.ExitSuccess
+ let ctx =
+ CoderContext
+ { ctxGitBranch = gitBranch,
+ ctxInitiallyBroken = isBroken,
+ ctxChangedFiles = []
+ }
+
+ if isBroken
+ then pure <| InitBrokenState ("Build failed: " <> Text.take 500 (buildOut <> buildErr)) ctx
+ else pure <| InitSuccess ctx
+
+-- | Phase 3: Verify - run lint --fix, typecheck, build, test
+--
+-- Verification order:
+-- 1. lint --fix: Auto-apply formatting and simple fixes
+-- 2. typecheck.sh: Run type checker (works for Python and other languages)
+-- 3. bild: Build/compile the namespace
+-- 4. bild --test: Run tests
+runCoderVerify :: CoderConfig -> IO VerifyResult
+runCoderVerify cfg = do
+ -- First, run lint --fix to auto-apply formatting
+ _ <- runBashCapture ("lint --fix " <> coderNamespace cfg <> ".hs 2>&1")
+
+ -- Run typecheck.sh (handles Python mypy, etc.)
+ (typecheckCode, typecheckOut, typecheckErr) <-
+ runBashCapture ("typecheck.sh " <> coderNamespace cfg <> " 2>&1 || true")
+ -- Note: typecheck.sh may not exist for all namespaces, so we use || true
+ -- and only fail if it explicitly returns non-zero with actual errors
+ let typecheckFailed =
+ typecheckCode
+ /= Exit.ExitSuccess
+ && not (Text.null (Text.strip typecheckOut))
+ && not ("No such file" `Text.isInfixOf` typecheckErr)
+
+ if typecheckFailed
+ then pure <| VerifyTypecheckFailed (Text.take 1000 (typecheckOut <> typecheckErr))
+ else do
+ -- Build the namespace
+ (buildCode, buildOut, buildErr) <- runBashCapture ("bild " <> coderNamespace cfg <> " 2>&1")
+ case buildCode of
+ Exit.ExitSuccess -> do
+ -- Run tests if they exist
+ (testCode, testOut, testErr) <- runBashCapture ("bild --test " <> coderNamespace cfg <> " 2>&1")
+ case testCode of
+ Exit.ExitSuccess -> pure VerifySuccess
+ _ -> pure <| VerifyTestFailed (Text.take 1000 (testOut <> testErr))
+ _ -> pure <| VerifyBuildFailed (Text.take 1000 (buildOut <> buildErr))
+
+-- | Phase 4: Commit - git commit with message
+runCoderCommit :: CoderConfig -> Text -> IO (Either Text ())
+runCoderCommit cfg summary = do
+ let commitMsg = coderNamespace cfg <> ": " <> Text.take 72 summary
+
+ -- Stage all changes
+ (addCode, _, addErr) <- runBashCapture "git add -A"
+ case addCode of
+ Exit.ExitSuccess -> do
+ -- Commit
+ (commitCode, _, commitErr) <- runBashCapture ("git commit -m " <> quoteShell commitMsg)
+ case commitCode of
+ Exit.ExitSuccess -> pure (Right ())
+ _ -> pure <| Left ("git commit failed: " <> commitErr)
+ _ -> pure <| Left ("git add failed: " <> addErr)
+
+-- | Phase 5: Recovery - revert changes on failure
+runCoderRecovery :: IO ()
+runCoderRecovery = do
+ -- Revert uncommitted changes
+ _ <- runBashCapture "git checkout -- ."
+ _ <- runBashCapture "git clean -fd"
+ pure ()
+
+-- | Quote a string for shell
+quoteShell :: Text -> Text
+quoteShell t = "'" <> Text.replace "'" "'\\''" t <> "'"
+
+-- | Tools available to the Coder subagent
+coderTools :: [Engine.Tool]
+coderTools =
+ [ Tools.readFileTool,
+ Tools.writeFileTool,
+ Tools.editFileTool,
+ Tools.runBashTool,
+ Tools.searchCodebaseTool,
+ Tools.searchAndReadTool
+ ]
+
+-- | System prompt for the Coder subagent
+coderSystemPrompt :: CoderConfig -> Maybe Text -> Text
+coderSystemPrompt cfg maybeInitState =
+ Text.unlines
+ [ "You are a specialized Coder subagent.",
+ "",
+ "## Your Task",
+ coderTask cfg,
+ "",
+ "## Namespace",
+ "You are working in namespace: " <> coderNamespace cfg,
+ "Focus your changes on files in this namespace.",
+ "",
+ maybe "" (\ctx -> "## Context\n" <> ctx <> "\n") (coderContext cfg),
+ maybe "" (\st -> "## Current State\n" <> st <> "\n") maybeInitState,
+ "## Guidelines",
+ "1. Make incremental changes - edit one file at a time",
+ "2. After each edit, consider running `lint " <> coderNamespace cfg <> ".hs` to catch issues early",
+ "3. Use `bild " <> coderNamespace cfg <> "` to check your work compiles",
+ "4. If tests exist, run `bild --test " <> coderNamespace cfg <> "` to verify",
+ "5. Keep changes focused on the task - don't refactor unrelated code",
+ "6. If you find bugs unrelated to your task, note them but don't fix them",
+ "",
+ "## Completion",
+ "When you believe you're done:",
+ "1. Ensure all edits are complete",
+ "2. The harness will automatically run lint --fix, build, and tests",
+ "3. If verification fails, you'll be asked to fix the issues",
+ "4. Provide a brief summary of what you changed",
+ "",
+ "## Important",
+ "- Do NOT commit - the harness handles git operations",
+ "- Focus on making the code changes correctly"
+ ]
+
+-- | Format verify error for agent retry prompt
+formatVerifyError :: VerifyResult -> Text
+formatVerifyError VerifySuccess = ""
+formatVerifyError (VerifyLintFailed err) = "LINT ERRORS:\n" <> err
+formatVerifyError (VerifyTypecheckFailed err) = "TYPE ERRORS:\n" <> err
+formatVerifyError (VerifyBuildFailed err) = "BUILD ERRORS:\n" <> err
+formatVerifyError (VerifyTestFailed err) = "TEST FAILURES:\n" <> err
+
+-- | Main entry point: run the Coder subagent with all phases
+--
+-- This implements Anthropic's pattern of looping back to the agent on
+-- verification failure. The agent gets up to coderMaxVerifyRetries attempts
+-- to fix verification errors before we give up.
+--
+-- See: https://www.anthropic.com/engineering/effective-harnesses-for-long-running-agents
+--
+-- NOTE: We may want to change this behavior in the future. For some errors,
+-- immediate failure might be better than retry loops. But for now, we follow
+-- Anthropic's recommendation of letting the agent attempt to fix its mistakes.
+runCoderSubagent :: Text -> CoderConfig -> IO (Either Text Aeson.Value)
+runCoderSubagent openRouterKey cfg = do
+ startTime <- Clock.getCurrentTime
+
+ -- Phase 1: Init
+ initResult <- runCoderInit cfg
+ (ctx, initState) <- case initResult of
+ InitFailed err -> pure (Left err, Nothing)
+ InitBrokenState msg ctxVal -> pure (Right ctxVal, Just ("WARNING: Starting from broken state.\n" <> msg))
+ InitSuccess ctxVal -> pure (Right ctxVal, Nothing)
+
+ case ctx of
+ Left err -> pure <| Left ("Init failed: " <> err)
+ Right _context -> do
+ result <- runWorkVerifyLoop openRouterKey cfg initState 0 0 0.0
+ case result of
+ Left err -> do
+ runCoderRecovery
+ pure <| Left err
+ Right (finalMsg, totalTokens, totalCost, totalIters) -> do
+ -- Phase 4: Commit
+ let summary = Text.take 100 finalMsg
+ commitResult <- runCoderCommit cfg summary
+ case commitResult of
+ Left err -> do
+ runCoderRecovery
+ pure <| Left ("Commit failed: " <> err)
+ Right () -> do
+ endTime <- Clock.getCurrentTime
+ let duration = round (Clock.diffUTCTime endTime startTime)
+ pure
+ <| Right
+ <| Aeson.object
+ [ "status" .= ("success" :: Text),
+ "namespace" .= coderNamespace cfg,
+ "summary" .= finalMsg,
+ "tokens_used" .= totalTokens,
+ "cost_cents" .= totalCost,
+ "duration_seconds" .= (duration :: Int),
+ "iterations" .= totalIters
+ ]
+
+-- | Work-verify loop with retries on verification failure
+--
+-- Per Anthropic's guidance, we loop back to the agent when verification fails,
+-- giving it a chance to fix its mistakes. This is more effective than
+-- immediately failing, as the agent often can fix simple issues.
+runWorkVerifyLoop ::
+ Text ->
+ CoderConfig ->
+ Maybe Text ->
+ Int ->
+ Int ->
+ Double ->
+ IO (Either Text (Text, Int, Double, Int))
+runWorkVerifyLoop openRouterKey cfg initState verifyAttempt accTokens accCost = do
+ let isRetry = verifyAttempt > 0
+ let maxRetries = coderMaxVerifyRetries cfg
+
+ -- Build the prompt - include error context if this is a retry
+ let baseTask = coderTask cfg
+ let taskPrompt =
+ if isRetry
+ then baseTask <> "\n\n## VERIFICATION FAILED - PLEASE FIX\nThis is retry attempt " <> tshow verifyAttempt <> " of " <> tshow maxRetries <> "."
+ else baseTask
+
+ -- Phase 2: Work (run the agent)
+ let provider = Provider.defaultOpenRouter openRouterKey (coderModel cfg)
+ let systemPrompt = coderSystemPrompt cfg initState
+ let guardrails =
+ Engine.Guardrails
+ { Engine.guardrailMaxCostCents = coderMaxCost cfg - accCost,
+ Engine.guardrailMaxTokens = coderMaxTokens cfg - accTokens,
+ Engine.guardrailMaxDuplicateToolCalls = 20,
+ Engine.guardrailMaxTestFailures = 5,
+ Engine.guardrailMaxEditFailures = 10
+ }
+ let agentConfig =
+ Engine.AgentConfig
+ { Engine.agentModel = coderModel cfg,
+ Engine.agentTools = coderTools,
+ Engine.agentSystemPrompt = systemPrompt,
+ Engine.agentMaxIterations = coderMaxIterations cfg,
+ Engine.agentGuardrails = guardrails
+ }
+ let engineConfig = Engine.defaultEngineConfig
+
+ let timeoutMicros = coderTimeout cfg * 1000000
+ agentResult <-
+ race
+ (threadDelay timeoutMicros)
+ (Engine.runAgentWithProvider engineConfig provider agentConfig taskPrompt)
+
+ case agentResult of
+ Left () -> pure <| Left "Coder subagent timed out"
+ Right (Left err) -> pure <| Left ("Coder agent failed: " <> err)
+ Right (Right result) -> do
+ let newTokens = accTokens + Engine.resultTotalTokens result
+ let newCost = accCost + Engine.resultTotalCost result
+ let newIters = Engine.resultIterations result
+
+ -- Phase 3: Verify
+ verifyResult <- runCoderVerify cfg
+ case verifyResult of
+ VerifySuccess ->
+ pure <| Right (Engine.resultFinalMessage result, newTokens, newCost, newIters)
+ failure
+ | verifyAttempt >= maxRetries -> do
+ -- Max retries exceeded, give up
+ let errMsg = "Verification failed after " <> tshow (verifyAttempt + 1) <> " attempts: " <> formatVerifyError failure
+ pure <| Left errMsg
+ | otherwise -> do
+ -- Retry: loop back to agent with error context
+ -- NOTE: This follows Anthropic's pattern. We may want to change
+ -- this for certain error types in the future.
+ let errorContext = formatVerifyError failure
+ let retryState = case initState of
+ Just st -> Just (st <> "\n\n" <> errorContext)
+ Nothing -> Just errorContext
+ runWorkVerifyLoop openRouterKey cfg retryState (verifyAttempt + 1) newTokens newCost