diff options
Diffstat (limited to 'Omni')
| -rw-r--r-- | Omni/Agent/Subagent.hs | 240 | ||||
| -rw-r--r-- | Omni/Agent/Subagent/Coder.hs | 415 | ||||
| -rw-r--r-- | Omni/Agent/Telegram.hs | 36 |
3 files changed, 660 insertions, 31 deletions
diff --git a/Omni/Agent/Subagent.hs b/Omni/Agent/Subagent.hs index 3278e4c..06ef938 100644 --- a/Omni/Agent/Subagent.hs +++ b/Omni/Agent/Subagent.hs @@ -93,6 +93,7 @@ import qualified Data.UUID.V4 import qualified Omni.Agent.AuditLog as AuditLog import qualified Omni.Agent.Engine as Engine import qualified Omni.Agent.Provider as Provider +import qualified Omni.Agent.Subagent.Coder as Coder import qualified Omni.Agent.Tools as Tools import qualified Omni.Agent.Tools.WebReader as WebReader import qualified Omni.Agent.Tools.WebSearch as WebSearch @@ -139,6 +140,7 @@ cleanupRegistry = do -- | A pending spawn request awaiting user confirmation data PendingSpawn = PendingSpawn { pendingId :: Text, + pendingSubagentId :: AuditLog.SubagentId, pendingConfig :: SubagentConfig, pendingChatId :: Int, pendingCreatedAt :: Clock.UTCTime @@ -151,20 +153,23 @@ pendingSpawnRegistry = unsafePerformIO (newIORef Map.empty) {-# NOINLINE pendingSpawnRegistry #-} -- | Create a new pending spawn request -createPendingSpawn :: SubagentConfig -> Int -> IO Text +-- Returns (pendingId, subagentId) - the subagentId is pre-generated so agent can track it +createPendingSpawn :: SubagentConfig -> Int -> IO (Text, Text) createPendingSpawn config chatId = do uuid <- Data.UUID.V4.nextRandom - let pendingId = Text.take 8 (Data.UUID.toText uuid) + let pid = Text.take 8 (Data.UUID.toText uuid) + subagentId <- AuditLog.newSubagentId now <- Clock.getCurrentTime let pending = PendingSpawn - { pendingId = pendingId, + { pendingId = pid, + pendingSubagentId = subagentId, pendingConfig = config, pendingChatId = chatId, pendingCreatedAt = now } - modifyIORef' pendingSpawnRegistry (Map.insert pendingId pending) - pure pendingId + modifyIORef' pendingSpawnRegistry (Map.insert pid pending) + pure (pid, AuditLog.unSubagentId subagentId) -- | Get a pending spawn by ID getPendingSpawn :: Text -> IO (Maybe PendingSpawn) @@ -262,7 +267,32 @@ test = Aeson.Object obj -> do let status = KeyMap.lookup "status" obj status Test.@=? Just (Aeson.String "awaiting_approval") - _ -> Test.assertFailure "Expected object response" + _ -> Test.assertFailure "Expected object response", + Test.unit "pending spawn create and lookup works" <| do + let config = defaultSubagentConfig WebCrawler "test pending task" + (pid, sid) <- createPendingSpawn config 12345 + when (Text.null pid) <| Test.assertFailure "pending ID should not be empty" + when (Text.null sid) <| Test.assertFailure "subagent ID should not be empty" + maybePending <- getPendingSpawn pid + case maybePending of + Nothing -> Test.assertFailure "Pending spawn not found after creation" + Just p -> do + pendingChatId p Test.@=? 12345 + subagentTask (pendingConfig p) Test.@=? "test pending task" + AuditLog.unSubagentId (pendingSubagentId p) Test.@=? sid + removePendingSpawn pid + afterRemove <- getPendingSpawn pid + afterRemove Test.@=? Nothing, + Test.unit "pending spawn registry is isolated" <| do + let config = defaultSubagentConfig Researcher "isolated test" + (pid1, _) <- createPendingSpawn config 111 + (pid2, _) <- createPendingSpawn config 222 + when (pid1 == pid2) <| Test.assertFailure "IDs should be different" + p1 <- getPendingSpawn pid1 + p2 <- getPendingSpawn pid2 + when (isNothing p1 || isNothing p2) <| Test.assertFailure "Both should exist" + removePendingSpawn pid1 + removePendingSpawn pid2 ] data SubagentRole @@ -270,6 +300,7 @@ data SubagentRole | CodeReviewer | DataExtractor | Researcher + | Coder | CustomRole Text deriving (Show, Eq, Generic) @@ -278,6 +309,7 @@ instance Aeson.ToJSON SubagentRole where toJSON CodeReviewer = Aeson.String "code_reviewer" toJSON DataExtractor = Aeson.String "data_extractor" toJSON Researcher = Aeson.String "researcher" + toJSON Coder = Aeson.String "coder" toJSON (CustomRole name) = Aeson.String name instance Aeson.FromJSON SubagentRole where @@ -287,6 +319,7 @@ instance Aeson.FromJSON SubagentRole where parseRole "code_reviewer" = pure CodeReviewer parseRole "data_extractor" = pure DataExtractor parseRole "researcher" = pure Researcher + parseRole "coder" = pure Coder parseRole name = pure (CustomRole name) data SubagentConfig = SubagentConfig @@ -298,7 +331,11 @@ data SubagentConfig = SubagentConfig subagentMaxTokens :: Int, subagentMaxIterations :: Int, subagentExtendedThinking :: Bool, - subagentContext :: Maybe Text + subagentContext :: Maybe Text, + -- | Optional task ID for tracking (not used by Coder) + subagentTaskId :: Maybe Text, + -- | Namespace for Coder role - required (e.g., "Omni/Agent/Subagent") + subagentNamespace :: Maybe Text } deriving (Show, Eq, Generic) @@ -314,7 +351,9 @@ instance Aeson.ToJSON SubagentConfig where Just ("max_tokens" .= subagentMaxTokens c), Just ("max_iterations" .= subagentMaxIterations c), Just ("extended_thinking" .= subagentExtendedThinking c), - ("context" .=) </ subagentContext c + ("context" .=) </ subagentContext c, + ("task_id" .=) </ subagentTaskId c, + ("namespace" .=) </ subagentNamespace c ] instance Aeson.FromJSON SubagentConfig where @@ -329,6 +368,8 @@ instance Aeson.FromJSON SubagentConfig where <*> (v .:? "max_iterations" .!= 20) <*> (v .:? "extended_thinking" .!= False) <*> (v .:? "context") + <*> (v .:? "task_id") + <*> (v .:? "namespace") data SubagentResult = SubagentResult { subagentOutput :: Aeson.Value, @@ -414,8 +455,13 @@ initialRunStatus = } spawnSubagentAsync :: AuditLog.SessionId -> Maybe Text -> SubagentApiKeys -> SubagentConfig -> IO SubagentHandle -spawnSubagentAsync sessionId userId keys config = do - sid <- AuditLog.newSubagentId +spawnSubagentAsync sessionId userId keys config = + spawnSubagentAsyncWithId sessionId userId keys config Nothing + +-- | Spawn subagent with optional pre-generated ID (for pending spawn flow) +spawnSubagentAsyncWithId :: AuditLog.SessionId -> Maybe Text -> SubagentApiKeys -> SubagentConfig -> Maybe AuditLog.SubagentId -> IO SubagentHandle +spawnSubagentAsyncWithId sessionId userId keys config maybePregenId = do + sid <- maybe AuditLog.newSubagentId pure maybePregenId startTime <- Clock.getCurrentTime statusVar <- newTVarIO initialRunStatus @@ -512,7 +558,9 @@ defaultSubagentConfig role task = subagentMaxTokens = 200000, subagentMaxIterations = 20, subagentExtendedThinking = False, - subagentContext = Nothing + subagentContext = Nothing, + subagentTaskId = Nothing, + subagentNamespace = Nothing } modelForRole :: SubagentRole -> Text @@ -520,6 +568,7 @@ modelForRole WebCrawler = "anthropic/claude-3-haiku" modelForRole CodeReviewer = "anthropic/claude-sonnet-4" modelForRole DataExtractor = "anthropic/claude-3-haiku" modelForRole Researcher = "anthropic/claude-sonnet-4" +modelForRole Coder = "anthropic/claude-sonnet-4" modelForRole (CustomRole _) = "anthropic/claude-sonnet-4" data SubagentApiKeys = SubagentApiKeys @@ -558,6 +607,8 @@ toolsForRole Researcher keys = Tools.searchCodebaseTool, Tools.searchAndReadTool ] +-- Coder uses the hardened Coder module, toolsForRole not used +toolsForRole Coder _keys = Coder.coderTools toolsForRole (CustomRole _) keys = toolsForRole Researcher keys systemPromptForRole :: SubagentRole -> Text -> Maybe Text -> Text @@ -602,6 +653,7 @@ systemPromptForRole role task maybeContext = roleDescription CodeReviewer = "code review" roleDescription DataExtractor = "data extraction" roleDescription Researcher = "research" + roleDescription Coder = "coding" roleDescription (CustomRole name) = name runSubagent :: SubagentApiKeys -> SubagentConfig -> IO SubagentResult @@ -609,6 +661,118 @@ runSubagent keys config = runSubagentWithCallbacks keys config defaultCallbacks runSubagentWithCallbacks :: SubagentApiKeys -> SubagentConfig -> SubagentCallbacks -> IO SubagentResult runSubagentWithCallbacks keys config callbacks = do + let role = subagentRole config + + -- Coder role uses the hardened Coder module with init/verify/commit phases + case role of + Coder -> runCoderSubagentWrapper keys config callbacks + _ -> runGenericSubagent keys config callbacks + +-- | Run Coder subagent using the hardened Coder module +runCoderSubagentWrapper :: SubagentApiKeys -> SubagentConfig -> SubagentCallbacks -> IO SubagentResult +runCoderSubagentWrapper keys config callbacks = do + startTime <- Clock.getCurrentTime + + -- Validate required namespace field for Coder role + let namespace = fromMaybe "" (subagentNamespace config) + + if Text.null namespace + then + pure + SubagentResult + { subagentOutput = Aeson.object ["error" .= ("Coder role requires namespace field" :: Text)], + subagentSummary = "Missing required field: namespace", + subagentConfidence = 0.0, + subagentTokensUsed = 0, + subagentCostCents = 0.0, + subagentDuration = 0, + subagentIterations = 0, + subagentStatus = SubagentError "Missing namespace" + } + else do + onSubagentStart callbacks ("Starting Coder subagent for " <> namespace <> "...") + + -- Build CoderConfig from SubagentConfig + let coderCfg = + Coder.CoderConfig + { Coder.coderNamespace = namespace, + Coder.coderTask = subagentTask config, + Coder.coderContext = subagentContext config, + Coder.coderModel = fromMaybe "anthropic/claude-sonnet-4" (subagentModel config), + Coder.coderTimeout = subagentTimeout config, + Coder.coderMaxCost = subagentMaxCost config, + Coder.coderMaxTokens = subagentMaxTokens config, + Coder.coderMaxIterations = subagentMaxIterations config, + Coder.coderMaxVerifyRetries = 3 + } + + result <- Coder.runCoderSubagent (subagentOpenRouterKey keys) coderCfg + + endTime <- Clock.getCurrentTime + let durationSecs = round (Clock.diffUTCTime endTime startTime) + + case result of + Left err -> do + onSubagentComplete callbacks + <| SubagentResult + { subagentOutput = Aeson.object ["error" .= err], + subagentSummary = "Coder failed: " <> Text.take 200 err, + subagentConfidence = 0.0, + subagentTokensUsed = 0, + subagentCostCents = 0.0, + subagentDuration = durationSecs, + subagentIterations = 0, + subagentStatus = SubagentError err + } + pure + SubagentResult + { subagentOutput = Aeson.object ["error" .= err], + subagentSummary = "Coder failed: " <> Text.take 200 err, + subagentConfidence = 0.0, + subagentTokensUsed = 0, + subagentCostCents = 0.0, + subagentDuration = durationSecs, + subagentIterations = 0, + subagentStatus = SubagentError err + } + Right jsonResult -> do + let summary = case jsonResult of + Aeson.Object obj -> case KeyMap.lookup "summary" obj of + Just (Aeson.String s) -> Text.take 200 s + _ -> "Coder completed successfully" + _ -> "Coder completed successfully" + let tokens = case jsonResult of + Aeson.Object obj -> case KeyMap.lookup "tokens_used" obj of + Just (Aeson.Number n) -> round n + _ -> 0 + _ -> 0 + let cost = case jsonResult of + Aeson.Object obj -> case KeyMap.lookup "cost_cents" obj of + Just (Aeson.Number n) -> realToFrac n + _ -> 0.0 + _ -> 0.0 + let iters = case jsonResult of + Aeson.Object obj -> case KeyMap.lookup "iterations" obj of + Just (Aeson.Number n) -> round n + _ -> 0 + _ -> 0 + let finalResult = + SubagentResult + { subagentOutput = jsonResult, + subagentSummary = summary, + subagentConfidence = 0.9, + subagentTokensUsed = tokens, + subagentCostCents = cost, + subagentDuration = durationSecs, + subagentIterations = iters, + subagentStatus = SubagentSuccess + } + onSubagentComplete callbacks finalResult + pure finalResult + +-- | Run generic (non-Coder) subagent +runGenericSubagent :: SubagentApiKeys -> SubagentConfig -> SubagentCallbacks -> IO SubagentResult +runGenericSubagent keys config callbacks = do startTime <- Clock.getCurrentTime let role = subagentRole config @@ -717,7 +881,8 @@ spawnSubagentTool keys = <> "then present the approval to the user. Only call with confirmed=true " <> "after the user explicitly approves. " <> "Available roles: web_crawler (fast web research), code_reviewer (thorough code analysis), " - <> "data_extractor (structured data extraction), researcher (general research).", + <> "data_extractor (structured data extraction), researcher (general research), " + <> "coder (hardened coding with init/verify/commit phases - requires task_id and namespace).", Engine.toolJsonSchema = Aeson.object [ "type" .= ("object" :: Text), @@ -726,7 +891,7 @@ spawnSubagentTool keys = [ "role" .= Aeson.object [ "type" .= ("string" :: Text), - "enum" .= (["web_crawler", "code_reviewer", "data_extractor", "researcher"] :: [Text]), + "enum" .= (["web_crawler", "code_reviewer", "data_extractor", "researcher", "coder"] :: [Text]), "description" .= ("Subagent role determining tools and model" :: Text) ], "task" @@ -754,6 +919,16 @@ spawnSubagentTool keys = [ "type" .= ("number" :: Text), "description" .= ("Maximum cost in cents (default: 50)" :: Text) ], + "task_id" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("Task ID from jr task (required for coder role)" :: Text) + ], + "namespace" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("Code namespace like 'Omni/Agent/Subagent' (required for coder role)" :: Text) + ], "confirmed" .= Aeson.object [ "type" .= ("boolean" :: Text), @@ -805,6 +980,7 @@ formatApprovalRequest config = CodeReviewer -> "CodeReviewer" DataExtractor -> "DataExtractor" Researcher -> "Researcher" + Coder -> "Coder" CustomRole name -> name estimatedTime :: Int estimatedTime = subagentTimeout config `div` 60 @@ -920,10 +1096,13 @@ spawnSubagentToolWithApproval keys chatId onApprovalNeeded = Engine.Tool { Engine.toolName = "spawn_subagent", Engine.toolDescription = - "Spawn a specialized subagent for a focused task. " - <> "The user will receive a confirmation button to approve the spawn. " + "Request to spawn a specialized subagent for a focused task. " + <> "The user will receive a confirmation button to approve. " + <> "IMPORTANT: The subagent does NOT start until the user clicks Approve - " + <> "do NOT say 'spawned' or 'started', say 'requested' or 'awaiting approval'. " <> "Available roles: web_crawler (fast web research), code_reviewer (thorough code analysis), " - <> "data_extractor (structured data extraction), researcher (general research).", + <> "data_extractor (structured data extraction), researcher (general research), " + <> "coder (hardened coding with init/verify/commit - requires task_id and namespace).", Engine.toolJsonSchema = Aeson.object [ "type" .= ("object" :: Text), @@ -932,7 +1111,7 @@ spawnSubagentToolWithApproval keys chatId onApprovalNeeded = [ "role" .= Aeson.object [ "type" .= ("string" :: Text), - "enum" .= (["web_crawler", "code_reviewer", "data_extractor", "researcher"] :: [Text]), + "enum" .= (["web_crawler", "code_reviewer", "data_extractor", "researcher", "coder"] :: [Text]), "description" .= ("Subagent role determining tools and model" :: Text) ], "task" @@ -959,6 +1138,16 @@ spawnSubagentToolWithApproval keys chatId onApprovalNeeded = .= Aeson.object [ "type" .= ("number" :: Text), "description" .= ("Maximum cost in cents (default: 50)" :: Text) + ], + "task_id" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("Task ID from jr task (required for coder role)" :: Text) + ], + "namespace" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("Code namespace like 'Omni/Agent/Subagent' (required for coder role)" :: Text) ] ], "required" .= (["role", "task"] :: [Text]) @@ -971,34 +1160,35 @@ executeSpawnWithApproval _keys chatId onApprovalNeeded v = case Aeson.fromJSON v of Aeson.Error e -> pure <| Aeson.object ["error" .= ("Invalid arguments: " <> Text.pack e)] Aeson.Success config -> do - pendingId <- createPendingSpawn config chatId + (pid, subagentId) <- createPendingSpawn config chatId let roleText = case subagentRole config of WebCrawler -> "web_crawler" CodeReviewer -> "code_reviewer" DataExtractor -> "data_extractor" Researcher -> "researcher" + Coder -> "coder" CustomRole name -> name estimatedMins = subagentTimeout config `div` 60 maxCost = subagentMaxCost config - onApprovalNeeded chatId pendingId roleText (subagentTask config) estimatedMins maxCost + onApprovalNeeded chatId pid roleText (subagentTask config) estimatedMins maxCost pure <| Aeson.object [ "status" .= ("pending_approval" :: Text), - "pending_id" .= pendingId, - "message" .= ("Approval button sent to user. Waiting for confirmation." :: Text) + "subagent_id" .= subagentId, + "message" .= ("Subagent requested. User must click Approve button before it starts. Do not say spawned yet." :: Text) ] -- | Approve a pending spawn and start the subagent approveAndSpawnSubagent :: SubagentApiKeys -> Text -> IO (Either Text Text) -approveAndSpawnSubagent keys pendingId = do - maybePending <- getPendingSpawn pendingId +approveAndSpawnSubagent keys pid = do + maybePending <- getPendingSpawn pid case maybePending of Nothing -> pure (Left "Pending spawn not found or expired") Just pending -> do - removePendingSpawn pendingId + removePendingSpawn pid uuid <- Data.UUID.V4.nextRandom let sessionId = AuditLog.SessionId ("subagent-" <> Text.take 8 (Data.UUID.toText uuid)) - subHandle <- spawnSubagentAsync sessionId Nothing keys (pendingConfig pending) + subHandle <- spawnSubagentAsyncWithId sessionId Nothing keys (pendingConfig pending) (Just (pendingSubagentId pending)) registerSubagent subHandle let sid = AuditLog.unSubagentId (handleId subHandle) pure (Right sid) diff --git a/Omni/Agent/Subagent/Coder.hs b/Omni/Agent/Subagent/Coder.hs new file mode 100644 index 0000000..0f5a274 --- /dev/null +++ b/Omni/Agent/Subagent/Coder.hs @@ -0,0 +1,415 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | Hardened Coder Subagent with automatic init/verify/commit phases. +-- +-- Based on Anthropic's "Effective Harnesses for Long-Running Agents": +-- https://www.anthropic.com/engineering/effective-harnesses-for-long-running-agents +-- +-- Key features: +-- - Init phase: Check git status, build namespace to detect broken state +-- - Work phase: Standard agent loop with tools +-- - Verify phase: Run lint --fix, build namespace, run tests +-- - Commit phase: Git commit with namespace-prefixed message +-- - Recovery phase: Revert changes on failure +-- +-- : out omni-agent-subagent-coder +-- : dep aeson +-- : dep async +-- : dep stm +-- : dep time +module Omni.Agent.Subagent.Coder + ( -- * Types + CoderConfig (..), + CoderContext (..), + InitResult (..), + VerifyResult (..), + + -- * Main Entry Point + runCoderSubagent, + + -- * Phases (exported for testing) + runCoderInit, + runCoderVerify, + runCoderCommit, + runCoderRecovery, + + -- * Helpers + defaultCoderConfig, + coderSystemPrompt, + coderTools, + + -- * Testing + main, + test, + ) +where + +import Alpha +import Data.Aeson ((.=)) +import qualified Data.Aeson as Aeson +import qualified Data.Text as Text +import qualified Data.Time.Clock as Clock +import qualified Omni.Agent.Engine as Engine +import qualified Omni.Agent.Provider as Provider +import qualified Omni.Agent.Tools as Tools +import qualified Omni.Test as Test +import qualified System.Exit as Exit +import qualified System.Process as Process + +main :: IO () +main = Test.run test + +test :: Test.Tree +test = + Test.group + "Omni.Agent.Subagent.Coder" + [ Test.unit "CoderConfig has sensible defaults" <| do + let cfg = defaultCoderConfig "Omni/Task" "Implement feature X" + coderNamespace cfg Test.@=? "Omni/Task" + coderTimeout cfg Test.@=? 1200, + Test.unit "coderSystemPrompt includes namespace" <| do + let cfg = defaultCoderConfig "Biz/Cloud" "Fix the bug" + let prompt = coderSystemPrompt cfg Nothing + Text.isInfixOf "Biz/Cloud" prompt Test.@=? True, + Test.unit "runBashCapture returns output" <| do + (code, out, _) <- runBashCapture "echo hello" + code Test.@=? Exit.ExitSuccess + Text.strip out Test.@=? "hello" + ] + +-- | Configuration for a Coder subagent run +data CoderConfig = CoderConfig + { coderNamespace :: Text, + coderTask :: Text, + coderContext :: Maybe Text, + coderModel :: Text, + coderTimeout :: Int, + coderMaxCost :: Double, + coderMaxTokens :: Int, + coderMaxIterations :: Int, + coderMaxVerifyRetries :: Int + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON CoderConfig + +instance Aeson.FromJSON CoderConfig + +-- | Runtime context gathered during init phase +data CoderContext = CoderContext + { ctxGitBranch :: Text, + ctxInitiallyBroken :: Bool, + ctxChangedFiles :: [Text] + } + deriving (Show, Eq, Generic) + +-- | Result of the init phase +data InitResult + = InitSuccess CoderContext + | InitBrokenState Text CoderContext + | InitFailed Text + deriving (Show, Eq) + +-- | Result of the verify phase +data VerifyResult + = VerifySuccess + | VerifyLintFailed Text + | VerifyTypecheckFailed Text + | VerifyBuildFailed Text + | VerifyTestFailed Text + deriving (Show, Eq) + +defaultCoderConfig :: Text -> Text -> CoderConfig +defaultCoderConfig namespace task = + CoderConfig + { coderNamespace = namespace, + coderTask = task, + coderContext = Nothing, + coderModel = "anthropic/claude-sonnet-4", + coderTimeout = 1200, + coderMaxCost = 200.0, + coderMaxTokens = 300000, + coderMaxIterations = 30, + coderMaxVerifyRetries = 3 + } + +-- | Run a bash command and capture output +runBashCapture :: Text -> IO (Exit.ExitCode, Text, Text) +runBashCapture cmd = do + (code, out, err) <- Process.readProcessWithExitCode "bash" ["-c", Text.unpack cmd] "" + pure (code, Text.pack out, Text.pack err) + +-- | Phase 1: Initialize - check environment, detect broken state +runCoderInit :: CoderConfig -> IO InitResult +runCoderInit cfg = do + -- Check git status + (_, branchOut, _) <- runBashCapture "git branch --show-current" + let gitBranch = Text.strip branchOut + + -- Try to build the namespace to detect broken state + (buildCode, buildOut, buildErr) <- runBashCapture ("bild " <> coderNamespace cfg <> " 2>&1 || true") + + let isBroken = buildCode /= Exit.ExitSuccess + let ctx = + CoderContext + { ctxGitBranch = gitBranch, + ctxInitiallyBroken = isBroken, + ctxChangedFiles = [] + } + + if isBroken + then pure <| InitBrokenState ("Build failed: " <> Text.take 500 (buildOut <> buildErr)) ctx + else pure <| InitSuccess ctx + +-- | Phase 3: Verify - run lint --fix, typecheck, build, test +-- +-- Verification order: +-- 1. lint --fix: Auto-apply formatting and simple fixes +-- 2. typecheck.sh: Run type checker (works for Python and other languages) +-- 3. bild: Build/compile the namespace +-- 4. bild --test: Run tests +runCoderVerify :: CoderConfig -> IO VerifyResult +runCoderVerify cfg = do + -- First, run lint --fix to auto-apply formatting + _ <- runBashCapture ("lint --fix " <> coderNamespace cfg <> ".hs 2>&1") + + -- Run typecheck.sh (handles Python mypy, etc.) + (typecheckCode, typecheckOut, typecheckErr) <- + runBashCapture ("typecheck.sh " <> coderNamespace cfg <> " 2>&1 || true") + -- Note: typecheck.sh may not exist for all namespaces, so we use || true + -- and only fail if it explicitly returns non-zero with actual errors + let typecheckFailed = + typecheckCode + /= Exit.ExitSuccess + && not (Text.null (Text.strip typecheckOut)) + && not ("No such file" `Text.isInfixOf` typecheckErr) + + if typecheckFailed + then pure <| VerifyTypecheckFailed (Text.take 1000 (typecheckOut <> typecheckErr)) + else do + -- Build the namespace + (buildCode, buildOut, buildErr) <- runBashCapture ("bild " <> coderNamespace cfg <> " 2>&1") + case buildCode of + Exit.ExitSuccess -> do + -- Run tests if they exist + (testCode, testOut, testErr) <- runBashCapture ("bild --test " <> coderNamespace cfg <> " 2>&1") + case testCode of + Exit.ExitSuccess -> pure VerifySuccess + _ -> pure <| VerifyTestFailed (Text.take 1000 (testOut <> testErr)) + _ -> pure <| VerifyBuildFailed (Text.take 1000 (buildOut <> buildErr)) + +-- | Phase 4: Commit - git commit with message +runCoderCommit :: CoderConfig -> Text -> IO (Either Text ()) +runCoderCommit cfg summary = do + let commitMsg = coderNamespace cfg <> ": " <> Text.take 72 summary + + -- Stage all changes + (addCode, _, addErr) <- runBashCapture "git add -A" + case addCode of + Exit.ExitSuccess -> do + -- Commit + (commitCode, _, commitErr) <- runBashCapture ("git commit -m " <> quoteShell commitMsg) + case commitCode of + Exit.ExitSuccess -> pure (Right ()) + _ -> pure <| Left ("git commit failed: " <> commitErr) + _ -> pure <| Left ("git add failed: " <> addErr) + +-- | Phase 5: Recovery - revert changes on failure +runCoderRecovery :: IO () +runCoderRecovery = do + -- Revert uncommitted changes + _ <- runBashCapture "git checkout -- ." + _ <- runBashCapture "git clean -fd" + pure () + +-- | Quote a string for shell +quoteShell :: Text -> Text +quoteShell t = "'" <> Text.replace "'" "'\\''" t <> "'" + +-- | Tools available to the Coder subagent +coderTools :: [Engine.Tool] +coderTools = + [ Tools.readFileTool, + Tools.writeFileTool, + Tools.editFileTool, + Tools.runBashTool, + Tools.searchCodebaseTool, + Tools.searchAndReadTool + ] + +-- | System prompt for the Coder subagent +coderSystemPrompt :: CoderConfig -> Maybe Text -> Text +coderSystemPrompt cfg maybeInitState = + Text.unlines + [ "You are a specialized Coder subagent.", + "", + "## Your Task", + coderTask cfg, + "", + "## Namespace", + "You are working in namespace: " <> coderNamespace cfg, + "Focus your changes on files in this namespace.", + "", + maybe "" (\ctx -> "## Context\n" <> ctx <> "\n") (coderContext cfg), + maybe "" (\st -> "## Current State\n" <> st <> "\n") maybeInitState, + "## Guidelines", + "1. Make incremental changes - edit one file at a time", + "2. After each edit, consider running `lint " <> coderNamespace cfg <> ".hs` to catch issues early", + "3. Use `bild " <> coderNamespace cfg <> "` to check your work compiles", + "4. If tests exist, run `bild --test " <> coderNamespace cfg <> "` to verify", + "5. Keep changes focused on the task - don't refactor unrelated code", + "6. If you find bugs unrelated to your task, note them but don't fix them", + "", + "## Completion", + "When you believe you're done:", + "1. Ensure all edits are complete", + "2. The harness will automatically run lint --fix, build, and tests", + "3. If verification fails, you'll be asked to fix the issues", + "4. Provide a brief summary of what you changed", + "", + "## Important", + "- Do NOT commit - the harness handles git operations", + "- Focus on making the code changes correctly" + ] + +-- | Format verify error for agent retry prompt +formatVerifyError :: VerifyResult -> Text +formatVerifyError VerifySuccess = "" +formatVerifyError (VerifyLintFailed err) = "LINT ERRORS:\n" <> err +formatVerifyError (VerifyTypecheckFailed err) = "TYPE ERRORS:\n" <> err +formatVerifyError (VerifyBuildFailed err) = "BUILD ERRORS:\n" <> err +formatVerifyError (VerifyTestFailed err) = "TEST FAILURES:\n" <> err + +-- | Main entry point: run the Coder subagent with all phases +-- +-- This implements Anthropic's pattern of looping back to the agent on +-- verification failure. The agent gets up to coderMaxVerifyRetries attempts +-- to fix verification errors before we give up. +-- +-- See: https://www.anthropic.com/engineering/effective-harnesses-for-long-running-agents +-- +-- NOTE: We may want to change this behavior in the future. For some errors, +-- immediate failure might be better than retry loops. But for now, we follow +-- Anthropic's recommendation of letting the agent attempt to fix its mistakes. +runCoderSubagent :: Text -> CoderConfig -> IO (Either Text Aeson.Value) +runCoderSubagent openRouterKey cfg = do + startTime <- Clock.getCurrentTime + + -- Phase 1: Init + initResult <- runCoderInit cfg + (ctx, initState) <- case initResult of + InitFailed err -> pure (Left err, Nothing) + InitBrokenState msg ctxVal -> pure (Right ctxVal, Just ("WARNING: Starting from broken state.\n" <> msg)) + InitSuccess ctxVal -> pure (Right ctxVal, Nothing) + + case ctx of + Left err -> pure <| Left ("Init failed: " <> err) + Right _context -> do + result <- runWorkVerifyLoop openRouterKey cfg initState 0 0 0.0 + case result of + Left err -> do + runCoderRecovery + pure <| Left err + Right (finalMsg, totalTokens, totalCost, totalIters) -> do + -- Phase 4: Commit + let summary = Text.take 100 finalMsg + commitResult <- runCoderCommit cfg summary + case commitResult of + Left err -> do + runCoderRecovery + pure <| Left ("Commit failed: " <> err) + Right () -> do + endTime <- Clock.getCurrentTime + let duration = round (Clock.diffUTCTime endTime startTime) + pure + <| Right + <| Aeson.object + [ "status" .= ("success" :: Text), + "namespace" .= coderNamespace cfg, + "summary" .= finalMsg, + "tokens_used" .= totalTokens, + "cost_cents" .= totalCost, + "duration_seconds" .= (duration :: Int), + "iterations" .= totalIters + ] + +-- | Work-verify loop with retries on verification failure +-- +-- Per Anthropic's guidance, we loop back to the agent when verification fails, +-- giving it a chance to fix its mistakes. This is more effective than +-- immediately failing, as the agent often can fix simple issues. +runWorkVerifyLoop :: + Text -> + CoderConfig -> + Maybe Text -> + Int -> + Int -> + Double -> + IO (Either Text (Text, Int, Double, Int)) +runWorkVerifyLoop openRouterKey cfg initState verifyAttempt accTokens accCost = do + let isRetry = verifyAttempt > 0 + let maxRetries = coderMaxVerifyRetries cfg + + -- Build the prompt - include error context if this is a retry + let baseTask = coderTask cfg + let taskPrompt = + if isRetry + then baseTask <> "\n\n## VERIFICATION FAILED - PLEASE FIX\nThis is retry attempt " <> tshow verifyAttempt <> " of " <> tshow maxRetries <> "." + else baseTask + + -- Phase 2: Work (run the agent) + let provider = Provider.defaultOpenRouter openRouterKey (coderModel cfg) + let systemPrompt = coderSystemPrompt cfg initState + let guardrails = + Engine.Guardrails + { Engine.guardrailMaxCostCents = coderMaxCost cfg - accCost, + Engine.guardrailMaxTokens = coderMaxTokens cfg - accTokens, + Engine.guardrailMaxDuplicateToolCalls = 20, + Engine.guardrailMaxTestFailures = 5, + Engine.guardrailMaxEditFailures = 10 + } + let agentConfig = + Engine.AgentConfig + { Engine.agentModel = coderModel cfg, + Engine.agentTools = coderTools, + Engine.agentSystemPrompt = systemPrompt, + Engine.agentMaxIterations = coderMaxIterations cfg, + Engine.agentGuardrails = guardrails + } + let engineConfig = Engine.defaultEngineConfig + + let timeoutMicros = coderTimeout cfg * 1000000 + agentResult <- + race + (threadDelay timeoutMicros) + (Engine.runAgentWithProvider engineConfig provider agentConfig taskPrompt) + + case agentResult of + Left () -> pure <| Left "Coder subagent timed out" + Right (Left err) -> pure <| Left ("Coder agent failed: " <> err) + Right (Right result) -> do + let newTokens = accTokens + Engine.resultTotalTokens result + let newCost = accCost + Engine.resultTotalCost result + let newIters = Engine.resultIterations result + + -- Phase 3: Verify + verifyResult <- runCoderVerify cfg + case verifyResult of + VerifySuccess -> + pure <| Right (Engine.resultFinalMessage result, newTokens, newCost, newIters) + failure + | verifyAttempt >= maxRetries -> do + -- Max retries exceeded, give up + let errMsg = "Verification failed after " <> tshow (verifyAttempt + 1) <> " attempts: " <> formatVerifyError failure + pure <| Left errMsg + | otherwise -> do + -- Retry: loop back to agent with error context + -- NOTE: This follows Anthropic's pattern. We may want to change + -- this for certain error types in the future. + let errorContext = formatVerifyError failure + let retryState = case initState of + Just st -> Just (st <> "\n\n" <> errorContext) + Nothing -> Just errorContext + runWorkVerifyLoop openRouterKey cfg retryState (verifyAttempt + 1) newTokens newCost diff --git a/Omni/Agent/Telegram.hs b/Omni/Agent/Telegram.hs index 23a760a..9e6eca0 100644 --- a/Omni/Agent/Telegram.hs +++ b/Omni/Agent/Telegram.hs @@ -240,6 +240,8 @@ getRawUpdates cfg offset = do <> show (Types.tgPollingTimeout cfg) <> "&offset=" <> show offset + <> "&allowed_updates=" + <> "%5B%22message%22%2C%22callback_query%22%5D" result <- try <| do req0 <- HTTP.parseRequest url @@ -539,8 +541,13 @@ runTelegramBot tgConfig provider = do offset <- readTVarIO offsetVar rawUpdates <- getRawUpdates tgConfig offset forM_ rawUpdates <| \rawUpdate -> do + let hasCallbackField = case rawUpdate of + Aeson.Object obj -> isJust (KeyMap.lookup "callback_query" obj) + _ -> False + when hasCallbackField <| putText <| "Raw callback update received: " <> Text.take 300 (tshow rawUpdate) case Types.parseCallbackQuery rawUpdate of Just cq -> do + putText <| "Parsed callback query: " <> Types.cqData cq let updateId = getUpdateId rawUpdate forM_ updateId <| \uid -> atomically (writeTVar offsetVar (uid + 1)) handleCallbackQuery tgConfig cq @@ -555,7 +562,12 @@ runTelegramBot tgConfig provider = do IncomingQueue.enqueueIncoming incomingQueues IncomingQueue.defaultBatchWindowSeconds msg Nothing -> do let updateId = getUpdateId rawUpdate - putText <| "Unparsed update: " <> Text.take 200 (tshow rawUpdate) + hasCallback = case rawUpdate of + Aeson.Object obj -> isJust (KeyMap.lookup "callback_query" obj) + _ -> False + if hasCallback + then putText <| "Failed to parse callback_query: " <> Text.take 500 (tshow rawUpdate) + else putText <| "Unparsed update: " <> Text.take 200 (tshow rawUpdate) forM_ updateId <| \uid -> atomically (writeTVar offsetVar (uid + 1)) when (null rawUpdates) <| threadDelay 1000000 @@ -588,30 +600,42 @@ handleCallbackQuery tgConfig cq = do userId = Types.cqFromId cq userName = Types.cqFromFirstName cq putText <| "Callback query from " <> userName <> ": " <> callbackData + result <- try @SomeException <| handleCallbackQueryInner tgConfig cq chatId userId callbackData + case result of + Left err -> putText <| "Callback handler error: " <> tshow err + Right () -> pure () + +handleCallbackQueryInner :: Types.TelegramConfig -> Types.CallbackQuery -> Int -> Int -> Text -> IO () +handleCallbackQueryInner tgConfig cq chatId userId callbackData = do if not (Types.isUserAllowed tgConfig userId) then do answerCallbackQuery tgConfig (Types.cqId cq) (Just "Not authorized") putText <| "Unauthorized callback from user " <> tshow userId else case Text.splitOn ":" callbackData of ["subagent_approve", pendingId] -> do + putText <| "Approving subagent spawn: " <> pendingId answerCallbackQuery tgConfig (Types.cqId cq) (Just "Spawning subagent...") let keys = Subagent.SubagentApiKeys { Subagent.subagentOpenRouterKey = Types.tgOpenRouterApiKey tgConfig, Subagent.subagentKagiKey = Types.tgKagiApiKey tgConfig } - result <- Subagent.approveAndSpawnSubagent keys pendingId - case result of + spawnResult <- Subagent.approveAndSpawnSubagent keys pendingId + putText <| "Spawn result: " <> tshow spawnResult + case spawnResult of Left err -> do - sendMessage tgConfig chatId ("❌ Failed to spawn subagent: " <> err) + putText <| "Spawn failed: " <> err + sendMessage tgConfig chatId ("Failed to spawn subagent: " <> err) Right subagentId -> do - sendMessage tgConfig chatId ("🚀 Subagent **" <> subagentId <> "** spawned! Use `check_subagent` to monitor progress.") + putText <| "Spawn succeeded, subagent ID: " <> subagentId + sendMessage tgConfig chatId ("Subagent " <> subagentId <> " spawned! Use check_subagent to monitor progress.") ["subagent_reject", pendingId] -> do + putText <| "Rejecting subagent spawn: " <> pendingId rejected <- Subagent.rejectPendingSpawn pendingId if rejected then do answerCallbackQuery tgConfig (Types.cqId cq) (Just "Spawn cancelled") - sendMessage tgConfig chatId "❌ Subagent spawn cancelled." + sendMessage tgConfig chatId "Subagent spawn cancelled." else answerCallbackQuery tgConfig (Types.cqId cq) (Just "Already expired") _ -> do answerCallbackQuery tgConfig (Types.cqId cq) (Just "Unknown action") |
