Omni/Agent/Subagent/Coder: decouple from jr task system

Remove task_id requirement and all jr task CLI calls. The Coder subagent now only requires namespace and task description - no external task tracking needed. Changes: - Remove coderTaskId from CoderConfig - Remove jr task show/update/comment calls - Commit message uses namespace prefix instead of task ID - Recovery phase just reverts git, no task comment - Subagent.hs only validates namespace for Coder role
author: Ben Sima <ben@bensima.com> 2025-12-18 13:39:40 -0500
committer: Ben Sima <ben@bensima.com> 2025-12-18 13:39:40 -0500
commit: bd7724068938daa44dc74c28ab0aa5c45477bbfd (patch)
tree: 919201025d3f7c95656b66947b48ecc983db5e6c /Omni
parent: 133df9a099785b5eabb5ad19bcd7daa33eff9afe (diff)
3 files changed, 660 insertions, 31 deletions
diff --git a/Omni/Agent/Subagent.hs b/Omni/Agent/Subagent.hs
index 3278e4c..06ef938 100644
--- a/Omni/Agent/Subagent.hs
+++ b/Omni/Agent/Subagent.hs
@@ -93,6 +93,7 @@ import qualified Data.UUID.V4
 import qualified Omni.Agent.AuditLog as AuditLog
 import qualified Omni.Agent.Engine as Engine
 import qualified Omni.Agent.Provider as Provider
+import qualified Omni.Agent.Subagent.Coder as Coder
 import qualified Omni.Agent.Tools as Tools
 import qualified Omni.Agent.Tools.WebReader as WebReader
 import qualified Omni.Agent.Tools.WebSearch as WebSearch
@@ -139,6 +140,7 @@ cleanupRegistry = do
 -- | A pending spawn request awaiting user confirmation
 data PendingSpawn = PendingSpawn
   { pendingId :: Text,
+    pendingSubagentId :: AuditLog.SubagentId,
     pendingConfig :: SubagentConfig,
     pendingChatId :: Int,
     pendingCreatedAt :: Clock.UTCTime
@@ -151,20 +153,23 @@ pendingSpawnRegistry = unsafePerformIO (newIORef Map.empty)
 {-# NOINLINE pendingSpawnRegistry #-}
 
 -- | Create a new pending spawn request
-createPendingSpawn :: SubagentConfig -> Int -> IO Text
+-- Returns (pendingId, subagentId) - the subagentId is pre-generated so agent can track it
+createPendingSpawn :: SubagentConfig -> Int -> IO (Text, Text)
 createPendingSpawn config chatId = do
   uuid <- Data.UUID.V4.nextRandom
-  let pendingId = Text.take 8 (Data.UUID.toText uuid)
+  let pid = Text.take 8 (Data.UUID.toText uuid)
+  subagentId <- AuditLog.newSubagentId
   now <- Clock.getCurrentTime
   let pending =
         PendingSpawn
-          { pendingId = pendingId,
+          { pendingId = pid,
+            pendingSubagentId = subagentId,
             pendingConfig = config,
             pendingChatId = chatId,
             pendingCreatedAt = now
           }
-  modifyIORef' pendingSpawnRegistry (Map.insert pendingId pending)
-  pure pendingId
+  modifyIORef' pendingSpawnRegistry (Map.insert pid pending)
+  pure (pid, AuditLog.unSubagentId subagentId)
 
 -- | Get a pending spawn by ID
 getPendingSpawn :: Text -> IO (Maybe PendingSpawn)
@@ -262,7 +267,32 @@ test =
           Aeson.Object obj -> do
             let status = KeyMap.lookup "status" obj
             status Test.@=? Just (Aeson.String "awaiting_approval")
-          _ -> Test.assertFailure "Expected object response"
+          _ -> Test.assertFailure "Expected object response",
+      Test.unit "pending spawn create and lookup works" <| do
+        let config = defaultSubagentConfig WebCrawler "test pending task"
+        (pid, sid) <- createPendingSpawn config 12345
+        when (Text.null pid) <| Test.assertFailure "pending ID should not be empty"
+        when (Text.null sid) <| Test.assertFailure "subagent ID should not be empty"
+        maybePending <- getPendingSpawn pid
+        case maybePending of
+          Nothing -> Test.assertFailure "Pending spawn not found after creation"
+          Just p -> do
+            pendingChatId p Test.@=? 12345
+            subagentTask (pendingConfig p) Test.@=? "test pending task"
+            AuditLog.unSubagentId (pendingSubagentId p) Test.@=? sid
+        removePendingSpawn pid
+        afterRemove <- getPendingSpawn pid
+        afterRemove Test.@=? Nothing,
+      Test.unit "pending spawn registry is isolated" <| do
+        let config = defaultSubagentConfig Researcher "isolated test"
+        (pid1, _) <- createPendingSpawn config 111
+        (pid2, _) <- createPendingSpawn config 222
+        when (pid1 == pid2) <| Test.assertFailure "IDs should be different"
+        p1 <- getPendingSpawn pid1
+        p2 <- getPendingSpawn pid2
+        when (isNothing p1 || isNothing p2) <| Test.assertFailure "Both should exist"
+        removePendingSpawn pid1
+        removePendingSpawn pid2
     ]
 
 data SubagentRole
@@ -270,6 +300,7 @@ data SubagentRole
   | CodeReviewer
   | DataExtractor
   | Researcher
+  | Coder
   | CustomRole Text
   deriving (Show, Eq, Generic)
 
@@ -278,6 +309,7 @@ instance Aeson.ToJSON SubagentRole where
   toJSON CodeReviewer = Aeson.String "code_reviewer"
   toJSON DataExtractor = Aeson.String "data_extractor"
   toJSON Researcher = Aeson.String "researcher"
+  toJSON Coder = Aeson.String "coder"
   toJSON (CustomRole name) = Aeson.String name
 
 instance Aeson.FromJSON SubagentRole where
@@ -287,6 +319,7 @@ instance Aeson.FromJSON SubagentRole where
       parseRole "code_reviewer" = pure CodeReviewer
       parseRole "data_extractor" = pure DataExtractor
       parseRole "researcher" = pure Researcher
+      parseRole "coder" = pure Coder
       parseRole name = pure (CustomRole name)
 
 data SubagentConfig = SubagentConfig
@@ -298,7 +331,11 @@ data SubagentConfig = SubagentConfig
     subagentMaxTokens :: Int,
     subagentMaxIterations :: Int,
     subagentExtendedThinking :: Bool,
-    subagentContext :: Maybe Text
+    subagentContext :: Maybe Text,
+    -- | Optional task ID for tracking (not used by Coder)
+    subagentTaskId :: Maybe Text,
+    -- | Namespace for Coder role - required (e.g., "Omni/Agent/Subagent")
+    subagentNamespace :: Maybe Text
   }
   deriving (Show, Eq, Generic)
 
@@ -314,7 +351,9 @@ instance Aeson.ToJSON SubagentConfig where
           Just ("max_tokens" .= subagentMaxTokens c),
           Just ("max_iterations" .= subagentMaxIterations c),
           Just ("extended_thinking" .= subagentExtendedThinking c),
-          ("context" .=) </ subagentContext c
+          ("context" .=) </ subagentContext c,
+          ("task_id" .=) </ subagentTaskId c,
+          ("namespace" .=) </ subagentNamespace c
         ]
 
 instance Aeson.FromJSON SubagentConfig where
@@ -329,6 +368,8 @@ instance Aeson.FromJSON SubagentConfig where
         <*> (v .:? "max_iterations" .!= 20)
         <*> (v .:? "extended_thinking" .!= False)
         <*> (v .:? "context")
+        <*> (v .:? "task_id")
+        <*> (v .:? "namespace")
 
 data SubagentResult = SubagentResult
   { subagentOutput :: Aeson.Value,
@@ -414,8 +455,13 @@ initialRunStatus =
     }
 
 spawnSubagentAsync :: AuditLog.SessionId -> Maybe Text -> SubagentApiKeys -> SubagentConfig -> IO SubagentHandle
-spawnSubagentAsync sessionId userId keys config = do
-  sid <- AuditLog.newSubagentId
+spawnSubagentAsync sessionId userId keys config =
+  spawnSubagentAsyncWithId sessionId userId keys config Nothing
+
+-- | Spawn subagent with optional pre-generated ID (for pending spawn flow)
+spawnSubagentAsyncWithId :: AuditLog.SessionId -> Maybe Text -> SubagentApiKeys -> SubagentConfig -> Maybe AuditLog.SubagentId -> IO SubagentHandle
+spawnSubagentAsyncWithId sessionId userId keys config maybePregenId = do
+  sid <- maybe AuditLog.newSubagentId pure maybePregenId
   startTime <- Clock.getCurrentTime
   statusVar <- newTVarIO initialRunStatus
 
@@ -512,7 +558,9 @@ defaultSubagentConfig role task =
       subagentMaxTokens = 200000,
       subagentMaxIterations = 20,
       subagentExtendedThinking = False,
-      subagentContext = Nothing
+      subagentContext = Nothing,
+      subagentTaskId = Nothing,
+      subagentNamespace = Nothing
     }
 
 modelForRole :: SubagentRole -> Text
@@ -520,6 +568,7 @@ modelForRole WebCrawler = "anthropic/claude-3-haiku"
 modelForRole CodeReviewer = "anthropic/claude-sonnet-4"
 modelForRole DataExtractor = "anthropic/claude-3-haiku"
 modelForRole Researcher = "anthropic/claude-sonnet-4"
+modelForRole Coder = "anthropic/claude-sonnet-4"
 modelForRole (CustomRole _) = "anthropic/claude-sonnet-4"
 
 data SubagentApiKeys = SubagentApiKeys
@@ -558,6 +607,8 @@ toolsForRole Researcher keys =
              Tools.searchCodebaseTool,
              Tools.searchAndReadTool
            ]
+-- Coder uses the hardened Coder module, toolsForRole not used
+toolsForRole Coder _keys = Coder.coderTools
 toolsForRole (CustomRole _) keys = toolsForRole Researcher keys
 
 systemPromptForRole :: SubagentRole -> Text -> Maybe Text -> Text
@@ -602,6 +653,7 @@ systemPromptForRole role task maybeContext =
     roleDescription CodeReviewer = "code review"
     roleDescription DataExtractor = "data extraction"
     roleDescription Researcher = "research"
+    roleDescription Coder = "coding"
     roleDescription (CustomRole name) = name
 
 runSubagent :: SubagentApiKeys -> SubagentConfig -> IO SubagentResult
@@ -609,6 +661,118 @@ runSubagent keys config = runSubagentWithCallbacks keys config defaultCallbacks
 
 runSubagentWithCallbacks :: SubagentApiKeys -> SubagentConfig -> SubagentCallbacks -> IO SubagentResult
 runSubagentWithCallbacks keys config callbacks = do
+  let role = subagentRole config
+
+  -- Coder role uses the hardened Coder module with init/verify/commit phases
+  case role of
+    Coder -> runCoderSubagentWrapper keys config callbacks
+    _ -> runGenericSubagent keys config callbacks
+
+-- | Run Coder subagent using the hardened Coder module
+runCoderSubagentWrapper :: SubagentApiKeys -> SubagentConfig -> SubagentCallbacks -> IO SubagentResult
+runCoderSubagentWrapper keys config callbacks = do
+  startTime <- Clock.getCurrentTime
+
+  -- Validate required namespace field for Coder role
+  let namespace = fromMaybe "" (subagentNamespace config)
+
+  if Text.null namespace
+    then
+      pure
+        SubagentResult
+          { subagentOutput = Aeson.object ["error" .= ("Coder role requires namespace field" :: Text)],
+            subagentSummary = "Missing required field: namespace",
+            subagentConfidence = 0.0,
+            subagentTokensUsed = 0,
+            subagentCostCents = 0.0,
+            subagentDuration = 0,
+            subagentIterations = 0,
+            subagentStatus = SubagentError "Missing namespace"
+          }
+    else do
+      onSubagentStart callbacks ("Starting Coder subagent for " <> namespace <> "...")
+
+      -- Build CoderConfig from SubagentConfig
+      let coderCfg =
+            Coder.CoderConfig
+              { Coder.coderNamespace = namespace,
+                Coder.coderTask = subagentTask config,
+                Coder.coderContext = subagentContext config,
+                Coder.coderModel = fromMaybe "anthropic/claude-sonnet-4" (subagentModel config),
+                Coder.coderTimeout = subagentTimeout config,
+                Coder.coderMaxCost = subagentMaxCost config,
+                Coder.coderMaxTokens = subagentMaxTokens config,
+                Coder.coderMaxIterations = subagentMaxIterations config,
+                Coder.coderMaxVerifyRetries = 3
+              }
+
+      result <- Coder.runCoderSubagent (subagentOpenRouterKey keys) coderCfg
+
+      endTime <- Clock.getCurrentTime
+      let durationSecs = round (Clock.diffUTCTime endTime startTime)
+
+      case result of
+        Left err -> do
+          onSubagentComplete callbacks
+            <| SubagentResult
+              { subagentOutput = Aeson.object ["error" .= err],
+                subagentSummary = "Coder failed: " <> Text.take 200 err,
+                subagentConfidence = 0.0,
+                subagentTokensUsed = 0,
+                subagentCostCents = 0.0,
+                subagentDuration = durationSecs,
+                subagentIterations = 0,
+                subagentStatus = SubagentError err
+              }
+          pure
+            SubagentResult
+              { subagentOutput = Aeson.object ["error" .= err],
+                subagentSummary = "Coder failed: " <> Text.take 200 err,
+                subagentConfidence = 0.0,
+                subagentTokensUsed = 0,
+                subagentCostCents = 0.0,
+                subagentDuration = durationSecs,
+                subagentIterations = 0,
+                subagentStatus = SubagentError err
+              }
+        Right jsonResult -> do
+          let summary = case jsonResult of
+                Aeson.Object obj -> case KeyMap.lookup "summary" obj of
+                  Just (Aeson.String s) -> Text.take 200 s
+                  _ -> "Coder completed successfully"
+                _ -> "Coder completed successfully"
+          let tokens = case jsonResult of
+                Aeson.Object obj -> case KeyMap.lookup "tokens_used" obj of
+                  Just (Aeson.Number n) -> round n
+                  _ -> 0
+                _ -> 0
+          let cost = case jsonResult of
+                Aeson.Object obj -> case KeyMap.lookup "cost_cents" obj of
+                  Just (Aeson.Number n) -> realToFrac n
+                  _ -> 0.0
+                _ -> 0.0
+          let iters = case jsonResult of
+                Aeson.Object obj -> case KeyMap.lookup "iterations" obj of
+                  Just (Aeson.Number n) -> round n
+                  _ -> 0
+                _ -> 0
+          let finalResult =
+                SubagentResult
+                  { subagentOutput = jsonResult,
+                    subagentSummary = summary,
+                    subagentConfidence = 0.9,
+                    subagentTokensUsed = tokens,
+                    subagentCostCents = cost,
+                    subagentDuration = durationSecs,
+                    subagentIterations = iters,
+                    subagentStatus = SubagentSuccess
+                  }
+          onSubagentComplete callbacks finalResult
+          pure finalResult
+
+-- | Run generic (non-Coder) subagent
+runGenericSubagent :: SubagentApiKeys -> SubagentConfig -> SubagentCallbacks -> IO SubagentResult
+runGenericSubagent keys config callbacks = do
   startTime <- Clock.getCurrentTime
 
   let role = subagentRole config
@@ -717,7 +881,8 @@ spawnSubagentTool keys =
           <> "then present the approval to the user. Only call with confirmed=true "
           <> "after the user explicitly approves. "
           <> "Available roles: web_crawler (fast web research), code_reviewer (thorough code analysis), "
-          <> "data_extractor (structured data extraction), researcher (general research).",
+          <> "data_extractor (structured data extraction), researcher (general research), "
+          <> "coder (hardened coding with init/verify/commit phases - requires task_id and namespace).",
       Engine.toolJsonSchema =
         Aeson.object
           [ "type" .= ("object" :: Text),
@@ -726,7 +891,7 @@ spawnSubagentTool keys =
                 [ "role"
                     .= Aeson.object
                       [ "type" .= ("string" :: Text),
-                        "enum" .= (["web_crawler", "code_reviewer", "data_extractor", "researcher"] :: [Text]),
+                        "enum" .= (["web_crawler", "code_reviewer", "data_extractor", "researcher", "coder"] :: [Text]),
                         "description" .= ("Subagent role determining tools and model" :: Text)
                       ],
                   "task"
@@ -754,6 +919,16 @@ spawnSubagentTool keys =
                       [ "type" .= ("number" :: Text),
                         "description" .= ("Maximum cost in cents (default: 50)" :: Text)
                       ],
+                  "task_id"
+                    .= Aeson.object
+                      [ "type" .= ("string" :: Text),
+                        "description" .= ("Task ID from jr task (required for coder role)" :: Text)
+                      ],
+                  "namespace"
+                    .= Aeson.object
+                      [ "type" .= ("string" :: Text),
+                        "description" .= ("Code namespace like 'Omni/Agent/Subagent' (required for coder role)" :: Text)
+                      ],
                   "confirmed"
                     .= Aeson.object
                       [ "type" .= ("boolean" :: Text),
@@ -805,6 +980,7 @@ formatApprovalRequest config =
       CodeReviewer -> "CodeReviewer"
       DataExtractor -> "DataExtractor"
       Researcher -> "Researcher"
+      Coder -> "Coder"
       CustomRole name -> name
     estimatedTime :: Int
     estimatedTime = subagentTimeout config `div` 60
@@ -920,10 +1096,13 @@ spawnSubagentToolWithApproval keys chatId onApprovalNeeded =
   Engine.Tool
     { Engine.toolName = "spawn_subagent",
       Engine.toolDescription =
-        "Spawn a specialized subagent for a focused task. "
-          <> "The user will receive a confirmation button to approve the spawn. "
+        "Request to spawn a specialized subagent for a focused task. "
+          <> "The user will receive a confirmation button to approve. "
+          <> "IMPORTANT: The subagent does NOT start until the user clicks Approve - "
+          <> "do NOT say 'spawned' or 'started', say 'requested' or 'awaiting approval'. "
           <> "Available roles: web_crawler (fast web research), code_reviewer (thorough code analysis), "
-          <> "data_extractor (structured data extraction), researcher (general research).",
+          <> "data_extractor (structured data extraction), researcher (general research), "
+          <> "coder (hardened coding with init/verify/commit - requires task_id and namespace).",
       Engine.toolJsonSchema =
         Aeson.object
           [ "type" .= ("object" :: Text),
@@ -932,7 +1111,7 @@ spawnSubagentToolWithApproval keys chatId onApprovalNeeded =
                 [ "role"
                     .= Aeson.object
                       [ "type" .= ("string" :: Text),
-                        "enum" .= (["web_crawler", "code_reviewer", "data_extractor", "researcher"] :: [Text]),
+                        "enum" .= (["web_crawler", "code_reviewer", "data_extractor", "researcher", "coder"] :: [Text]),
                         "description" .= ("Subagent role determining tools and model" :: Text)
                       ],
                   "task"
@@ -959,6 +1138,16 @@ spawnSubagentToolWithApproval keys chatId onApprovalNeeded =
                     .= Aeson.object
                       [ "type" .= ("number" :: Text),
                         "description" .= ("Maximum cost in cents (default: 50)" :: Text)
+                      ],
+                  "task_id"
+                    .= Aeson.object
+                      [ "type" .= ("string" :: Text),
+                        "description" .= ("Task ID from jr task (required for coder role)" :: Text)
+                      ],
+                  "namespace"
+                    .= Aeson.object
+                      [ "type" .= ("string" :: Text),
+                        "description" .= ("Code namespace like 'Omni/Agent/Subagent' (required for coder role)" :: Text)
                       ]
                 ],
             "required" .= (["role", "task"] :: [Text])
@@ -971,34 +1160,35 @@ executeSpawnWithApproval _keys chatId onApprovalNeeded v =
   case Aeson.fromJSON v of
     Aeson.Error e -> pure <| Aeson.object ["error" .= ("Invalid arguments: " <> Text.pack e)]
     Aeson.Success config -> do
-      pendingId <- createPendingSpawn config chatId
+      (pid, subagentId) <- createPendingSpawn config chatId
       let roleText = case subagentRole config of
             WebCrawler -> "web_crawler"
             CodeReviewer -> "code_reviewer"
             DataExtractor -> "data_extractor"
             Researcher -> "researcher"
+            Coder -> "coder"
             CustomRole name -> name
           estimatedMins = subagentTimeout config `div` 60
           maxCost = subagentMaxCost config
-      onApprovalNeeded chatId pendingId roleText (subagentTask config) estimatedMins maxCost
+      onApprovalNeeded chatId pid roleText (subagentTask config) estimatedMins maxCost
       pure
         <| Aeson.object
           [ "status" .= ("pending_approval" :: Text),
-            "pending_id" .= pendingId,
-            "message" .= ("Approval button sent to user. Waiting for confirmation." :: Text)
+            "subagent_id" .= subagentId,
+            "message" .= ("Subagent requested. User must click Approve button before it starts. Do not say spawned yet." :: Text)
           ]
 
 -- | Approve a pending spawn and start the subagent
 approveAndSpawnSubagent :: SubagentApiKeys -> Text -> IO (Either Text Text)
-approveAndSpawnSubagent keys pendingId = do
-  maybePending <- getPendingSpawn pendingId
+approveAndSpawnSubagent keys pid = do
+  maybePending <- getPendingSpawn pid
   case maybePending of
     Nothing -> pure (Left "Pending spawn not found or expired")
     Just pending -> do
-      removePendingSpawn pendingId
+      removePendingSpawn pid
       uuid <- Data.UUID.V4.nextRandom
       let sessionId = AuditLog.SessionId ("subagent-" <> Text.take 8 (Data.UUID.toText uuid))
-      subHandle <- spawnSubagentAsync sessionId Nothing keys (pendingConfig pending)
+      subHandle <- spawnSubagentAsyncWithId sessionId Nothing keys (pendingConfig pending) (Just (pendingSubagentId pending))
       registerSubagent subHandle
       let sid = AuditLog.unSubagentId (handleId subHandle)
       pure (Right sid)
diff --git a/Omni/Agent/Subagent/Coder.hs b/Omni/Agent/Subagent/Coder.hs
new file mode 100644
index 0000000..0f5a274
--- /dev/null
+++ b/Omni/Agent/Subagent/Coder.hs
@@ -0,0 +1,415 @@
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- | Hardened Coder Subagent with automatic init/verify/commit phases.
+--
+-- Based on Anthropic's "Effective Harnesses for Long-Running Agents":
+-- https://www.anthropic.com/engineering/effective-harnesses-for-long-running-agents
+--
+-- Key features:
+-- - Init phase: Check git status, build namespace to detect broken state
+-- - Work phase: Standard agent loop with tools
+-- - Verify phase: Run lint --fix, build namespace, run tests
+-- - Commit phase: Git commit with namespace-prefixed message
+-- - Recovery phase: Revert changes on failure
+--
+-- : out omni-agent-subagent-coder
+-- : dep aeson
+-- : dep async
+-- : dep stm
+-- : dep time
+module Omni.Agent.Subagent.Coder
+  ( -- * Types
+    CoderConfig (..),
+    CoderContext (..),
+    InitResult (..),
+    VerifyResult (..),
+
+    -- * Main Entry Point
+    runCoderSubagent,
+
+    -- * Phases (exported for testing)
+    runCoderInit,
+    runCoderVerify,
+    runCoderCommit,
+    runCoderRecovery,
+
+    -- * Helpers
+    defaultCoderConfig,
+    coderSystemPrompt,
+    coderTools,
+
+    -- * Testing
+    main,
+    test,
+  )
+where
+
+import Alpha
+import Data.Aeson ((.=))
+import qualified Data.Aeson as Aeson
+import qualified Data.Text as Text
+import qualified Data.Time.Clock as Clock
+import qualified Omni.Agent.Engine as Engine
+import qualified Omni.Agent.Provider as Provider
+import qualified Omni.Agent.Tools as Tools
+import qualified Omni.Test as Test
+import qualified System.Exit as Exit
+import qualified System.Process as Process
+
+main :: IO ()
+main = Test.run test
+
+test :: Test.Tree
+test =
+  Test.group
+    "Omni.Agent.Subagent.Coder"
+    [ Test.unit "CoderConfig has sensible defaults" <| do
+        let cfg = defaultCoderConfig "Omni/Task" "Implement feature X"
+        coderNamespace cfg Test.@=? "Omni/Task"
+        coderTimeout cfg Test.@=? 1200,
+      Test.unit "coderSystemPrompt includes namespace" <| do
+        let cfg = defaultCoderConfig "Biz/Cloud" "Fix the bug"
+        let prompt = coderSystemPrompt cfg Nothing
+        Text.isInfixOf "Biz/Cloud" prompt Test.@=? True,
+      Test.unit "runBashCapture returns output" <| do
+        (code, out, _) <- runBashCapture "echo hello"
+        code Test.@=? Exit.ExitSuccess
+        Text.strip out Test.@=? "hello"
+    ]
+
+-- | Configuration for a Coder subagent run
+data CoderConfig = CoderConfig
+  { coderNamespace :: Text,
+    coderTask :: Text,
+    coderContext :: Maybe Text,
+    coderModel :: Text,
+    coderTimeout :: Int,
+    coderMaxCost :: Double,
+    coderMaxTokens :: Int,
+    coderMaxIterations :: Int,
+    coderMaxVerifyRetries :: Int
+  }
+  deriving (Show, Eq, Generic)
+
+instance Aeson.ToJSON CoderConfig
+
+instance Aeson.FromJSON CoderConfig
+
+-- | Runtime context gathered during init phase
+data CoderContext = CoderContext
+  { ctxGitBranch :: Text,
+    ctxInitiallyBroken :: Bool,
+    ctxChangedFiles :: [Text]
+  }
+  deriving (Show, Eq, Generic)
+
+-- | Result of the init phase
+data InitResult
+  = InitSuccess CoderContext
+  | InitBrokenState Text CoderContext
+  | InitFailed Text
+  deriving (Show, Eq)
+
+-- | Result of the verify phase
+data VerifyResult
+  = VerifySuccess
+  | VerifyLintFailed Text
+  | VerifyTypecheckFailed Text
+  | VerifyBuildFailed Text
+  | VerifyTestFailed Text
+  deriving (Show, Eq)
+
+defaultCoderConfig :: Text -> Text -> CoderConfig
+defaultCoderConfig namespace task =
+  CoderConfig
+    { coderNamespace = namespace,
+      coderTask = task,
+      coderContext = Nothing,
+      coderModel = "anthropic/claude-sonnet-4",
+      coderTimeout = 1200,
+      coderMaxCost = 200.0,
+      coderMaxTokens = 300000,
+      coderMaxIterations = 30,
+      coderMaxVerifyRetries = 3
+    }
+
+-- | Run a bash command and capture output
+runBashCapture :: Text -> IO (Exit.ExitCode, Text, Text)
+runBashCapture cmd = do
+  (code, out, err) <- Process.readProcessWithExitCode "bash" ["-c", Text.unpack cmd] ""
+  pure (code, Text.pack out, Text.pack err)
+
+-- | Phase 1: Initialize - check environment, detect broken state
+runCoderInit :: CoderConfig -> IO InitResult
+runCoderInit cfg = do
+  -- Check git status
+  (_, branchOut, _) <- runBashCapture "git branch --show-current"
+  let gitBranch = Text.strip branchOut
+
+  -- Try to build the namespace to detect broken state
+  (buildCode, buildOut, buildErr) <- runBashCapture ("bild " <> coderNamespace cfg <> " 2>&1 || true")
+
+  let isBroken = buildCode /= Exit.ExitSuccess
+  let ctx =
+        CoderContext
+          { ctxGitBranch = gitBranch,
+            ctxInitiallyBroken = isBroken,
+            ctxChangedFiles = []
+          }
+
+  if isBroken
+    then pure <| InitBrokenState ("Build failed: " <> Text.take 500 (buildOut <> buildErr)) ctx
+    else pure <| InitSuccess ctx
+
+-- | Phase 3: Verify - run lint --fix, typecheck, build, test
+--
+-- Verification order:
+-- 1. lint --fix: Auto-apply formatting and simple fixes
+-- 2. typecheck.sh: Run type checker (works for Python and other languages)
+-- 3. bild: Build/compile the namespace
+-- 4. bild --test: Run tests
+runCoderVerify :: CoderConfig -> IO VerifyResult
+runCoderVerify cfg = do
+  -- First, run lint --fix to auto-apply formatting
+  _ <- runBashCapture ("lint --fix " <> coderNamespace cfg <> ".hs 2>&1")
+
+  -- Run typecheck.sh (handles Python mypy, etc.)
+  (typecheckCode, typecheckOut, typecheckErr) <-
+    runBashCapture ("typecheck.sh " <> coderNamespace cfg <> " 2>&1 || true")
+  -- Note: typecheck.sh may not exist for all namespaces, so we use || true
+  -- and only fail if it explicitly returns non-zero with actual errors
+  let typecheckFailed =
+        typecheckCode
+          /= Exit.ExitSuccess
+          && not (Text.null (Text.strip typecheckOut))
+          && not ("No such file" `Text.isInfixOf` typecheckErr)
+
+  if typecheckFailed
+    then pure <| VerifyTypecheckFailed (Text.take 1000 (typecheckOut <> typecheckErr))
+    else do
+      -- Build the namespace
+      (buildCode, buildOut, buildErr) <- runBashCapture ("bild " <> coderNamespace cfg <> " 2>&1")
+      case buildCode of
+        Exit.ExitSuccess -> do
+          -- Run tests if they exist
+          (testCode, testOut, testErr) <- runBashCapture ("bild --test " <> coderNamespace cfg <> " 2>&1")
+          case testCode of
+            Exit.ExitSuccess -> pure VerifySuccess
+            _ -> pure <| VerifyTestFailed (Text.take 1000 (testOut <> testErr))
+        _ -> pure <| VerifyBuildFailed (Text.take 1000 (buildOut <> buildErr))
+
+-- | Phase 4: Commit - git commit with message
+runCoderCommit :: CoderConfig -> Text -> IO (Either Text ())
+runCoderCommit cfg summary = do
+  let commitMsg = coderNamespace cfg <> ": " <> Text.take 72 summary
+
+  -- Stage all changes
+  (addCode, _, addErr) <- runBashCapture "git add -A"
+  case addCode of
+    Exit.ExitSuccess -> do
+      -- Commit
+      (commitCode, _, commitErr) <- runBashCapture ("git commit -m " <> quoteShell commitMsg)
+      case commitCode of
+        Exit.ExitSuccess -> pure (Right ())
+        _ -> pure <| Left ("git commit failed: " <> commitErr)
+    _ -> pure <| Left ("git add failed: " <> addErr)
+
+-- | Phase 5: Recovery - revert changes on failure
+runCoderRecovery :: IO ()
+runCoderRecovery = do
+  -- Revert uncommitted changes
+  _ <- runBashCapture "git checkout -- ."
+  _ <- runBashCapture "git clean -fd"
+  pure ()
+
+-- | Quote a string for shell
+quoteShell :: Text -> Text
+quoteShell t = "'" <> Text.replace "'" "'\\''" t <> "'"
+
+-- | Tools available to the Coder subagent
+coderTools :: [Engine.Tool]
+coderTools =
+  [ Tools.readFileTool,
+    Tools.writeFileTool,
+    Tools.editFileTool,
+    Tools.runBashTool,
+    Tools.searchCodebaseTool,
+    Tools.searchAndReadTool
+  ]
+
+-- | System prompt for the Coder subagent
+coderSystemPrompt :: CoderConfig -> Maybe Text -> Text
+coderSystemPrompt cfg maybeInitState =
+  Text.unlines
+    [ "You are a specialized Coder subagent.",
+      "",
+      "## Your Task",
+      coderTask cfg,
+      "",
+      "## Namespace",
+      "You are working in namespace: " <> coderNamespace cfg,
+      "Focus your changes on files in this namespace.",
+      "",
+      maybe "" (\ctx -> "## Context\n" <> ctx <> "\n") (coderContext cfg),
+      maybe "" (\st -> "## Current State\n" <> st <> "\n") maybeInitState,
+      "## Guidelines",
+      "1. Make incremental changes - edit one file at a time",
+      "2. After each edit, consider running `lint " <> coderNamespace cfg <> ".hs` to catch issues early",
+      "3. Use `bild " <> coderNamespace cfg <> "` to check your work compiles",
+      "4. If tests exist, run `bild --test " <> coderNamespace cfg <> "` to verify",
+      "5. Keep changes focused on the task - don't refactor unrelated code",
+      "6. If you find bugs unrelated to your task, note them but don't fix them",
+      "",
+      "## Completion",
+      "When you believe you're done:",
+      "1. Ensure all edits are complete",
+      "2. The harness will automatically run lint --fix, build, and tests",
+      "3. If verification fails, you'll be asked to fix the issues",
+      "4. Provide a brief summary of what you changed",
+      "",
+      "## Important",
+      "- Do NOT commit - the harness handles git operations",
+      "- Focus on making the code changes correctly"
+    ]
+
+-- | Format verify error for agent retry prompt
+formatVerifyError :: VerifyResult -> Text
+formatVerifyError VerifySuccess = ""
+formatVerifyError (VerifyLintFailed err) = "LINT ERRORS:\n" <> err
+formatVerifyError (VerifyTypecheckFailed err) = "TYPE ERRORS:\n" <> err
+formatVerifyError (VerifyBuildFailed err) = "BUILD ERRORS:\n" <> err
+formatVerifyError (VerifyTestFailed err) = "TEST FAILURES:\n" <> err
+
+-- | Main entry point: run the Coder subagent with all phases
+--
+-- This implements Anthropic's pattern of looping back to the agent on
+-- verification failure. The agent gets up to coderMaxVerifyRetries attempts
+-- to fix verification errors before we give up.
+--
+-- See: https://www.anthropic.com/engineering/effective-harnesses-for-long-running-agents
+--
+-- NOTE: We may want to change this behavior in the future. For some errors,
+-- immediate failure might be better than retry loops. But for now, we follow
+-- Anthropic's recommendation of letting the agent attempt to fix its mistakes.
+runCoderSubagent :: Text -> CoderConfig -> IO (Either Text Aeson.Value)
+runCoderSubagent openRouterKey cfg = do
+  startTime <- Clock.getCurrentTime
+
+  -- Phase 1: Init
+  initResult <- runCoderInit cfg
+  (ctx, initState) <- case initResult of
+    InitFailed err -> pure (Left err, Nothing)
+    InitBrokenState msg ctxVal -> pure (Right ctxVal, Just ("WARNING: Starting from broken state.\n" <> msg))
+    InitSuccess ctxVal -> pure (Right ctxVal, Nothing)
+
+  case ctx of
+    Left err -> pure <| Left ("Init failed: " <> err)
+    Right _context -> do
+      result <- runWorkVerifyLoop openRouterKey cfg initState 0 0 0.0
+      case result of
+        Left err -> do
+          runCoderRecovery
+          pure <| Left err
+        Right (finalMsg, totalTokens, totalCost, totalIters) -> do
+          -- Phase 4: Commit
+          let summary = Text.take 100 finalMsg
+          commitResult <- runCoderCommit cfg summary
+          case commitResult of
+            Left err -> do
+              runCoderRecovery
+              pure <| Left ("Commit failed: " <> err)
+            Right () -> do
+              endTime <- Clock.getCurrentTime
+              let duration = round (Clock.diffUTCTime endTime startTime)
+              pure
+                <| Right
+                <| Aeson.object
+                  [ "status" .= ("success" :: Text),
+                    "namespace" .= coderNamespace cfg,
+                    "summary" .= finalMsg,
+                    "tokens_used" .= totalTokens,
+                    "cost_cents" .= totalCost,
+                    "duration_seconds" .= (duration :: Int),
+                    "iterations" .= totalIters
+                  ]
+
+-- | Work-verify loop with retries on verification failure
+--
+-- Per Anthropic's guidance, we loop back to the agent when verification fails,
+-- giving it a chance to fix its mistakes. This is more effective than
+-- immediately failing, as the agent often can fix simple issues.
+runWorkVerifyLoop ::
+  Text ->
+  CoderConfig ->
+  Maybe Text ->
+  Int ->
+  Int ->
+  Double ->
+  IO (Either Text (Text, Int, Double, Int))
+runWorkVerifyLoop openRouterKey cfg initState verifyAttempt accTokens accCost = do
+  let isRetry = verifyAttempt > 0
+  let maxRetries = coderMaxVerifyRetries cfg
+
+  -- Build the prompt - include error context if this is a retry
+  let baseTask = coderTask cfg
+  let taskPrompt =
+        if isRetry
+          then baseTask <> "\n\n## VERIFICATION FAILED - PLEASE FIX\nThis is retry attempt " <> tshow verifyAttempt <> " of " <> tshow maxRetries <> "."
+          else baseTask
+
+  -- Phase 2: Work (run the agent)
+  let provider = Provider.defaultOpenRouter openRouterKey (coderModel cfg)
+  let systemPrompt = coderSystemPrompt cfg initState
+  let guardrails =
+        Engine.Guardrails
+          { Engine.guardrailMaxCostCents = coderMaxCost cfg - accCost,
+            Engine.guardrailMaxTokens = coderMaxTokens cfg - accTokens,
+            Engine.guardrailMaxDuplicateToolCalls = 20,
+            Engine.guardrailMaxTestFailures = 5,
+            Engine.guardrailMaxEditFailures = 10
+          }
+  let agentConfig =
+        Engine.AgentConfig
+          { Engine.agentModel = coderModel cfg,
+            Engine.agentTools = coderTools,
+            Engine.agentSystemPrompt = systemPrompt,
+            Engine.agentMaxIterations = coderMaxIterations cfg,
+            Engine.agentGuardrails = guardrails
+          }
+  let engineConfig = Engine.defaultEngineConfig
+
+  let timeoutMicros = coderTimeout cfg * 1000000
+  agentResult <-
+    race
+      (threadDelay timeoutMicros)
+      (Engine.runAgentWithProvider engineConfig provider agentConfig taskPrompt)
+
+  case agentResult of
+    Left () -> pure <| Left "Coder subagent timed out"
+    Right (Left err) -> pure <| Left ("Coder agent failed: " <> err)
+    Right (Right result) -> do
+      let newTokens = accTokens + Engine.resultTotalTokens result
+      let newCost = accCost + Engine.resultTotalCost result
+      let newIters = Engine.resultIterations result
+
+      -- Phase 3: Verify
+      verifyResult <- runCoderVerify cfg
+      case verifyResult of
+        VerifySuccess ->
+          pure <| Right (Engine.resultFinalMessage result, newTokens, newCost, newIters)
+        failure
+          | verifyAttempt >= maxRetries -> do
+              -- Max retries exceeded, give up
+              let errMsg = "Verification failed after " <> tshow (verifyAttempt + 1) <> " attempts: " <> formatVerifyError failure
+              pure <| Left errMsg
+          | otherwise -> do
+              -- Retry: loop back to agent with error context
+              -- NOTE: This follows Anthropic's pattern. We may want to change
+              -- this for certain error types in the future.
+              let errorContext = formatVerifyError failure
+              let retryState = case initState of
+                    Just st -> Just (st <> "\n\n" <> errorContext)
+                    Nothing -> Just errorContext
+              runWorkVerifyLoop openRouterKey cfg retryState (verifyAttempt + 1) newTokens newCost
diff --git a/Omni/Agent/Telegram.hs b/Omni/Agent/Telegram.hs
index 23a760a..9e6eca0 100644
--- a/Omni/Agent/Telegram.hs
+++ b/Omni/Agent/Telegram.hs
@@ -240,6 +240,8 @@ getRawUpdates cfg offset = do
           <> show (Types.tgPollingTimeout cfg)
           <> "&offset="
           <> show offset
+          <> "&allowed_updates="
+          <> "%5B%22message%22%2C%22callback_query%22%5D"
   result <-
     try <| do
       req0 <- HTTP.parseRequest url
@@ -539,8 +541,13 @@ runTelegramBot tgConfig provider = do
     offset <- readTVarIO offsetVar
     rawUpdates <- getRawUpdates tgConfig offset
     forM_ rawUpdates <| \rawUpdate -> do
+      let hasCallbackField = case rawUpdate of
+            Aeson.Object obj -> isJust (KeyMap.lookup "callback_query" obj)
+            _ -> False
+      when hasCallbackField <| putText <| "Raw callback update received: " <> Text.take 300 (tshow rawUpdate)
       case Types.parseCallbackQuery rawUpdate of
         Just cq -> do
+          putText <| "Parsed callback query: " <> Types.cqData cq
           let updateId = getUpdateId rawUpdate
           forM_ updateId <| \uid -> atomically (writeTVar offsetVar (uid + 1))
           handleCallbackQuery tgConfig cq
@@ -555,7 +562,12 @@ runTelegramBot tgConfig provider = do
               IncomingQueue.enqueueIncoming incomingQueues IncomingQueue.defaultBatchWindowSeconds msg
             Nothing -> do
               let updateId = getUpdateId rawUpdate
-              putText <| "Unparsed update: " <> Text.take 200 (tshow rawUpdate)
+                  hasCallback = case rawUpdate of
+                    Aeson.Object obj -> isJust (KeyMap.lookup "callback_query" obj)
+                    _ -> False
+              if hasCallback
+                then putText <| "Failed to parse callback_query: " <> Text.take 500 (tshow rawUpdate)
+                else putText <| "Unparsed update: " <> Text.take 200 (tshow rawUpdate)
               forM_ updateId <| \uid -> atomically (writeTVar offsetVar (uid + 1))
     when (null rawUpdates) <| threadDelay 1000000
 
@@ -588,30 +600,42 @@ handleCallbackQuery tgConfig cq = do
       userId = Types.cqFromId cq
       userName = Types.cqFromFirstName cq
   putText <| "Callback query from " <> userName <> ": " <> callbackData
+  result <- try @SomeException <| handleCallbackQueryInner tgConfig cq chatId userId callbackData
+  case result of
+    Left err -> putText <| "Callback handler error: " <> tshow err
+    Right () -> pure ()
+
+handleCallbackQueryInner :: Types.TelegramConfig -> Types.CallbackQuery -> Int -> Int -> Text -> IO ()
+handleCallbackQueryInner tgConfig cq chatId userId callbackData = do
   if not (Types.isUserAllowed tgConfig userId)
     then do
       answerCallbackQuery tgConfig (Types.cqId cq) (Just "Not authorized")
       putText <| "Unauthorized callback from user " <> tshow userId
     else case Text.splitOn ":" callbackData of
       ["subagent_approve", pendingId] -> do
+        putText <| "Approving subagent spawn: " <> pendingId
         answerCallbackQuery tgConfig (Types.cqId cq) (Just "Spawning subagent...")
         let keys =
               Subagent.SubagentApiKeys
                 { Subagent.subagentOpenRouterKey = Types.tgOpenRouterApiKey tgConfig,
                   Subagent.subagentKagiKey = Types.tgKagiApiKey tgConfig
                 }
-        result <- Subagent.approveAndSpawnSubagent keys pendingId
-        case result of
+        spawnResult <- Subagent.approveAndSpawnSubagent keys pendingId
+        putText <| "Spawn result: " <> tshow spawnResult
+        case spawnResult of
           Left err -> do
-            sendMessage tgConfig chatId ("❌ Failed to spawn subagent: " <> err)
+            putText <| "Spawn failed: " <> err
+            sendMessage tgConfig chatId ("Failed to spawn subagent: " <> err)
           Right subagentId -> do
-            sendMessage tgConfig chatId ("🚀 Subagent **" <> subagentId <> "** spawned! Use `check_subagent` to monitor progress.")
+            putText <| "Spawn succeeded, subagent ID: " <> subagentId
+            sendMessage tgConfig chatId ("Subagent " <> subagentId <> " spawned! Use check_subagent to monitor progress.")
       ["subagent_reject", pendingId] -> do
+        putText <| "Rejecting subagent spawn: " <> pendingId
         rejected <- Subagent.rejectPendingSpawn pendingId
         if rejected
           then do
             answerCallbackQuery tgConfig (Types.cqId cq) (Just "Spawn cancelled")
-            sendMessage tgConfig chatId "❌ Subagent spawn cancelled."
+            sendMessage tgConfig chatId "Subagent spawn cancelled."
           else answerCallbackQuery tgConfig (Types.cqId cq) (Just "Already expired")
       _ -> do
         answerCallbackQuery tgConfig (Types.cqId cq) (Just "Unknown action")
author	Ben Sima <ben@bensima.com>	2025-12-18 13:39:40 -0500
committer	Ben Sima <ben@bensima.com>	2025-12-18 13:39:40 -0500
commit	bd7724068938daa44dc74c28ab0aa5c45477bbfd (patch)
tree	919201025d3f7c95656b66947b48ecc983db5e6c /Omni
parent	133df9a099785b5eabb5ad19bcd7daa33eff9afe (diff)