summaryrefslogtreecommitdiff
path: root/Omni
diff options
context:
space:
mode:
authorBen Sima <ben@bensima.com>2025-12-18 13:39:40 -0500
committerBen Sima <ben@bensima.com>2025-12-18 13:39:40 -0500
commitbd7724068938daa44dc74c28ab0aa5c45477bbfd (patch)
tree919201025d3f7c95656b66947b48ecc983db5e6c /Omni
parent133df9a099785b5eabb5ad19bcd7daa33eff9afe (diff)
Omni/Agent/Subagent/Coder: decouple from jr task system
Remove task_id requirement and all jr task CLI calls. The Coder subagent now only requires namespace and task description - no external task tracking needed. Changes: - Remove coderTaskId from CoderConfig - Remove jr task show/update/comment calls - Commit message uses namespace prefix instead of task ID - Recovery phase just reverts git, no task comment - Subagent.hs only validates namespace for Coder role
Diffstat (limited to 'Omni')
-rw-r--r--Omni/Agent/Subagent.hs240
-rw-r--r--Omni/Agent/Subagent/Coder.hs415
-rw-r--r--Omni/Agent/Telegram.hs36
3 files changed, 660 insertions, 31 deletions
diff --git a/Omni/Agent/Subagent.hs b/Omni/Agent/Subagent.hs
index 3278e4c..06ef938 100644
--- a/Omni/Agent/Subagent.hs
+++ b/Omni/Agent/Subagent.hs
@@ -93,6 +93,7 @@ import qualified Data.UUID.V4
import qualified Omni.Agent.AuditLog as AuditLog
import qualified Omni.Agent.Engine as Engine
import qualified Omni.Agent.Provider as Provider
+import qualified Omni.Agent.Subagent.Coder as Coder
import qualified Omni.Agent.Tools as Tools
import qualified Omni.Agent.Tools.WebReader as WebReader
import qualified Omni.Agent.Tools.WebSearch as WebSearch
@@ -139,6 +140,7 @@ cleanupRegistry = do
-- | A pending spawn request awaiting user confirmation
data PendingSpawn = PendingSpawn
{ pendingId :: Text,
+ pendingSubagentId :: AuditLog.SubagentId,
pendingConfig :: SubagentConfig,
pendingChatId :: Int,
pendingCreatedAt :: Clock.UTCTime
@@ -151,20 +153,23 @@ pendingSpawnRegistry = unsafePerformIO (newIORef Map.empty)
{-# NOINLINE pendingSpawnRegistry #-}
-- | Create a new pending spawn request
-createPendingSpawn :: SubagentConfig -> Int -> IO Text
+-- Returns (pendingId, subagentId) - the subagentId is pre-generated so agent can track it
+createPendingSpawn :: SubagentConfig -> Int -> IO (Text, Text)
createPendingSpawn config chatId = do
uuid <- Data.UUID.V4.nextRandom
- let pendingId = Text.take 8 (Data.UUID.toText uuid)
+ let pid = Text.take 8 (Data.UUID.toText uuid)
+ subagentId <- AuditLog.newSubagentId
now <- Clock.getCurrentTime
let pending =
PendingSpawn
- { pendingId = pendingId,
+ { pendingId = pid,
+ pendingSubagentId = subagentId,
pendingConfig = config,
pendingChatId = chatId,
pendingCreatedAt = now
}
- modifyIORef' pendingSpawnRegistry (Map.insert pendingId pending)
- pure pendingId
+ modifyIORef' pendingSpawnRegistry (Map.insert pid pending)
+ pure (pid, AuditLog.unSubagentId subagentId)
-- | Get a pending spawn by ID
getPendingSpawn :: Text -> IO (Maybe PendingSpawn)
@@ -262,7 +267,32 @@ test =
Aeson.Object obj -> do
let status = KeyMap.lookup "status" obj
status Test.@=? Just (Aeson.String "awaiting_approval")
- _ -> Test.assertFailure "Expected object response"
+ _ -> Test.assertFailure "Expected object response",
+ Test.unit "pending spawn create and lookup works" <| do
+ let config = defaultSubagentConfig WebCrawler "test pending task"
+ (pid, sid) <- createPendingSpawn config 12345
+ when (Text.null pid) <| Test.assertFailure "pending ID should not be empty"
+ when (Text.null sid) <| Test.assertFailure "subagent ID should not be empty"
+ maybePending <- getPendingSpawn pid
+ case maybePending of
+ Nothing -> Test.assertFailure "Pending spawn not found after creation"
+ Just p -> do
+ pendingChatId p Test.@=? 12345
+ subagentTask (pendingConfig p) Test.@=? "test pending task"
+ AuditLog.unSubagentId (pendingSubagentId p) Test.@=? sid
+ removePendingSpawn pid
+ afterRemove <- getPendingSpawn pid
+ afterRemove Test.@=? Nothing,
+ Test.unit "pending spawn registry is isolated" <| do
+ let config = defaultSubagentConfig Researcher "isolated test"
+ (pid1, _) <- createPendingSpawn config 111
+ (pid2, _) <- createPendingSpawn config 222
+ when (pid1 == pid2) <| Test.assertFailure "IDs should be different"
+ p1 <- getPendingSpawn pid1
+ p2 <- getPendingSpawn pid2
+ when (isNothing p1 || isNothing p2) <| Test.assertFailure "Both should exist"
+ removePendingSpawn pid1
+ removePendingSpawn pid2
]
data SubagentRole
@@ -270,6 +300,7 @@ data SubagentRole
| CodeReviewer
| DataExtractor
| Researcher
+ | Coder
| CustomRole Text
deriving (Show, Eq, Generic)
@@ -278,6 +309,7 @@ instance Aeson.ToJSON SubagentRole where
toJSON CodeReviewer = Aeson.String "code_reviewer"
toJSON DataExtractor = Aeson.String "data_extractor"
toJSON Researcher = Aeson.String "researcher"
+ toJSON Coder = Aeson.String "coder"
toJSON (CustomRole name) = Aeson.String name
instance Aeson.FromJSON SubagentRole where
@@ -287,6 +319,7 @@ instance Aeson.FromJSON SubagentRole where
parseRole "code_reviewer" = pure CodeReviewer
parseRole "data_extractor" = pure DataExtractor
parseRole "researcher" = pure Researcher
+ parseRole "coder" = pure Coder
parseRole name = pure (CustomRole name)
data SubagentConfig = SubagentConfig
@@ -298,7 +331,11 @@ data SubagentConfig = SubagentConfig
subagentMaxTokens :: Int,
subagentMaxIterations :: Int,
subagentExtendedThinking :: Bool,
- subagentContext :: Maybe Text
+ subagentContext :: Maybe Text,
+ -- | Optional task ID for tracking (not used by Coder)
+ subagentTaskId :: Maybe Text,
+ -- | Namespace for Coder role - required (e.g., "Omni/Agent/Subagent")
+ subagentNamespace :: Maybe Text
}
deriving (Show, Eq, Generic)
@@ -314,7 +351,9 @@ instance Aeson.ToJSON SubagentConfig where
Just ("max_tokens" .= subagentMaxTokens c),
Just ("max_iterations" .= subagentMaxIterations c),
Just ("extended_thinking" .= subagentExtendedThinking c),
- ("context" .=) </ subagentContext c
+ ("context" .=) </ subagentContext c,
+ ("task_id" .=) </ subagentTaskId c,
+ ("namespace" .=) </ subagentNamespace c
]
instance Aeson.FromJSON SubagentConfig where
@@ -329,6 +368,8 @@ instance Aeson.FromJSON SubagentConfig where
<*> (v .:? "max_iterations" .!= 20)
<*> (v .:? "extended_thinking" .!= False)
<*> (v .:? "context")
+ <*> (v .:? "task_id")
+ <*> (v .:? "namespace")
data SubagentResult = SubagentResult
{ subagentOutput :: Aeson.Value,
@@ -414,8 +455,13 @@ initialRunStatus =
}
spawnSubagentAsync :: AuditLog.SessionId -> Maybe Text -> SubagentApiKeys -> SubagentConfig -> IO SubagentHandle
-spawnSubagentAsync sessionId userId keys config = do
- sid <- AuditLog.newSubagentId
+spawnSubagentAsync sessionId userId keys config =
+ spawnSubagentAsyncWithId sessionId userId keys config Nothing
+
+-- | Spawn subagent with optional pre-generated ID (for pending spawn flow)
+spawnSubagentAsyncWithId :: AuditLog.SessionId -> Maybe Text -> SubagentApiKeys -> SubagentConfig -> Maybe AuditLog.SubagentId -> IO SubagentHandle
+spawnSubagentAsyncWithId sessionId userId keys config maybePregenId = do
+ sid <- maybe AuditLog.newSubagentId pure maybePregenId
startTime <- Clock.getCurrentTime
statusVar <- newTVarIO initialRunStatus
@@ -512,7 +558,9 @@ defaultSubagentConfig role task =
subagentMaxTokens = 200000,
subagentMaxIterations = 20,
subagentExtendedThinking = False,
- subagentContext = Nothing
+ subagentContext = Nothing,
+ subagentTaskId = Nothing,
+ subagentNamespace = Nothing
}
modelForRole :: SubagentRole -> Text
@@ -520,6 +568,7 @@ modelForRole WebCrawler = "anthropic/claude-3-haiku"
modelForRole CodeReviewer = "anthropic/claude-sonnet-4"
modelForRole DataExtractor = "anthropic/claude-3-haiku"
modelForRole Researcher = "anthropic/claude-sonnet-4"
+modelForRole Coder = "anthropic/claude-sonnet-4"
modelForRole (CustomRole _) = "anthropic/claude-sonnet-4"
data SubagentApiKeys = SubagentApiKeys
@@ -558,6 +607,8 @@ toolsForRole Researcher keys =
Tools.searchCodebaseTool,
Tools.searchAndReadTool
]
+-- Coder uses the hardened Coder module, toolsForRole not used
+toolsForRole Coder _keys = Coder.coderTools
toolsForRole (CustomRole _) keys = toolsForRole Researcher keys
systemPromptForRole :: SubagentRole -> Text -> Maybe Text -> Text
@@ -602,6 +653,7 @@ systemPromptForRole role task maybeContext =
roleDescription CodeReviewer = "code review"
roleDescription DataExtractor = "data extraction"
roleDescription Researcher = "research"
+ roleDescription Coder = "coding"
roleDescription (CustomRole name) = name
runSubagent :: SubagentApiKeys -> SubagentConfig -> IO SubagentResult
@@ -609,6 +661,118 @@ runSubagent keys config = runSubagentWithCallbacks keys config defaultCallbacks
runSubagentWithCallbacks :: SubagentApiKeys -> SubagentConfig -> SubagentCallbacks -> IO SubagentResult
runSubagentWithCallbacks keys config callbacks = do
+ let role = subagentRole config
+
+ -- Coder role uses the hardened Coder module with init/verify/commit phases
+ case role of
+ Coder -> runCoderSubagentWrapper keys config callbacks
+ _ -> runGenericSubagent keys config callbacks
+
+-- | Run Coder subagent using the hardened Coder module
+runCoderSubagentWrapper :: SubagentApiKeys -> SubagentConfig -> SubagentCallbacks -> IO SubagentResult
+runCoderSubagentWrapper keys config callbacks = do
+ startTime <- Clock.getCurrentTime
+
+ -- Validate required namespace field for Coder role
+ let namespace = fromMaybe "" (subagentNamespace config)
+
+ if Text.null namespace
+ then
+ pure
+ SubagentResult
+ { subagentOutput = Aeson.object ["error" .= ("Coder role requires namespace field" :: Text)],
+ subagentSummary = "Missing required field: namespace",
+ subagentConfidence = 0.0,
+ subagentTokensUsed = 0,
+ subagentCostCents = 0.0,
+ subagentDuration = 0,
+ subagentIterations = 0,
+ subagentStatus = SubagentError "Missing namespace"
+ }
+ else do
+ onSubagentStart callbacks ("Starting Coder subagent for " <> namespace <> "...")
+
+ -- Build CoderConfig from SubagentConfig
+ let coderCfg =
+ Coder.CoderConfig
+ { Coder.coderNamespace = namespace,
+ Coder.coderTask = subagentTask config,
+ Coder.coderContext = subagentContext config,
+ Coder.coderModel = fromMaybe "anthropic/claude-sonnet-4" (subagentModel config),
+ Coder.coderTimeout = subagentTimeout config,
+ Coder.coderMaxCost = subagentMaxCost config,
+ Coder.coderMaxTokens = subagentMaxTokens config,
+ Coder.coderMaxIterations = subagentMaxIterations config,
+ Coder.coderMaxVerifyRetries = 3
+ }
+
+ result <- Coder.runCoderSubagent (subagentOpenRouterKey keys) coderCfg
+
+ endTime <- Clock.getCurrentTime
+ let durationSecs = round (Clock.diffUTCTime endTime startTime)
+
+ case result of
+ Left err -> do
+ onSubagentComplete callbacks
+ <| SubagentResult
+ { subagentOutput = Aeson.object ["error" .= err],
+ subagentSummary = "Coder failed: " <> Text.take 200 err,
+ subagentConfidence = 0.0,
+ subagentTokensUsed = 0,
+ subagentCostCents = 0.0,
+ subagentDuration = durationSecs,
+ subagentIterations = 0,
+ subagentStatus = SubagentError err
+ }
+ pure
+ SubagentResult
+ { subagentOutput = Aeson.object ["error" .= err],
+ subagentSummary = "Coder failed: " <> Text.take 200 err,
+ subagentConfidence = 0.0,
+ subagentTokensUsed = 0,
+ subagentCostCents = 0.0,
+ subagentDuration = durationSecs,
+ subagentIterations = 0,
+ subagentStatus = SubagentError err
+ }
+ Right jsonResult -> do
+ let summary = case jsonResult of
+ Aeson.Object obj -> case KeyMap.lookup "summary" obj of
+ Just (Aeson.String s) -> Text.take 200 s
+ _ -> "Coder completed successfully"
+ _ -> "Coder completed successfully"
+ let tokens = case jsonResult of
+ Aeson.Object obj -> case KeyMap.lookup "tokens_used" obj of
+ Just (Aeson.Number n) -> round n
+ _ -> 0
+ _ -> 0
+ let cost = case jsonResult of
+ Aeson.Object obj -> case KeyMap.lookup "cost_cents" obj of
+ Just (Aeson.Number n) -> realToFrac n
+ _ -> 0.0
+ _ -> 0.0
+ let iters = case jsonResult of
+ Aeson.Object obj -> case KeyMap.lookup "iterations" obj of
+ Just (Aeson.Number n) -> round n
+ _ -> 0
+ _ -> 0
+ let finalResult =
+ SubagentResult
+ { subagentOutput = jsonResult,
+ subagentSummary = summary,
+ subagentConfidence = 0.9,
+ subagentTokensUsed = tokens,
+ subagentCostCents = cost,
+ subagentDuration = durationSecs,
+ subagentIterations = iters,
+ subagentStatus = SubagentSuccess
+ }
+ onSubagentComplete callbacks finalResult
+ pure finalResult
+
+-- | Run generic (non-Coder) subagent
+runGenericSubagent :: SubagentApiKeys -> SubagentConfig -> SubagentCallbacks -> IO SubagentResult
+runGenericSubagent keys config callbacks = do
startTime <- Clock.getCurrentTime
let role = subagentRole config
@@ -717,7 +881,8 @@ spawnSubagentTool keys =
<> "then present the approval to the user. Only call with confirmed=true "
<> "after the user explicitly approves. "
<> "Available roles: web_crawler (fast web research), code_reviewer (thorough code analysis), "
- <> "data_extractor (structured data extraction), researcher (general research).",
+ <> "data_extractor (structured data extraction), researcher (general research), "
+ <> "coder (hardened coding with init/verify/commit phases - requires task_id and namespace).",
Engine.toolJsonSchema =
Aeson.object
[ "type" .= ("object" :: Text),
@@ -726,7 +891,7 @@ spawnSubagentTool keys =
[ "role"
.= Aeson.object
[ "type" .= ("string" :: Text),
- "enum" .= (["web_crawler", "code_reviewer", "data_extractor", "researcher"] :: [Text]),
+ "enum" .= (["web_crawler", "code_reviewer", "data_extractor", "researcher", "coder"] :: [Text]),
"description" .= ("Subagent role determining tools and model" :: Text)
],
"task"
@@ -754,6 +919,16 @@ spawnSubagentTool keys =
[ "type" .= ("number" :: Text),
"description" .= ("Maximum cost in cents (default: 50)" :: Text)
],
+ "task_id"
+ .= Aeson.object
+ [ "type" .= ("string" :: Text),
+ "description" .= ("Task ID from jr task (required for coder role)" :: Text)
+ ],
+ "namespace"
+ .= Aeson.object
+ [ "type" .= ("string" :: Text),
+ "description" .= ("Code namespace like 'Omni/Agent/Subagent' (required for coder role)" :: Text)
+ ],
"confirmed"
.= Aeson.object
[ "type" .= ("boolean" :: Text),
@@ -805,6 +980,7 @@ formatApprovalRequest config =
CodeReviewer -> "CodeReviewer"
DataExtractor -> "DataExtractor"
Researcher -> "Researcher"
+ Coder -> "Coder"
CustomRole name -> name
estimatedTime :: Int
estimatedTime = subagentTimeout config `div` 60
@@ -920,10 +1096,13 @@ spawnSubagentToolWithApproval keys chatId onApprovalNeeded =
Engine.Tool
{ Engine.toolName = "spawn_subagent",
Engine.toolDescription =
- "Spawn a specialized subagent for a focused task. "
- <> "The user will receive a confirmation button to approve the spawn. "
+ "Request to spawn a specialized subagent for a focused task. "
+ <> "The user will receive a confirmation button to approve. "
+ <> "IMPORTANT: The subagent does NOT start until the user clicks Approve - "
+ <> "do NOT say 'spawned' or 'started', say 'requested' or 'awaiting approval'. "
<> "Available roles: web_crawler (fast web research), code_reviewer (thorough code analysis), "
- <> "data_extractor (structured data extraction), researcher (general research).",
+ <> "data_extractor (structured data extraction), researcher (general research), "
+ <> "coder (hardened coding with init/verify/commit - requires task_id and namespace).",
Engine.toolJsonSchema =
Aeson.object
[ "type" .= ("object" :: Text),
@@ -932,7 +1111,7 @@ spawnSubagentToolWithApproval keys chatId onApprovalNeeded =
[ "role"
.= Aeson.object
[ "type" .= ("string" :: Text),
- "enum" .= (["web_crawler", "code_reviewer", "data_extractor", "researcher"] :: [Text]),
+ "enum" .= (["web_crawler", "code_reviewer", "data_extractor", "researcher", "coder"] :: [Text]),
"description" .= ("Subagent role determining tools and model" :: Text)
],
"task"
@@ -959,6 +1138,16 @@ spawnSubagentToolWithApproval keys chatId onApprovalNeeded =
.= Aeson.object
[ "type" .= ("number" :: Text),
"description" .= ("Maximum cost in cents (default: 50)" :: Text)
+ ],
+ "task_id"
+ .= Aeson.object
+ [ "type" .= ("string" :: Text),
+ "description" .= ("Task ID from jr task (required for coder role)" :: Text)
+ ],
+ "namespace"
+ .= Aeson.object
+ [ "type" .= ("string" :: Text),
+ "description" .= ("Code namespace like 'Omni/Agent/Subagent' (required for coder role)" :: Text)
]
],
"required" .= (["role", "task"] :: [Text])
@@ -971,34 +1160,35 @@ executeSpawnWithApproval _keys chatId onApprovalNeeded v =
case Aeson.fromJSON v of
Aeson.Error e -> pure <| Aeson.object ["error" .= ("Invalid arguments: " <> Text.pack e)]
Aeson.Success config -> do
- pendingId <- createPendingSpawn config chatId
+ (pid, subagentId) <- createPendingSpawn config chatId
let roleText = case subagentRole config of
WebCrawler -> "web_crawler"
CodeReviewer -> "code_reviewer"
DataExtractor -> "data_extractor"
Researcher -> "researcher"
+ Coder -> "coder"
CustomRole name -> name
estimatedMins = subagentTimeout config `div` 60
maxCost = subagentMaxCost config
- onApprovalNeeded chatId pendingId roleText (subagentTask config) estimatedMins maxCost
+ onApprovalNeeded chatId pid roleText (subagentTask config) estimatedMins maxCost
pure
<| Aeson.object
[ "status" .= ("pending_approval" :: Text),
- "pending_id" .= pendingId,
- "message" .= ("Approval button sent to user. Waiting for confirmation." :: Text)
+ "subagent_id" .= subagentId,
+ "message" .= ("Subagent requested. User must click Approve button before it starts. Do not say spawned yet." :: Text)
]
-- | Approve a pending spawn and start the subagent
approveAndSpawnSubagent :: SubagentApiKeys -> Text -> IO (Either Text Text)
-approveAndSpawnSubagent keys pendingId = do
- maybePending <- getPendingSpawn pendingId
+approveAndSpawnSubagent keys pid = do
+ maybePending <- getPendingSpawn pid
case maybePending of
Nothing -> pure (Left "Pending spawn not found or expired")
Just pending -> do
- removePendingSpawn pendingId
+ removePendingSpawn pid
uuid <- Data.UUID.V4.nextRandom
let sessionId = AuditLog.SessionId ("subagent-" <> Text.take 8 (Data.UUID.toText uuid))
- subHandle <- spawnSubagentAsync sessionId Nothing keys (pendingConfig pending)
+ subHandle <- spawnSubagentAsyncWithId sessionId Nothing keys (pendingConfig pending) (Just (pendingSubagentId pending))
registerSubagent subHandle
let sid = AuditLog.unSubagentId (handleId subHandle)
pure (Right sid)
diff --git a/Omni/Agent/Subagent/Coder.hs b/Omni/Agent/Subagent/Coder.hs
new file mode 100644
index 0000000..0f5a274
--- /dev/null
+++ b/Omni/Agent/Subagent/Coder.hs
@@ -0,0 +1,415 @@
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- | Hardened Coder Subagent with automatic init/verify/commit phases.
+--
+-- Based on Anthropic's "Effective Harnesses for Long-Running Agents":
+-- https://www.anthropic.com/engineering/effective-harnesses-for-long-running-agents
+--
+-- Key features:
+-- - Init phase: Check git status, build namespace to detect broken state
+-- - Work phase: Standard agent loop with tools
+-- - Verify phase: Run lint --fix, build namespace, run tests
+-- - Commit phase: Git commit with namespace-prefixed message
+-- - Recovery phase: Revert changes on failure
+--
+-- : out omni-agent-subagent-coder
+-- : dep aeson
+-- : dep async
+-- : dep stm
+-- : dep time
+module Omni.Agent.Subagent.Coder
+ ( -- * Types
+ CoderConfig (..),
+ CoderContext (..),
+ InitResult (..),
+ VerifyResult (..),
+
+ -- * Main Entry Point
+ runCoderSubagent,
+
+ -- * Phases (exported for testing)
+ runCoderInit,
+ runCoderVerify,
+ runCoderCommit,
+ runCoderRecovery,
+
+ -- * Helpers
+ defaultCoderConfig,
+ coderSystemPrompt,
+ coderTools,
+
+ -- * Testing
+ main,
+ test,
+ )
+where
+
+import Alpha
+import Data.Aeson ((.=))
+import qualified Data.Aeson as Aeson
+import qualified Data.Text as Text
+import qualified Data.Time.Clock as Clock
+import qualified Omni.Agent.Engine as Engine
+import qualified Omni.Agent.Provider as Provider
+import qualified Omni.Agent.Tools as Tools
+import qualified Omni.Test as Test
+import qualified System.Exit as Exit
+import qualified System.Process as Process
+
+main :: IO ()
+main = Test.run test
+
+test :: Test.Tree
+test =
+ Test.group
+ "Omni.Agent.Subagent.Coder"
+ [ Test.unit "CoderConfig has sensible defaults" <| do
+ let cfg = defaultCoderConfig "Omni/Task" "Implement feature X"
+ coderNamespace cfg Test.@=? "Omni/Task"
+ coderTimeout cfg Test.@=? 1200,
+ Test.unit "coderSystemPrompt includes namespace" <| do
+ let cfg = defaultCoderConfig "Biz/Cloud" "Fix the bug"
+ let prompt = coderSystemPrompt cfg Nothing
+ Text.isInfixOf "Biz/Cloud" prompt Test.@=? True,
+ Test.unit "runBashCapture returns output" <| do
+ (code, out, _) <- runBashCapture "echo hello"
+ code Test.@=? Exit.ExitSuccess
+ Text.strip out Test.@=? "hello"
+ ]
+
+-- | Configuration for a Coder subagent run
+data CoderConfig = CoderConfig
+ { coderNamespace :: Text,
+ coderTask :: Text,
+ coderContext :: Maybe Text,
+ coderModel :: Text,
+ coderTimeout :: Int,
+ coderMaxCost :: Double,
+ coderMaxTokens :: Int,
+ coderMaxIterations :: Int,
+ coderMaxVerifyRetries :: Int
+ }
+ deriving (Show, Eq, Generic)
+
+instance Aeson.ToJSON CoderConfig
+
+instance Aeson.FromJSON CoderConfig
+
+-- | Runtime context gathered during init phase
+data CoderContext = CoderContext
+ { ctxGitBranch :: Text,
+ ctxInitiallyBroken :: Bool,
+ ctxChangedFiles :: [Text]
+ }
+ deriving (Show, Eq, Generic)
+
+-- | Result of the init phase
+data InitResult
+ = InitSuccess CoderContext
+ | InitBrokenState Text CoderContext
+ | InitFailed Text
+ deriving (Show, Eq)
+
+-- | Result of the verify phase
+data VerifyResult
+ = VerifySuccess
+ | VerifyLintFailed Text
+ | VerifyTypecheckFailed Text
+ | VerifyBuildFailed Text
+ | VerifyTestFailed Text
+ deriving (Show, Eq)
+
+defaultCoderConfig :: Text -> Text -> CoderConfig
+defaultCoderConfig namespace task =
+ CoderConfig
+ { coderNamespace = namespace,
+ coderTask = task,
+ coderContext = Nothing,
+ coderModel = "anthropic/claude-sonnet-4",
+ coderTimeout = 1200,
+ coderMaxCost = 200.0,
+ coderMaxTokens = 300000,
+ coderMaxIterations = 30,
+ coderMaxVerifyRetries = 3
+ }
+
+-- | Run a bash command and capture output
+runBashCapture :: Text -> IO (Exit.ExitCode, Text, Text)
+runBashCapture cmd = do
+ (code, out, err) <- Process.readProcessWithExitCode "bash" ["-c", Text.unpack cmd] ""
+ pure (code, Text.pack out, Text.pack err)
+
+-- | Phase 1: Initialize - check environment, detect broken state
+runCoderInit :: CoderConfig -> IO InitResult
+runCoderInit cfg = do
+ -- Check git status
+ (_, branchOut, _) <- runBashCapture "git branch --show-current"
+ let gitBranch = Text.strip branchOut
+
+ -- Try to build the namespace to detect broken state
+ (buildCode, buildOut, buildErr) <- runBashCapture ("bild " <> coderNamespace cfg <> " 2>&1 || true")
+
+ let isBroken = buildCode /= Exit.ExitSuccess
+ let ctx =
+ CoderContext
+ { ctxGitBranch = gitBranch,
+ ctxInitiallyBroken = isBroken,
+ ctxChangedFiles = []
+ }
+
+ if isBroken
+ then pure <| InitBrokenState ("Build failed: " <> Text.take 500 (buildOut <> buildErr)) ctx
+ else pure <| InitSuccess ctx
+
+-- | Phase 3: Verify - run lint --fix, typecheck, build, test
+--
+-- Verification order:
+-- 1. lint --fix: Auto-apply formatting and simple fixes
+-- 2. typecheck.sh: Run type checker (works for Python and other languages)
+-- 3. bild: Build/compile the namespace
+-- 4. bild --test: Run tests
+runCoderVerify :: CoderConfig -> IO VerifyResult
+runCoderVerify cfg = do
+ -- First, run lint --fix to auto-apply formatting
+ _ <- runBashCapture ("lint --fix " <> coderNamespace cfg <> ".hs 2>&1")
+
+ -- Run typecheck.sh (handles Python mypy, etc.)
+ (typecheckCode, typecheckOut, typecheckErr) <-
+ runBashCapture ("typecheck.sh " <> coderNamespace cfg <> " 2>&1 || true")
+ -- Note: typecheck.sh may not exist for all namespaces, so we use || true
+ -- and only fail if it explicitly returns non-zero with actual errors
+ let typecheckFailed =
+ typecheckCode
+ /= Exit.ExitSuccess
+ && not (Text.null (Text.strip typecheckOut))
+ && not ("No such file" `Text.isInfixOf` typecheckErr)
+
+ if typecheckFailed
+ then pure <| VerifyTypecheckFailed (Text.take 1000 (typecheckOut <> typecheckErr))
+ else do
+ -- Build the namespace
+ (buildCode, buildOut, buildErr) <- runBashCapture ("bild " <> coderNamespace cfg <> " 2>&1")
+ case buildCode of
+ Exit.ExitSuccess -> do
+ -- Run tests if they exist
+ (testCode, testOut, testErr) <- runBashCapture ("bild --test " <> coderNamespace cfg <> " 2>&1")
+ case testCode of
+ Exit.ExitSuccess -> pure VerifySuccess
+ _ -> pure <| VerifyTestFailed (Text.take 1000 (testOut <> testErr))
+ _ -> pure <| VerifyBuildFailed (Text.take 1000 (buildOut <> buildErr))
+
+-- | Phase 4: Commit - git commit with message
+runCoderCommit :: CoderConfig -> Text -> IO (Either Text ())
+runCoderCommit cfg summary = do
+ let commitMsg = coderNamespace cfg <> ": " <> Text.take 72 summary
+
+ -- Stage all changes
+ (addCode, _, addErr) <- runBashCapture "git add -A"
+ case addCode of
+ Exit.ExitSuccess -> do
+ -- Commit
+ (commitCode, _, commitErr) <- runBashCapture ("git commit -m " <> quoteShell commitMsg)
+ case commitCode of
+ Exit.ExitSuccess -> pure (Right ())
+ _ -> pure <| Left ("git commit failed: " <> commitErr)
+ _ -> pure <| Left ("git add failed: " <> addErr)
+
+-- | Phase 5: Recovery - revert changes on failure
+runCoderRecovery :: IO ()
+runCoderRecovery = do
+ -- Revert uncommitted changes
+ _ <- runBashCapture "git checkout -- ."
+ _ <- runBashCapture "git clean -fd"
+ pure ()
+
+-- | Quote a string for shell
+quoteShell :: Text -> Text
+quoteShell t = "'" <> Text.replace "'" "'\\''" t <> "'"
+
+-- | Tools available to the Coder subagent
+coderTools :: [Engine.Tool]
+coderTools =
+ [ Tools.readFileTool,
+ Tools.writeFileTool,
+ Tools.editFileTool,
+ Tools.runBashTool,
+ Tools.searchCodebaseTool,
+ Tools.searchAndReadTool
+ ]
+
+-- | System prompt for the Coder subagent
+coderSystemPrompt :: CoderConfig -> Maybe Text -> Text
+coderSystemPrompt cfg maybeInitState =
+ Text.unlines
+ [ "You are a specialized Coder subagent.",
+ "",
+ "## Your Task",
+ coderTask cfg,
+ "",
+ "## Namespace",
+ "You are working in namespace: " <> coderNamespace cfg,
+ "Focus your changes on files in this namespace.",
+ "",
+ maybe "" (\ctx -> "## Context\n" <> ctx <> "\n") (coderContext cfg),
+ maybe "" (\st -> "## Current State\n" <> st <> "\n") maybeInitState,
+ "## Guidelines",
+ "1. Make incremental changes - edit one file at a time",
+ "2. After each edit, consider running `lint " <> coderNamespace cfg <> ".hs` to catch issues early",
+ "3. Use `bild " <> coderNamespace cfg <> "` to check your work compiles",
+ "4. If tests exist, run `bild --test " <> coderNamespace cfg <> "` to verify",
+ "5. Keep changes focused on the task - don't refactor unrelated code",
+ "6. If you find bugs unrelated to your task, note them but don't fix them",
+ "",
+ "## Completion",
+ "When you believe you're done:",
+ "1. Ensure all edits are complete",
+ "2. The harness will automatically run lint --fix, build, and tests",
+ "3. If verification fails, you'll be asked to fix the issues",
+ "4. Provide a brief summary of what you changed",
+ "",
+ "## Important",
+ "- Do NOT commit - the harness handles git operations",
+ "- Focus on making the code changes correctly"
+ ]
+
+-- | Format verify error for agent retry prompt
+formatVerifyError :: VerifyResult -> Text
+formatVerifyError VerifySuccess = ""
+formatVerifyError (VerifyLintFailed err) = "LINT ERRORS:\n" <> err
+formatVerifyError (VerifyTypecheckFailed err) = "TYPE ERRORS:\n" <> err
+formatVerifyError (VerifyBuildFailed err) = "BUILD ERRORS:\n" <> err
+formatVerifyError (VerifyTestFailed err) = "TEST FAILURES:\n" <> err
+
+-- | Main entry point: run the Coder subagent with all phases
+--
+-- This implements Anthropic's pattern of looping back to the agent on
+-- verification failure. The agent gets up to coderMaxVerifyRetries attempts
+-- to fix verification errors before we give up.
+--
+-- See: https://www.anthropic.com/engineering/effective-harnesses-for-long-running-agents
+--
+-- NOTE: We may want to change this behavior in the future. For some errors,
+-- immediate failure might be better than retry loops. But for now, we follow
+-- Anthropic's recommendation of letting the agent attempt to fix its mistakes.
+runCoderSubagent :: Text -> CoderConfig -> IO (Either Text Aeson.Value)
+runCoderSubagent openRouterKey cfg = do
+ startTime <- Clock.getCurrentTime
+
+ -- Phase 1: Init
+ initResult <- runCoderInit cfg
+ (ctx, initState) <- case initResult of
+ InitFailed err -> pure (Left err, Nothing)
+ InitBrokenState msg ctxVal -> pure (Right ctxVal, Just ("WARNING: Starting from broken state.\n" <> msg))
+ InitSuccess ctxVal -> pure (Right ctxVal, Nothing)
+
+ case ctx of
+ Left err -> pure <| Left ("Init failed: " <> err)
+ Right _context -> do
+ result <- runWorkVerifyLoop openRouterKey cfg initState 0 0 0.0
+ case result of
+ Left err -> do
+ runCoderRecovery
+ pure <| Left err
+ Right (finalMsg, totalTokens, totalCost, totalIters) -> do
+ -- Phase 4: Commit
+ let summary = Text.take 100 finalMsg
+ commitResult <- runCoderCommit cfg summary
+ case commitResult of
+ Left err -> do
+ runCoderRecovery
+ pure <| Left ("Commit failed: " <> err)
+ Right () -> do
+ endTime <- Clock.getCurrentTime
+ let duration = round (Clock.diffUTCTime endTime startTime)
+ pure
+ <| Right
+ <| Aeson.object
+ [ "status" .= ("success" :: Text),
+ "namespace" .= coderNamespace cfg,
+ "summary" .= finalMsg,
+ "tokens_used" .= totalTokens,
+ "cost_cents" .= totalCost,
+ "duration_seconds" .= (duration :: Int),
+ "iterations" .= totalIters
+ ]
+
+-- | Work-verify loop with retries on verification failure
+--
+-- Per Anthropic's guidance, we loop back to the agent when verification fails,
+-- giving it a chance to fix its mistakes. This is more effective than
+-- immediately failing, as the agent often can fix simple issues.
+runWorkVerifyLoop ::
+ Text ->
+ CoderConfig ->
+ Maybe Text ->
+ Int ->
+ Int ->
+ Double ->
+ IO (Either Text (Text, Int, Double, Int))
+runWorkVerifyLoop openRouterKey cfg initState verifyAttempt accTokens accCost = do
+ let isRetry = verifyAttempt > 0
+ let maxRetries = coderMaxVerifyRetries cfg
+
+ -- Build the prompt - include error context if this is a retry
+ let baseTask = coderTask cfg
+ let taskPrompt =
+ if isRetry
+ then baseTask <> "\n\n## VERIFICATION FAILED - PLEASE FIX\nThis is retry attempt " <> tshow verifyAttempt <> " of " <> tshow maxRetries <> "."
+ else baseTask
+
+ -- Phase 2: Work (run the agent)
+ let provider = Provider.defaultOpenRouter openRouterKey (coderModel cfg)
+ let systemPrompt = coderSystemPrompt cfg initState
+ let guardrails =
+ Engine.Guardrails
+ { Engine.guardrailMaxCostCents = coderMaxCost cfg - accCost,
+ Engine.guardrailMaxTokens = coderMaxTokens cfg - accTokens,
+ Engine.guardrailMaxDuplicateToolCalls = 20,
+ Engine.guardrailMaxTestFailures = 5,
+ Engine.guardrailMaxEditFailures = 10
+ }
+ let agentConfig =
+ Engine.AgentConfig
+ { Engine.agentModel = coderModel cfg,
+ Engine.agentTools = coderTools,
+ Engine.agentSystemPrompt = systemPrompt,
+ Engine.agentMaxIterations = coderMaxIterations cfg,
+ Engine.agentGuardrails = guardrails
+ }
+ let engineConfig = Engine.defaultEngineConfig
+
+ let timeoutMicros = coderTimeout cfg * 1000000
+ agentResult <-
+ race
+ (threadDelay timeoutMicros)
+ (Engine.runAgentWithProvider engineConfig provider agentConfig taskPrompt)
+
+ case agentResult of
+ Left () -> pure <| Left "Coder subagent timed out"
+ Right (Left err) -> pure <| Left ("Coder agent failed: " <> err)
+ Right (Right result) -> do
+ let newTokens = accTokens + Engine.resultTotalTokens result
+ let newCost = accCost + Engine.resultTotalCost result
+ let newIters = Engine.resultIterations result
+
+ -- Phase 3: Verify
+ verifyResult <- runCoderVerify cfg
+ case verifyResult of
+ VerifySuccess ->
+ pure <| Right (Engine.resultFinalMessage result, newTokens, newCost, newIters)
+ failure
+ | verifyAttempt >= maxRetries -> do
+ -- Max retries exceeded, give up
+ let errMsg = "Verification failed after " <> tshow (verifyAttempt + 1) <> " attempts: " <> formatVerifyError failure
+ pure <| Left errMsg
+ | otherwise -> do
+ -- Retry: loop back to agent with error context
+ -- NOTE: This follows Anthropic's pattern. We may want to change
+ -- this for certain error types in the future.
+ let errorContext = formatVerifyError failure
+ let retryState = case initState of
+ Just st -> Just (st <> "\n\n" <> errorContext)
+ Nothing -> Just errorContext
+ runWorkVerifyLoop openRouterKey cfg retryState (verifyAttempt + 1) newTokens newCost
diff --git a/Omni/Agent/Telegram.hs b/Omni/Agent/Telegram.hs
index 23a760a..9e6eca0 100644
--- a/Omni/Agent/Telegram.hs
+++ b/Omni/Agent/Telegram.hs
@@ -240,6 +240,8 @@ getRawUpdates cfg offset = do
<> show (Types.tgPollingTimeout cfg)
<> "&offset="
<> show offset
+ <> "&allowed_updates="
+ <> "%5B%22message%22%2C%22callback_query%22%5D"
result <-
try <| do
req0 <- HTTP.parseRequest url
@@ -539,8 +541,13 @@ runTelegramBot tgConfig provider = do
offset <- readTVarIO offsetVar
rawUpdates <- getRawUpdates tgConfig offset
forM_ rawUpdates <| \rawUpdate -> do
+ let hasCallbackField = case rawUpdate of
+ Aeson.Object obj -> isJust (KeyMap.lookup "callback_query" obj)
+ _ -> False
+ when hasCallbackField <| putText <| "Raw callback update received: " <> Text.take 300 (tshow rawUpdate)
case Types.parseCallbackQuery rawUpdate of
Just cq -> do
+ putText <| "Parsed callback query: " <> Types.cqData cq
let updateId = getUpdateId rawUpdate
forM_ updateId <| \uid -> atomically (writeTVar offsetVar (uid + 1))
handleCallbackQuery tgConfig cq
@@ -555,7 +562,12 @@ runTelegramBot tgConfig provider = do
IncomingQueue.enqueueIncoming incomingQueues IncomingQueue.defaultBatchWindowSeconds msg
Nothing -> do
let updateId = getUpdateId rawUpdate
- putText <| "Unparsed update: " <> Text.take 200 (tshow rawUpdate)
+ hasCallback = case rawUpdate of
+ Aeson.Object obj -> isJust (KeyMap.lookup "callback_query" obj)
+ _ -> False
+ if hasCallback
+ then putText <| "Failed to parse callback_query: " <> Text.take 500 (tshow rawUpdate)
+ else putText <| "Unparsed update: " <> Text.take 200 (tshow rawUpdate)
forM_ updateId <| \uid -> atomically (writeTVar offsetVar (uid + 1))
when (null rawUpdates) <| threadDelay 1000000
@@ -588,30 +600,42 @@ handleCallbackQuery tgConfig cq = do
userId = Types.cqFromId cq
userName = Types.cqFromFirstName cq
putText <| "Callback query from " <> userName <> ": " <> callbackData
+ result <- try @SomeException <| handleCallbackQueryInner tgConfig cq chatId userId callbackData
+ case result of
+ Left err -> putText <| "Callback handler error: " <> tshow err
+ Right () -> pure ()
+
+handleCallbackQueryInner :: Types.TelegramConfig -> Types.CallbackQuery -> Int -> Int -> Text -> IO ()
+handleCallbackQueryInner tgConfig cq chatId userId callbackData = do
if not (Types.isUserAllowed tgConfig userId)
then do
answerCallbackQuery tgConfig (Types.cqId cq) (Just "Not authorized")
putText <| "Unauthorized callback from user " <> tshow userId
else case Text.splitOn ":" callbackData of
["subagent_approve", pendingId] -> do
+ putText <| "Approving subagent spawn: " <> pendingId
answerCallbackQuery tgConfig (Types.cqId cq) (Just "Spawning subagent...")
let keys =
Subagent.SubagentApiKeys
{ Subagent.subagentOpenRouterKey = Types.tgOpenRouterApiKey tgConfig,
Subagent.subagentKagiKey = Types.tgKagiApiKey tgConfig
}
- result <- Subagent.approveAndSpawnSubagent keys pendingId
- case result of
+ spawnResult <- Subagent.approveAndSpawnSubagent keys pendingId
+ putText <| "Spawn result: " <> tshow spawnResult
+ case spawnResult of
Left err -> do
- sendMessage tgConfig chatId ("❌ Failed to spawn subagent: " <> err)
+ putText <| "Spawn failed: " <> err
+ sendMessage tgConfig chatId ("Failed to spawn subagent: " <> err)
Right subagentId -> do
- sendMessage tgConfig chatId ("🚀 Subagent **" <> subagentId <> "** spawned! Use `check_subagent` to monitor progress.")
+ putText <| "Spawn succeeded, subagent ID: " <> subagentId
+ sendMessage tgConfig chatId ("Subagent " <> subagentId <> " spawned! Use check_subagent to monitor progress.")
["subagent_reject", pendingId] -> do
+ putText <| "Rejecting subagent spawn: " <> pendingId
rejected <- Subagent.rejectPendingSpawn pendingId
if rejected
then do
answerCallbackQuery tgConfig (Types.cqId cq) (Just "Spawn cancelled")
- sendMessage tgConfig chatId "❌ Subagent spawn cancelled."
+ sendMessage tgConfig chatId "Subagent spawn cancelled."
else answerCallbackQuery tgConfig (Types.cqId cq) (Just "Already expired")
_ -> do
answerCallbackQuery tgConfig (Types.cqId cq) (Just "Unknown action")