From e2ea8308d74582d5651ed933dea9428ce8982d25 Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Wed, 17 Dec 2025 22:05:40 -0500 Subject: feat(ava): subagent hardening with audit logging Based on Anthropic's effective harnesses research. New modules: - Omni/Agent/AuditLog.hs: JSONL audit logging with SubagentId linking - Omni/Agent/Tools/AvaLogs.hs: Tool for Ava to query her own logs - Omni/Agent/Subagent/HARDENING.md: Design documentation Key features: - SubagentHandle with TVar status for async execution and polling - spawnSubagentAsync, querySubagentStatus, waitSubagent, cancelSubagent - User confirmation: spawn_subagent requires confirmed=true after approval - Audit logs stored in $AVA_DATA_ROOT/logs/{ava,subagents}/ - CLI: ava logs [--last=N] [] - read_ava_logs tool for Ava self-diagnosis Tasks: t-267, t-268, t-269, t-270, t-271 --- Omni/Agent/Subagent.hs | 215 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 207 insertions(+), 8 deletions(-) (limited to 'Omni/Agent/Subagent.hs') diff --git a/Omni/Agent/Subagent.hs b/Omni/Agent/Subagent.hs index c8e56d5..39288db 100644 --- a/Omni/Agent/Subagent.hs +++ b/Omni/Agent/Subagent.hs @@ -12,10 +12,14 @@ -- - Per-subagent resource limits (timeout, cost, tokens) -- - Structured result format with confidence scores -- - No sub-subagent spawning (hierarchical control) +-- - Async execution with status polling +-- - Audit logging for all events -- -- : out omni-agent-subagent -- : dep aeson -- : dep async +-- : dep stm +-- : dep uuid module Omni.Agent.Subagent ( -- * Types SubagentRole (..), @@ -23,8 +27,17 @@ module Omni.Agent.Subagent SubagentResult (..), SubagentStatus (..), SubagentCallbacks (..), + SubagentHandle (..), + SubagentRunStatus (..), - -- * Execution + -- * Async Execution + spawnSubagentAsync, + querySubagentStatus, + isSubagentDone, + waitSubagent, + cancelSubagent, + + -- * Sync Execution (legacy) runSubagent, runSubagentWithCallbacks, @@ -48,16 +61,20 @@ module Omni.Agent.Subagent where import Alpha +import Control.Concurrent.STM (TVar, newTVarIO, readTVar, readTVarIO, writeTVar) import Data.Aeson ((.!=), (.:), (.:?), (.=)) import qualified Data.Aeson as Aeson +import qualified Data.Aeson.KeyMap as KeyMap import qualified Data.Text as Text import qualified Data.Time.Clock as Clock +import qualified Omni.Agent.AuditLog as AuditLog import qualified Omni.Agent.Engine as Engine import qualified Omni.Agent.Provider as Provider import qualified Omni.Agent.Tools as Tools import qualified Omni.Agent.Tools.WebReader as WebReader import qualified Omni.Agent.Tools.WebSearch as WebSearch import qualified Omni.Test as Test +import Text.Printf (printf) main :: IO () main = Test.run test @@ -128,7 +145,17 @@ test = Test.unit "spawnSubagentTool has correct name" <| do let keys = SubagentApiKeys "test-openrouter-key" (Just "test-kagi-key") let tool = spawnSubagentTool keys - Engine.toolName tool Test.@=? "spawn_subagent" + Engine.toolName tool Test.@=? "spawn_subagent", + Test.unit "spawn_subagent returns approval request when not confirmed" <| do + let keys = SubagentApiKeys "test-openrouter-key" (Just "test-kagi-key") + let tool = spawnSubagentTool keys + let args = Aeson.object ["role" .= ("web_crawler" :: Text), "task" .= ("test task" :: Text)] + result <- Engine.toolExecute tool args + case result of + Aeson.Object obj -> do + let status = KeyMap.lookup "status" obj + status Test.@=? Just (Aeson.String "awaiting_approval") + _ -> Test.assertFailure "Expected object response" ] data SubagentRole @@ -248,6 +275,125 @@ defaultCallbacks = onSubagentComplete = \_ -> pure () } +data SubagentHandle = SubagentHandle + { handleId :: AuditLog.SubagentId, + handleAsync :: Async SubagentResult, + handleStartTime :: Clock.UTCTime, + handleConfig :: SubagentConfig, + handleStatus :: TVar SubagentRunStatus + } + +data SubagentRunStatus = SubagentRunStatus + { runIteration :: Int, + runTokensUsed :: Int, + runCostCents :: Double, + runElapsedSeconds :: Int, + runCurrentActivity :: Text, + runLastToolCall :: Maybe (Text, Clock.UTCTime) + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON SubagentRunStatus + +initialRunStatus :: SubagentRunStatus +initialRunStatus = + SubagentRunStatus + { runIteration = 0, + runTokensUsed = 0, + runCostCents = 0.0, + runElapsedSeconds = 0, + runCurrentActivity = "Starting...", + runLastToolCall = Nothing + } + +spawnSubagentAsync :: AuditLog.SessionId -> Maybe Text -> SubagentApiKeys -> SubagentConfig -> IO SubagentHandle +spawnSubagentAsync sessionId userId keys config = do + sid <- AuditLog.newSubagentId + startTime <- Clock.getCurrentTime + statusVar <- newTVarIO initialRunStatus + + let logEntry evType content = do + entry <- + AuditLog.mkLogEntry + sessionId + (AuditLog.AgentId ("subagent-" <> AuditLog.unSubagentId sid)) + userId + evType + content + AuditLog.emptyMetadata + AuditLog.writeSubagentLog sid entry + + logEntry AuditLog.SubagentSpawn + <| Aeson.object + [ "role" .= subagentRole config, + "task" .= subagentTask config, + "subagent_id" .= sid + ] + + let callbacks = + SubagentCallbacks + { onSubagentStart = \msg -> do + logEntry AuditLog.AssistantMessage (Aeson.String msg) + atomically <| writeTVar statusVar <| initialRunStatus {runCurrentActivity = msg}, + onSubagentActivity = \msg -> do + now <- Clock.getCurrentTime + let elapsed = round (Clock.diffUTCTime now startTime) + logEntry AuditLog.AssistantMessage (Aeson.String msg) + atomically <| do + status <- readTVar statusVar + writeTVar statusVar <| status {runCurrentActivity = msg, runElapsedSeconds = elapsed}, + onSubagentToolCall = \tool args -> do + now <- Clock.getCurrentTime + let elapsed = round (Clock.diffUTCTime now startTime) + logEntry AuditLog.ToolCall (Aeson.object ["tool" .= tool, "args" .= args]) + atomically <| do + status <- readTVar statusVar + writeTVar statusVar + <| status + { runCurrentActivity = "Calling " <> tool, + runLastToolCall = Just (tool, now), + runElapsedSeconds = elapsed + }, + onSubagentComplete = \result -> do + logEntry AuditLog.SubagentComplete + <| Aeson.object + [ "status" .= subagentStatus result, + "summary" .= subagentSummary result, + "tokens" .= subagentTokensUsed result, + "cost_cents" .= subagentCostCents result, + "duration" .= subagentDuration result + ] + } + + asyncHandle <- async (runSubagentWithCallbacks keys config callbacks) + + pure + SubagentHandle + { handleId = sid, + handleAsync = asyncHandle, + handleStartTime = startTime, + handleConfig = config, + handleStatus = statusVar + } + +querySubagentStatus :: SubagentHandle -> IO SubagentRunStatus +querySubagentStatus h = do + now <- Clock.getCurrentTime + let elapsed = round (Clock.diffUTCTime now (handleStartTime h)) + status <- readTVarIO (handleStatus h) + pure <| status {runElapsedSeconds = elapsed} + +isSubagentDone :: SubagentHandle -> IO Bool +isSubagentDone h = do + result <- poll (handleAsync h) + pure <| isJust result + +waitSubagent :: SubagentHandle -> IO SubagentResult +waitSubagent h = wait (handleAsync h) + +cancelSubagent :: SubagentHandle -> IO () +cancelSubagent h = cancel (handleAsync h) + defaultSubagentConfig :: SubagentRole -> Text -> SubagentConfig defaultSubagentConfig role task = SubagentConfig @@ -460,9 +606,9 @@ spawnSubagentTool keys = { Engine.toolName = "spawn_subagent", Engine.toolDescription = "Spawn a specialized subagent for a focused task. " - <> "Use for tasks that benefit from deep exploration, parallel execution, " - <> "or specialized tools. The subagent will iterate until task completion " - <> "or resource limits are reached. " + <> "IMPORTANT: First call with confirmed=false to get approval request, " + <> "then present the approval to the user. Only call with confirmed=true " + <> "after the user explicitly approves. " <> "Available roles: web_crawler (fast web research), code_reviewer (thorough code analysis), " <> "data_extractor (structured data extraction), researcher (general research).", Engine.toolJsonSchema = @@ -500,6 +646,11 @@ spawnSubagentTool keys = .= Aeson.object [ "type" .= ("number" :: Text), "description" .= ("Maximum cost in cents (default: 50)" :: Text) + ], + "confirmed" + .= Aeson.object + [ "type" .= ("boolean" :: Text), + "description" .= ("Set to true only after user approval. First call should use false." :: Text) ] ], "required" .= (["role", "task"] :: [Text]) @@ -507,10 +658,58 @@ spawnSubagentTool keys = Engine.toolExecute = executeSpawnSubagent keys } +data SpawnRequest = SpawnRequest + { spawnConfig :: SubagentConfig, + spawnConfirmed :: Bool + } + deriving (Show, Eq) + +instance Aeson.FromJSON SpawnRequest where + parseJSON = + Aeson.withObject "SpawnRequest" <| \v -> do + config <- Aeson.parseJSON (Aeson.Object v) + confirmed <- v .:? "confirmed" .!= False + pure SpawnRequest {spawnConfig = config, spawnConfirmed = confirmed} + +formatApprovalRequest :: SubagentConfig -> Aeson.Value +formatApprovalRequest config = + Aeson.object + [ "status" .= ("awaiting_approval" :: Text), + "message" .= approvalMessage, + "estimated_time_minutes" .= estimatedTime, + "max_cost_cents" .= subagentMaxCost config, + "role" .= subagentRole config, + "task" .= subagentTask config + ] + where + approvalMessage :: Text + approvalMessage = + "I'd like to spawn a " + <> roleText + <> " subagent to: " + <> subagentTask config + <> "\n\nEstimated: " + <> tshow estimatedTime + <> " minutes, up to $" + <> costStr + <> "\n\nProceed? (yes/no)" + roleText = case subagentRole config of + WebCrawler -> "WebCrawler" + CodeReviewer -> "CodeReviewer" + DataExtractor -> "DataExtractor" + Researcher -> "Researcher" + CustomRole name -> name + estimatedTime :: Int + estimatedTime = subagentTimeout config `div` 60 + costStr = Text.pack (printf "%.2f" (subagentMaxCost config / 100)) + executeSpawnSubagent :: SubagentApiKeys -> Aeson.Value -> IO Aeson.Value executeSpawnSubagent keys v = case Aeson.fromJSON v of Aeson.Error e -> pure <| Aeson.object ["error" .= ("Invalid arguments: " <> Text.pack e)] - Aeson.Success config -> do - result <- runSubagent keys config - pure (Aeson.toJSON result) + Aeson.Success req -> + if spawnConfirmed req + then do + result <- runSubagent keys (spawnConfig req) + pure (Aeson.toJSON result) + else pure (formatApprovalRequest (spawnConfig req)) -- cgit v1.2.3