diff options
| author | Ben Sima <ben@bensima.com> | 2025-12-18 00:09:05 -0500 |
|---|---|---|
| committer | Ben Sima <ben@bensima.com> | 2025-12-18 00:09:05 -0500 |
| commit | 133df9a099785b5eabb5ad19bcd7daa33eff9afe (patch) | |
| tree | 600ea108ceca9a3aad2579a4b0227c77a68dc632 /Omni/Agent | |
| parent | b5337a6c08b500cd3e603a48f8dfdb4772420929 (diff) | |
Add Telegram button confirmation for subagent spawning
Security improvement: subagents now require explicit user confirmation via
Telegram inline buttons, preventing the agent from bypassing approval.
Changes:
- Add InlineKeyboardMarkup, InlineKeyboardButton, CallbackQuery types
- Add parseCallbackQuery for handling button presses
- Add sendMessageWithKeyboard and answerCallbackQuery API functions
- Add PendingSpawn registry for tracking unconfirmed spawn requests
- Add spawnSubagentToolWithApproval that sends approval buttons
- Add handleCallbackQuery to process approve/reject button clicks
- Add approveAndSpawnSubagent and rejectPendingSpawn functions
Flow:
1. Agent calls spawn_subagent → creates pending request
2. User receives message with ✅ Approve / ❌ Reject buttons
3. Button click (outside agent loop) spawns or cancels
4. Pending requests expire after 10 minutes
Diffstat (limited to 'Omni/Agent')
| -rw-r--r-- | Omni/Agent/Subagent.hs | 169 | ||||
| -rw-r--r-- | Omni/Agent/Telegram.hs | 151 | ||||
| -rw-r--r-- | Omni/Agent/Telegram/Types.hs | 112 |
3 files changed, 417 insertions, 15 deletions
diff --git a/Omni/Agent/Subagent.hs b/Omni/Agent/Subagent.hs index c251e9d..3278e4c 100644 --- a/Omni/Agent/Subagent.hs +++ b/Omni/Agent/Subagent.hs @@ -43,14 +43,26 @@ module Omni.Agent.Subagent -- * Tool spawnSubagentTool, + spawnSubagentToolWithApproval, checkSubagentTool, subagentTools, + subagentToolsWithApproval, + ApprovalCallback, -- * Registry getSubagentHandle, listRunningSubagents, cleanupRegistry, + -- * Pending Spawns (for Telegram button confirmation) + PendingSpawn (..), + createPendingSpawn, + getPendingSpawn, + removePendingSpawn, + approveAndSpawnSubagent, + rejectPendingSpawn, + cleanupExpiredPending, + -- * Role-specific tools SubagentApiKeys (..), toolsForRole, @@ -124,6 +136,53 @@ cleanupRegistry = do stillRunning <- filterM (\(_, h) -> fmap not (isSubagentDone h)) (Map.toList registry) modifyIORef' subagentRegistry (const (Map.fromList stillRunning)) +-- | A pending spawn request awaiting user confirmation +data PendingSpawn = PendingSpawn + { pendingId :: Text, + pendingConfig :: SubagentConfig, + pendingChatId :: Int, + pendingCreatedAt :: Clock.UTCTime + } + deriving (Show, Eq) + +-- | Global registry of pending spawn requests +pendingSpawnRegistry :: IORef (Map.Map Text PendingSpawn) +pendingSpawnRegistry = unsafePerformIO (newIORef Map.empty) +{-# NOINLINE pendingSpawnRegistry #-} + +-- | Create a new pending spawn request +createPendingSpawn :: SubagentConfig -> Int -> IO Text +createPendingSpawn config chatId = do + uuid <- Data.UUID.V4.nextRandom + let pendingId = Text.take 8 (Data.UUID.toText uuid) + now <- Clock.getCurrentTime + let pending = + PendingSpawn + { pendingId = pendingId, + pendingConfig = config, + pendingChatId = chatId, + pendingCreatedAt = now + } + modifyIORef' pendingSpawnRegistry (Map.insert pendingId pending) + pure pendingId + +-- | Get a pending spawn by ID +getPendingSpawn :: Text -> IO (Maybe PendingSpawn) +getPendingSpawn pid = do + registry <- readIORef pendingSpawnRegistry + pure (Map.lookup pid registry) + +-- | Remove a pending spawn (after approval/rejection) +removePendingSpawn :: Text -> IO () +removePendingSpawn pid = modifyIORef' pendingSpawnRegistry (Map.delete pid) + +-- | Clean up expired pending spawns (older than 10 minutes) +cleanupExpiredPending :: IO () +cleanupExpiredPending = do + now <- Clock.getCurrentTime + let isExpired p = Clock.diffUTCTime now (pendingCreatedAt p) > 600 + modifyIORef' pendingSpawnRegistry (Map.filter (not <. isExpired)) + main :: IO () main = Test.run test @@ -847,6 +906,114 @@ executeCheckSubagent v = do "cost_cents" .= runCostCents status ] --- | All subagent-related tools +-- | All subagent-related tools (legacy - agent can bypass approval) subagentTools :: SubagentApiKeys -> [Engine.Tool] subagentTools keys = [spawnSubagentTool keys, checkSubagentTool] + +-- | Callback for sending approval buttons +-- Args: chatId, pendingId, role, task, estimatedMinutes, maxCostCents +type ApprovalCallback = Int -> Text -> Text -> Text -> Int -> Double -> IO () + +-- | Spawn subagent tool that requires external approval via callback +spawnSubagentToolWithApproval :: SubagentApiKeys -> Int -> ApprovalCallback -> Engine.Tool +spawnSubagentToolWithApproval keys chatId onApprovalNeeded = + Engine.Tool + { Engine.toolName = "spawn_subagent", + Engine.toolDescription = + "Spawn a specialized subagent for a focused task. " + <> "The user will receive a confirmation button to approve the spawn. " + <> "Available roles: web_crawler (fast web research), code_reviewer (thorough code analysis), " + <> "data_extractor (structured data extraction), researcher (general research).", + Engine.toolJsonSchema = + Aeson.object + [ "type" .= ("object" :: Text), + "properties" + .= Aeson.object + [ "role" + .= Aeson.object + [ "type" .= ("string" :: Text), + "enum" .= (["web_crawler", "code_reviewer", "data_extractor", "researcher"] :: [Text]), + "description" .= ("Subagent role determining tools and model" :: Text) + ], + "task" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("The specific task for the subagent to accomplish" :: Text) + ], + "context" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("Additional context to help the subagent understand the goal" :: Text) + ], + "model" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("Override the default model for this role" :: Text) + ], + "timeout" + .= Aeson.object + [ "type" .= ("integer" :: Text), + "description" .= ("Timeout in seconds (default: 600)" :: Text) + ], + "max_cost_cents" + .= Aeson.object + [ "type" .= ("number" :: Text), + "description" .= ("Maximum cost in cents (default: 50)" :: Text) + ] + ], + "required" .= (["role", "task"] :: [Text]) + ], + Engine.toolExecute = executeSpawnWithApproval keys chatId onApprovalNeeded + } + +executeSpawnWithApproval :: SubagentApiKeys -> Int -> ApprovalCallback -> Aeson.Value -> IO Aeson.Value +executeSpawnWithApproval _keys chatId onApprovalNeeded v = + case Aeson.fromJSON v of + Aeson.Error e -> pure <| Aeson.object ["error" .= ("Invalid arguments: " <> Text.pack e)] + Aeson.Success config -> do + pendingId <- createPendingSpawn config chatId + let roleText = case subagentRole config of + WebCrawler -> "web_crawler" + CodeReviewer -> "code_reviewer" + DataExtractor -> "data_extractor" + Researcher -> "researcher" + CustomRole name -> name + estimatedMins = subagentTimeout config `div` 60 + maxCost = subagentMaxCost config + onApprovalNeeded chatId pendingId roleText (subagentTask config) estimatedMins maxCost + pure + <| Aeson.object + [ "status" .= ("pending_approval" :: Text), + "pending_id" .= pendingId, + "message" .= ("Approval button sent to user. Waiting for confirmation." :: Text) + ] + +-- | Approve a pending spawn and start the subagent +approveAndSpawnSubagent :: SubagentApiKeys -> Text -> IO (Either Text Text) +approveAndSpawnSubagent keys pendingId = do + maybePending <- getPendingSpawn pendingId + case maybePending of + Nothing -> pure (Left "Pending spawn not found or expired") + Just pending -> do + removePendingSpawn pendingId + uuid <- Data.UUID.V4.nextRandom + let sessionId = AuditLog.SessionId ("subagent-" <> Text.take 8 (Data.UUID.toText uuid)) + subHandle <- spawnSubagentAsync sessionId Nothing keys (pendingConfig pending) + registerSubagent subHandle + let sid = AuditLog.unSubagentId (handleId subHandle) + pure (Right sid) + +-- | Reject a pending spawn +rejectPendingSpawn :: Text -> IO Bool +rejectPendingSpawn pendingId = do + maybePending <- getPendingSpawn pendingId + case maybePending of + Nothing -> pure False + Just _ -> do + removePendingSpawn pendingId + pure True + +-- | All subagent-related tools with approval callback +subagentToolsWithApproval :: SubagentApiKeys -> Int -> ApprovalCallback -> [Engine.Tool] +subagentToolsWithApproval keys chatId onApproval = + [spawnSubagentToolWithApproval keys chatId onApproval, checkSubagentTool] diff --git a/Omni/Agent/Telegram.hs b/Omni/Agent/Telegram.hs index 59361ac..23a760a 100644 --- a/Omni/Agent/Telegram.hs +++ b/Omni/Agent/Telegram.hs @@ -33,7 +33,9 @@ module Omni.Agent.Telegram getUpdates, sendMessage, sendMessageReturningId, + sendMessageWithKeyboard, editMessage, + answerCallbackQuery, sendTypingAction, leaveChat, @@ -383,6 +385,63 @@ editMessage cfg chatId messageId text = do let respBody = HTTP.getResponseBody response putText <| "Edit message HTTP " <> tshow status <> ": " <> TE.decodeUtf8 (BL.toStrict respBody) +-- | Send a message with inline keyboard buttons +sendMessageWithKeyboard :: Types.TelegramConfig -> Int -> Text -> Types.InlineKeyboardMarkup -> IO (Maybe Int) +sendMessageWithKeyboard cfg chatId text keyboard = do + let url = + Text.unpack (Types.tgApiBaseUrl cfg) + <> "/bot" + <> Text.unpack (Types.tgBotToken cfg) + <> "/sendMessage" + body = + Aeson.object + [ "chat_id" .= chatId, + "text" .= text, + "reply_markup" .= keyboard + ] + req0 <- HTTP.parseRequest url + let req = + HTTP.setRequestMethod "POST" + <| HTTP.setRequestHeader "Content-Type" ["application/json"] + <| HTTP.setRequestBodyLBS (Aeson.encode body) + <| req0 + result <- try @SomeException (HTTP.httpLBS req) + case result of + Left e -> do + putText <| "sendMessageWithKeyboard failed: " <> tshow e + pure Nothing + Right response -> do + let respBody = HTTP.getResponseBody response + case Aeson.decode respBody of + Just (Aeson.Object obj) -> case KeyMap.lookup "result" obj of + Just (Aeson.Object msgObj) -> case KeyMap.lookup "message_id" msgObj of + Just (Aeson.Number n) -> pure (Just (round n)) + _ -> pure Nothing + _ -> pure Nothing + _ -> pure Nothing + +-- | Answer a callback query (acknowledges button press) +answerCallbackQuery :: Types.TelegramConfig -> Text -> Maybe Text -> IO () +answerCallbackQuery cfg callbackId maybeText = do + let url = + Text.unpack (Types.tgApiBaseUrl cfg) + <> "/bot" + <> Text.unpack (Types.tgBotToken cfg) + <> "/answerCallbackQuery" + baseFields = ["callback_query_id" .= callbackId] + textField = case maybeText of + Just txt -> ["text" .= txt] + Nothing -> [] + body = Aeson.object (baseFields <> textField) + req0 <- HTTP.parseRequest url + let req = + HTTP.setRequestMethod "POST" + <| HTTP.setRequestHeader "Content-Type" ["application/json"] + <| HTTP.setRequestBodyLBS (Aeson.encode body) + <| req0 + _ <- try @SomeException (HTTP.httpLBS req) + pure () + sendTypingAction :: Types.TelegramConfig -> Int -> IO () sendTypingAction cfg chatId = do let url = @@ -480,19 +539,24 @@ runTelegramBot tgConfig provider = do offset <- readTVarIO offsetVar rawUpdates <- getRawUpdates tgConfig offset forM_ rawUpdates <| \rawUpdate -> do - case Types.parseBotAddedToGroup botName rawUpdate of - Just addedEvent -> do - atomically (writeTVar offsetVar (Types.bagUpdateId addedEvent + 1)) - handleBotAddedToGroup tgConfig addedEvent - Nothing -> case Types.parseUpdate rawUpdate of - Just msg -> do - putText <| "Received message from " <> Types.tmUserFirstName msg <> " in chat " <> tshow (Types.tmChatId msg) <> " (type: " <> tshow (Types.tmChatType msg) <> "): " <> Text.take 50 (Types.tmText msg) - atomically (writeTVar offsetVar (Types.tmUpdateId msg + 1)) - IncomingQueue.enqueueIncoming incomingQueues IncomingQueue.defaultBatchWindowSeconds msg - Nothing -> do - let updateId = getUpdateId rawUpdate - putText <| "Unparsed update: " <> Text.take 200 (tshow rawUpdate) - forM_ updateId <| \uid -> atomically (writeTVar offsetVar (uid + 1)) + case Types.parseCallbackQuery rawUpdate of + Just cq -> do + let updateId = getUpdateId rawUpdate + forM_ updateId <| \uid -> atomically (writeTVar offsetVar (uid + 1)) + handleCallbackQuery tgConfig cq + Nothing -> case Types.parseBotAddedToGroup botName rawUpdate of + Just addedEvent -> do + atomically (writeTVar offsetVar (Types.bagUpdateId addedEvent + 1)) + handleBotAddedToGroup tgConfig addedEvent + Nothing -> case Types.parseUpdate rawUpdate of + Just msg -> do + putText <| "Received message from " <> Types.tmUserFirstName msg <> " in chat " <> tshow (Types.tmChatId msg) <> " (type: " <> tshow (Types.tmChatType msg) <> "): " <> Text.take 50 (Types.tmText msg) + atomically (writeTVar offsetVar (Types.tmUpdateId msg + 1)) + IncomingQueue.enqueueIncoming incomingQueues IncomingQueue.defaultBatchWindowSeconds msg + Nothing -> do + let updateId = getUpdateId rawUpdate + putText <| "Unparsed update: " <> Text.take 200 (tshow rawUpdate) + forM_ updateId <| \uid -> atomically (writeTVar offsetVar (uid + 1)) when (null rawUpdates) <| threadDelay 1000000 getUpdateId :: Aeson.Value -> Maybe Int @@ -516,6 +580,43 @@ handleBotAddedToGroup tgConfig addedEvent = do _ <- Messages.enqueueImmediate Nothing chatId Nothing "sorry, you're not authorized to add me to groups." (Just "system") Nothing leaveChat tgConfig chatId +-- | Handle callback query from inline keyboard button press +handleCallbackQuery :: Types.TelegramConfig -> Types.CallbackQuery -> IO () +handleCallbackQuery tgConfig cq = do + let callbackData = Types.cqData cq + chatId = Types.cqChatId cq + userId = Types.cqFromId cq + userName = Types.cqFromFirstName cq + putText <| "Callback query from " <> userName <> ": " <> callbackData + if not (Types.isUserAllowed tgConfig userId) + then do + answerCallbackQuery tgConfig (Types.cqId cq) (Just "Not authorized") + putText <| "Unauthorized callback from user " <> tshow userId + else case Text.splitOn ":" callbackData of + ["subagent_approve", pendingId] -> do + answerCallbackQuery tgConfig (Types.cqId cq) (Just "Spawning subagent...") + let keys = + Subagent.SubagentApiKeys + { Subagent.subagentOpenRouterKey = Types.tgOpenRouterApiKey tgConfig, + Subagent.subagentKagiKey = Types.tgKagiApiKey tgConfig + } + result <- Subagent.approveAndSpawnSubagent keys pendingId + case result of + Left err -> do + sendMessage tgConfig chatId ("❌ Failed to spawn subagent: " <> err) + Right subagentId -> do + sendMessage tgConfig chatId ("🚀 Subagent **" <> subagentId <> "** spawned! Use `check_subagent` to monitor progress.") + ["subagent_reject", pendingId] -> do + rejected <- Subagent.rejectPendingSpawn pendingId + if rejected + then do + answerCallbackQuery tgConfig (Types.cqId cq) (Just "Spawn cancelled") + sendMessage tgConfig chatId "❌ Subagent spawn cancelled." + else answerCallbackQuery tgConfig (Types.cqId cq) (Just "Already expired") + _ -> do + answerCallbackQuery tgConfig (Types.cqId cq) (Just "Unknown action") + putText <| "Unknown callback data: " <> callbackData + handleMessageBatch :: Types.TelegramConfig -> Provider.Provider -> @@ -1023,7 +1124,29 @@ processEngagedMessage tgConfig provider engineCfg msg uid userName chatId userMe { Subagent.subagentOpenRouterKey = Types.tgOpenRouterApiKey tgConfig, Subagent.subagentKagiKey = Types.tgKagiApiKey tgConfig } - in Subagent.subagentTools keys + approvalCallback cid pid role task estMins maxCost = do + let approvalMsg = + "🤖 **Spawn Subagent?**\n\n" + <> "**Role:** " + <> role + <> "\n" + <> "**Task:** " + <> task + <> "\n" + <> "**Est. time:** ~" + <> tshow estMins + <> " min\n" + <> "**Max cost:** $" + <> Text.pack (printf "%.2f" (maxCost / 100)) + keyboard = + Types.InlineKeyboardMarkup + [ [ Types.InlineKeyboardButton "✅ Approve" (Just ("subagent_approve:" <> pid)) Nothing, + Types.InlineKeyboardButton "❌ Reject" (Just ("subagent_reject:" <> pid)) Nothing + ] + ] + _ <- sendMessageWithKeyboard tgConfig cid approvalMsg keyboard + pure () + in Subagent.subagentToolsWithApproval keys chatId approvalCallback else [] auditLogTools = [AvaLogs.readAvaLogsTool | isBenAuthorized userName] diff --git a/Omni/Agent/Telegram/Types.hs b/Omni/Agent/Telegram/Types.hs index 7a91df3..e6d8957 100644 --- a/Omni/Agent/Telegram/Types.hs +++ b/Omni/Agent/Telegram/Types.hs @@ -22,9 +22,15 @@ module Omni.Agent.Telegram.Types BotAddedToGroup (..), ChatType (..), + -- * Inline Keyboard + InlineKeyboardMarkup (..), + InlineKeyboardButton (..), + CallbackQuery (..), + -- * Parsing parseUpdate, parseBotAddedToGroup, + parseCallbackQuery, parseDocument, parseLargestPhoto, parsePhotoSize, @@ -652,3 +658,109 @@ shouldRespondInGroup botUsername msg mention = "@" <> Text.toLower botUsername isMentioned = mention `Text.isInfixOf` msgText isReplyToBot = isJust (tmReplyTo msg) + +-- | Inline keyboard button +data InlineKeyboardButton = InlineKeyboardButton + { ikbText :: Text, + ikbCallbackData :: Maybe Text, + ikbUrl :: Maybe Text + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON InlineKeyboardButton where + toJSON b = + Aeson.object + <| catMaybes + [ Just ("text" .= ikbText b), + ("callback_data" .=) </ ikbCallbackData b, + ("url" .=) </ ikbUrl b + ] + +instance Aeson.FromJSON InlineKeyboardButton where + parseJSON = + Aeson.withObject "InlineKeyboardButton" <| \v -> + (InlineKeyboardButton </ (v .: "text")) + <*> (v .:? "callback_data") + <*> (v .:? "url") + +-- | Inline keyboard markup (grid of buttons) +newtype InlineKeyboardMarkup = InlineKeyboardMarkup + { ikmInlineKeyboard :: [[InlineKeyboardButton]] + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON InlineKeyboardMarkup where + toJSON m = Aeson.object ["inline_keyboard" .= ikmInlineKeyboard m] + +instance Aeson.FromJSON InlineKeyboardMarkup where + parseJSON = + Aeson.withObject "InlineKeyboardMarkup" <| \v -> + InlineKeyboardMarkup </ (v .: "inline_keyboard") + +-- | Callback query from inline keyboard button press +data CallbackQuery = CallbackQuery + { cqId :: Text, + cqFromId :: Int, + cqFromFirstName :: Text, + cqChatId :: Int, + cqMessageId :: Int, + cqData :: Text + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON CallbackQuery where + toJSON cq = + Aeson.object + [ "id" .= cqId cq, + "from_id" .= cqFromId cq, + "from_first_name" .= cqFromFirstName cq, + "chat_id" .= cqChatId cq, + "message_id" .= cqMessageId cq, + "data" .= cqData cq + ] + +instance Aeson.FromJSON CallbackQuery where + parseJSON = + Aeson.withObject "CallbackQuery" <| \v -> + (CallbackQuery </ (v .: "id")) + <*> (v .: "from_id") + <*> (v .: "from_first_name") + <*> (v .: "chat_id") + <*> (v .: "message_id") + <*> (v .: "data") + +-- | Parse a callback query from a raw Telegram update +parseCallbackQuery :: Aeson.Value -> Maybe CallbackQuery +parseCallbackQuery val = do + Aeson.Object obj <- pure val + Aeson.Object cqObj <- KeyMap.lookup "callback_query" obj + cqId <- case KeyMap.lookup "id" cqObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + Aeson.Object fromObj <- KeyMap.lookup "from" cqObj + fromId <- case KeyMap.lookup "id" fromObj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + fromFirstName <- case KeyMap.lookup "first_name" fromObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + Aeson.Object msgObj <- KeyMap.lookup "message" cqObj + Aeson.Object chatObj <- KeyMap.lookup "chat" msgObj + chatId <- case KeyMap.lookup "id" chatObj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + messageId <- case KeyMap.lookup "message_id" msgObj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + callbackData <- case KeyMap.lookup "data" cqObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + pure + CallbackQuery + { cqId = cqId, + cqFromId = fromId, + cqFromFirstName = fromFirstName, + cqChatId = chatId, + cqMessageId = messageId, + cqData = callbackData + } |
