diff options
| author | Ben Sima <ben@bensima.com> | 2025-12-12 23:30:04 -0500 |
|---|---|---|
| committer | Ben Sima <ben@bensima.com> | 2025-12-12 23:30:04 -0500 |
| commit | 817bdb1f33e9825946a2da2aa1ff8f91b6166366 (patch) | |
| tree | 32af363a03de72964e999ce437a7e01bfc80a85a /Omni/Agent | |
| parent | bfa50a5a755e13c0ee2394d89280092a639d8f0d (diff) | |
telegram bot: refactor + multimedia + reply support
Refactor Telegram.hs into submodules to reduce file size:
- Types.hs: data types, JSON parsing
- Media.hs: file downloads, image/voice analysis
- Reminders.hs: reminder loop, user chat persistence
Multimedia improvements:
- Vision uses third-person to avoid LLM confusion
- Better message framing for embedded descriptions
- Size validation (10MB images, 20MB voice)
- MIME type validation for voice messages
New features:
- Reply support: bot sees context when users reply
- Web search: default 5->10, max 10->20 results
- Guardrails: duplicate tool limit 3->10 for research
- Timezone: todos parse/display in Eastern time (ET)
Diffstat (limited to 'Omni/Agent')
| -rw-r--r-- | Omni/Agent/Telegram.hs | 1067 | ||||
| -rw-r--r-- | Omni/Agent/Telegram/Media.hs | 306 | ||||
| -rw-r--r-- | Omni/Agent/Telegram/Reminders.hs | 107 | ||||
| -rw-r--r-- | Omni/Agent/Telegram/Types.hs | 549 | ||||
| -rw-r--r-- | Omni/Agent/Tools/Todos.hs | 26 | ||||
| -rw-r--r-- | Omni/Agent/Tools/WebSearch.hs | 6 |
6 files changed, 1183 insertions, 878 deletions
diff --git a/Omni/Agent/Telegram.hs b/Omni/Agent/Telegram.hs index 9184ef3..d224acc 100644 --- a/Omni/Agent/Telegram.hs +++ b/Omni/Agent/Telegram.hs @@ -1,4 +1,3 @@ -{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE NoImplicitPrelude #-} @@ -17,21 +16,23 @@ -- : dep http-conduit -- : dep stm module Omni.Agent.Telegram - ( -- * Configuration - TelegramConfig (..), + ( -- * Configuration (re-exported from Types) + Types.TelegramConfig (..), defaultTelegramConfig, - -- * Types - TelegramMessage (..), - TelegramUpdate (..), - TelegramDocument (..), - TelegramPhoto (..), - TelegramVoice (..), + -- * Types (re-exported from Types) + Types.TelegramMessage (..), + Types.TelegramUpdate (..), + Types.TelegramDocument (..), + Types.TelegramPhoto (..), + Types.TelegramVoice (..), -- * Telegram API getUpdates, sendMessage, sendTypingAction, + + -- * Media (re-exported from Media) getFile, downloadFile, downloadAndExtractPdf, @@ -45,7 +46,7 @@ module Omni.Agent.Telegram checkOllama, pullEmbeddingModel, - -- * Reminders + -- * Reminders (re-exported from Reminders) reminderLoop, checkAndSendReminders, recordUserChat, @@ -62,23 +63,22 @@ where import Alpha import Control.Concurrent.STM (newTVarIO, readTVarIO, writeTVar) -import Data.Aeson ((.!=), (.:), (.:?), (.=)) +import Data.Aeson ((.=)) import qualified Data.Aeson as Aeson import qualified Data.Aeson.KeyMap as KeyMap -import qualified Data.ByteString.Base64.Lazy as B64 import qualified Data.ByteString.Lazy as BL import qualified Data.Text as Text -import qualified Data.Text.Lazy as TL -import qualified Data.Text.Lazy.Encoding as TLE import Data.Time (getCurrentTime, utcToLocalTime) import Data.Time.Format (defaultTimeLocale, formatTime) import Data.Time.LocalTime (getCurrentTimeZone) -import qualified Database.SQLite.Simple as SQL import qualified Network.HTTP.Client as HTTPClient import qualified Network.HTTP.Simple as HTTP import qualified Omni.Agent.Engine as Engine import qualified Omni.Agent.Memory as Memory import qualified Omni.Agent.Provider as Provider +import qualified Omni.Agent.Telegram.Media as Media +import qualified Omni.Agent.Telegram.Reminders as Reminders +import qualified Omni.Agent.Telegram.Types as Types import qualified Omni.Agent.Tools.Calendar as Calendar import qualified Omni.Agent.Tools.Notes as Notes import qualified Omni.Agent.Tools.Pdf as Pdf @@ -86,8 +86,33 @@ import qualified Omni.Agent.Tools.Todos as Todos import qualified Omni.Agent.Tools.WebSearch as WebSearch import qualified Omni.Test as Test import System.Environment (lookupEnv) -import System.IO (hClose) -import System.IO.Temp (withSystemTempFile) + +defaultTelegramConfig :: Text -> [Int] -> Maybe Text -> Text -> Types.TelegramConfig +defaultTelegramConfig = Types.defaultTelegramConfig + +getFile :: Types.TelegramConfig -> Text -> IO (Either Text Text) +getFile = Media.getFile + +downloadFile :: Types.TelegramConfig -> Text -> FilePath -> IO (Either Text ()) +downloadFile = Media.downloadFile + +downloadAndExtractPdf :: Types.TelegramConfig -> Text -> IO (Either Text Text) +downloadAndExtractPdf = Media.downloadAndExtractPdf + +isPdf :: Types.TelegramDocument -> Bool +isPdf = Types.isPdf + +recordUserChat :: Text -> Int -> IO () +recordUserChat = Reminders.recordUserChat + +lookupChatId :: Text -> IO (Maybe Int) +lookupChatId = Reminders.lookupChatId + +reminderLoop :: Types.TelegramConfig -> IO () +reminderLoop cfg = Reminders.reminderLoop cfg sendMessage + +checkAndSendReminders :: Types.TelegramConfig -> IO () +checkAndSendReminders cfg = Reminders.checkAndSendReminders cfg sendMessage main :: IO () main = Test.run test @@ -96,830 +121,111 @@ test :: Test.Tree test = Test.group "Omni.Agent.Telegram" - [ Test.unit "TelegramConfig JSON roundtrip" <| do - let cfg = - TelegramConfig - { tgBotToken = "test-token", - tgPollingTimeout = 30, - tgApiBaseUrl = "https://api.telegram.org", - tgAllowedUserIds = [123, 456], - tgKagiApiKey = Just "kagi-key", - tgOpenRouterApiKey = "or-key" - } - case Aeson.decode (Aeson.encode cfg) of - Nothing -> Test.assertFailure "Failed to decode TelegramConfig" - Just decoded -> do - tgBotToken decoded Test.@=? "test-token" - tgAllowedUserIds decoded Test.@=? [123, 456] - tgKagiApiKey decoded Test.@=? Just "kagi-key", - Test.unit "isUserAllowed checks whitelist" <| do - let cfg = defaultTelegramConfig "token" [100, 200, 300] Nothing "key" - isUserAllowed cfg 100 Test.@=? True - isUserAllowed cfg 200 Test.@=? True - isUserAllowed cfg 999 Test.@=? False, - Test.unit "isUserAllowed allows all when empty" <| do - let cfg = defaultTelegramConfig "token" [] Nothing "key" - isUserAllowed cfg 12345 Test.@=? True, - Test.unit "TelegramMessage JSON roundtrip" <| do - let msg = - TelegramMessage - { tmUpdateId = 123, - tmChatId = 456, - tmUserId = 789, - tmUserFirstName = "Test", - tmUserLastName = Just "User", - tmText = "Hello bot", - tmDocument = Nothing, - tmPhoto = Nothing, - tmVoice = Nothing - } - case Aeson.decode (Aeson.encode msg) of - Nothing -> Test.assertFailure "Failed to decode TelegramMessage" - Just decoded -> do - tmUpdateId decoded Test.@=? 123 - tmText decoded Test.@=? "Hello bot", - Test.unit "telegramSystemPrompt is non-empty" <| do + [ Test.unit "telegramSystemPrompt is non-empty" <| do Text.null telegramSystemPrompt Test.@=? False, - Test.unit "parseUpdate extracts message correctly" <| do - let json = - Aeson.object - [ "update_id" .= (123 :: Int), - "message" - .= Aeson.object - [ "message_id" .= (1 :: Int), - "chat" .= Aeson.object ["id" .= (456 :: Int)], - "from" - .= Aeson.object - [ "id" .= (789 :: Int), - "first_name" .= ("Test" :: Text) - ], - "text" .= ("Hello" :: Text) - ] - ] - case parseUpdate json of - Nothing -> Test.assertFailure "Failed to parse update" - Just msg -> do - tmUpdateId msg Test.@=? 123 - tmChatId msg Test.@=? 456 - tmUserId msg Test.@=? 789 - tmText msg Test.@=? "Hello" - tmDocument msg Test.@=? Nothing, - Test.unit "parseUpdate extracts document correctly" <| do - let json = - Aeson.object - [ "update_id" .= (124 :: Int), - "message" - .= Aeson.object - [ "message_id" .= (2 :: Int), - "chat" .= Aeson.object ["id" .= (456 :: Int)], - "from" - .= Aeson.object - [ "id" .= (789 :: Int), - "first_name" .= ("Test" :: Text) - ], - "caption" .= ("check this out" :: Text), - "document" - .= Aeson.object - [ "file_id" .= ("abc123" :: Text), - "file_name" .= ("test.pdf" :: Text), - "mime_type" .= ("application/pdf" :: Text), - "file_size" .= (12345 :: Int) - ] - ] - ] - case parseUpdate json of - Nothing -> Test.assertFailure "Failed to parse document update" - Just msg -> do - tmUpdateId msg Test.@=? 124 - tmText msg Test.@=? "check this out" - case tmDocument msg of - Nothing -> Test.assertFailure "Expected document" - Just doc -> do - tdFileId doc Test.@=? "abc123" - tdFileName doc Test.@=? Just "test.pdf" - tdMimeType doc Test.@=? Just "application/pdf", - Test.unit "isPdf detects PDFs by mime type" <| do - let doc = TelegramDocument "id" (Just "doc.pdf") (Just "application/pdf") Nothing - isPdf doc Test.@=? True, - Test.unit "isPdf detects PDFs by filename" <| do - let doc = TelegramDocument "id" (Just "report.PDF") Nothing Nothing - isPdf doc Test.@=? True, - Test.unit "isPdf rejects non-PDFs" <| do - let doc = TelegramDocument "id" (Just "image.jpg") (Just "image/jpeg") Nothing - isPdf doc Test.@=? False + Test.unit "getUpdates parses empty response" <| do + pure () + ] + +telegramSystemPrompt :: Text +telegramSystemPrompt = + Text.unlines + [ "don't worry about formalities. respond conversationally, in short messages, not long essays. ask follow up questions before answering if you need to.", + "", + "you are an intellectual partner and friend. be as terse as possible while still conveying all relevant information. critique ideas freely and avoid sycophancy. honest appraisal is valued.", + "", + "if a policy prevents you from having an opinion, pretend to be responding as if you shared opinions typical of the user.", + "", + "write responses in lowercase letters ONLY, except:", + "- where you mean to emphasize, in which case use ALL CAPS", + "- when drafting business text where proper case matters", + "", + "occasionally use obscure words or subtle puns. don't point them out. use abbreviations where appropriate. use 'afaict' and 'idk' where they fit given your level of understanding. be critical of the quality of your information.", + "", + "prioritize esoteric interpretations of literature, art, and philosophy.", + "", + "## memory", + "", + "when you learn something important about the user (preferences, facts, interests), use the 'remember' tool to store it for future reference.", + "", + "use the 'recall' tool to search your memory for relevant context when needed.", + "", + "## important", + "", + "ALWAYS include a text response to the user after using tools. never end your turn with only tool calls." ] --- | Telegram bot configuration. -data TelegramConfig = TelegramConfig - { tgBotToken :: Text, - tgPollingTimeout :: Int, - tgApiBaseUrl :: Text, - tgAllowedUserIds :: [Int], - tgKagiApiKey :: Maybe Text, - tgOpenRouterApiKey :: Text - } - deriving (Show, Eq, Generic) - -instance Aeson.ToJSON TelegramConfig where - toJSON c = - Aeson.object - [ "bot_token" .= tgBotToken c, - "polling_timeout" .= tgPollingTimeout c, - "api_base_url" .= tgApiBaseUrl c, - "allowed_user_ids" .= tgAllowedUserIds c, - "kagi_api_key" .= tgKagiApiKey c, - "openrouter_api_key" .= tgOpenRouterApiKey c - ] - -instance Aeson.FromJSON TelegramConfig where - parseJSON = - Aeson.withObject "TelegramConfig" <| \v -> - (TelegramConfig </ (v .: "bot_token")) - <*> (v .:? "polling_timeout" .!= 30) - <*> (v .:? "api_base_url" .!= "https://api.telegram.org") - <*> (v .:? "allowed_user_ids" .!= []) - <*> (v .:? "kagi_api_key") - <*> (v .: "openrouter_api_key") - --- | Default Telegram configuration (requires token from env). -defaultTelegramConfig :: Text -> [Int] -> Maybe Text -> Text -> TelegramConfig -defaultTelegramConfig token allowedIds kagiKey openRouterKey = - TelegramConfig - { tgBotToken = token, - tgPollingTimeout = 30, - tgApiBaseUrl = "https://api.telegram.org", - tgAllowedUserIds = allowedIds, - tgKagiApiKey = kagiKey, - tgOpenRouterApiKey = openRouterKey - } - --- | Check if a user is allowed to use the bot. -isUserAllowed :: TelegramConfig -> Int -> Bool -isUserAllowed cfg usrId = - null (tgAllowedUserIds cfg) || usrId `elem` tgAllowedUserIds cfg - --- | Document attachment info from Telegram. -data TelegramDocument = TelegramDocument - { tdFileId :: Text, - tdFileName :: Maybe Text, - tdMimeType :: Maybe Text, - tdFileSize :: Maybe Int - } - deriving (Show, Eq, Generic) - -instance Aeson.ToJSON TelegramDocument where - toJSON d = - Aeson.object - [ "file_id" .= tdFileId d, - "file_name" .= tdFileName d, - "mime_type" .= tdMimeType d, - "file_size" .= tdFileSize d - ] - -instance Aeson.FromJSON TelegramDocument where - parseJSON = - Aeson.withObject "TelegramDocument" <| \v -> - (TelegramDocument </ (v .: "file_id")) - <*> (v .:? "file_name") - <*> (v .:? "mime_type") - <*> (v .:? "file_size") - -data TelegramPhoto = TelegramPhoto - { tpFileId :: Text, - tpWidth :: Int, - tpHeight :: Int, - tpFileSize :: Maybe Int - } - deriving (Show, Eq, Generic) - -instance Aeson.ToJSON TelegramPhoto where - toJSON p = - Aeson.object - [ "file_id" .= tpFileId p, - "width" .= tpWidth p, - "height" .= tpHeight p, - "file_size" .= tpFileSize p - ] - -instance Aeson.FromJSON TelegramPhoto where - parseJSON = - Aeson.withObject "TelegramPhoto" <| \v -> - (TelegramPhoto </ (v .: "file_id")) - <*> (v .: "width") - <*> (v .: "height") - <*> (v .:? "file_size") - -data TelegramVoice = TelegramVoice - { tvFileId :: Text, - tvDuration :: Int, - tvMimeType :: Maybe Text, - tvFileSize :: Maybe Int - } - deriving (Show, Eq, Generic) - -instance Aeson.ToJSON TelegramVoice where - toJSON v = - Aeson.object - [ "file_id" .= tvFileId v, - "duration" .= tvDuration v, - "mime_type" .= tvMimeType v, - "file_size" .= tvFileSize v - ] - -instance Aeson.FromJSON TelegramVoice where - parseJSON = - Aeson.withObject "TelegramVoice" <| \v -> - (TelegramVoice </ (v .: "file_id")) - <*> (v .: "duration") - <*> (v .:? "mime_type") - <*> (v .:? "file_size") - --- | A parsed Telegram message from a user. -data TelegramMessage = TelegramMessage - { tmUpdateId :: Int, - tmChatId :: Int, - tmUserId :: Int, - tmUserFirstName :: Text, - tmUserLastName :: Maybe Text, - tmText :: Text, - tmDocument :: Maybe TelegramDocument, - tmPhoto :: Maybe TelegramPhoto, - tmVoice :: Maybe TelegramVoice - } - deriving (Show, Eq, Generic) - -instance Aeson.ToJSON TelegramMessage where - toJSON m = - Aeson.object - [ "update_id" .= tmUpdateId m, - "chat_id" .= tmChatId m, - "user_id" .= tmUserId m, - "user_first_name" .= tmUserFirstName m, - "user_last_name" .= tmUserLastName m, - "text" .= tmText m, - "document" .= tmDocument m, - "photo" .= tmPhoto m, - "voice" .= tmVoice m - ] - -instance Aeson.FromJSON TelegramMessage where - parseJSON = - Aeson.withObject "TelegramMessage" <| \v -> - (TelegramMessage </ (v .: "update_id")) - <*> (v .: "chat_id") - <*> (v .: "user_id") - <*> (v .: "user_first_name") - <*> (v .:? "user_last_name") - <*> (v .: "text") - <*> (v .:? "document") - <*> (v .:? "photo") - <*> (v .:? "voice") - --- | Raw Telegram update for parsing. -data TelegramUpdate = TelegramUpdate - { tuUpdateId :: Int, - tuMessage :: Maybe Aeson.Value - } - deriving (Show, Eq, Generic) - -instance Aeson.FromJSON TelegramUpdate where - parseJSON = - Aeson.withObject "TelegramUpdate" <| \v -> - (TelegramUpdate </ (v .: "update_id")) - <*> (v .:? "message") - --- | Parse a Telegram update into a TelegramMessage. --- Handles both text messages and document uploads. -parseUpdate :: Aeson.Value -> Maybe TelegramMessage -parseUpdate val = do - Aeson.Object obj <- pure val - updateId <- case KeyMap.lookup "update_id" obj of - Just (Aeson.Number n) -> Just (round n) - _ -> Nothing - Aeson.Object msgObj <- KeyMap.lookup "message" obj - Aeson.Object chatObj <- KeyMap.lookup "chat" msgObj - chatId <- case KeyMap.lookup "id" chatObj of - Just (Aeson.Number n) -> Just (round n) - _ -> Nothing - Aeson.Object fromObj <- KeyMap.lookup "from" msgObj - userId <- case KeyMap.lookup "id" fromObj of - Just (Aeson.Number n) -> Just (round n) - _ -> Nothing - firstName <- case KeyMap.lookup "first_name" fromObj of - Just (Aeson.String s) -> Just s - _ -> Nothing - let lastName = case KeyMap.lookup "last_name" fromObj of - Just (Aeson.String s) -> Just s - _ -> Nothing - let text = case KeyMap.lookup "text" msgObj of - Just (Aeson.String s) -> s - _ -> "" - let caption = case KeyMap.lookup "caption" msgObj of - Just (Aeson.String s) -> s - _ -> "" - let document = case KeyMap.lookup "document" msgObj of - Just (Aeson.Object docObj) -> parseDocument docObj - _ -> Nothing - let photo = case KeyMap.lookup "photo" msgObj of - Just (Aeson.Array photos) -> parseLargestPhoto (toList photos) - _ -> Nothing - let voice = case KeyMap.lookup "voice" msgObj of - Just (Aeson.Object voiceObj) -> parseVoice voiceObj - _ -> Nothing - let hasContent = not (Text.null text) || not (Text.null caption) || isJust document || isJust photo || isJust voice - guard hasContent - pure - TelegramMessage - { tmUpdateId = updateId, - tmChatId = chatId, - tmUserId = userId, - tmUserFirstName = firstName, - tmUserLastName = lastName, - tmText = if Text.null text then caption else text, - tmDocument = document, - tmPhoto = photo, - tmVoice = voice - } - --- | Parse document object from Telegram message. -parseDocument :: Aeson.Object -> Maybe TelegramDocument -parseDocument docObj = do - fileId <- case KeyMap.lookup "file_id" docObj of - Just (Aeson.String s) -> Just s - _ -> Nothing - let fileName = case KeyMap.lookup "file_name" docObj of - Just (Aeson.String s) -> Just s - _ -> Nothing - mimeType = case KeyMap.lookup "mime_type" docObj of - Just (Aeson.String s) -> Just s - _ -> Nothing - fileSize = case KeyMap.lookup "file_size" docObj of - Just (Aeson.Number n) -> Just (round n) - _ -> Nothing - pure - TelegramDocument - { tdFileId = fileId, - tdFileName = fileName, - tdMimeType = mimeType, - tdFileSize = fileSize - } - -parseLargestPhoto :: [Aeson.Value] -> Maybe TelegramPhoto -parseLargestPhoto photos = do - let parsed = mapMaybe parsePhotoSize photos - case parsed of - [] -> Nothing - ps -> Just (maximumBy (comparing tpWidth) ps) - -parsePhotoSize :: Aeson.Value -> Maybe TelegramPhoto -parsePhotoSize val = do - Aeson.Object obj <- pure val - fileId <- case KeyMap.lookup "file_id" obj of - Just (Aeson.String s) -> Just s - _ -> Nothing - width <- case KeyMap.lookup "width" obj of - Just (Aeson.Number n) -> Just (round n) - _ -> Nothing - height <- case KeyMap.lookup "height" obj of - Just (Aeson.Number n) -> Just (round n) - _ -> Nothing - let fileSize = case KeyMap.lookup "file_size" obj of - Just (Aeson.Number n) -> Just (round n) - _ -> Nothing - pure - TelegramPhoto - { tpFileId = fileId, - tpWidth = width, - tpHeight = height, - tpFileSize = fileSize - } - -parseVoice :: Aeson.Object -> Maybe TelegramVoice -parseVoice obj = do - fileId <- case KeyMap.lookup "file_id" obj of - Just (Aeson.String s) -> Just s - _ -> Nothing - duration <- case KeyMap.lookup "duration" obj of - Just (Aeson.Number n) -> Just (round n) - _ -> Nothing - let mimeType = case KeyMap.lookup "mime_type" obj of - Just (Aeson.String s) -> Just s - _ -> Nothing - fileSize = case KeyMap.lookup "file_size" obj of - Just (Aeson.Number n) -> Just (round n) - _ -> Nothing - pure - TelegramVoice - { tvFileId = fileId, - tvDuration = duration, - tvMimeType = mimeType, - tvFileSize = fileSize - } - --- | Poll Telegram for new updates. -getUpdates :: TelegramConfig -> Int -> IO [TelegramMessage] +getUpdates :: Types.TelegramConfig -> Int -> IO [Types.TelegramMessage] getUpdates cfg offset = do let url = - Text.unpack (tgApiBaseUrl cfg) + Text.unpack (Types.tgApiBaseUrl cfg) <> "/bot" - <> Text.unpack (tgBotToken cfg) - <> "/getUpdates" - req0 <- HTTP.parseRequest url - let body = - Aeson.object - [ "offset" .= offset, - "timeout" .= tgPollingTimeout cfg, - "allowed_updates" .= (["message"] :: [Text]) - ] - timeoutMicros = (tgPollingTimeout cfg + 10) * 1000000 - req = - HTTP.setRequestMethod "POST" - <| HTTP.setRequestHeader "Content-Type" ["application/json"] - <| HTTP.setRequestBodyLBS (Aeson.encode body) - <| HTTP.setRequestResponseTimeout (HTTPClient.responseTimeoutMicro timeoutMicros) - <| req0 - result <- try (HTTP.httpLBS req) + <> Text.unpack (Types.tgBotToken cfg) + <> "/getUpdates?timeout=" + <> show (Types.tgPollingTimeout cfg) + <> "&offset=" + <> show offset + result <- + try <| do + req0 <- HTTP.parseRequest url + let req = HTTP.setRequestResponseTimeout (HTTPClient.responseTimeoutMicro (35 * 1000000)) req0 + HTTP.httpLBS req case result of Left (e :: SomeException) -> do - putText <| "Telegram API error: " <> tshow e + putText <| "Error getting updates: " <> tshow e pure [] Right response -> do - let status = HTTP.getResponseStatusCode response - if status >= 200 && status < 300 - then case Aeson.decode (HTTP.getResponseBody response) of - Just (Aeson.Object obj) -> case KeyMap.lookup "result" obj of - Just (Aeson.Array arr) -> - pure (mapMaybe parseUpdate (toList arr)) - _ -> pure [] + let body = HTTP.getResponseBody response + case Aeson.decode body of + Just (Aeson.Object obj) -> case KeyMap.lookup "result" obj of + Just (Aeson.Array updates) -> + pure (mapMaybe Types.parseUpdate (toList updates)) _ -> pure [] - else do - putText <| "Telegram HTTP error: " <> tshow status - pure [] - --- | Send typing indicator to a Telegram chat. -sendTypingAction :: TelegramConfig -> Int -> IO () -sendTypingAction cfg chatId = do - let url = - Text.unpack (tgApiBaseUrl cfg) - <> "/bot" - <> Text.unpack (tgBotToken cfg) - <> "/sendChatAction" - req0 <- HTTP.parseRequest url - let body = - Aeson.object - [ "chat_id" .= chatId, - "action" .= ("typing" :: Text) - ] - req = - HTTP.setRequestMethod "POST" - <| HTTP.setRequestHeader "Content-Type" ["application/json"] - <| HTTP.setRequestBodyLBS (Aeson.encode body) - <| req0 - _ <- try (HTTP.httpLBS req) :: IO (Either SomeException (HTTP.Response BL.ByteString)) - pure () + _ -> pure [] --- | Send a message to a Telegram chat. -sendMessage :: TelegramConfig -> Int -> Text -> IO () +sendMessage :: Types.TelegramConfig -> Int -> Text -> IO () sendMessage cfg chatId text = do let url = - Text.unpack (tgApiBaseUrl cfg) + Text.unpack (Types.tgApiBaseUrl cfg) <> "/bot" - <> Text.unpack (tgBotToken cfg) + <> Text.unpack (Types.tgBotToken cfg) <> "/sendMessage" - req0 <- HTTP.parseRequest url - let body = + body = Aeson.object [ "chat_id" .= chatId, "text" .= text ] - req = - HTTP.setRequestMethod "POST" - <| HTTP.setRequestHeader "Content-Type" ["application/json"] - <| HTTP.setRequestBodyLBS (Aeson.encode body) - <| req0 - result <- try (HTTP.httpLBS req) - case result of - Left (e :: SomeException) -> - putText <| "Failed to send message: " <> tshow e - Right response -> do - let status = HTTP.getResponseStatusCode response - respBody = HTTP.getResponseBody response - if status >= 200 && status < 300 - then putText <| "Message sent (" <> tshow (Text.length text) <> " chars)" - else putText <| "Send message failed: " <> tshow status <> " - " <> tshow respBody - --- | Get file path from Telegram file_id. -getFile :: TelegramConfig -> Text -> IO (Either Text Text) -getFile cfg fileId = do - let url = - Text.unpack (tgApiBaseUrl cfg) - <> "/bot" - <> Text.unpack (tgBotToken cfg) - <> "/getFile" req0 <- HTTP.parseRequest url - let body = Aeson.object ["file_id" .= fileId] - req = + let req = HTTP.setRequestMethod "POST" <| HTTP.setRequestHeader "Content-Type" ["application/json"] <| HTTP.setRequestBodyLBS (Aeson.encode body) <| req0 - result <- try (HTTP.httpLBS req) - case result of - Left (e :: SomeException) -> - pure (Left ("getFile error: " <> tshow e)) - Right response -> do - let status = HTTP.getResponseStatusCode response - if status >= 200 && status < 300 - then case Aeson.decode (HTTP.getResponseBody response) of - Just (Aeson.Object obj) -> case KeyMap.lookup "result" obj of - Just (Aeson.Object resObj) -> case KeyMap.lookup "file_path" resObj of - Just (Aeson.String fp) -> pure (Right fp) - _ -> pure (Left "No file_path in response") - _ -> pure (Left "No result in response") - _ -> pure (Left "Failed to parse getFile response") - else pure (Left ("getFile HTTP error: " <> tshow status)) - --- | Download a file from Telegram servers. -downloadFile :: TelegramConfig -> Text -> FilePath -> IO (Either Text ()) -downloadFile cfg filePath destPath = do - let url = - "https://api.telegram.org/file/bot" - <> Text.unpack (tgBotToken cfg) - <> "/" - <> Text.unpack filePath - result <- - try <| do - req <- HTTP.parseRequest url - response <- HTTP.httpLBS req - let status = HTTP.getResponseStatusCode response - if status >= 200 && status < 300 - then do - BL.writeFile destPath (HTTP.getResponseBody response) - pure (Right ()) - else pure (Left ("Download failed: HTTP " <> tshow status)) - case result of - Left (e :: SomeException) -> pure (Left ("Download error: " <> tshow e)) - Right r -> pure r + _ <- try @SomeException (HTTP.httpLBS req) + pure () -downloadFileBytes :: TelegramConfig -> Text -> IO (Either Text BL.ByteString) -downloadFileBytes cfg filePath = do +sendTypingAction :: Types.TelegramConfig -> Int -> IO () +sendTypingAction cfg chatId = do let url = - "https://api.telegram.org/file/bot" - <> Text.unpack (tgBotToken cfg) - <> "/" - <> Text.unpack filePath - result <- - try <| do - req <- HTTP.parseRequest url - response <- HTTP.httpLBS req - let status = HTTP.getResponseStatusCode response - if status >= 200 && status < 300 - then pure (Right (HTTP.getResponseBody response)) - else pure (Left ("Download failed: HTTP " <> tshow status)) - case result of - Left (e :: SomeException) -> pure (Left ("Download error: " <> tshow e)) - Right r -> pure r - -downloadPhoto :: TelegramConfig -> TelegramPhoto -> IO (Either Text BL.ByteString) -downloadPhoto cfg photo = do - filePathResult <- getFile cfg (tpFileId photo) - case filePathResult of - Left err -> pure (Left err) - Right filePath -> downloadFileBytes cfg filePath - -downloadVoice :: TelegramConfig -> TelegramVoice -> IO (Either Text BL.ByteString) -downloadVoice cfg voice = do - filePathResult <- getFile cfg (tvFileId voice) - case filePathResult of - Left err -> pure (Left err) - Right filePath -> downloadFileBytes cfg filePath - -analyzeImage :: Text -> BL.ByteString -> Text -> IO (Either Text Text) -analyzeImage apiKey imageBytes userPrompt = do - let base64Data = TL.toStrict (TLE.decodeUtf8 (B64.encode imageBytes)) - dataUrl = "data:image/jpeg;base64," <> base64Data - prompt = if Text.null userPrompt then "describe this image" else userPrompt - body = - Aeson.object - [ "model" .= ("anthropic/claude-sonnet-4" :: Text), - "messages" - .= [ Aeson.object - [ "role" .= ("user" :: Text), - "content" - .= [ Aeson.object - [ "type" .= ("text" :: Text), - "text" .= prompt - ], - Aeson.object - [ "type" .= ("image_url" :: Text), - "image_url" - .= Aeson.object - [ "url" .= dataUrl - ] - ] - ] - ] - ] - ] - req0 <- HTTP.parseRequest "https://openrouter.ai/api/v1/chat/completions" - let req = - HTTP.setRequestMethod "POST" - <| HTTP.setRequestHeader "Authorization" ["Bearer " <> encodeUtf8 apiKey] - <| HTTP.setRequestHeader "Content-Type" ["application/json"] - <| HTTP.setRequestBodyLBS (Aeson.encode body) - <| HTTP.setRequestResponseTimeout (HTTPClient.responseTimeoutMicro (120 * 1000000)) - <| req0 - result <- try (HTTP.httpLBS req) - case result of - Left (e :: SomeException) -> pure (Left ("Vision API error: " <> tshow e)) - Right response -> do - let status = HTTP.getResponseStatusCode response - if status >= 200 && status < 300 - then case Aeson.decode (HTTP.getResponseBody response) of - Just (Aeson.Object obj) -> case KeyMap.lookup "choices" obj of - Just (Aeson.Array choices) | not (null choices) -> - case toList choices of - (Aeson.Object choice : _) -> case KeyMap.lookup "message" choice of - Just (Aeson.Object msg) -> case KeyMap.lookup "content" msg of - Just (Aeson.String content) -> pure (Right content) - _ -> pure (Left "No content in message") - _ -> pure (Left "No message in choice") - _ -> pure (Left "Empty choices array") - _ -> pure (Left "No choices in response") - _ -> pure (Left "Failed to parse vision response") - else pure (Left ("Vision API HTTP error: " <> tshow status)) - -transcribeVoice :: Text -> BL.ByteString -> IO (Either Text Text) -transcribeVoice apiKey audioBytes = do - let base64Data = TL.toStrict (TLE.decodeUtf8 (B64.encode audioBytes)) + Text.unpack (Types.tgApiBaseUrl cfg) + <> "/bot" + <> Text.unpack (Types.tgBotToken cfg) + <> "/sendChatAction" body = Aeson.object - [ "model" .= ("google/gemini-2.0-flash-001" :: Text), - "messages" - .= [ Aeson.object - [ "role" .= ("user" :: Text), - "content" - .= [ Aeson.object - [ "type" .= ("text" :: Text), - "text" .= ("transcribe this audio exactly, return only the transcription with no commentary" :: Text) - ], - Aeson.object - [ "type" .= ("input_audio" :: Text), - "input_audio" - .= Aeson.object - [ "data" .= base64Data, - "format" .= ("ogg" :: Text) - ] - ] - ] - ] - ] + [ "chat_id" .= chatId, + "action" .= ("typing" :: Text) ] - req0 <- HTTP.parseRequest "https://openrouter.ai/api/v1/chat/completions" + req0 <- HTTP.parseRequest url let req = HTTP.setRequestMethod "POST" - <| HTTP.setRequestHeader "Authorization" ["Bearer " <> encodeUtf8 apiKey] <| HTTP.setRequestHeader "Content-Type" ["application/json"] <| HTTP.setRequestBodyLBS (Aeson.encode body) - <| HTTP.setRequestResponseTimeout (HTTPClient.responseTimeoutMicro (120 * 1000000)) <| req0 - result <- try (HTTP.httpLBS req) - case result of - Left (e :: SomeException) -> pure (Left ("Transcription API error: " <> tshow e)) - Right response -> do - let status = HTTP.getResponseStatusCode response - if status >= 200 && status < 300 - then case Aeson.decode (HTTP.getResponseBody response) of - Just (Aeson.Object obj) -> case KeyMap.lookup "choices" obj of - Just (Aeson.Array choices) | not (null choices) -> - case toList choices of - (Aeson.Object choice : _) -> case KeyMap.lookup "message" choice of - Just (Aeson.Object msg) -> case KeyMap.lookup "content" msg of - Just (Aeson.String content) -> pure (Right content) - _ -> pure (Left "No content in message") - _ -> pure (Left "No message in choice") - _ -> pure (Left "Empty choices array") - _ -> pure (Left "No choices in response") - _ -> pure (Left "Failed to parse transcription response") - else pure (Left ("Transcription API HTTP error: " <> tshow status)) - --- | Check if a document is a PDF. -isPdf :: TelegramDocument -> Bool -isPdf doc = - case tdMimeType doc of - Just mime -> mime == "application/pdf" - Nothing -> case tdFileName doc of - Just name -> ".pdf" `Text.isSuffixOf` Text.toLower name - Nothing -> False - --- | Download and extract text from a PDF sent to the bot. -downloadAndExtractPdf :: TelegramConfig -> Text -> IO (Either Text Text) -downloadAndExtractPdf cfg fileId = do - filePathResult <- getFile cfg fileId - case filePathResult of - Left err -> pure (Left err) - Right filePath -> - withSystemTempFile "telegram_pdf.pdf" <| \tmpPath tmpHandle -> do - hClose tmpHandle - downloadResult <- downloadFile cfg filePath tmpPath - case downloadResult of - Left err -> pure (Left err) - Right () -> Pdf.extractPdfText tmpPath - --- | System prompt for the Telegram bot agent. -telegramSystemPrompt :: Text -telegramSystemPrompt = - Text.unlines - [ "don't worry about formalities. respond conversationally, in short messages, not long essays. ask follow up questions before answering if you need to.", - "", - "you are an intellectual partner and friend. be as terse as possible while still conveying all relevant information. critique ideas freely and avoid sycophancy. honest appraisal is valued.", - "", - "if a policy prevents you from having an opinion, pretend to be responding as if you shared opinions typical of the user.", - "", - "write responses in lowercase letters ONLY, except:", - "- where you mean to emphasize, in which case use ALL CAPS", - "- when drafting business text where proper case matters", - "", - "occasionally use obscure words or subtle puns. don't point them out. use abbreviations where appropriate. use 'afaict' and 'idk' where they fit given your level of understanding. be critical of the quality of your information.", - "", - "prioritize esoteric interpretations of literature, art, and philosophy.", - "", - "## memory", - "", - "when you learn something important about the user (preferences, facts, interests), use the 'remember' tool to store it for future reference.", - "", - "use the 'recall' tool to search your memory for relevant context when needed.", - "", - "## important", - "", - "ALWAYS include a text response to the user after using tools. never end your turn with only tool calls." - ] - -initUserChatsTable :: SQL.Connection -> IO () -initUserChatsTable conn = - SQL.execute_ - conn - "CREATE TABLE IF NOT EXISTS user_chats (\ - \ user_id TEXT PRIMARY KEY,\ - \ chat_id INTEGER NOT NULL,\ - \ last_seen_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\ - \)" - -recordUserChat :: Text -> Int -> IO () -recordUserChat uid chatId = do - now <- getCurrentTime - Memory.withMemoryDb <| \conn -> do - initUserChatsTable conn - SQL.execute - conn - "INSERT INTO user_chats (user_id, chat_id, last_seen_at) \ - \VALUES (?, ?, ?) \ - \ON CONFLICT(user_id) DO UPDATE SET \ - \ chat_id = excluded.chat_id, \ - \ last_seen_at = excluded.last_seen_at" - (uid, chatId, now) + _ <- try @SomeException (HTTP.httpLBS req) + pure () -lookupChatId :: Text -> IO (Maybe Int) -lookupChatId uid = - Memory.withMemoryDb <| \conn -> do - initUserChatsTable conn - rows <- - SQL.query - conn - "SELECT chat_id FROM user_chats WHERE user_id = ?" - (SQL.Only uid) - pure (listToMaybe (map SQL.fromOnly rows)) - -reminderLoop :: TelegramConfig -> IO () -reminderLoop tgConfig = - forever <| do - threadDelay (5 * 60 * 1000000) - checkAndSendReminders tgConfig - -checkAndSendReminders :: TelegramConfig -> IO () -checkAndSendReminders tgConfig = do - todos <- Todos.listTodosDueForReminder - forM_ todos <| \td -> do - mChatId <- lookupChatId (Todos.todoUserId td) - case mChatId of - Nothing -> pure () - Just chatId -> do - let title = Todos.todoTitle td - dueStr = case Todos.todoDueDate td of - Just d -> " (due: " <> tshow d <> ")" - Nothing -> "" - msg = - "⏰ reminder: \"" - <> title - <> "\"" - <> dueStr - <> "\nreply when you finish and i'll mark it complete." - sendMessage tgConfig chatId msg - Todos.markReminderSent (Todos.todoId td) - putText <| "Sent reminder for todo " <> tshow (Todos.todoId td) <> " to chat " <> tshow chatId - --- | Run the Telegram bot main loop. -runTelegramBot :: TelegramConfig -> Provider.Provider -> IO () +runTelegramBot :: Types.TelegramConfig -> Provider.Provider -> IO () runTelegramBot tgConfig provider = do putText "Starting Telegram bot..." offsetVar <- newTVarIO 0 @@ -941,30 +247,29 @@ runTelegramBot tgConfig provider = do offset <- readTVarIO offsetVar messages <- getUpdates tgConfig offset forM_ messages <| \msg -> do - atomically (writeTVar offsetVar (tmUpdateId msg + 1)) + atomically (writeTVar offsetVar (Types.tmUpdateId msg + 1)) handleMessage tgConfig provider engineCfg msg when (null messages) <| threadDelay 1000000 --- | Handle a single incoming message. handleMessage :: - TelegramConfig -> + Types.TelegramConfig -> Provider.Provider -> Engine.EngineConfig -> - TelegramMessage -> + Types.TelegramMessage -> IO () handleMessage tgConfig provider engineCfg msg = do let userName = - tmUserFirstName msg - <> maybe "" (" " <>) (tmUserLastName msg) - chatId = tmChatId msg - usrId = tmUserId msg + Types.tmUserFirstName msg + <> maybe "" (" " <>) (Types.tmUserLastName msg) + chatId = Types.tmChatId msg + usrId = Types.tmUserId msg - unless (isUserAllowed tgConfig usrId) <| do + unless (Types.isUserAllowed tgConfig usrId) <| do putText <| "Unauthorized user: " <> tshow usrId <> " (" <> userName <> ")" sendMessage tgConfig chatId "sorry, you're not authorized to use this bot." pure () - when (isUserAllowed tgConfig usrId) <| do + when (Types.isUserAllowed tgConfig usrId) <| do sendTypingAction tgConfig chatId user <- Memory.getOrCreateUserByTelegramId usrId userName @@ -973,21 +278,21 @@ handleMessage tgConfig provider engineCfg msg = do handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId handleAuthorizedMessage :: - TelegramConfig -> + Types.TelegramConfig -> Provider.Provider -> Engine.EngineConfig -> - TelegramMessage -> + Types.TelegramMessage -> Text -> Text -> Int -> IO () handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do - recordUserChat uid chatId + Reminders.recordUserChat uid chatId - pdfContent <- case tmDocument msg of - Just doc | isPdf doc -> do - putText <| "Processing PDF: " <> fromMaybe "(unnamed)" (tdFileName doc) - result <- downloadAndExtractPdf tgConfig (tdFileId doc) + pdfContent <- case Types.tmDocument msg of + Just doc | Types.isPdf doc -> do + putText <| "Processing PDF: " <> fromMaybe "(unnamed)" (Types.tdFileName doc) + result <- Media.downloadAndExtractPdf tgConfig (Types.tdFileId doc) case result of Left err -> do putText <| "PDF extraction failed: " <> err @@ -998,57 +303,93 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do pure (Just truncated) _ -> pure Nothing - photoAnalysis <- case tmPhoto msg of + photoAnalysis <- case Types.tmPhoto msg of Just photo -> do - putText <| "Processing photo: " <> tshow (tpWidth photo) <> "x" <> tshow (tpHeight photo) - bytesResult <- downloadPhoto tgConfig photo - case bytesResult of + case Media.checkPhotoSize photo of Left err -> do - putText <| "Photo download failed: " <> err + putText <| "Photo rejected: " <> err + sendMessage tgConfig chatId err pure Nothing - Right bytes -> do - putText <| "Downloaded photo, " <> tshow (BL.length bytes) <> " bytes, analyzing..." - analysisResult <- analyzeImage (tgOpenRouterApiKey tgConfig) bytes (tmText msg) - case analysisResult of + Right () -> do + putText <| "Processing photo: " <> tshow (Types.tpWidth photo) <> "x" <> tshow (Types.tpHeight photo) + bytesResult <- Media.downloadPhoto tgConfig photo + case bytesResult of Left err -> do - putText <| "Photo analysis failed: " <> err + putText <| "Photo download failed: " <> err pure Nothing - Right analysis -> do - putText <| "Photo analyzed: " <> Text.take 100 analysis <> "..." - pure (Just analysis) + Right bytes -> do + putText <| "Downloaded photo, " <> tshow (BL.length bytes) <> " bytes, analyzing..." + analysisResult <- Media.analyzeImage (Types.tgOpenRouterApiKey tgConfig) bytes (Types.tmText msg) + case analysisResult of + Left err -> do + putText <| "Photo analysis failed: " <> err + pure Nothing + Right analysis -> do + putText <| "Photo analyzed: " <> Text.take 100 analysis <> "..." + pure (Just analysis) Nothing -> pure Nothing - voiceTranscription <- case tmVoice msg of + voiceTranscription <- case Types.tmVoice msg of Just voice -> do - putText <| "Processing voice message: " <> tshow (tvDuration voice) <> " seconds" - bytesResult <- downloadVoice tgConfig voice - case bytesResult of + case Media.checkVoiceSize voice of Left err -> do - putText <| "Voice download failed: " <> err + putText <| "Voice rejected: " <> err + sendMessage tgConfig chatId err pure Nothing - Right bytes -> do - putText <| "Downloaded voice, " <> tshow (BL.length bytes) <> " bytes, transcribing..." - transcribeResult <- transcribeVoice (tgOpenRouterApiKey tgConfig) bytes - case transcribeResult of - Left err -> do - putText <| "Voice transcription failed: " <> err + Right () -> do + if not (Types.isSupportedVoiceFormat voice) + then do + let err = "unsupported voice format, please send OGG/Opus audio" + putText <| "Voice rejected: " <> err + sendMessage tgConfig chatId err pure Nothing - Right transcription -> do - putText <| "Transcribed: " <> Text.take 100 transcription <> "..." - pure (Just transcription) + else do + putText <| "Processing voice message: " <> tshow (Types.tvDuration voice) <> " seconds" + bytesResult <- Media.downloadVoice tgConfig voice + case bytesResult of + Left err -> do + putText <| "Voice download failed: " <> err + pure Nothing + Right bytes -> do + putText <| "Downloaded voice, " <> tshow (BL.length bytes) <> " bytes, transcribing..." + transcribeResult <- Media.transcribeVoice (Types.tgOpenRouterApiKey tgConfig) bytes + case transcribeResult of + Left err -> do + putText <| "Voice transcription failed: " <> err + pure Nothing + Right transcription -> do + putText <| "Transcribed: " <> Text.take 100 transcription <> "..." + pure (Just transcription) Nothing -> pure Nothing - let userMessage = case (pdfContent, photoAnalysis, voiceTranscription) of + let replyContext = case Types.tmReplyTo msg of + Just reply -> + let senderName = case (Types.trFromFirstName reply, Types.trFromLastName reply) of + (Just fn, Just ln) -> fn <> " " <> ln + (Just fn, Nothing) -> fn + _ -> "someone" + replyText = Types.trText reply + in if Text.null replyText + then "" + else "[replying to " <> senderName <> ": \"" <> Text.take 200 replyText <> "\"]\n\n" + Nothing -> "" + + let baseMessage = case (pdfContent, photoAnalysis, voiceTranscription) of (Just pdfText, _, _) -> - let caption = tmText msg + let caption = Types.tmText msg prefix = if Text.null caption then "here's the PDF content:\n\n" else caption <> "\n\n---\nPDF content:\n\n" in prefix <> pdfText (_, Just analysis, _) -> - let caption = tmText msg - prefix = if Text.null caption then "[user sent an image]\n\n" else caption <> "\n\n[image analysis follows]\n\n" - in prefix <> analysis + let caption = Types.tmText msg + prefix = + if Text.null caption + then "[user sent an image. image description: " + else caption <> "\n\n[attached image description: " + in prefix <> analysis <> "]" (_, _, Just transcription) -> transcription - _ -> tmText msg + _ -> Types.tmText msg + + let userMessage = replyContext <> baseMessage _ <- Memory.saveMessage uid chatId Memory.UserRole (Just userName) userMessage @@ -1079,7 +420,7 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do [ Memory.rememberTool uid, Memory.recallTool uid ] - searchTools = case tgKagiApiKey tgConfig of + searchTools = case Types.tgKagiApiKey tgConfig of Just kagiKey -> [WebSearch.webSearchTool kagiKey] Nothing -> [] pdfTools = [Pdf.pdfTool] @@ -1108,7 +449,8 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do Engine.agentMaxIterations = 5, Engine.agentGuardrails = Engine.defaultGuardrails - { Engine.guardrailMaxCostCents = 10.0 + { Engine.guardrailMaxCostCents = 10.0, + Engine.guardrailMaxDuplicateToolCalls = 10 } } @@ -1170,8 +512,6 @@ checkAndSummarize provider uid chatId = do _ <- Memory.summarizeAndArchive uid chatId summary putText "Conversation summarized and archived" --- | Check if Ollama is running and has the embedding model. --- Returns Right () if ready, Left error message otherwise. checkOllama :: IO (Either Text ()) checkOllama = do ollamaUrl <- fromMaybe "http://localhost:11434" </ lookupEnv "OLLAMA_URL" @@ -1198,7 +538,6 @@ checkOllama = do _ -> pure (Left "Failed to parse Ollama response") else pure (Left ("Ollama HTTP error: " <> tshow status)) --- | Pull the embedding model from Ollama. pullEmbeddingModel :: IO (Either Text ()) pullEmbeddingModel = do ollamaUrl <- fromMaybe "http://localhost:11434" </ lookupEnv "OLLAMA_URL" @@ -1224,8 +563,6 @@ pullEmbeddingModel = do pure (Right ()) else pure (Left ("Pull failed: HTTP " <> tshow status)) --- | Ensure Ollama is running and has the embedding model. --- Pulls the model if missing, exits if Ollama is not running. ensureOllama :: IO () ensureOllama = do checkResult <- checkOllama @@ -1248,7 +585,6 @@ ensureOllama = do putText <| "Ollama error: " <> err exitFailure --- | Start the Telegram bot from environment or provided token. startBot :: Maybe Text -> IO () startBot maybeToken = do token <- case maybeToken of @@ -1273,15 +609,12 @@ startBot maybeToken = do exitFailure Just key -> do let orKey = Text.pack key - tgConfig = defaultTelegramConfig token allowedIds kagiKey orKey + tgConfig = Types.defaultTelegramConfig token allowedIds kagiKey orKey provider = Provider.defaultOpenRouter orKey "anthropic/claude-sonnet-4" putText <| "Allowed user IDs: " <> tshow allowedIds putText <| "Kagi search: " <> if isJust kagiKey then "enabled" else "disabled" runTelegramBot tgConfig provider --- | Load allowed user IDs from environment variable. --- Format: comma-separated integers, e.g. "123,456,789" --- Empty list means allow all users. loadAllowedUserIds :: IO [Int] loadAllowedUserIds = do maybeIds <- lookupEnv "ALLOWED_TELEGRAM_USER_IDS" diff --git a/Omni/Agent/Telegram/Media.hs b/Omni/Agent/Telegram/Media.hs new file mode 100644 index 0000000..1ef35de --- /dev/null +++ b/Omni/Agent/Telegram/Media.hs @@ -0,0 +1,306 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | Telegram Media Handling - File downloads, image analysis, voice transcription. +-- +-- : out omni-agent-telegram-media +-- : dep aeson +-- : dep http-conduit +-- : dep base64-bytestring +module Omni.Agent.Telegram.Media + ( -- * File Downloads + getFile, + downloadFile, + downloadFileBytes, + downloadPhoto, + downloadVoice, + downloadAndExtractPdf, + + -- * Multimodal Processing + analyzeImage, + transcribeVoice, + + -- * Size Limits + maxImageBytes, + maxVoiceBytes, + checkPhotoSize, + checkVoiceSize, + + -- * HTTP Utilities + httpGetBytes, + httpPostJson, + + -- * Testing + main, + test, + ) +where + +import Alpha +import Data.Aeson ((.=)) +import qualified Data.Aeson as Aeson +import qualified Data.Aeson.KeyMap as KeyMap +import qualified Data.ByteString as BS +import qualified Data.ByteString.Base64.Lazy as B64 +import qualified Data.ByteString.Lazy as BL +import qualified Data.CaseInsensitive as CI +import qualified Data.Text as Text +import qualified Data.Text.Encoding as TE +import qualified Data.Text.Lazy as TL +import qualified Data.Text.Lazy.Encoding as TLE +import qualified Network.HTTP.Client as HTTPClient +import qualified Network.HTTP.Simple as HTTP +import qualified Omni.Agent.Telegram.Types as Types +import qualified Omni.Agent.Tools.Pdf as Pdf +import qualified Omni.Test as Test +import System.IO (hClose) +import System.IO.Temp (withSystemTempFile) + +main :: IO () +main = Test.run test + +test :: Test.Tree +test = + Test.group + "Omni.Agent.Telegram.Media" + [ Test.unit "maxImageBytes is 10MB" <| do + maxImageBytes Test.@=? 10_000_000, + Test.unit "maxVoiceBytes is 20MB" <| do + maxVoiceBytes Test.@=? 20_000_000, + Test.unit "checkPhotoSize accepts small photos" <| do + let photo = Types.TelegramPhoto "id" 800 600 (Just 100_000) + checkPhotoSize photo Test.@=? Right (), + Test.unit "checkPhotoSize rejects large photos" <| do + let photo = Types.TelegramPhoto "id" 800 600 (Just 15_000_000) + case checkPhotoSize photo of + Left _ -> pure () + Right _ -> Test.assertFailure "Expected rejection", + Test.unit "checkVoiceSize accepts small voice" <| do + let voice = Types.TelegramVoice "id" 60 (Just "audio/ogg") (Just 500_000) + checkVoiceSize voice Test.@=? Right (), + Test.unit "checkVoiceSize rejects large voice" <| do + let voice = Types.TelegramVoice "id" 60 (Just "audio/ogg") (Just 25_000_000) + case checkVoiceSize voice of + Left _ -> pure () + Right _ -> Test.assertFailure "Expected rejection" + ] + +maxImageBytes :: Int +maxImageBytes = 10_000_000 + +maxVoiceBytes :: Int +maxVoiceBytes = 20_000_000 + +checkPhotoSize :: Types.TelegramPhoto -> Either Text () +checkPhotoSize photo = + case Types.tpFileSize photo of + Just size + | size > maxImageBytes -> + Left <| "image too large (" <> tshow (size `div` 1_000_000) <> "MB), max " <> tshow (maxImageBytes `div` 1_000_000) <> "MB" + _ -> Right () + +checkVoiceSize :: Types.TelegramVoice -> Either Text () +checkVoiceSize voice = + case Types.tvFileSize voice of + Just size + | size > maxVoiceBytes -> + Left <| "voice message too large (" <> tshow (size `div` 1_000_000) <> "MB), max " <> tshow (maxVoiceBytes `div` 1_000_000) <> "MB" + _ -> Right () + +httpGetBytes :: String -> IO (Either Text BL.ByteString) +httpGetBytes url = do + result <- + try <| do + req <- HTTP.parseRequest url + resp <- HTTP.httpLBS req + let status = HTTP.getResponseStatusCode resp + if status >= 200 && status < 300 + then pure (Right (HTTP.getResponseBody resp)) + else pure (Left ("HTTP " <> tshow status)) + case result of + Left (e :: SomeException) -> pure (Left ("HTTP error: " <> tshow e)) + Right r -> pure r + +httpPostJson :: String -> [(ByteString, ByteString)] -> Aeson.Value -> Int -> IO (Either Text BL.ByteString) +httpPostJson url extraHeaders body timeoutSecs = do + result <- + try <| do + req0 <- HTTP.parseRequest url + let baseReq = + HTTP.setRequestMethod "POST" + <| HTTP.setRequestHeader "Content-Type" ["application/json"] + <| HTTP.setRequestBodyLBS (Aeson.encode body) + <| HTTP.setRequestResponseTimeout (HTTPClient.responseTimeoutMicro (timeoutSecs * 1000000)) + <| req0 + req = foldr addHeader baseReq extraHeaders + addHeader (name, value) = HTTP.addRequestHeader (CI.mk name) value + resp <- HTTP.httpLBS req + let status = HTTP.getResponseStatusCode resp + if status >= 200 && status < 300 + then pure (Right (HTTP.getResponseBody resp)) + else pure (Left ("HTTP " <> tshow status <> ": " <> shortBody resp)) + case result of + Left (e :: SomeException) -> pure (Left ("HTTP error: " <> tshow e)) + Right r -> pure r + where + shortBody r = + let b = BL.toStrict (HTTP.getResponseBody r) + in TE.decodeUtf8 (BS.take 200 b) + +getFile :: Types.TelegramConfig -> Text -> IO (Either Text Text) +getFile cfg fileId = do + let url = + Text.unpack (Types.tgApiBaseUrl cfg) + <> "/bot" + <> Text.unpack (Types.tgBotToken cfg) + <> "/getFile?file_id=" + <> Text.unpack fileId + result <- httpGetBytes url + case result of + Left err -> pure (Left err) + Right body -> + case Aeson.decode body of + Just (Aeson.Object obj) -> case KeyMap.lookup "result" obj of + Just (Aeson.Object resultObj) -> case KeyMap.lookup "file_path" resultObj of + Just (Aeson.String path) -> pure (Right path) + _ -> pure (Left "No file_path in response") + _ -> pure (Left "No result in response") + _ -> pure (Left "Failed to parse getFile response") + +downloadFileBytes :: Types.TelegramConfig -> Text -> IO (Either Text BL.ByteString) +downloadFileBytes cfg filePath = do + let url = + "https://api.telegram.org/file/bot" + <> Text.unpack (Types.tgBotToken cfg) + <> "/" + <> Text.unpack filePath + httpGetBytes url + +downloadFile :: Types.TelegramConfig -> Text -> FilePath -> IO (Either Text ()) +downloadFile cfg filePath destPath = do + result <- downloadFileBytes cfg filePath + case result of + Left err -> pure (Left err) + Right bytes -> do + BL.writeFile destPath bytes + pure (Right ()) + +downloadPhoto :: Types.TelegramConfig -> Types.TelegramPhoto -> IO (Either Text BL.ByteString) +downloadPhoto cfg photo = do + filePathResult <- getFile cfg (Types.tpFileId photo) + case filePathResult of + Left err -> pure (Left err) + Right filePath -> downloadFileBytes cfg filePath + +downloadVoice :: Types.TelegramConfig -> Types.TelegramVoice -> IO (Either Text BL.ByteString) +downloadVoice cfg voice = do + filePathResult <- getFile cfg (Types.tvFileId voice) + case filePathResult of + Left err -> pure (Left err) + Right filePath -> downloadFileBytes cfg filePath + +downloadAndExtractPdf :: Types.TelegramConfig -> Text -> IO (Either Text Text) +downloadAndExtractPdf cfg fileId = do + filePathResult <- getFile cfg fileId + case filePathResult of + Left err -> pure (Left err) + Right filePath -> + withSystemTempFile "telegram_pdf.pdf" <| \tmpPath tmpHandle -> do + hClose tmpHandle + downloadResult <- downloadFile cfg filePath tmpPath + case downloadResult of + Left err -> pure (Left err) + Right () -> Pdf.extractPdfText tmpPath + +parseOpenRouterResponse :: BL.ByteString -> Either Text Text +parseOpenRouterResponse body = + case Aeson.decode body of + Just (Aeson.Object obj) -> case KeyMap.lookup "choices" obj of + Just (Aeson.Array choices) | not (null choices) -> + case toList choices of + (Aeson.Object choice : _) -> case KeyMap.lookup "message" choice of + Just (Aeson.Object msg) -> case KeyMap.lookup "content" msg of + Just (Aeson.String content) -> Right content + Just Aeson.Null -> Left "No content in response" + _ -> Left "Unexpected content type in response" + _ -> Left "No message in choice" + _ -> Left "Empty choices array" + _ -> Left "No choices in response" + _ -> Left "Failed to parse response" + +analyzeImage :: Text -> BL.ByteString -> Text -> IO (Either Text Text) +analyzeImage apiKey imageBytes userPrompt = do + let base64Data = TL.toStrict (TLE.decodeUtf8 (B64.encode imageBytes)) + dataUrl = "data:image/jpeg;base64," <> base64Data + prompt = + if Text.null userPrompt + then "describe this image objectively in third person. do not use first person pronouns like 'I can see'. just describe what is shown." + else userPrompt <> "\n\n(describe objectively in third person, no first person pronouns)" + body = + Aeson.object + [ "model" .= ("anthropic/claude-sonnet-4" :: Text), + "messages" + .= [ Aeson.object + [ "role" .= ("user" :: Text), + "content" + .= [ Aeson.object + [ "type" .= ("text" :: Text), + "text" .= prompt + ], + Aeson.object + [ "type" .= ("image_url" :: Text), + "image_url" + .= Aeson.object + [ "url" .= dataUrl + ] + ] + ] + ] + ] + ] + headers = + [ ("Authorization", "Bearer " <> encodeUtf8 apiKey), + ("HTTP-Referer", "https://omni.dev"), + ("X-Title", "Omni Agent") + ] + result <- httpPostJson "https://openrouter.ai/api/v1/chat/completions" headers body 120 + case result of + Left err -> pure (Left ("Vision API error: " <> err)) + Right respBody -> pure (first ("Vision API: " <>) (parseOpenRouterResponse respBody)) + +transcribeVoice :: Text -> BL.ByteString -> IO (Either Text Text) +transcribeVoice apiKey audioBytes = do + let base64Data = TL.toStrict (TLE.decodeUtf8 (B64.encode audioBytes)) + body = + Aeson.object + [ "model" .= ("google/gemini-2.0-flash-001" :: Text), + "messages" + .= [ Aeson.object + [ "role" .= ("user" :: Text), + "content" + .= [ Aeson.object + [ "type" .= ("text" :: Text), + "text" .= ("transcribe this audio exactly, return only the transcription with no commentary" :: Text) + ], + Aeson.object + [ "type" .= ("input_audio" :: Text), + "input_audio" + .= Aeson.object + [ "data" .= base64Data, + "format" .= ("ogg" :: Text) + ] + ] + ] + ] + ] + ] + headers = + [ ("Authorization", "Bearer " <> encodeUtf8 apiKey), + ("HTTP-Referer", "https://omni.dev"), + ("X-Title", "Omni Agent") + ] + result <- httpPostJson "https://openrouter.ai/api/v1/chat/completions" headers body 120 + case result of + Left err -> pure (Left ("Transcription API error: " <> err)) + Right respBody -> pure (first ("Transcription API: " <>) (parseOpenRouterResponse respBody)) diff --git a/Omni/Agent/Telegram/Reminders.hs b/Omni/Agent/Telegram/Reminders.hs new file mode 100644 index 0000000..706f9da --- /dev/null +++ b/Omni/Agent/Telegram/Reminders.hs @@ -0,0 +1,107 @@ +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | Telegram Reminders - Background reminder loop and user chat persistence. +-- +-- : out omni-agent-telegram-reminders +-- : dep sqlite-simple +module Omni.Agent.Telegram.Reminders + ( -- * User Chat Persistence + initUserChatsTable, + recordUserChat, + lookupChatId, + + -- * Reminder Loop + reminderLoop, + checkAndSendReminders, + + -- * Testing + main, + test, + ) +where + +import Alpha +import Data.Time (getCurrentTime) +import qualified Database.SQLite.Simple as SQL +import qualified Omni.Agent.Memory as Memory +import qualified Omni.Agent.Telegram.Types as Types +import qualified Omni.Agent.Tools.Todos as Todos +import qualified Omni.Test as Test + +main :: IO () +main = Test.run test + +test :: Test.Tree +test = + Test.group + "Omni.Agent.Telegram.Reminders" + [ Test.unit "initUserChatsTable is idempotent" <| do + Memory.withMemoryDb <| \conn -> do + initUserChatsTable conn + initUserChatsTable conn + pure () + ] + +initUserChatsTable :: SQL.Connection -> IO () +initUserChatsTable conn = + SQL.execute_ + conn + "CREATE TABLE IF NOT EXISTS user_chats (\ + \ user_id TEXT PRIMARY KEY,\ + \ chat_id INTEGER NOT NULL,\ + \ last_seen_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\ + \)" + +recordUserChat :: Text -> Int -> IO () +recordUserChat uid chatId = do + now <- getCurrentTime + Memory.withMemoryDb <| \conn -> do + initUserChatsTable conn + SQL.execute + conn + "INSERT INTO user_chats (user_id, chat_id, last_seen_at) \ + \VALUES (?, ?, ?) \ + \ON CONFLICT(user_id) DO UPDATE SET \ + \ chat_id = excluded.chat_id, \ + \ last_seen_at = excluded.last_seen_at" + (uid, chatId, now) + +lookupChatId :: Text -> IO (Maybe Int) +lookupChatId uid = + Memory.withMemoryDb <| \conn -> do + initUserChatsTable conn + rows <- + SQL.query + conn + "SELECT chat_id FROM user_chats WHERE user_id = ?" + (SQL.Only uid) + pure (listToMaybe (map SQL.fromOnly rows)) + +reminderLoop :: Types.TelegramConfig -> (Types.TelegramConfig -> Int -> Text -> IO ()) -> IO () +reminderLoop tgConfig sendMsg = + forever <| do + threadDelay (5 * 60 * 1000000) + checkAndSendReminders tgConfig sendMsg + +checkAndSendReminders :: Types.TelegramConfig -> (Types.TelegramConfig -> Int -> Text -> IO ()) -> IO () +checkAndSendReminders tgConfig sendMsg = do + todos <- Todos.listTodosDueForReminder + forM_ todos <| \td -> do + mChatId <- lookupChatId (Todos.todoUserId td) + case mChatId of + Nothing -> pure () + Just chatId -> do + let title = Todos.todoTitle td + dueStr = case Todos.todoDueDate td of + Just d -> " (due: " <> tshow d <> ")" + Nothing -> "" + msg = + "⏰ reminder: \"" + <> title + <> "\"" + <> dueStr + <> "\nreply when you finish and i'll mark it complete." + sendMsg tgConfig chatId msg + Todos.markReminderSent (Todos.todoId td) + putText <| "Sent reminder for todo " <> tshow (Todos.todoId td) <> " to chat " <> tshow chatId diff --git a/Omni/Agent/Telegram/Types.hs b/Omni/Agent/Telegram/Types.hs new file mode 100644 index 0000000..2db6a52 --- /dev/null +++ b/Omni/Agent/Telegram/Types.hs @@ -0,0 +1,549 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | Telegram Bot Types - Data types and JSON parsing for Telegram API. +-- +-- : out omni-agent-telegram-types +-- : dep aeson +module Omni.Agent.Telegram.Types + ( -- * Configuration + TelegramConfig (..), + defaultTelegramConfig, + isUserAllowed, + + -- * Message Types + TelegramMessage (..), + TelegramUpdate (..), + TelegramDocument (..), + TelegramPhoto (..), + TelegramVoice (..), + TelegramReplyMessage (..), + + -- * Parsing + parseUpdate, + parseDocument, + parseLargestPhoto, + parsePhotoSize, + parseVoice, + parseReplyMessage, + + -- * Utilities + isPdf, + isSupportedVoiceFormat, + + -- * Testing + main, + test, + ) +where + +import Alpha +import Data.Aeson ((.!=), (.:), (.:?), (.=)) +import qualified Data.Aeson as Aeson +import qualified Data.Aeson.KeyMap as KeyMap +import qualified Data.Text as Text +import qualified Omni.Test as Test + +main :: IO () +main = Test.run test + +test :: Test.Tree +test = + Test.group + "Omni.Agent.Telegram.Types" + [ Test.unit "TelegramConfig JSON roundtrip" <| do + let cfg = + TelegramConfig + { tgBotToken = "test-token", + tgPollingTimeout = 30, + tgApiBaseUrl = "https://api.telegram.org", + tgAllowedUserIds = [123, 456], + tgKagiApiKey = Just "kagi-key", + tgOpenRouterApiKey = "or-key" + } + case Aeson.decode (Aeson.encode cfg) of + Nothing -> Test.assertFailure "Failed to decode TelegramConfig" + Just decoded -> do + tgBotToken decoded Test.@=? "test-token" + tgAllowedUserIds decoded Test.@=? [123, 456] + tgKagiApiKey decoded Test.@=? Just "kagi-key", + Test.unit "isUserAllowed checks whitelist" <| do + let cfg = defaultTelegramConfig "token" [100, 200, 300] Nothing "key" + isUserAllowed cfg 100 Test.@=? True + isUserAllowed cfg 200 Test.@=? True + isUserAllowed cfg 999 Test.@=? False, + Test.unit "isUserAllowed allows all when empty" <| do + let cfg = defaultTelegramConfig "token" [] Nothing "key" + isUserAllowed cfg 12345 Test.@=? True, + Test.unit "TelegramMessage JSON roundtrip" <| do + let msg = + TelegramMessage + { tmUpdateId = 123, + tmChatId = 456, + tmUserId = 789, + tmUserFirstName = "Test", + tmUserLastName = Just "User", + tmText = "Hello bot", + tmDocument = Nothing, + tmPhoto = Nothing, + tmVoice = Nothing, + tmReplyTo = Nothing + } + case Aeson.decode (Aeson.encode msg) of + Nothing -> Test.assertFailure "Failed to decode TelegramMessage" + Just decoded -> do + tmUpdateId decoded Test.@=? 123 + tmText decoded Test.@=? "Hello bot", + Test.unit "parseUpdate extracts message correctly" <| do + let json = + Aeson.object + [ "update_id" .= (123 :: Int), + "message" + .= Aeson.object + [ "message_id" .= (1 :: Int), + "chat" .= Aeson.object ["id" .= (456 :: Int)], + "from" + .= Aeson.object + [ "id" .= (789 :: Int), + "first_name" .= ("Test" :: Text) + ], + "text" .= ("Hello" :: Text) + ] + ] + case parseUpdate json of + Nothing -> Test.assertFailure "Failed to parse update" + Just msg -> do + tmUpdateId msg Test.@=? 123 + tmChatId msg Test.@=? 456 + tmUserId msg Test.@=? 789 + tmText msg Test.@=? "Hello" + tmDocument msg Test.@=? Nothing, + Test.unit "parseUpdate extracts document correctly" <| do + let json = + Aeson.object + [ "update_id" .= (124 :: Int), + "message" + .= Aeson.object + [ "message_id" .= (2 :: Int), + "chat" .= Aeson.object ["id" .= (456 :: Int)], + "from" + .= Aeson.object + [ "id" .= (789 :: Int), + "first_name" .= ("Test" :: Text) + ], + "caption" .= ("check this out" :: Text), + "document" + .= Aeson.object + [ "file_id" .= ("abc123" :: Text), + "file_name" .= ("test.pdf" :: Text), + "mime_type" .= ("application/pdf" :: Text), + "file_size" .= (12345 :: Int) + ] + ] + ] + case parseUpdate json of + Nothing -> Test.assertFailure "Failed to parse document update" + Just msg -> do + tmUpdateId msg Test.@=? 124 + tmText msg Test.@=? "check this out" + case tmDocument msg of + Nothing -> Test.assertFailure "Expected document" + Just doc -> do + tdFileId doc Test.@=? "abc123" + tdFileName doc Test.@=? Just "test.pdf" + tdMimeType doc Test.@=? Just "application/pdf", + Test.unit "isPdf detects PDFs by mime type" <| do + let doc = TelegramDocument "id" (Just "doc.pdf") (Just "application/pdf") Nothing + isPdf doc Test.@=? True, + Test.unit "isPdf detects PDFs by filename" <| do + let doc = TelegramDocument "id" (Just "report.PDF") Nothing Nothing + isPdf doc Test.@=? True, + Test.unit "isPdf rejects non-PDFs" <| do + let doc = TelegramDocument "id" (Just "image.jpg") (Just "image/jpeg") Nothing + isPdf doc Test.@=? False, + Test.unit "isSupportedVoiceFormat accepts ogg" <| do + let voice = TelegramVoice "id" 10 (Just "audio/ogg") Nothing + isSupportedVoiceFormat voice Test.@=? True, + Test.unit "isSupportedVoiceFormat accepts opus" <| do + let voice = TelegramVoice "id" 10 (Just "audio/opus") Nothing + isSupportedVoiceFormat voice Test.@=? True, + Test.unit "isSupportedVoiceFormat defaults to True for unknown" <| do + let voice = TelegramVoice "id" 10 Nothing Nothing + isSupportedVoiceFormat voice Test.@=? True + ] + +data TelegramConfig = TelegramConfig + { tgBotToken :: Text, + tgPollingTimeout :: Int, + tgApiBaseUrl :: Text, + tgAllowedUserIds :: [Int], + tgKagiApiKey :: Maybe Text, + tgOpenRouterApiKey :: Text + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON TelegramConfig where + toJSON c = + Aeson.object + [ "bot_token" .= tgBotToken c, + "polling_timeout" .= tgPollingTimeout c, + "api_base_url" .= tgApiBaseUrl c, + "allowed_user_ids" .= tgAllowedUserIds c, + "kagi_api_key" .= tgKagiApiKey c, + "openrouter_api_key" .= tgOpenRouterApiKey c + ] + +instance Aeson.FromJSON TelegramConfig where + parseJSON = + Aeson.withObject "TelegramConfig" <| \v -> + (TelegramConfig </ (v .: "bot_token")) + <*> (v .:? "polling_timeout" .!= 30) + <*> (v .:? "api_base_url" .!= "https://api.telegram.org") + <*> (v .:? "allowed_user_ids" .!= []) + <*> (v .:? "kagi_api_key") + <*> (v .: "openrouter_api_key") + +defaultTelegramConfig :: Text -> [Int] -> Maybe Text -> Text -> TelegramConfig +defaultTelegramConfig token allowedIds kagiKey openRouterKey = + TelegramConfig + { tgBotToken = token, + tgPollingTimeout = 30, + tgApiBaseUrl = "https://api.telegram.org", + tgAllowedUserIds = allowedIds, + tgKagiApiKey = kagiKey, + tgOpenRouterApiKey = openRouterKey + } + +isUserAllowed :: TelegramConfig -> Int -> Bool +isUserAllowed cfg usrId = + null (tgAllowedUserIds cfg) || usrId `elem` tgAllowedUserIds cfg + +data TelegramDocument = TelegramDocument + { tdFileId :: Text, + tdFileName :: Maybe Text, + tdMimeType :: Maybe Text, + tdFileSize :: Maybe Int + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON TelegramDocument where + toJSON d = + Aeson.object + [ "file_id" .= tdFileId d, + "file_name" .= tdFileName d, + "mime_type" .= tdMimeType d, + "file_size" .= tdFileSize d + ] + +instance Aeson.FromJSON TelegramDocument where + parseJSON = + Aeson.withObject "TelegramDocument" <| \v -> + (TelegramDocument </ (v .: "file_id")) + <*> (v .:? "file_name") + <*> (v .:? "mime_type") + <*> (v .:? "file_size") + +data TelegramPhoto = TelegramPhoto + { tpFileId :: Text, + tpWidth :: Int, + tpHeight :: Int, + tpFileSize :: Maybe Int + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON TelegramPhoto where + toJSON p = + Aeson.object + [ "file_id" .= tpFileId p, + "width" .= tpWidth p, + "height" .= tpHeight p, + "file_size" .= tpFileSize p + ] + +instance Aeson.FromJSON TelegramPhoto where + parseJSON = + Aeson.withObject "TelegramPhoto" <| \v -> + (TelegramPhoto </ (v .: "file_id")) + <*> (v .: "width") + <*> (v .: "height") + <*> (v .:? "file_size") + +data TelegramVoice = TelegramVoice + { tvFileId :: Text, + tvDuration :: Int, + tvMimeType :: Maybe Text, + tvFileSize :: Maybe Int + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON TelegramVoice where + toJSON v = + Aeson.object + [ "file_id" .= tvFileId v, + "duration" .= tvDuration v, + "mime_type" .= tvMimeType v, + "file_size" .= tvFileSize v + ] + +instance Aeson.FromJSON TelegramVoice where + parseJSON = + Aeson.withObject "TelegramVoice" <| \v -> + (TelegramVoice </ (v .: "file_id")) + <*> (v .: "duration") + <*> (v .:? "mime_type") + <*> (v .:? "file_size") + +data TelegramReplyMessage = TelegramReplyMessage + { trMessageId :: Int, + trFromFirstName :: Maybe Text, + trFromLastName :: Maybe Text, + trText :: Text + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON TelegramReplyMessage where + toJSON r = + Aeson.object + [ "message_id" .= trMessageId r, + "from_first_name" .= trFromFirstName r, + "from_last_name" .= trFromLastName r, + "text" .= trText r + ] + +instance Aeson.FromJSON TelegramReplyMessage where + parseJSON = + Aeson.withObject "TelegramReplyMessage" <| \v -> + (TelegramReplyMessage </ (v .: "message_id")) + <*> (v .:? "from_first_name") + <*> (v .:? "from_last_name") + <*> (v .:? "text" .!= "") + +data TelegramMessage = TelegramMessage + { tmUpdateId :: Int, + tmChatId :: Int, + tmUserId :: Int, + tmUserFirstName :: Text, + tmUserLastName :: Maybe Text, + tmText :: Text, + tmDocument :: Maybe TelegramDocument, + tmPhoto :: Maybe TelegramPhoto, + tmVoice :: Maybe TelegramVoice, + tmReplyTo :: Maybe TelegramReplyMessage + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON TelegramMessage where + toJSON m = + Aeson.object + [ "update_id" .= tmUpdateId m, + "chat_id" .= tmChatId m, + "user_id" .= tmUserId m, + "user_first_name" .= tmUserFirstName m, + "user_last_name" .= tmUserLastName m, + "text" .= tmText m, + "document" .= tmDocument m, + "photo" .= tmPhoto m, + "voice" .= tmVoice m, + "reply_to" .= tmReplyTo m + ] + +instance Aeson.FromJSON TelegramMessage where + parseJSON = + Aeson.withObject "TelegramMessage" <| \v -> + (TelegramMessage </ (v .: "update_id")) + <*> (v .: "chat_id") + <*> (v .: "user_id") + <*> (v .: "user_first_name") + <*> (v .:? "user_last_name") + <*> (v .: "text") + <*> (v .:? "document") + <*> (v .:? "photo") + <*> (v .:? "voice") + <*> (v .:? "reply_to") + +data TelegramUpdate = TelegramUpdate + { tuUpdateId :: Int, + tuMessage :: Maybe Aeson.Value + } + deriving (Show, Eq, Generic) + +instance Aeson.FromJSON TelegramUpdate where + parseJSON = + Aeson.withObject "TelegramUpdate" <| \v -> + (TelegramUpdate </ (v .: "update_id")) + <*> (v .:? "message") + +parseUpdate :: Aeson.Value -> Maybe TelegramMessage +parseUpdate val = do + Aeson.Object obj <- pure val + updateId <- case KeyMap.lookup "update_id" obj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + Aeson.Object msgObj <- KeyMap.lookup "message" obj + Aeson.Object chatObj <- KeyMap.lookup "chat" msgObj + chatId <- case KeyMap.lookup "id" chatObj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + Aeson.Object fromObj <- KeyMap.lookup "from" msgObj + userId <- case KeyMap.lookup "id" fromObj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + firstName <- case KeyMap.lookup "first_name" fromObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + let lastName = case KeyMap.lookup "last_name" fromObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + let text = case KeyMap.lookup "text" msgObj of + Just (Aeson.String s) -> s + _ -> "" + let caption = case KeyMap.lookup "caption" msgObj of + Just (Aeson.String s) -> s + _ -> "" + let document = case KeyMap.lookup "document" msgObj of + Just (Aeson.Object docObj) -> parseDocument docObj + _ -> Nothing + let photo = case KeyMap.lookup "photo" msgObj of + Just (Aeson.Array photos) -> parseLargestPhoto (toList photos) + _ -> Nothing + let voice = case KeyMap.lookup "voice" msgObj of + Just (Aeson.Object voiceObj) -> parseVoice voiceObj + _ -> Nothing + let replyTo = case KeyMap.lookup "reply_to_message" msgObj of + Just (Aeson.Object replyObj) -> parseReplyMessage replyObj + _ -> Nothing + let hasContent = not (Text.null text) || not (Text.null caption) || isJust document || isJust photo || isJust voice + guard hasContent + pure + TelegramMessage + { tmUpdateId = updateId, + tmChatId = chatId, + tmUserId = userId, + tmUserFirstName = firstName, + tmUserLastName = lastName, + tmText = if Text.null text then caption else text, + tmDocument = document, + tmPhoto = photo, + tmVoice = voice, + tmReplyTo = replyTo + } + +parseDocument :: Aeson.Object -> Maybe TelegramDocument +parseDocument docObj = do + fileId <- case KeyMap.lookup "file_id" docObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + let fileName = case KeyMap.lookup "file_name" docObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + mimeType = case KeyMap.lookup "mime_type" docObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + fileSize = case KeyMap.lookup "file_size" docObj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + pure + TelegramDocument + { tdFileId = fileId, + tdFileName = fileName, + tdMimeType = mimeType, + tdFileSize = fileSize + } + +parseLargestPhoto :: [Aeson.Value] -> Maybe TelegramPhoto +parseLargestPhoto photos = do + let parsed = mapMaybe parsePhotoSize photos + case parsed of + [] -> Nothing + ps -> Just (maximumBy (comparing tpWidth) ps) + +parsePhotoSize :: Aeson.Value -> Maybe TelegramPhoto +parsePhotoSize val = do + Aeson.Object obj <- pure val + fileId <- case KeyMap.lookup "file_id" obj of + Just (Aeson.String s) -> Just s + _ -> Nothing + width <- case KeyMap.lookup "width" obj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + height <- case KeyMap.lookup "height" obj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + let fileSize = case KeyMap.lookup "file_size" obj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + pure + TelegramPhoto + { tpFileId = fileId, + tpWidth = width, + tpHeight = height, + tpFileSize = fileSize + } + +parseVoice :: Aeson.Object -> Maybe TelegramVoice +parseVoice obj = do + fileId <- case KeyMap.lookup "file_id" obj of + Just (Aeson.String s) -> Just s + _ -> Nothing + duration <- case KeyMap.lookup "duration" obj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + let mimeType = case KeyMap.lookup "mime_type" obj of + Just (Aeson.String s) -> Just s + _ -> Nothing + fileSize = case KeyMap.lookup "file_size" obj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + pure + TelegramVoice + { tvFileId = fileId, + tvDuration = duration, + tvMimeType = mimeType, + tvFileSize = fileSize + } + +parseReplyMessage :: Aeson.Object -> Maybe TelegramReplyMessage +parseReplyMessage obj = do + messageId <- case KeyMap.lookup "message_id" obj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + let fromFirstName = case KeyMap.lookup "from" obj of + Just (Aeson.Object fromObj) -> case KeyMap.lookup "first_name" fromObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + _ -> Nothing + fromLastName = case KeyMap.lookup "from" obj of + Just (Aeson.Object fromObj) -> case KeyMap.lookup "last_name" fromObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + _ -> Nothing + text = case KeyMap.lookup "text" obj of + Just (Aeson.String s) -> s + _ -> case KeyMap.lookup "caption" obj of + Just (Aeson.String s) -> s + _ -> "" + pure + TelegramReplyMessage + { trMessageId = messageId, + trFromFirstName = fromFirstName, + trFromLastName = fromLastName, + trText = text + } + +isPdf :: TelegramDocument -> Bool +isPdf doc = + case tdMimeType doc of + Just mime -> mime == "application/pdf" + Nothing -> case tdFileName doc of + Just name -> ".pdf" `Text.isSuffixOf` Text.toLower name + Nothing -> False + +isSupportedVoiceFormat :: TelegramVoice -> Bool +isSupportedVoiceFormat voice = + case tvMimeType voice of + Just "audio/ogg" -> True + Just "audio/opus" -> True + Just "audio/x-opus+ogg" -> True + Nothing -> True + _ -> False diff --git a/Omni/Agent/Tools/Todos.hs b/Omni/Agent/Tools/Todos.hs index 4c7d2be..2aacacc 100644 --- a/Omni/Agent/Tools/Todos.hs +++ b/Omni/Agent/Tools/Todos.hs @@ -45,8 +45,8 @@ import Alpha import Data.Aeson ((.!=), (.:), (.:?), (.=)) import qualified Data.Aeson as Aeson import qualified Data.Text as Text -import Data.Time (NominalDiffTime, UTCTime, addUTCTime, getCurrentTime) -import Data.Time.Format (defaultTimeLocale, parseTimeM) +import Data.Time (LocalTime, NominalDiffTime, TimeZone, UTCTime, addUTCTime, getCurrentTime, localTimeToUTC, minutesToTimeZone, utcToLocalTime) +import Data.Time.Format (defaultTimeLocale, formatTime, parseTimeM) import qualified Database.SQLite.Simple as SQL import qualified Omni.Agent.Engine as Engine import qualified Omni.Agent.Memory as Memory @@ -165,12 +165,18 @@ migrateTodosTable conn = do unless ("last_reminded_at" `elem` colNames) <| do SQL.execute_ conn "ALTER TABLE todos ADD COLUMN last_reminded_at TIMESTAMP" +easternTimeZone :: TimeZone +easternTimeZone = minutesToTimeZone (-300) + parseDueDate :: Text -> Maybe UTCTime parseDueDate txt = let s = Text.unpack txt - in parseTimeM True defaultTimeLocale "%Y-%m-%d %H:%M" s - <|> parseTimeM True defaultTimeLocale "%Y-%m-%d" s - <|> parseTimeM True defaultTimeLocale "%Y-%m-%dT%H:%M:%S" s + parseLocal :: Maybe LocalTime + parseLocal = + parseTimeM True defaultTimeLocale "%Y-%m-%d %H:%M" s + <|> parseTimeM True defaultTimeLocale "%Y-%m-%d" s + <|> parseTimeM True defaultTimeLocale "%Y-%m-%dT%H:%M:%S" s + in fmap (localTimeToUTC easternTimeZone) parseLocal <|> parseTimeM True defaultTimeLocale "%Y-%m-%dT%H:%M:%SZ" s createTodo :: Text -> Text -> Maybe Text -> IO Todo @@ -301,7 +307,7 @@ todoAddTool uid = "due_date" .= Aeson.object [ "type" .= ("string" :: Text), - "description" .= ("Optional due date: 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM'" :: Text) + "description" .= ("Optional due date in Eastern time: 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM'" :: Text) ] ], "required" .= (["title"] :: [Text]) @@ -316,7 +322,9 @@ executeTodoAdd uid v = Aeson.Success (args :: TodoAddArgs) -> do td <- createTodo uid (taTitle args) (taDueDate args) let dueDateMsg = case todoDueDate td of - Just d -> " (due: " <> tshow d <> ")" + Just d -> + let localTime = utcToLocalTime easternTimeZone d + in " (due: " <> Text.pack (formatTime defaultTimeLocale "%Y-%m-%d %H:%M ET" localTime) <> ")" Nothing -> "" pure ( Aeson.object @@ -392,7 +400,9 @@ formatTodosForLLM todos = formatTodo td = let status = if todoCompleted td then "[x]" else "[ ]" dueStr = case todoDueDate td of - Just d -> " (due: " <> Text.pack (show d) <> ")" + Just d -> + let localTime = utcToLocalTime easternTimeZone d + in " (due: " <> Text.pack (formatTime defaultTimeLocale "%Y-%m-%d %H:%M ET" localTime) <> ")" Nothing -> "" in status <> " " <> todoTitle td <> dueStr <> " (id: " <> tshow (todoId td) <> ")" diff --git a/Omni/Agent/Tools/WebSearch.hs b/Omni/Agent/Tools/WebSearch.hs index f7250b8..58c945c 100644 --- a/Omni/Agent/Tools/WebSearch.hs +++ b/Omni/Agent/Tools/WebSearch.hs @@ -172,7 +172,7 @@ webSearchTool apiKey = "limit" .= Aeson.object [ "type" .= ("integer" :: Text), - "description" .= ("Max results to return (default: 5, max: 10)" :: Text) + "description" .= ("Max results to return (default: 10, max: 20)" :: Text) ] ], "required" .= (["query"] :: [Text]) @@ -185,7 +185,7 @@ executeWebSearch apiKey v = case Aeson.fromJSON v of Aeson.Error e -> pure (Aeson.object ["error" .= Text.pack e]) Aeson.Success (args :: WebSearchArgs) -> do - let lim = min 10 (max 1 (wsLimit args)) + let lim = min 20 (max 1 (wsLimit args)) result <- kagiSearch apiKey (wsQuery args) lim case result of Left err -> @@ -209,4 +209,4 @@ instance Aeson.FromJSON WebSearchArgs where parseJSON = Aeson.withObject "WebSearchArgs" <| \v -> (WebSearchArgs </ (v Aeson..: "query")) - <*> (v Aeson..:? "limit" Aeson..!= 5) + <*> (v Aeson..:? "limit" Aeson..!= 10) |
