diff options
Diffstat (limited to 'Omni/Agent/Telegram.hs')
| -rw-r--r-- | Omni/Agent/Telegram.hs | 334 |
1 files changed, 322 insertions, 12 deletions
diff --git a/Omni/Agent/Telegram.hs b/Omni/Agent/Telegram.hs index 1162e25..e7eb659 100644 --- a/Omni/Agent/Telegram.hs +++ b/Omni/Agent/Telegram.hs @@ -24,16 +24,24 @@ module Omni.Agent.Telegram -- * Types TelegramMessage (..), TelegramUpdate (..), + TelegramDocument (..), -- * Telegram API getUpdates, sendMessage, sendTypingAction, + getFile, + downloadFile, + downloadAndExtractPdf, + isPdf, -- * Bot Loop runTelegramBot, handleMessage, startBot, + ensureOllama, + checkOllama, + pullEmbeddingModel, -- * System Prompt telegramSystemPrompt, @@ -56,9 +64,13 @@ import qualified Network.HTTP.Simple as HTTP import qualified Omni.Agent.Engine as Engine import qualified Omni.Agent.Memory as Memory import qualified Omni.Agent.Provider as Provider +import qualified Omni.Agent.Tools.Notes as Notes +import qualified Omni.Agent.Tools.Pdf as Pdf import qualified Omni.Agent.Tools.WebSearch as WebSearch import qualified Omni.Test as Test import System.Environment (lookupEnv) +import System.IO (hClose) +import System.IO.Temp (withSystemTempFile) main :: IO () main = Test.run test @@ -98,7 +110,8 @@ test = tmUserId = 789, tmUserFirstName = "Test", tmUserLastName = Just "User", - tmText = "Hello bot" + tmText = "Hello bot", + tmDocument = Nothing } case Aeson.decode (Aeson.encode msg) of Nothing -> Test.assertFailure "Failed to decode TelegramMessage" @@ -130,6 +143,50 @@ test = tmChatId msg Test.@=? 456 tmUserId msg Test.@=? 789 tmText msg Test.@=? "Hello" + tmDocument msg Test.@=? Nothing, + Test.unit "parseUpdate extracts document correctly" <| do + let json = + Aeson.object + [ "update_id" .= (124 :: Int), + "message" + .= Aeson.object + [ "message_id" .= (2 :: Int), + "chat" .= Aeson.object ["id" .= (456 :: Int)], + "from" + .= Aeson.object + [ "id" .= (789 :: Int), + "first_name" .= ("Test" :: Text) + ], + "caption" .= ("check this out" :: Text), + "document" + .= Aeson.object + [ "file_id" .= ("abc123" :: Text), + "file_name" .= ("test.pdf" :: Text), + "mime_type" .= ("application/pdf" :: Text), + "file_size" .= (12345 :: Int) + ] + ] + ] + case parseUpdate json of + Nothing -> Test.assertFailure "Failed to parse document update" + Just msg -> do + tmUpdateId msg Test.@=? 124 + tmText msg Test.@=? "check this out" + case tmDocument msg of + Nothing -> Test.assertFailure "Expected document" + Just doc -> do + tdFileId doc Test.@=? "abc123" + tdFileName doc Test.@=? Just "test.pdf" + tdMimeType doc Test.@=? Just "application/pdf", + Test.unit "isPdf detects PDFs by mime type" <| do + let doc = TelegramDocument "id" (Just "doc.pdf") (Just "application/pdf") Nothing + isPdf doc Test.@=? True, + Test.unit "isPdf detects PDFs by filename" <| do + let doc = TelegramDocument "id" (Just "report.PDF") Nothing Nothing + isPdf doc Test.@=? True, + Test.unit "isPdf rejects non-PDFs" <| do + let doc = TelegramDocument "id" (Just "image.jpg") (Just "image/jpeg") Nothing + isPdf doc Test.@=? False ] -- | Telegram bot configuration. @@ -177,6 +234,32 @@ isUserAllowed :: TelegramConfig -> Int -> Bool isUserAllowed cfg usrId = null (tgAllowedUserIds cfg) || usrId `elem` tgAllowedUserIds cfg +-- | Document attachment info from Telegram. +data TelegramDocument = TelegramDocument + { tdFileId :: Text, + tdFileName :: Maybe Text, + tdMimeType :: Maybe Text, + tdFileSize :: Maybe Int + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON TelegramDocument where + toJSON d = + Aeson.object + [ "file_id" .= tdFileId d, + "file_name" .= tdFileName d, + "mime_type" .= tdMimeType d, + "file_size" .= tdFileSize d + ] + +instance Aeson.FromJSON TelegramDocument where + parseJSON = + Aeson.withObject "TelegramDocument" <| \v -> + (TelegramDocument </ (v .: "file_id")) + <*> (v .:? "file_name") + <*> (v .:? "mime_type") + <*> (v .:? "file_size") + -- | A parsed Telegram message from a user. data TelegramMessage = TelegramMessage { tmUpdateId :: Int, @@ -184,7 +267,8 @@ data TelegramMessage = TelegramMessage tmUserId :: Int, tmUserFirstName :: Text, tmUserLastName :: Maybe Text, - tmText :: Text + tmText :: Text, + tmDocument :: Maybe TelegramDocument } deriving (Show, Eq, Generic) @@ -196,7 +280,8 @@ instance Aeson.ToJSON TelegramMessage where "user_id" .= tmUserId m, "user_first_name" .= tmUserFirstName m, "user_last_name" .= tmUserLastName m, - "text" .= tmText m + "text" .= tmText m, + "document" .= tmDocument m ] instance Aeson.FromJSON TelegramMessage where @@ -208,6 +293,7 @@ instance Aeson.FromJSON TelegramMessage where <*> (v .: "user_first_name") <*> (v .:? "user_last_name") <*> (v .: "text") + <*> (v .:? "document") -- | Raw Telegram update for parsing. data TelegramUpdate = TelegramUpdate @@ -223,6 +309,7 @@ instance Aeson.FromJSON TelegramUpdate where <*> (v .:? "message") -- | Parse a Telegram update into a TelegramMessage. +-- Handles both text messages and document uploads. parseUpdate :: Aeson.Value -> Maybe TelegramMessage parseUpdate val = do Aeson.Object obj <- pure val @@ -244,9 +331,17 @@ parseUpdate val = do let lastName = case KeyMap.lookup "last_name" fromObj of Just (Aeson.String s) -> Just s _ -> Nothing - text <- case KeyMap.lookup "text" msgObj of - Just (Aeson.String s) -> Just s - _ -> Nothing + let text = case KeyMap.lookup "text" msgObj of + Just (Aeson.String s) -> s + _ -> "" + let caption = case KeyMap.lookup "caption" msgObj of + Just (Aeson.String s) -> s + _ -> "" + let document = case KeyMap.lookup "document" msgObj of + Just (Aeson.Object docObj) -> parseDocument docObj + _ -> Nothing + let hasContent = not (Text.null text) || not (Text.null caption) || isJust document + guard hasContent pure TelegramMessage { tmUpdateId = updateId, @@ -254,7 +349,31 @@ parseUpdate val = do tmUserId = userId, tmUserFirstName = firstName, tmUserLastName = lastName, - tmText = text + tmText = if Text.null text then caption else text, + tmDocument = document + } + +-- | Parse document object from Telegram message. +parseDocument :: Aeson.Object -> Maybe TelegramDocument +parseDocument docObj = do + fileId <- case KeyMap.lookup "file_id" docObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + let fileName = case KeyMap.lookup "file_name" docObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + mimeType = case KeyMap.lookup "mime_type" docObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + fileSize = case KeyMap.lookup "file_size" docObj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + pure + TelegramDocument + { tdFileId = fileId, + tdFileName = fileName, + tdMimeType = mimeType, + tdFileSize = fileSize } -- | Poll Telegram for new updates. @@ -349,6 +468,82 @@ sendMessage cfg chatId text = do then putText <| "Message sent (" <> tshow (Text.length text) <> " chars)" else putText <| "Send message failed: " <> tshow status <> " - " <> tshow respBody +-- | Get file path from Telegram file_id. +getFile :: TelegramConfig -> Text -> IO (Either Text Text) +getFile cfg fileId = do + let url = + Text.unpack (tgApiBaseUrl cfg) + <> "/bot" + <> Text.unpack (tgBotToken cfg) + <> "/getFile" + req0 <- HTTP.parseRequest url + let body = Aeson.object ["file_id" .= fileId] + req = + HTTP.setRequestMethod "POST" + <| HTTP.setRequestHeader "Content-Type" ["application/json"] + <| HTTP.setRequestBodyLBS (Aeson.encode body) + <| req0 + result <- try (HTTP.httpLBS req) + case result of + Left (e :: SomeException) -> + pure (Left ("getFile error: " <> tshow e)) + Right response -> do + let status = HTTP.getResponseStatusCode response + if status >= 200 && status < 300 + then case Aeson.decode (HTTP.getResponseBody response) of + Just (Aeson.Object obj) -> case KeyMap.lookup "result" obj of + Just (Aeson.Object resObj) -> case KeyMap.lookup "file_path" resObj of + Just (Aeson.String fp) -> pure (Right fp) + _ -> pure (Left "No file_path in response") + _ -> pure (Left "No result in response") + _ -> pure (Left "Failed to parse getFile response") + else pure (Left ("getFile HTTP error: " <> tshow status)) + +-- | Download a file from Telegram servers. +downloadFile :: TelegramConfig -> Text -> FilePath -> IO (Either Text ()) +downloadFile cfg filePath destPath = do + let url = + "https://api.telegram.org/file/bot" + <> Text.unpack (tgBotToken cfg) + <> "/" + <> Text.unpack filePath + result <- + try <| do + req <- HTTP.parseRequest url + response <- HTTP.httpLBS req + let status = HTTP.getResponseStatusCode response + if status >= 200 && status < 300 + then do + BL.writeFile destPath (HTTP.getResponseBody response) + pure (Right ()) + else pure (Left ("Download failed: HTTP " <> tshow status)) + case result of + Left (e :: SomeException) -> pure (Left ("Download error: " <> tshow e)) + Right r -> pure r + +-- | Check if a document is a PDF. +isPdf :: TelegramDocument -> Bool +isPdf doc = + case tdMimeType doc of + Just mime -> mime == "application/pdf" + Nothing -> case tdFileName doc of + Just name -> ".pdf" `Text.isSuffixOf` Text.toLower name + Nothing -> False + +-- | Download and extract text from a PDF sent to the bot. +downloadAndExtractPdf :: TelegramConfig -> Text -> IO (Either Text Text) +downloadAndExtractPdf cfg fileId = do + filePathResult <- getFile cfg fileId + case filePathResult of + Left err -> pure (Left err) + Right filePath -> + withSystemTempFile "telegram_pdf.pdf" <| \tmpPath tmpHandle -> do + hClose tmpHandle + downloadResult <- downloadFile cfg filePath tmpPath + case downloadResult of + Left err -> pure (Left err) + Right () -> Pdf.extractPdfText tmpPath + -- | System prompt for the Telegram bot agent. telegramSystemPrompt :: Text telegramSystemPrompt = @@ -380,7 +575,15 @@ runTelegramBot tgConfig provider = do putText "Starting Telegram bot..." offsetVar <- newTVarIO 0 - let engineCfg = Engine.defaultEngineConfig + let engineCfg = + Engine.defaultEngineConfig + { Engine.engineOnToolCall = \toolName args -> + putText <| "Tool call: " <> toolName <> " " <> Text.take 200 args, + Engine.engineOnToolResult = \toolName success result -> + putText <| "Tool result: " <> toolName <> " " <> (if success then "ok" else "err") <> " " <> Text.take 200 result, + Engine.engineOnActivity = \activity -> + putText <| "Agent: " <> activity + } forever <| do offset <- readTVarIO offsetVar @@ -427,12 +630,33 @@ handleAuthorizedMessage :: Int -> IO () handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do - _ <- Memory.saveMessage uid chatId Memory.UserRole (tmText msg) + pdfContent <- case tmDocument msg of + Just doc | isPdf doc -> do + putText <| "Processing PDF: " <> fromMaybe "(unnamed)" (tdFileName doc) + result <- downloadAndExtractPdf tgConfig (tdFileId doc) + case result of + Left err -> do + putText <| "PDF extraction failed: " <> err + pure Nothing + Right text -> do + let truncated = Text.take 40000 text + putText <| "Extracted " <> tshow (Text.length truncated) <> " chars from PDF" + pure (Just truncated) + _ -> pure Nothing + + let userMessage = case pdfContent of + Just pdfText -> + let caption = tmText msg + prefix = if Text.null caption then "here's the PDF content:\n\n" else caption <> "\n\n---\nPDF content:\n\n" + in prefix <> pdfText + Nothing -> tmText msg + + _ <- Memory.saveMessage uid chatId Memory.UserRole userMessage (conversationContext, contextTokens) <- Memory.getConversationContext uid chatId maxConversationTokens putText <| "Conversation context: " <> tshow contextTokens <> " tokens" - memories <- Memory.recallMemories uid (tmText msg) 5 + memories <- Memory.recallMemories uid userMessage 5 let memoryContext = Memory.formatMemoriesForPrompt memories let systemPrompt = @@ -449,7 +673,13 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do searchTools = case tgKagiApiKey tgConfig of Just kagiKey -> [WebSearch.webSearchTool kagiKey] Nothing -> [] - tools = memoryTools <> searchTools + pdfTools = [Pdf.pdfTool] + notesTools = + [ Notes.noteAddTool uid, + Notes.noteListTool uid, + Notes.noteDeleteTool uid + ] + tools = memoryTools <> searchTools <> pdfTools <> notesTools let agentCfg = Engine.defaultAgentConfig @@ -462,7 +692,7 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do } } - result <- Engine.runAgentWithProvider engineCfg provider agentCfg (tmText msg) + result <- Engine.runAgentWithProvider engineCfg provider agentCfg userMessage case result of Left err -> do @@ -520,6 +750,84 @@ checkAndSummarize provider uid chatId = do _ <- Memory.summarizeAndArchive uid chatId summary putText "Conversation summarized and archived" +-- | Check if Ollama is running and has the embedding model. +-- Returns Right () if ready, Left error message otherwise. +checkOllama :: IO (Either Text ()) +checkOllama = do + ollamaUrl <- fromMaybe "http://localhost:11434" </ lookupEnv "OLLAMA_URL" + let url = ollamaUrl <> "/api/tags" + result <- + try <| do + req <- HTTP.parseRequest url + HTTP.httpLBS req + case result of + Left (e :: SomeException) -> + pure (Left ("Ollama not running: " <> tshow e)) + Right response -> do + let status = HTTP.getResponseStatusCode response + if status >= 200 && status < 300 + then case Aeson.decode (HTTP.getResponseBody response) of + Just (Aeson.Object obj) -> case KeyMap.lookup "models" obj of + Just (Aeson.Array models) -> + let names = [n | Aeson.Object m <- toList models, Just (Aeson.String n) <- [KeyMap.lookup "name" m]] + hasNomic = any ("nomic-embed-text" `Text.isInfixOf`) names + in if hasNomic + then pure (Right ()) + else pure (Left "nomic-embed-text model not found") + _ -> pure (Left "Invalid Ollama response") + _ -> pure (Left "Failed to parse Ollama response") + else pure (Left ("Ollama HTTP error: " <> tshow status)) + +-- | Pull the embedding model from Ollama. +pullEmbeddingModel :: IO (Either Text ()) +pullEmbeddingModel = do + ollamaUrl <- fromMaybe "http://localhost:11434" </ lookupEnv "OLLAMA_URL" + let url = ollamaUrl <> "/api/pull" + putText "Pulling nomic-embed-text model (this may take a few minutes)..." + req0 <- HTTP.parseRequest url + let body = Aeson.object ["name" .= ("nomic-embed-text" :: Text)] + req = + HTTP.setRequestMethod "POST" + <| HTTP.setRequestHeader "Content-Type" ["application/json"] + <| HTTP.setRequestBodyLBS (Aeson.encode body) + <| HTTP.setRequestResponseTimeout (HTTPClient.responseTimeoutMicro (600 * 1000000)) + <| req0 + result <- try (HTTP.httpLBS req) + case result of + Left (e :: SomeException) -> + pure (Left ("Failed to pull model: " <> tshow e)) + Right response -> do + let status = HTTP.getResponseStatusCode response + if status >= 200 && status < 300 + then do + putText "nomic-embed-text model ready" + pure (Right ()) + else pure (Left ("Pull failed: HTTP " <> tshow status)) + +-- | Ensure Ollama is running and has the embedding model. +-- Pulls the model if missing, exits if Ollama is not running. +ensureOllama :: IO () +ensureOllama = do + checkResult <- checkOllama + case checkResult of + Right () -> putText "Ollama ready with nomic-embed-text" + Left err + | "not running" `Text.isInfixOf` err -> do + putText <| "Error: " <> err + putText "Please start Ollama: ollama serve" + exitFailure + | "not found" `Text.isInfixOf` err -> do + putText "nomic-embed-text model not found, pulling..." + pullResult <- pullEmbeddingModel + case pullResult of + Right () -> pure () + Left pullErr -> do + putText <| "Error: " <> pullErr + exitFailure + | otherwise -> do + putText <| "Ollama error: " <> err + exitFailure + -- | Start the Telegram bot from environment or provided token. startBot :: Maybe Text -> IO () startBot maybeToken = do @@ -533,6 +841,8 @@ startBot maybeToken = do putText "Error: TELEGRAM_BOT_TOKEN not set and no --token provided" exitFailure + ensureOllama + allowedIds <- loadAllowedUserIds kagiKey <- fmap Text.pack </ lookupEnv "KAGI_API_KEY" |
