summaryrefslogtreecommitdiff
path: root/Omni/Agent/Telegram.hs
diff options
context:
space:
mode:
Diffstat (limited to 'Omni/Agent/Telegram.hs')
-rw-r--r--Omni/Agent/Telegram.hs334
1 files changed, 322 insertions, 12 deletions
diff --git a/Omni/Agent/Telegram.hs b/Omni/Agent/Telegram.hs
index 1162e25..e7eb659 100644
--- a/Omni/Agent/Telegram.hs
+++ b/Omni/Agent/Telegram.hs
@@ -24,16 +24,24 @@ module Omni.Agent.Telegram
-- * Types
TelegramMessage (..),
TelegramUpdate (..),
+ TelegramDocument (..),
-- * Telegram API
getUpdates,
sendMessage,
sendTypingAction,
+ getFile,
+ downloadFile,
+ downloadAndExtractPdf,
+ isPdf,
-- * Bot Loop
runTelegramBot,
handleMessage,
startBot,
+ ensureOllama,
+ checkOllama,
+ pullEmbeddingModel,
-- * System Prompt
telegramSystemPrompt,
@@ -56,9 +64,13 @@ import qualified Network.HTTP.Simple as HTTP
import qualified Omni.Agent.Engine as Engine
import qualified Omni.Agent.Memory as Memory
import qualified Omni.Agent.Provider as Provider
+import qualified Omni.Agent.Tools.Notes as Notes
+import qualified Omni.Agent.Tools.Pdf as Pdf
import qualified Omni.Agent.Tools.WebSearch as WebSearch
import qualified Omni.Test as Test
import System.Environment (lookupEnv)
+import System.IO (hClose)
+import System.IO.Temp (withSystemTempFile)
main :: IO ()
main = Test.run test
@@ -98,7 +110,8 @@ test =
tmUserId = 789,
tmUserFirstName = "Test",
tmUserLastName = Just "User",
- tmText = "Hello bot"
+ tmText = "Hello bot",
+ tmDocument = Nothing
}
case Aeson.decode (Aeson.encode msg) of
Nothing -> Test.assertFailure "Failed to decode TelegramMessage"
@@ -130,6 +143,50 @@ test =
tmChatId msg Test.@=? 456
tmUserId msg Test.@=? 789
tmText msg Test.@=? "Hello"
+ tmDocument msg Test.@=? Nothing,
+ Test.unit "parseUpdate extracts document correctly" <| do
+ let json =
+ Aeson.object
+ [ "update_id" .= (124 :: Int),
+ "message"
+ .= Aeson.object
+ [ "message_id" .= (2 :: Int),
+ "chat" .= Aeson.object ["id" .= (456 :: Int)],
+ "from"
+ .= Aeson.object
+ [ "id" .= (789 :: Int),
+ "first_name" .= ("Test" :: Text)
+ ],
+ "caption" .= ("check this out" :: Text),
+ "document"
+ .= Aeson.object
+ [ "file_id" .= ("abc123" :: Text),
+ "file_name" .= ("test.pdf" :: Text),
+ "mime_type" .= ("application/pdf" :: Text),
+ "file_size" .= (12345 :: Int)
+ ]
+ ]
+ ]
+ case parseUpdate json of
+ Nothing -> Test.assertFailure "Failed to parse document update"
+ Just msg -> do
+ tmUpdateId msg Test.@=? 124
+ tmText msg Test.@=? "check this out"
+ case tmDocument msg of
+ Nothing -> Test.assertFailure "Expected document"
+ Just doc -> do
+ tdFileId doc Test.@=? "abc123"
+ tdFileName doc Test.@=? Just "test.pdf"
+ tdMimeType doc Test.@=? Just "application/pdf",
+ Test.unit "isPdf detects PDFs by mime type" <| do
+ let doc = TelegramDocument "id" (Just "doc.pdf") (Just "application/pdf") Nothing
+ isPdf doc Test.@=? True,
+ Test.unit "isPdf detects PDFs by filename" <| do
+ let doc = TelegramDocument "id" (Just "report.PDF") Nothing Nothing
+ isPdf doc Test.@=? True,
+ Test.unit "isPdf rejects non-PDFs" <| do
+ let doc = TelegramDocument "id" (Just "image.jpg") (Just "image/jpeg") Nothing
+ isPdf doc Test.@=? False
]
-- | Telegram bot configuration.
@@ -177,6 +234,32 @@ isUserAllowed :: TelegramConfig -> Int -> Bool
isUserAllowed cfg usrId =
null (tgAllowedUserIds cfg) || usrId `elem` tgAllowedUserIds cfg
+-- | Document attachment info from Telegram.
+data TelegramDocument = TelegramDocument
+ { tdFileId :: Text,
+ tdFileName :: Maybe Text,
+ tdMimeType :: Maybe Text,
+ tdFileSize :: Maybe Int
+ }
+ deriving (Show, Eq, Generic)
+
+instance Aeson.ToJSON TelegramDocument where
+ toJSON d =
+ Aeson.object
+ [ "file_id" .= tdFileId d,
+ "file_name" .= tdFileName d,
+ "mime_type" .= tdMimeType d,
+ "file_size" .= tdFileSize d
+ ]
+
+instance Aeson.FromJSON TelegramDocument where
+ parseJSON =
+ Aeson.withObject "TelegramDocument" <| \v ->
+ (TelegramDocument </ (v .: "file_id"))
+ <*> (v .:? "file_name")
+ <*> (v .:? "mime_type")
+ <*> (v .:? "file_size")
+
-- | A parsed Telegram message from a user.
data TelegramMessage = TelegramMessage
{ tmUpdateId :: Int,
@@ -184,7 +267,8 @@ data TelegramMessage = TelegramMessage
tmUserId :: Int,
tmUserFirstName :: Text,
tmUserLastName :: Maybe Text,
- tmText :: Text
+ tmText :: Text,
+ tmDocument :: Maybe TelegramDocument
}
deriving (Show, Eq, Generic)
@@ -196,7 +280,8 @@ instance Aeson.ToJSON TelegramMessage where
"user_id" .= tmUserId m,
"user_first_name" .= tmUserFirstName m,
"user_last_name" .= tmUserLastName m,
- "text" .= tmText m
+ "text" .= tmText m,
+ "document" .= tmDocument m
]
instance Aeson.FromJSON TelegramMessage where
@@ -208,6 +293,7 @@ instance Aeson.FromJSON TelegramMessage where
<*> (v .: "user_first_name")
<*> (v .:? "user_last_name")
<*> (v .: "text")
+ <*> (v .:? "document")
-- | Raw Telegram update for parsing.
data TelegramUpdate = TelegramUpdate
@@ -223,6 +309,7 @@ instance Aeson.FromJSON TelegramUpdate where
<*> (v .:? "message")
-- | Parse a Telegram update into a TelegramMessage.
+-- Handles both text messages and document uploads.
parseUpdate :: Aeson.Value -> Maybe TelegramMessage
parseUpdate val = do
Aeson.Object obj <- pure val
@@ -244,9 +331,17 @@ parseUpdate val = do
let lastName = case KeyMap.lookup "last_name" fromObj of
Just (Aeson.String s) -> Just s
_ -> Nothing
- text <- case KeyMap.lookup "text" msgObj of
- Just (Aeson.String s) -> Just s
- _ -> Nothing
+ let text = case KeyMap.lookup "text" msgObj of
+ Just (Aeson.String s) -> s
+ _ -> ""
+ let caption = case KeyMap.lookup "caption" msgObj of
+ Just (Aeson.String s) -> s
+ _ -> ""
+ let document = case KeyMap.lookup "document" msgObj of
+ Just (Aeson.Object docObj) -> parseDocument docObj
+ _ -> Nothing
+ let hasContent = not (Text.null text) || not (Text.null caption) || isJust document
+ guard hasContent
pure
TelegramMessage
{ tmUpdateId = updateId,
@@ -254,7 +349,31 @@ parseUpdate val = do
tmUserId = userId,
tmUserFirstName = firstName,
tmUserLastName = lastName,
- tmText = text
+ tmText = if Text.null text then caption else text,
+ tmDocument = document
+ }
+
+-- | Parse document object from Telegram message.
+parseDocument :: Aeson.Object -> Maybe TelegramDocument
+parseDocument docObj = do
+ fileId <- case KeyMap.lookup "file_id" docObj of
+ Just (Aeson.String s) -> Just s
+ _ -> Nothing
+ let fileName = case KeyMap.lookup "file_name" docObj of
+ Just (Aeson.String s) -> Just s
+ _ -> Nothing
+ mimeType = case KeyMap.lookup "mime_type" docObj of
+ Just (Aeson.String s) -> Just s
+ _ -> Nothing
+ fileSize = case KeyMap.lookup "file_size" docObj of
+ Just (Aeson.Number n) -> Just (round n)
+ _ -> Nothing
+ pure
+ TelegramDocument
+ { tdFileId = fileId,
+ tdFileName = fileName,
+ tdMimeType = mimeType,
+ tdFileSize = fileSize
}
-- | Poll Telegram for new updates.
@@ -349,6 +468,82 @@ sendMessage cfg chatId text = do
then putText <| "Message sent (" <> tshow (Text.length text) <> " chars)"
else putText <| "Send message failed: " <> tshow status <> " - " <> tshow respBody
+-- | Get file path from Telegram file_id.
+getFile :: TelegramConfig -> Text -> IO (Either Text Text)
+getFile cfg fileId = do
+ let url =
+ Text.unpack (tgApiBaseUrl cfg)
+ <> "/bot"
+ <> Text.unpack (tgBotToken cfg)
+ <> "/getFile"
+ req0 <- HTTP.parseRequest url
+ let body = Aeson.object ["file_id" .= fileId]
+ req =
+ HTTP.setRequestMethod "POST"
+ <| HTTP.setRequestHeader "Content-Type" ["application/json"]
+ <| HTTP.setRequestBodyLBS (Aeson.encode body)
+ <| req0
+ result <- try (HTTP.httpLBS req)
+ case result of
+ Left (e :: SomeException) ->
+ pure (Left ("getFile error: " <> tshow e))
+ Right response -> do
+ let status = HTTP.getResponseStatusCode response
+ if status >= 200 && status < 300
+ then case Aeson.decode (HTTP.getResponseBody response) of
+ Just (Aeson.Object obj) -> case KeyMap.lookup "result" obj of
+ Just (Aeson.Object resObj) -> case KeyMap.lookup "file_path" resObj of
+ Just (Aeson.String fp) -> pure (Right fp)
+ _ -> pure (Left "No file_path in response")
+ _ -> pure (Left "No result in response")
+ _ -> pure (Left "Failed to parse getFile response")
+ else pure (Left ("getFile HTTP error: " <> tshow status))
+
+-- | Download a file from Telegram servers.
+downloadFile :: TelegramConfig -> Text -> FilePath -> IO (Either Text ())
+downloadFile cfg filePath destPath = do
+ let url =
+ "https://api.telegram.org/file/bot"
+ <> Text.unpack (tgBotToken cfg)
+ <> "/"
+ <> Text.unpack filePath
+ result <-
+ try <| do
+ req <- HTTP.parseRequest url
+ response <- HTTP.httpLBS req
+ let status = HTTP.getResponseStatusCode response
+ if status >= 200 && status < 300
+ then do
+ BL.writeFile destPath (HTTP.getResponseBody response)
+ pure (Right ())
+ else pure (Left ("Download failed: HTTP " <> tshow status))
+ case result of
+ Left (e :: SomeException) -> pure (Left ("Download error: " <> tshow e))
+ Right r -> pure r
+
+-- | Check if a document is a PDF.
+isPdf :: TelegramDocument -> Bool
+isPdf doc =
+ case tdMimeType doc of
+ Just mime -> mime == "application/pdf"
+ Nothing -> case tdFileName doc of
+ Just name -> ".pdf" `Text.isSuffixOf` Text.toLower name
+ Nothing -> False
+
+-- | Download and extract text from a PDF sent to the bot.
+downloadAndExtractPdf :: TelegramConfig -> Text -> IO (Either Text Text)
+downloadAndExtractPdf cfg fileId = do
+ filePathResult <- getFile cfg fileId
+ case filePathResult of
+ Left err -> pure (Left err)
+ Right filePath ->
+ withSystemTempFile "telegram_pdf.pdf" <| \tmpPath tmpHandle -> do
+ hClose tmpHandle
+ downloadResult <- downloadFile cfg filePath tmpPath
+ case downloadResult of
+ Left err -> pure (Left err)
+ Right () -> Pdf.extractPdfText tmpPath
+
-- | System prompt for the Telegram bot agent.
telegramSystemPrompt :: Text
telegramSystemPrompt =
@@ -380,7 +575,15 @@ runTelegramBot tgConfig provider = do
putText "Starting Telegram bot..."
offsetVar <- newTVarIO 0
- let engineCfg = Engine.defaultEngineConfig
+ let engineCfg =
+ Engine.defaultEngineConfig
+ { Engine.engineOnToolCall = \toolName args ->
+ putText <| "Tool call: " <> toolName <> " " <> Text.take 200 args,
+ Engine.engineOnToolResult = \toolName success result ->
+ putText <| "Tool result: " <> toolName <> " " <> (if success then "ok" else "err") <> " " <> Text.take 200 result,
+ Engine.engineOnActivity = \activity ->
+ putText <| "Agent: " <> activity
+ }
forever <| do
offset <- readTVarIO offsetVar
@@ -427,12 +630,33 @@ handleAuthorizedMessage ::
Int ->
IO ()
handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do
- _ <- Memory.saveMessage uid chatId Memory.UserRole (tmText msg)
+ pdfContent <- case tmDocument msg of
+ Just doc | isPdf doc -> do
+ putText <| "Processing PDF: " <> fromMaybe "(unnamed)" (tdFileName doc)
+ result <- downloadAndExtractPdf tgConfig (tdFileId doc)
+ case result of
+ Left err -> do
+ putText <| "PDF extraction failed: " <> err
+ pure Nothing
+ Right text -> do
+ let truncated = Text.take 40000 text
+ putText <| "Extracted " <> tshow (Text.length truncated) <> " chars from PDF"
+ pure (Just truncated)
+ _ -> pure Nothing
+
+ let userMessage = case pdfContent of
+ Just pdfText ->
+ let caption = tmText msg
+ prefix = if Text.null caption then "here's the PDF content:\n\n" else caption <> "\n\n---\nPDF content:\n\n"
+ in prefix <> pdfText
+ Nothing -> tmText msg
+
+ _ <- Memory.saveMessage uid chatId Memory.UserRole userMessage
(conversationContext, contextTokens) <- Memory.getConversationContext uid chatId maxConversationTokens
putText <| "Conversation context: " <> tshow contextTokens <> " tokens"
- memories <- Memory.recallMemories uid (tmText msg) 5
+ memories <- Memory.recallMemories uid userMessage 5
let memoryContext = Memory.formatMemoriesForPrompt memories
let systemPrompt =
@@ -449,7 +673,13 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do
searchTools = case tgKagiApiKey tgConfig of
Just kagiKey -> [WebSearch.webSearchTool kagiKey]
Nothing -> []
- tools = memoryTools <> searchTools
+ pdfTools = [Pdf.pdfTool]
+ notesTools =
+ [ Notes.noteAddTool uid,
+ Notes.noteListTool uid,
+ Notes.noteDeleteTool uid
+ ]
+ tools = memoryTools <> searchTools <> pdfTools <> notesTools
let agentCfg =
Engine.defaultAgentConfig
@@ -462,7 +692,7 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do
}
}
- result <- Engine.runAgentWithProvider engineCfg provider agentCfg (tmText msg)
+ result <- Engine.runAgentWithProvider engineCfg provider agentCfg userMessage
case result of
Left err -> do
@@ -520,6 +750,84 @@ checkAndSummarize provider uid chatId = do
_ <- Memory.summarizeAndArchive uid chatId summary
putText "Conversation summarized and archived"
+-- | Check if Ollama is running and has the embedding model.
+-- Returns Right () if ready, Left error message otherwise.
+checkOllama :: IO (Either Text ())
+checkOllama = do
+ ollamaUrl <- fromMaybe "http://localhost:11434" </ lookupEnv "OLLAMA_URL"
+ let url = ollamaUrl <> "/api/tags"
+ result <-
+ try <| do
+ req <- HTTP.parseRequest url
+ HTTP.httpLBS req
+ case result of
+ Left (e :: SomeException) ->
+ pure (Left ("Ollama not running: " <> tshow e))
+ Right response -> do
+ let status = HTTP.getResponseStatusCode response
+ if status >= 200 && status < 300
+ then case Aeson.decode (HTTP.getResponseBody response) of
+ Just (Aeson.Object obj) -> case KeyMap.lookup "models" obj of
+ Just (Aeson.Array models) ->
+ let names = [n | Aeson.Object m <- toList models, Just (Aeson.String n) <- [KeyMap.lookup "name" m]]
+ hasNomic = any ("nomic-embed-text" `Text.isInfixOf`) names
+ in if hasNomic
+ then pure (Right ())
+ else pure (Left "nomic-embed-text model not found")
+ _ -> pure (Left "Invalid Ollama response")
+ _ -> pure (Left "Failed to parse Ollama response")
+ else pure (Left ("Ollama HTTP error: " <> tshow status))
+
+-- | Pull the embedding model from Ollama.
+pullEmbeddingModel :: IO (Either Text ())
+pullEmbeddingModel = do
+ ollamaUrl <- fromMaybe "http://localhost:11434" </ lookupEnv "OLLAMA_URL"
+ let url = ollamaUrl <> "/api/pull"
+ putText "Pulling nomic-embed-text model (this may take a few minutes)..."
+ req0 <- HTTP.parseRequest url
+ let body = Aeson.object ["name" .= ("nomic-embed-text" :: Text)]
+ req =
+ HTTP.setRequestMethod "POST"
+ <| HTTP.setRequestHeader "Content-Type" ["application/json"]
+ <| HTTP.setRequestBodyLBS (Aeson.encode body)
+ <| HTTP.setRequestResponseTimeout (HTTPClient.responseTimeoutMicro (600 * 1000000))
+ <| req0
+ result <- try (HTTP.httpLBS req)
+ case result of
+ Left (e :: SomeException) ->
+ pure (Left ("Failed to pull model: " <> tshow e))
+ Right response -> do
+ let status = HTTP.getResponseStatusCode response
+ if status >= 200 && status < 300
+ then do
+ putText "nomic-embed-text model ready"
+ pure (Right ())
+ else pure (Left ("Pull failed: HTTP " <> tshow status))
+
+-- | Ensure Ollama is running and has the embedding model.
+-- Pulls the model if missing, exits if Ollama is not running.
+ensureOllama :: IO ()
+ensureOllama = do
+ checkResult <- checkOllama
+ case checkResult of
+ Right () -> putText "Ollama ready with nomic-embed-text"
+ Left err
+ | "not running" `Text.isInfixOf` err -> do
+ putText <| "Error: " <> err
+ putText "Please start Ollama: ollama serve"
+ exitFailure
+ | "not found" `Text.isInfixOf` err -> do
+ putText "nomic-embed-text model not found, pulling..."
+ pullResult <- pullEmbeddingModel
+ case pullResult of
+ Right () -> pure ()
+ Left pullErr -> do
+ putText <| "Error: " <> pullErr
+ exitFailure
+ | otherwise -> do
+ putText <| "Ollama error: " <> err
+ exitFailure
+
-- | Start the Telegram bot from environment or provided token.
startBot :: Maybe Text -> IO ()
startBot maybeToken = do
@@ -533,6 +841,8 @@ startBot maybeToken = do
putText "Error: TELEGRAM_BOT_TOKEN not set and no --token provided"
exitFailure
+ ensureOllama
+
allowedIds <- loadAllowedUserIds
kagiKey <- fmap Text.pack </ lookupEnv "KAGI_API_KEY"