diff options
| author | Ben Sima <ben@bensima.com> | 2025-12-12 18:55:15 -0500 |
|---|---|---|
| committer | Ben Sima <ben@bensima.com> | 2025-12-12 18:55:15 -0500 |
| commit | a6863d562a76eff5de36e0faa244e6ae2310bc22 (patch) | |
| tree | 1953a43ecaacbf48355bb0c6b32d7b81e853455d /Omni/Agent | |
| parent | 622786d69393c650d8d5e2b080ba9fad77f901e0 (diff) | |
Add PDF and Notes tools to Telegram bot
- Omni/Agent/Tools/Pdf.hs: Extract text from PDFs using pdftotext
- Omni/Agent/Tools/Notes.hs: Quick notes CRUD with topics
- Add notes table schema to Memory.hs initMemoryDb
- Wire both tools into Telegram bot with logging callbacks
Diffstat (limited to 'Omni/Agent')
| -rw-r--r-- | Omni/Agent/Memory.hs | 15 | ||||
| -rw-r--r-- | Omni/Agent/Telegram.hs | 334 | ||||
| -rw-r--r-- | Omni/Agent/Tools/Notes.hs | 357 | ||||
| -rw-r--r-- | Omni/Agent/Tools/Pdf.hs | 180 |
4 files changed, 874 insertions, 12 deletions
diff --git a/Omni/Agent/Memory.hs b/Omni/Agent/Memory.hs index 461f7ac..8337baf 100644 --- a/Omni/Agent/Memory.hs +++ b/Omni/Agent/Memory.hs @@ -513,6 +513,21 @@ initMemoryDb conn = do SQL.execute_ conn "CREATE INDEX IF NOT EXISTS idx_summary_user_chat ON conversation_summaries(user_id, chat_id)" + SQL.execute_ + conn + "CREATE TABLE IF NOT EXISTS notes (\ + \ id INTEGER PRIMARY KEY AUTOINCREMENT,\ + \ user_id TEXT NOT NULL,\ + \ topic TEXT NOT NULL,\ + \ content TEXT NOT NULL,\ + \ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\ + \)" + SQL.execute_ + conn + "CREATE INDEX IF NOT EXISTS idx_notes_user ON notes(user_id)" + SQL.execute_ + conn + "CREATE INDEX IF NOT EXISTS idx_notes_topic ON notes(user_id, topic)" -- | Create a new user. createUser :: Text -> Maybe Int -> IO User diff --git a/Omni/Agent/Telegram.hs b/Omni/Agent/Telegram.hs index 1162e25..e7eb659 100644 --- a/Omni/Agent/Telegram.hs +++ b/Omni/Agent/Telegram.hs @@ -24,16 +24,24 @@ module Omni.Agent.Telegram -- * Types TelegramMessage (..), TelegramUpdate (..), + TelegramDocument (..), -- * Telegram API getUpdates, sendMessage, sendTypingAction, + getFile, + downloadFile, + downloadAndExtractPdf, + isPdf, -- * Bot Loop runTelegramBot, handleMessage, startBot, + ensureOllama, + checkOllama, + pullEmbeddingModel, -- * System Prompt telegramSystemPrompt, @@ -56,9 +64,13 @@ import qualified Network.HTTP.Simple as HTTP import qualified Omni.Agent.Engine as Engine import qualified Omni.Agent.Memory as Memory import qualified Omni.Agent.Provider as Provider +import qualified Omni.Agent.Tools.Notes as Notes +import qualified Omni.Agent.Tools.Pdf as Pdf import qualified Omni.Agent.Tools.WebSearch as WebSearch import qualified Omni.Test as Test import System.Environment (lookupEnv) +import System.IO (hClose) +import System.IO.Temp (withSystemTempFile) main :: IO () main = Test.run test @@ -98,7 +110,8 @@ test = tmUserId = 789, tmUserFirstName = "Test", tmUserLastName = Just "User", - tmText = "Hello bot" + tmText = "Hello bot", + tmDocument = Nothing } case Aeson.decode (Aeson.encode msg) of Nothing -> Test.assertFailure "Failed to decode TelegramMessage" @@ -130,6 +143,50 @@ test = tmChatId msg Test.@=? 456 tmUserId msg Test.@=? 789 tmText msg Test.@=? "Hello" + tmDocument msg Test.@=? Nothing, + Test.unit "parseUpdate extracts document correctly" <| do + let json = + Aeson.object + [ "update_id" .= (124 :: Int), + "message" + .= Aeson.object + [ "message_id" .= (2 :: Int), + "chat" .= Aeson.object ["id" .= (456 :: Int)], + "from" + .= Aeson.object + [ "id" .= (789 :: Int), + "first_name" .= ("Test" :: Text) + ], + "caption" .= ("check this out" :: Text), + "document" + .= Aeson.object + [ "file_id" .= ("abc123" :: Text), + "file_name" .= ("test.pdf" :: Text), + "mime_type" .= ("application/pdf" :: Text), + "file_size" .= (12345 :: Int) + ] + ] + ] + case parseUpdate json of + Nothing -> Test.assertFailure "Failed to parse document update" + Just msg -> do + tmUpdateId msg Test.@=? 124 + tmText msg Test.@=? "check this out" + case tmDocument msg of + Nothing -> Test.assertFailure "Expected document" + Just doc -> do + tdFileId doc Test.@=? "abc123" + tdFileName doc Test.@=? Just "test.pdf" + tdMimeType doc Test.@=? Just "application/pdf", + Test.unit "isPdf detects PDFs by mime type" <| do + let doc = TelegramDocument "id" (Just "doc.pdf") (Just "application/pdf") Nothing + isPdf doc Test.@=? True, + Test.unit "isPdf detects PDFs by filename" <| do + let doc = TelegramDocument "id" (Just "report.PDF") Nothing Nothing + isPdf doc Test.@=? True, + Test.unit "isPdf rejects non-PDFs" <| do + let doc = TelegramDocument "id" (Just "image.jpg") (Just "image/jpeg") Nothing + isPdf doc Test.@=? False ] -- | Telegram bot configuration. @@ -177,6 +234,32 @@ isUserAllowed :: TelegramConfig -> Int -> Bool isUserAllowed cfg usrId = null (tgAllowedUserIds cfg) || usrId `elem` tgAllowedUserIds cfg +-- | Document attachment info from Telegram. +data TelegramDocument = TelegramDocument + { tdFileId :: Text, + tdFileName :: Maybe Text, + tdMimeType :: Maybe Text, + tdFileSize :: Maybe Int + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON TelegramDocument where + toJSON d = + Aeson.object + [ "file_id" .= tdFileId d, + "file_name" .= tdFileName d, + "mime_type" .= tdMimeType d, + "file_size" .= tdFileSize d + ] + +instance Aeson.FromJSON TelegramDocument where + parseJSON = + Aeson.withObject "TelegramDocument" <| \v -> + (TelegramDocument </ (v .: "file_id")) + <*> (v .:? "file_name") + <*> (v .:? "mime_type") + <*> (v .:? "file_size") + -- | A parsed Telegram message from a user. data TelegramMessage = TelegramMessage { tmUpdateId :: Int, @@ -184,7 +267,8 @@ data TelegramMessage = TelegramMessage tmUserId :: Int, tmUserFirstName :: Text, tmUserLastName :: Maybe Text, - tmText :: Text + tmText :: Text, + tmDocument :: Maybe TelegramDocument } deriving (Show, Eq, Generic) @@ -196,7 +280,8 @@ instance Aeson.ToJSON TelegramMessage where "user_id" .= tmUserId m, "user_first_name" .= tmUserFirstName m, "user_last_name" .= tmUserLastName m, - "text" .= tmText m + "text" .= tmText m, + "document" .= tmDocument m ] instance Aeson.FromJSON TelegramMessage where @@ -208,6 +293,7 @@ instance Aeson.FromJSON TelegramMessage where <*> (v .: "user_first_name") <*> (v .:? "user_last_name") <*> (v .: "text") + <*> (v .:? "document") -- | Raw Telegram update for parsing. data TelegramUpdate = TelegramUpdate @@ -223,6 +309,7 @@ instance Aeson.FromJSON TelegramUpdate where <*> (v .:? "message") -- | Parse a Telegram update into a TelegramMessage. +-- Handles both text messages and document uploads. parseUpdate :: Aeson.Value -> Maybe TelegramMessage parseUpdate val = do Aeson.Object obj <- pure val @@ -244,9 +331,17 @@ parseUpdate val = do let lastName = case KeyMap.lookup "last_name" fromObj of Just (Aeson.String s) -> Just s _ -> Nothing - text <- case KeyMap.lookup "text" msgObj of - Just (Aeson.String s) -> Just s - _ -> Nothing + let text = case KeyMap.lookup "text" msgObj of + Just (Aeson.String s) -> s + _ -> "" + let caption = case KeyMap.lookup "caption" msgObj of + Just (Aeson.String s) -> s + _ -> "" + let document = case KeyMap.lookup "document" msgObj of + Just (Aeson.Object docObj) -> parseDocument docObj + _ -> Nothing + let hasContent = not (Text.null text) || not (Text.null caption) || isJust document + guard hasContent pure TelegramMessage { tmUpdateId = updateId, @@ -254,7 +349,31 @@ parseUpdate val = do tmUserId = userId, tmUserFirstName = firstName, tmUserLastName = lastName, - tmText = text + tmText = if Text.null text then caption else text, + tmDocument = document + } + +-- | Parse document object from Telegram message. +parseDocument :: Aeson.Object -> Maybe TelegramDocument +parseDocument docObj = do + fileId <- case KeyMap.lookup "file_id" docObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + let fileName = case KeyMap.lookup "file_name" docObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + mimeType = case KeyMap.lookup "mime_type" docObj of + Just (Aeson.String s) -> Just s + _ -> Nothing + fileSize = case KeyMap.lookup "file_size" docObj of + Just (Aeson.Number n) -> Just (round n) + _ -> Nothing + pure + TelegramDocument + { tdFileId = fileId, + tdFileName = fileName, + tdMimeType = mimeType, + tdFileSize = fileSize } -- | Poll Telegram for new updates. @@ -349,6 +468,82 @@ sendMessage cfg chatId text = do then putText <| "Message sent (" <> tshow (Text.length text) <> " chars)" else putText <| "Send message failed: " <> tshow status <> " - " <> tshow respBody +-- | Get file path from Telegram file_id. +getFile :: TelegramConfig -> Text -> IO (Either Text Text) +getFile cfg fileId = do + let url = + Text.unpack (tgApiBaseUrl cfg) + <> "/bot" + <> Text.unpack (tgBotToken cfg) + <> "/getFile" + req0 <- HTTP.parseRequest url + let body = Aeson.object ["file_id" .= fileId] + req = + HTTP.setRequestMethod "POST" + <| HTTP.setRequestHeader "Content-Type" ["application/json"] + <| HTTP.setRequestBodyLBS (Aeson.encode body) + <| req0 + result <- try (HTTP.httpLBS req) + case result of + Left (e :: SomeException) -> + pure (Left ("getFile error: " <> tshow e)) + Right response -> do + let status = HTTP.getResponseStatusCode response + if status >= 200 && status < 300 + then case Aeson.decode (HTTP.getResponseBody response) of + Just (Aeson.Object obj) -> case KeyMap.lookup "result" obj of + Just (Aeson.Object resObj) -> case KeyMap.lookup "file_path" resObj of + Just (Aeson.String fp) -> pure (Right fp) + _ -> pure (Left "No file_path in response") + _ -> pure (Left "No result in response") + _ -> pure (Left "Failed to parse getFile response") + else pure (Left ("getFile HTTP error: " <> tshow status)) + +-- | Download a file from Telegram servers. +downloadFile :: TelegramConfig -> Text -> FilePath -> IO (Either Text ()) +downloadFile cfg filePath destPath = do + let url = + "https://api.telegram.org/file/bot" + <> Text.unpack (tgBotToken cfg) + <> "/" + <> Text.unpack filePath + result <- + try <| do + req <- HTTP.parseRequest url + response <- HTTP.httpLBS req + let status = HTTP.getResponseStatusCode response + if status >= 200 && status < 300 + then do + BL.writeFile destPath (HTTP.getResponseBody response) + pure (Right ()) + else pure (Left ("Download failed: HTTP " <> tshow status)) + case result of + Left (e :: SomeException) -> pure (Left ("Download error: " <> tshow e)) + Right r -> pure r + +-- | Check if a document is a PDF. +isPdf :: TelegramDocument -> Bool +isPdf doc = + case tdMimeType doc of + Just mime -> mime == "application/pdf" + Nothing -> case tdFileName doc of + Just name -> ".pdf" `Text.isSuffixOf` Text.toLower name + Nothing -> False + +-- | Download and extract text from a PDF sent to the bot. +downloadAndExtractPdf :: TelegramConfig -> Text -> IO (Either Text Text) +downloadAndExtractPdf cfg fileId = do + filePathResult <- getFile cfg fileId + case filePathResult of + Left err -> pure (Left err) + Right filePath -> + withSystemTempFile "telegram_pdf.pdf" <| \tmpPath tmpHandle -> do + hClose tmpHandle + downloadResult <- downloadFile cfg filePath tmpPath + case downloadResult of + Left err -> pure (Left err) + Right () -> Pdf.extractPdfText tmpPath + -- | System prompt for the Telegram bot agent. telegramSystemPrompt :: Text telegramSystemPrompt = @@ -380,7 +575,15 @@ runTelegramBot tgConfig provider = do putText "Starting Telegram bot..." offsetVar <- newTVarIO 0 - let engineCfg = Engine.defaultEngineConfig + let engineCfg = + Engine.defaultEngineConfig + { Engine.engineOnToolCall = \toolName args -> + putText <| "Tool call: " <> toolName <> " " <> Text.take 200 args, + Engine.engineOnToolResult = \toolName success result -> + putText <| "Tool result: " <> toolName <> " " <> (if success then "ok" else "err") <> " " <> Text.take 200 result, + Engine.engineOnActivity = \activity -> + putText <| "Agent: " <> activity + } forever <| do offset <- readTVarIO offsetVar @@ -427,12 +630,33 @@ handleAuthorizedMessage :: Int -> IO () handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do - _ <- Memory.saveMessage uid chatId Memory.UserRole (tmText msg) + pdfContent <- case tmDocument msg of + Just doc | isPdf doc -> do + putText <| "Processing PDF: " <> fromMaybe "(unnamed)" (tdFileName doc) + result <- downloadAndExtractPdf tgConfig (tdFileId doc) + case result of + Left err -> do + putText <| "PDF extraction failed: " <> err + pure Nothing + Right text -> do + let truncated = Text.take 40000 text + putText <| "Extracted " <> tshow (Text.length truncated) <> " chars from PDF" + pure (Just truncated) + _ -> pure Nothing + + let userMessage = case pdfContent of + Just pdfText -> + let caption = tmText msg + prefix = if Text.null caption then "here's the PDF content:\n\n" else caption <> "\n\n---\nPDF content:\n\n" + in prefix <> pdfText + Nothing -> tmText msg + + _ <- Memory.saveMessage uid chatId Memory.UserRole userMessage (conversationContext, contextTokens) <- Memory.getConversationContext uid chatId maxConversationTokens putText <| "Conversation context: " <> tshow contextTokens <> " tokens" - memories <- Memory.recallMemories uid (tmText msg) 5 + memories <- Memory.recallMemories uid userMessage 5 let memoryContext = Memory.formatMemoriesForPrompt memories let systemPrompt = @@ -449,7 +673,13 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do searchTools = case tgKagiApiKey tgConfig of Just kagiKey -> [WebSearch.webSearchTool kagiKey] Nothing -> [] - tools = memoryTools <> searchTools + pdfTools = [Pdf.pdfTool] + notesTools = + [ Notes.noteAddTool uid, + Notes.noteListTool uid, + Notes.noteDeleteTool uid + ] + tools = memoryTools <> searchTools <> pdfTools <> notesTools let agentCfg = Engine.defaultAgentConfig @@ -462,7 +692,7 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do } } - result <- Engine.runAgentWithProvider engineCfg provider agentCfg (tmText msg) + result <- Engine.runAgentWithProvider engineCfg provider agentCfg userMessage case result of Left err -> do @@ -520,6 +750,84 @@ checkAndSummarize provider uid chatId = do _ <- Memory.summarizeAndArchive uid chatId summary putText "Conversation summarized and archived" +-- | Check if Ollama is running and has the embedding model. +-- Returns Right () if ready, Left error message otherwise. +checkOllama :: IO (Either Text ()) +checkOllama = do + ollamaUrl <- fromMaybe "http://localhost:11434" </ lookupEnv "OLLAMA_URL" + let url = ollamaUrl <> "/api/tags" + result <- + try <| do + req <- HTTP.parseRequest url + HTTP.httpLBS req + case result of + Left (e :: SomeException) -> + pure (Left ("Ollama not running: " <> tshow e)) + Right response -> do + let status = HTTP.getResponseStatusCode response + if status >= 200 && status < 300 + then case Aeson.decode (HTTP.getResponseBody response) of + Just (Aeson.Object obj) -> case KeyMap.lookup "models" obj of + Just (Aeson.Array models) -> + let names = [n | Aeson.Object m <- toList models, Just (Aeson.String n) <- [KeyMap.lookup "name" m]] + hasNomic = any ("nomic-embed-text" `Text.isInfixOf`) names + in if hasNomic + then pure (Right ()) + else pure (Left "nomic-embed-text model not found") + _ -> pure (Left "Invalid Ollama response") + _ -> pure (Left "Failed to parse Ollama response") + else pure (Left ("Ollama HTTP error: " <> tshow status)) + +-- | Pull the embedding model from Ollama. +pullEmbeddingModel :: IO (Either Text ()) +pullEmbeddingModel = do + ollamaUrl <- fromMaybe "http://localhost:11434" </ lookupEnv "OLLAMA_URL" + let url = ollamaUrl <> "/api/pull" + putText "Pulling nomic-embed-text model (this may take a few minutes)..." + req0 <- HTTP.parseRequest url + let body = Aeson.object ["name" .= ("nomic-embed-text" :: Text)] + req = + HTTP.setRequestMethod "POST" + <| HTTP.setRequestHeader "Content-Type" ["application/json"] + <| HTTP.setRequestBodyLBS (Aeson.encode body) + <| HTTP.setRequestResponseTimeout (HTTPClient.responseTimeoutMicro (600 * 1000000)) + <| req0 + result <- try (HTTP.httpLBS req) + case result of + Left (e :: SomeException) -> + pure (Left ("Failed to pull model: " <> tshow e)) + Right response -> do + let status = HTTP.getResponseStatusCode response + if status >= 200 && status < 300 + then do + putText "nomic-embed-text model ready" + pure (Right ()) + else pure (Left ("Pull failed: HTTP " <> tshow status)) + +-- | Ensure Ollama is running and has the embedding model. +-- Pulls the model if missing, exits if Ollama is not running. +ensureOllama :: IO () +ensureOllama = do + checkResult <- checkOllama + case checkResult of + Right () -> putText "Ollama ready with nomic-embed-text" + Left err + | "not running" `Text.isInfixOf` err -> do + putText <| "Error: " <> err + putText "Please start Ollama: ollama serve" + exitFailure + | "not found" `Text.isInfixOf` err -> do + putText "nomic-embed-text model not found, pulling..." + pullResult <- pullEmbeddingModel + case pullResult of + Right () -> pure () + Left pullErr -> do + putText <| "Error: " <> pullErr + exitFailure + | otherwise -> do + putText <| "Ollama error: " <> err + exitFailure + -- | Start the Telegram bot from environment or provided token. startBot :: Maybe Text -> IO () startBot maybeToken = do @@ -533,6 +841,8 @@ startBot maybeToken = do putText "Error: TELEGRAM_BOT_TOKEN not set and no --token provided" exitFailure + ensureOllama + allowedIds <- loadAllowedUserIds kagiKey <- fmap Text.pack </ lookupEnv "KAGI_API_KEY" diff --git a/Omni/Agent/Tools/Notes.hs b/Omni/Agent/Tools/Notes.hs new file mode 100644 index 0000000..e3cef5d --- /dev/null +++ b/Omni/Agent/Tools/Notes.hs @@ -0,0 +1,357 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | Quick notes tool for agents. +-- +-- Provides simple CRUD for tagged notes stored in memory.db. +-- +-- : out omni-agent-tools-notes +-- : dep aeson +-- : dep sqlite-simple +module Omni.Agent.Tools.Notes + ( -- * Tools + noteAddTool, + noteListTool, + noteDeleteTool, + + -- * Direct API + Note (..), + createNote, + listNotes, + listNotesByTopic, + deleteNote, + + -- * Database + initNotesTable, + + -- * Testing + main, + test, + ) +where + +import Alpha +import Data.Aeson ((.!=), (.:), (.:?), (.=)) +import qualified Data.Aeson as Aeson +import qualified Data.Text as Text +import Data.Time (UTCTime, getCurrentTime) +import qualified Database.SQLite.Simple as SQL +import qualified Omni.Agent.Engine as Engine +import qualified Omni.Agent.Memory as Memory +import qualified Omni.Test as Test + +main :: IO () +main = Test.run test + +test :: Test.Tree +test = + Test.group + "Omni.Agent.Tools.Notes" + [ Test.unit "noteAddTool has correct schema" <| do + let tool = noteAddTool "test-user-id" + Engine.toolName tool Test.@=? "note_add", + Test.unit "noteListTool has correct schema" <| do + let tool = noteListTool "test-user-id" + Engine.toolName tool Test.@=? "note_list", + Test.unit "noteDeleteTool has correct schema" <| do + let tool = noteDeleteTool "test-user-id" + Engine.toolName tool Test.@=? "note_delete", + Test.unit "Note JSON roundtrip" <| do + now <- getCurrentTime + let n = + Note + { noteId = 1, + noteUserId = "user-123", + noteTopic = "groceries", + noteContent = "Buy milk", + noteCreatedAt = now + } + case Aeson.decode (Aeson.encode n) of + Nothing -> Test.assertFailure "Failed to decode Note" + Just decoded -> do + noteContent decoded Test.@=? "Buy milk" + noteTopic decoded Test.@=? "groceries" + ] + +data Note = Note + { noteId :: Int, + noteUserId :: Text, + noteTopic :: Text, + noteContent :: Text, + noteCreatedAt :: UTCTime + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON Note where + toJSON n = + Aeson.object + [ "id" .= noteId n, + "user_id" .= noteUserId n, + "topic" .= noteTopic n, + "content" .= noteContent n, + "created_at" .= noteCreatedAt n + ] + +instance Aeson.FromJSON Note where + parseJSON = + Aeson.withObject "Note" <| \v -> + (Note </ (v .: "id")) + <*> (v .: "user_id") + <*> (v .: "topic") + <*> (v .: "content") + <*> (v .: "created_at") + +instance SQL.FromRow Note where + fromRow = + (Note </ SQL.field) + <*> SQL.field + <*> SQL.field + <*> SQL.field + <*> SQL.field + +initNotesTable :: SQL.Connection -> IO () +initNotesTable conn = do + SQL.execute_ + conn + "CREATE TABLE IF NOT EXISTS notes (\ + \ id INTEGER PRIMARY KEY AUTOINCREMENT,\ + \ user_id TEXT NOT NULL,\ + \ topic TEXT NOT NULL,\ + \ content TEXT NOT NULL,\ + \ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\ + \)" + SQL.execute_ + conn + "CREATE INDEX IF NOT EXISTS idx_notes_user ON notes(user_id)" + SQL.execute_ + conn + "CREATE INDEX IF NOT EXISTS idx_notes_topic ON notes(user_id, topic)" + +createNote :: Text -> Text -> Text -> IO Note +createNote uid topic content = do + now <- getCurrentTime + Memory.withMemoryDb <| \conn -> do + initNotesTable conn + SQL.execute + conn + "INSERT INTO notes (user_id, topic, content, created_at) VALUES (?, ?, ?, ?)" + (uid, topic, content, now) + rowId <- SQL.lastInsertRowId conn + pure + Note + { noteId = fromIntegral rowId, + noteUserId = uid, + noteTopic = topic, + noteContent = content, + noteCreatedAt = now + } + +listNotes :: Text -> Int -> IO [Note] +listNotes uid limit = + Memory.withMemoryDb <| \conn -> do + initNotesTable conn + SQL.query + conn + "SELECT id, user_id, topic, content, created_at \ + \FROM notes WHERE user_id = ? \ + \ORDER BY created_at DESC LIMIT ?" + (uid, limit) + +listNotesByTopic :: Text -> Text -> Int -> IO [Note] +listNotesByTopic uid topic limit = + Memory.withMemoryDb <| \conn -> do + initNotesTable conn + SQL.query + conn + "SELECT id, user_id, topic, content, created_at \ + \FROM notes WHERE user_id = ? AND topic = ? \ + \ORDER BY created_at DESC LIMIT ?" + (uid, topic, limit) + +deleteNote :: Text -> Int -> IO Bool +deleteNote uid nid = + Memory.withMemoryDb <| \conn -> do + initNotesTable conn + SQL.execute + conn + "DELETE FROM notes WHERE id = ? AND user_id = ?" + (nid, uid) + changes <- SQL.changes conn + pure (changes > 0) + +noteAddTool :: Text -> Engine.Tool +noteAddTool uid = + Engine.Tool + { Engine.toolName = "note_add", + Engine.toolDescription = + "Add a quick note on a topic. Use for reminders, lists, ideas, or anything " + <> "the user wants to jot down. Topics help organize notes (e.g., 'groceries', " + <> "'ideas', 'todo', 'recipes').", + Engine.toolJsonSchema = + Aeson.object + [ "type" .= ("object" :: Text), + "properties" + .= Aeson.object + [ "topic" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("Topic/category for the note (e.g., 'groceries', 'todo')" :: Text) + ], + "content" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("The note content" :: Text) + ] + ], + "required" .= (["topic", "content"] :: [Text]) + ], + Engine.toolExecute = executeNoteAdd uid + } + +executeNoteAdd :: Text -> Aeson.Value -> IO Aeson.Value +executeNoteAdd uid v = + case Aeson.fromJSON v of + Aeson.Error e -> pure (Aeson.object ["error" .= Text.pack e]) + Aeson.Success (args :: NoteAddArgs) -> do + newNote <- createNote uid (naTopic args) (naContent args) + pure + ( Aeson.object + [ "success" .= True, + "note_id" .= noteId newNote, + "message" .= ("Added note to '" <> noteTopic newNote <> "': " <> noteContent newNote) + ] + ) + +data NoteAddArgs = NoteAddArgs + { naTopic :: Text, + naContent :: Text + } + deriving (Generic) + +instance Aeson.FromJSON NoteAddArgs where + parseJSON = + Aeson.withObject "NoteAddArgs" <| \v -> + (NoteAddArgs </ (v .: "topic")) + <*> (v .: "content") + +noteListTool :: Text -> Engine.Tool +noteListTool uid = + Engine.Tool + { Engine.toolName = "note_list", + Engine.toolDescription = + "List notes, optionally filtered by topic. Use to show the user their " + <> "saved notes or check what's on a specific list.", + Engine.toolJsonSchema = + Aeson.object + [ "type" .= ("object" :: Text), + "properties" + .= Aeson.object + [ "topic" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("Filter by topic (optional, omit to list all)" :: Text) + ], + "limit" + .= Aeson.object + [ "type" .= ("integer" :: Text), + "description" .= ("Max notes to return (default: 20)" :: Text) + ] + ], + "required" .= ([] :: [Text]) + ], + Engine.toolExecute = executeNoteList uid + } + +executeNoteList :: Text -> Aeson.Value -> IO Aeson.Value +executeNoteList uid v = + case Aeson.fromJSON v of + Aeson.Error e -> pure (Aeson.object ["error" .= Text.pack e]) + Aeson.Success (args :: NoteListArgs) -> do + let lim = min 50 (max 1 (nlLimit args)) + notes <- case nlTopic args of + Just topic -> listNotesByTopic uid topic lim + Nothing -> listNotes uid lim + pure + ( Aeson.object + [ "success" .= True, + "count" .= length notes, + "notes" .= formatNotesForLLM notes + ] + ) + +formatNotesForLLM :: [Note] -> Text +formatNotesForLLM [] = "No notes found." +formatNotesForLLM notes = + Text.unlines (map formatNote notes) + where + formatNote n = + "[" <> noteTopic n <> "] " <> noteContent n <> " (id: " <> tshow (noteId n) <> ")" + +data NoteListArgs = NoteListArgs + { nlTopic :: Maybe Text, + nlLimit :: Int + } + deriving (Generic) + +instance Aeson.FromJSON NoteListArgs where + parseJSON = + Aeson.withObject "NoteListArgs" <| \v -> + (NoteListArgs </ (v .:? "topic")) + <*> (v .:? "limit" .!= 20) + +noteDeleteTool :: Text -> Engine.Tool +noteDeleteTool uid = + Engine.Tool + { Engine.toolName = "note_delete", + Engine.toolDescription = + "Delete a note by its ID. Use after the user says they've completed an item " + <> "or no longer need a note.", + Engine.toolJsonSchema = + Aeson.object + [ "type" .= ("object" :: Text), + "properties" + .= Aeson.object + [ "note_id" + .= Aeson.object + [ "type" .= ("integer" :: Text), + "description" .= ("The ID of the note to delete" :: Text) + ] + ], + "required" .= (["note_id"] :: [Text]) + ], + Engine.toolExecute = executeNoteDelete uid + } + +executeNoteDelete :: Text -> Aeson.Value -> IO Aeson.Value +executeNoteDelete uid v = + case Aeson.fromJSON v of + Aeson.Error e -> pure (Aeson.object ["error" .= Text.pack e]) + Aeson.Success (args :: NoteDeleteArgs) -> do + deleted <- deleteNote uid (ndNoteId args) + if deleted + then + pure + ( Aeson.object + [ "success" .= True, + "message" .= ("Note deleted" :: Text) + ] + ) + else + pure + ( Aeson.object + [ "success" .= False, + "error" .= ("Note not found or already deleted" :: Text) + ] + ) + +newtype NoteDeleteArgs = NoteDeleteArgs + { ndNoteId :: Int + } + deriving (Generic) + +instance Aeson.FromJSON NoteDeleteArgs where + parseJSON = + Aeson.withObject "NoteDeleteArgs" <| \v -> + NoteDeleteArgs </ (v .: "note_id") diff --git a/Omni/Agent/Tools/Pdf.hs b/Omni/Agent/Tools/Pdf.hs new file mode 100644 index 0000000..7687234 --- /dev/null +++ b/Omni/Agent/Tools/Pdf.hs @@ -0,0 +1,180 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | PDF extraction tool using poppler-utils (pdftotext). +-- +-- Extracts text from PDF files for LLM consumption. +-- +-- : out omni-agent-tools-pdf +-- : dep aeson +-- : dep http-conduit +-- : dep directory +-- : dep process +module Omni.Agent.Tools.Pdf + ( -- * Tool + pdfTool, + + -- * Direct API + extractPdfText, + downloadAndExtract, + + -- * Testing + main, + test, + ) +where + +import Alpha +import Data.Aeson ((.=)) +import qualified Data.Aeson as Aeson +import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as Text +import qualified Network.HTTP.Simple as HTTP +import qualified Omni.Agent.Engine as Engine +import qualified Omni.Test as Test +import System.IO (hClose) +import System.IO.Temp (withSystemTempFile) +import System.Process (readProcessWithExitCode) + +main :: IO () +main = Test.run test + +test :: Test.Tree +test = + Test.group + "Omni.Agent.Tools.Pdf" + [ Test.unit "pdfTool has correct schema" <| do + let tool = pdfTool + Engine.toolName tool Test.@=? "read_pdf", + Test.unit "extractPdfText handles missing file" <| do + result <- extractPdfText "/nonexistent/file.pdf" + case result of + Left err -> ("No such file" `Text.isInfixOf` err || "pdftotext" `Text.isInfixOf` err) Test.@=? True + Right _ -> Test.assertFailure "Expected error for missing file", + Test.unit "chunkText splits correctly" <| do + let text = Text.replicate 5000 "a" + chunks = chunkText 1000 text + length chunks Test.@=? 5 + all (\c -> Text.length c <= 1000) chunks Test.@=? True, + Test.unit "chunkText handles small text" <| do + let text = "small text" + chunks = chunkText 1000 text + chunks Test.@=? ["small text"] + ] + +data PdfArgs = PdfArgs + { pdfPath :: Text, + pdfMaxChars :: Maybe Int + } + deriving (Generic) + +instance Aeson.FromJSON PdfArgs where + parseJSON = + Aeson.withObject "PdfArgs" <| \v -> + (PdfArgs </ (v Aeson..: "path")) + <*> (v Aeson..:? "max_chars") + +pdfTool :: Engine.Tool +pdfTool = + Engine.Tool + { Engine.toolName = "read_pdf", + Engine.toolDescription = + "Extract text from a PDF file. Use this when you receive a PDF document " + <> "and need to read its contents. Returns the extracted text.", + Engine.toolJsonSchema = + Aeson.object + [ "type" .= ("object" :: Text), + "properties" + .= Aeson.object + [ "path" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("Path to the PDF file" :: Text) + ], + "max_chars" + .= Aeson.object + [ "type" .= ("integer" :: Text), + "description" .= ("Maximum characters to return (default: 50000)" :: Text) + ] + ], + "required" .= (["path"] :: [Text]) + ], + Engine.toolExecute = executePdf + } + +executePdf :: Aeson.Value -> IO Aeson.Value +executePdf v = + case Aeson.fromJSON v of + Aeson.Error e -> pure (Aeson.object ["error" .= Text.pack e]) + Aeson.Success (args :: PdfArgs) -> do + let maxChars = maybe 50000 (min 100000 <. max 1000) (pdfMaxChars args) + result <- extractPdfText (Text.unpack (pdfPath args)) + case result of + Left err -> + pure (Aeson.object ["error" .= err]) + Right text -> do + let truncated = Text.take maxChars text + wasTruncated = Text.length text > maxChars + pure + ( Aeson.object + [ "success" .= True, + "text" .= truncated, + "chars" .= Text.length truncated, + "truncated" .= wasTruncated + ] + ) + +extractPdfText :: FilePath -> IO (Either Text Text) +extractPdfText path = do + result <- + try <| readProcessWithExitCode "pdftotext" ["-layout", path, "-"] "" + case result of + Left (e :: SomeException) -> + pure (Left ("pdftotext error: " <> tshow e)) + Right (exitCode, stdoutStr, stderrStr) -> + case exitCode of + ExitSuccess -> pure (Right (Text.pack stdoutStr)) + ExitFailure code -> + pure (Left ("pdftotext failed (" <> tshow code <> "): " <> Text.pack stderrStr)) + +downloadAndExtract :: Text -> Text -> Text -> IO (Either Text Text) +downloadAndExtract botToken filePath maxCharsText = do + let url = + "https://api.telegram.org/file/bot" + <> Text.unpack botToken + <> "/" + <> Text.unpack filePath + maxChars = maybe 50000 identity (readMaybe (Text.unpack maxCharsText) :: Maybe Int) + withSystemTempFile "telegram_pdf.pdf" <| \tmpPath tmpHandle -> do + hClose tmpHandle + downloadResult <- + try <| do + req <- HTTP.parseRequest url + response <- HTTP.httpLBS req + let status = HTTP.getResponseStatusCode response + if status >= 200 && status < 300 + then do + BL.writeFile tmpPath (HTTP.getResponseBody response) + pure (Right ()) + else pure (Left ("Download failed: HTTP " <> tshow status)) + case downloadResult of + Left (e :: SomeException) -> + pure (Left ("Download error: " <> tshow e)) + Right (Left err) -> pure (Left err) + Right (Right ()) -> do + result <- extractPdfText tmpPath + case result of + Left err -> pure (Left err) + Right text -> do + let truncated = Text.take maxChars text + pure (Right truncated) + +chunkText :: Int -> Text -> [Text] +chunkText chunkSize text + | Text.null text = [] + | Text.length text <= chunkSize = [text] + | otherwise = + let (chunk, rest) = Text.splitAt chunkSize text + in chunk : chunkText chunkSize rest |
