diff options
Diffstat (limited to 'Omni/Agent/Tools')
| -rw-r--r-- | Omni/Agent/Tools/Notes.hs | 357 | ||||
| -rw-r--r-- | Omni/Agent/Tools/Pdf.hs | 180 |
2 files changed, 537 insertions, 0 deletions
diff --git a/Omni/Agent/Tools/Notes.hs b/Omni/Agent/Tools/Notes.hs new file mode 100644 index 0000000..e3cef5d --- /dev/null +++ b/Omni/Agent/Tools/Notes.hs @@ -0,0 +1,357 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | Quick notes tool for agents. +-- +-- Provides simple CRUD for tagged notes stored in memory.db. +-- +-- : out omni-agent-tools-notes +-- : dep aeson +-- : dep sqlite-simple +module Omni.Agent.Tools.Notes + ( -- * Tools + noteAddTool, + noteListTool, + noteDeleteTool, + + -- * Direct API + Note (..), + createNote, + listNotes, + listNotesByTopic, + deleteNote, + + -- * Database + initNotesTable, + + -- * Testing + main, + test, + ) +where + +import Alpha +import Data.Aeson ((.!=), (.:), (.:?), (.=)) +import qualified Data.Aeson as Aeson +import qualified Data.Text as Text +import Data.Time (UTCTime, getCurrentTime) +import qualified Database.SQLite.Simple as SQL +import qualified Omni.Agent.Engine as Engine +import qualified Omni.Agent.Memory as Memory +import qualified Omni.Test as Test + +main :: IO () +main = Test.run test + +test :: Test.Tree +test = + Test.group + "Omni.Agent.Tools.Notes" + [ Test.unit "noteAddTool has correct schema" <| do + let tool = noteAddTool "test-user-id" + Engine.toolName tool Test.@=? "note_add", + Test.unit "noteListTool has correct schema" <| do + let tool = noteListTool "test-user-id" + Engine.toolName tool Test.@=? "note_list", + Test.unit "noteDeleteTool has correct schema" <| do + let tool = noteDeleteTool "test-user-id" + Engine.toolName tool Test.@=? "note_delete", + Test.unit "Note JSON roundtrip" <| do + now <- getCurrentTime + let n = + Note + { noteId = 1, + noteUserId = "user-123", + noteTopic = "groceries", + noteContent = "Buy milk", + noteCreatedAt = now + } + case Aeson.decode (Aeson.encode n) of + Nothing -> Test.assertFailure "Failed to decode Note" + Just decoded -> do + noteContent decoded Test.@=? "Buy milk" + noteTopic decoded Test.@=? "groceries" + ] + +data Note = Note + { noteId :: Int, + noteUserId :: Text, + noteTopic :: Text, + noteContent :: Text, + noteCreatedAt :: UTCTime + } + deriving (Show, Eq, Generic) + +instance Aeson.ToJSON Note where + toJSON n = + Aeson.object + [ "id" .= noteId n, + "user_id" .= noteUserId n, + "topic" .= noteTopic n, + "content" .= noteContent n, + "created_at" .= noteCreatedAt n + ] + +instance Aeson.FromJSON Note where + parseJSON = + Aeson.withObject "Note" <| \v -> + (Note </ (v .: "id")) + <*> (v .: "user_id") + <*> (v .: "topic") + <*> (v .: "content") + <*> (v .: "created_at") + +instance SQL.FromRow Note where + fromRow = + (Note </ SQL.field) + <*> SQL.field + <*> SQL.field + <*> SQL.field + <*> SQL.field + +initNotesTable :: SQL.Connection -> IO () +initNotesTable conn = do + SQL.execute_ + conn + "CREATE TABLE IF NOT EXISTS notes (\ + \ id INTEGER PRIMARY KEY AUTOINCREMENT,\ + \ user_id TEXT NOT NULL,\ + \ topic TEXT NOT NULL,\ + \ content TEXT NOT NULL,\ + \ created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP\ + \)" + SQL.execute_ + conn + "CREATE INDEX IF NOT EXISTS idx_notes_user ON notes(user_id)" + SQL.execute_ + conn + "CREATE INDEX IF NOT EXISTS idx_notes_topic ON notes(user_id, topic)" + +createNote :: Text -> Text -> Text -> IO Note +createNote uid topic content = do + now <- getCurrentTime + Memory.withMemoryDb <| \conn -> do + initNotesTable conn + SQL.execute + conn + "INSERT INTO notes (user_id, topic, content, created_at) VALUES (?, ?, ?, ?)" + (uid, topic, content, now) + rowId <- SQL.lastInsertRowId conn + pure + Note + { noteId = fromIntegral rowId, + noteUserId = uid, + noteTopic = topic, + noteContent = content, + noteCreatedAt = now + } + +listNotes :: Text -> Int -> IO [Note] +listNotes uid limit = + Memory.withMemoryDb <| \conn -> do + initNotesTable conn + SQL.query + conn + "SELECT id, user_id, topic, content, created_at \ + \FROM notes WHERE user_id = ? \ + \ORDER BY created_at DESC LIMIT ?" + (uid, limit) + +listNotesByTopic :: Text -> Text -> Int -> IO [Note] +listNotesByTopic uid topic limit = + Memory.withMemoryDb <| \conn -> do + initNotesTable conn + SQL.query + conn + "SELECT id, user_id, topic, content, created_at \ + \FROM notes WHERE user_id = ? AND topic = ? \ + \ORDER BY created_at DESC LIMIT ?" + (uid, topic, limit) + +deleteNote :: Text -> Int -> IO Bool +deleteNote uid nid = + Memory.withMemoryDb <| \conn -> do + initNotesTable conn + SQL.execute + conn + "DELETE FROM notes WHERE id = ? AND user_id = ?" + (nid, uid) + changes <- SQL.changes conn + pure (changes > 0) + +noteAddTool :: Text -> Engine.Tool +noteAddTool uid = + Engine.Tool + { Engine.toolName = "note_add", + Engine.toolDescription = + "Add a quick note on a topic. Use for reminders, lists, ideas, or anything " + <> "the user wants to jot down. Topics help organize notes (e.g., 'groceries', " + <> "'ideas', 'todo', 'recipes').", + Engine.toolJsonSchema = + Aeson.object + [ "type" .= ("object" :: Text), + "properties" + .= Aeson.object + [ "topic" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("Topic/category for the note (e.g., 'groceries', 'todo')" :: Text) + ], + "content" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("The note content" :: Text) + ] + ], + "required" .= (["topic", "content"] :: [Text]) + ], + Engine.toolExecute = executeNoteAdd uid + } + +executeNoteAdd :: Text -> Aeson.Value -> IO Aeson.Value +executeNoteAdd uid v = + case Aeson.fromJSON v of + Aeson.Error e -> pure (Aeson.object ["error" .= Text.pack e]) + Aeson.Success (args :: NoteAddArgs) -> do + newNote <- createNote uid (naTopic args) (naContent args) + pure + ( Aeson.object + [ "success" .= True, + "note_id" .= noteId newNote, + "message" .= ("Added note to '" <> noteTopic newNote <> "': " <> noteContent newNote) + ] + ) + +data NoteAddArgs = NoteAddArgs + { naTopic :: Text, + naContent :: Text + } + deriving (Generic) + +instance Aeson.FromJSON NoteAddArgs where + parseJSON = + Aeson.withObject "NoteAddArgs" <| \v -> + (NoteAddArgs </ (v .: "topic")) + <*> (v .: "content") + +noteListTool :: Text -> Engine.Tool +noteListTool uid = + Engine.Tool + { Engine.toolName = "note_list", + Engine.toolDescription = + "List notes, optionally filtered by topic. Use to show the user their " + <> "saved notes or check what's on a specific list.", + Engine.toolJsonSchema = + Aeson.object + [ "type" .= ("object" :: Text), + "properties" + .= Aeson.object + [ "topic" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("Filter by topic (optional, omit to list all)" :: Text) + ], + "limit" + .= Aeson.object + [ "type" .= ("integer" :: Text), + "description" .= ("Max notes to return (default: 20)" :: Text) + ] + ], + "required" .= ([] :: [Text]) + ], + Engine.toolExecute = executeNoteList uid + } + +executeNoteList :: Text -> Aeson.Value -> IO Aeson.Value +executeNoteList uid v = + case Aeson.fromJSON v of + Aeson.Error e -> pure (Aeson.object ["error" .= Text.pack e]) + Aeson.Success (args :: NoteListArgs) -> do + let lim = min 50 (max 1 (nlLimit args)) + notes <- case nlTopic args of + Just topic -> listNotesByTopic uid topic lim + Nothing -> listNotes uid lim + pure + ( Aeson.object + [ "success" .= True, + "count" .= length notes, + "notes" .= formatNotesForLLM notes + ] + ) + +formatNotesForLLM :: [Note] -> Text +formatNotesForLLM [] = "No notes found." +formatNotesForLLM notes = + Text.unlines (map formatNote notes) + where + formatNote n = + "[" <> noteTopic n <> "] " <> noteContent n <> " (id: " <> tshow (noteId n) <> ")" + +data NoteListArgs = NoteListArgs + { nlTopic :: Maybe Text, + nlLimit :: Int + } + deriving (Generic) + +instance Aeson.FromJSON NoteListArgs where + parseJSON = + Aeson.withObject "NoteListArgs" <| \v -> + (NoteListArgs </ (v .:? "topic")) + <*> (v .:? "limit" .!= 20) + +noteDeleteTool :: Text -> Engine.Tool +noteDeleteTool uid = + Engine.Tool + { Engine.toolName = "note_delete", + Engine.toolDescription = + "Delete a note by its ID. Use after the user says they've completed an item " + <> "or no longer need a note.", + Engine.toolJsonSchema = + Aeson.object + [ "type" .= ("object" :: Text), + "properties" + .= Aeson.object + [ "note_id" + .= Aeson.object + [ "type" .= ("integer" :: Text), + "description" .= ("The ID of the note to delete" :: Text) + ] + ], + "required" .= (["note_id"] :: [Text]) + ], + Engine.toolExecute = executeNoteDelete uid + } + +executeNoteDelete :: Text -> Aeson.Value -> IO Aeson.Value +executeNoteDelete uid v = + case Aeson.fromJSON v of + Aeson.Error e -> pure (Aeson.object ["error" .= Text.pack e]) + Aeson.Success (args :: NoteDeleteArgs) -> do + deleted <- deleteNote uid (ndNoteId args) + if deleted + then + pure + ( Aeson.object + [ "success" .= True, + "message" .= ("Note deleted" :: Text) + ] + ) + else + pure + ( Aeson.object + [ "success" .= False, + "error" .= ("Note not found or already deleted" :: Text) + ] + ) + +newtype NoteDeleteArgs = NoteDeleteArgs + { ndNoteId :: Int + } + deriving (Generic) + +instance Aeson.FromJSON NoteDeleteArgs where + parseJSON = + Aeson.withObject "NoteDeleteArgs" <| \v -> + NoteDeleteArgs </ (v .: "note_id") diff --git a/Omni/Agent/Tools/Pdf.hs b/Omni/Agent/Tools/Pdf.hs new file mode 100644 index 0000000..7687234 --- /dev/null +++ b/Omni/Agent/Tools/Pdf.hs @@ -0,0 +1,180 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | PDF extraction tool using poppler-utils (pdftotext). +-- +-- Extracts text from PDF files for LLM consumption. +-- +-- : out omni-agent-tools-pdf +-- : dep aeson +-- : dep http-conduit +-- : dep directory +-- : dep process +module Omni.Agent.Tools.Pdf + ( -- * Tool + pdfTool, + + -- * Direct API + extractPdfText, + downloadAndExtract, + + -- * Testing + main, + test, + ) +where + +import Alpha +import Data.Aeson ((.=)) +import qualified Data.Aeson as Aeson +import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as Text +import qualified Network.HTTP.Simple as HTTP +import qualified Omni.Agent.Engine as Engine +import qualified Omni.Test as Test +import System.IO (hClose) +import System.IO.Temp (withSystemTempFile) +import System.Process (readProcessWithExitCode) + +main :: IO () +main = Test.run test + +test :: Test.Tree +test = + Test.group + "Omni.Agent.Tools.Pdf" + [ Test.unit "pdfTool has correct schema" <| do + let tool = pdfTool + Engine.toolName tool Test.@=? "read_pdf", + Test.unit "extractPdfText handles missing file" <| do + result <- extractPdfText "/nonexistent/file.pdf" + case result of + Left err -> ("No such file" `Text.isInfixOf` err || "pdftotext" `Text.isInfixOf` err) Test.@=? True + Right _ -> Test.assertFailure "Expected error for missing file", + Test.unit "chunkText splits correctly" <| do + let text = Text.replicate 5000 "a" + chunks = chunkText 1000 text + length chunks Test.@=? 5 + all (\c -> Text.length c <= 1000) chunks Test.@=? True, + Test.unit "chunkText handles small text" <| do + let text = "small text" + chunks = chunkText 1000 text + chunks Test.@=? ["small text"] + ] + +data PdfArgs = PdfArgs + { pdfPath :: Text, + pdfMaxChars :: Maybe Int + } + deriving (Generic) + +instance Aeson.FromJSON PdfArgs where + parseJSON = + Aeson.withObject "PdfArgs" <| \v -> + (PdfArgs </ (v Aeson..: "path")) + <*> (v Aeson..:? "max_chars") + +pdfTool :: Engine.Tool +pdfTool = + Engine.Tool + { Engine.toolName = "read_pdf", + Engine.toolDescription = + "Extract text from a PDF file. Use this when you receive a PDF document " + <> "and need to read its contents. Returns the extracted text.", + Engine.toolJsonSchema = + Aeson.object + [ "type" .= ("object" :: Text), + "properties" + .= Aeson.object + [ "path" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("Path to the PDF file" :: Text) + ], + "max_chars" + .= Aeson.object + [ "type" .= ("integer" :: Text), + "description" .= ("Maximum characters to return (default: 50000)" :: Text) + ] + ], + "required" .= (["path"] :: [Text]) + ], + Engine.toolExecute = executePdf + } + +executePdf :: Aeson.Value -> IO Aeson.Value +executePdf v = + case Aeson.fromJSON v of + Aeson.Error e -> pure (Aeson.object ["error" .= Text.pack e]) + Aeson.Success (args :: PdfArgs) -> do + let maxChars = maybe 50000 (min 100000 <. max 1000) (pdfMaxChars args) + result <- extractPdfText (Text.unpack (pdfPath args)) + case result of + Left err -> + pure (Aeson.object ["error" .= err]) + Right text -> do + let truncated = Text.take maxChars text + wasTruncated = Text.length text > maxChars + pure + ( Aeson.object + [ "success" .= True, + "text" .= truncated, + "chars" .= Text.length truncated, + "truncated" .= wasTruncated + ] + ) + +extractPdfText :: FilePath -> IO (Either Text Text) +extractPdfText path = do + result <- + try <| readProcessWithExitCode "pdftotext" ["-layout", path, "-"] "" + case result of + Left (e :: SomeException) -> + pure (Left ("pdftotext error: " <> tshow e)) + Right (exitCode, stdoutStr, stderrStr) -> + case exitCode of + ExitSuccess -> pure (Right (Text.pack stdoutStr)) + ExitFailure code -> + pure (Left ("pdftotext failed (" <> tshow code <> "): " <> Text.pack stderrStr)) + +downloadAndExtract :: Text -> Text -> Text -> IO (Either Text Text) +downloadAndExtract botToken filePath maxCharsText = do + let url = + "https://api.telegram.org/file/bot" + <> Text.unpack botToken + <> "/" + <> Text.unpack filePath + maxChars = maybe 50000 identity (readMaybe (Text.unpack maxCharsText) :: Maybe Int) + withSystemTempFile "telegram_pdf.pdf" <| \tmpPath tmpHandle -> do + hClose tmpHandle + downloadResult <- + try <| do + req <- HTTP.parseRequest url + response <- HTTP.httpLBS req + let status = HTTP.getResponseStatusCode response + if status >= 200 && status < 300 + then do + BL.writeFile tmpPath (HTTP.getResponseBody response) + pure (Right ()) + else pure (Left ("Download failed: HTTP " <> tshow status)) + case downloadResult of + Left (e :: SomeException) -> + pure (Left ("Download error: " <> tshow e)) + Right (Left err) -> pure (Left err) + Right (Right ()) -> do + result <- extractPdfText tmpPath + case result of + Left err -> pure (Left err) + Right text -> do + let truncated = Text.take maxChars text + pure (Right truncated) + +chunkText :: Int -> Text -> [Text] +chunkText chunkSize text + | Text.null text = [] + | Text.length text <= chunkSize = [text] + | otherwise = + let (chunk, rest) = Text.splitAt chunkSize text + in chunk : chunkText chunkSize rest |
