diff options
| author | Ben Sima <ben@bensima.com> | 2025-12-13 00:35:24 -0500 |
|---|---|---|
| committer | Ben Sima <ben@bensima.com> | 2025-12-13 00:35:24 -0500 |
| commit | 4ff40843e7a6801b7785bfff7f4e9e8fff4e27d4 (patch) | |
| tree | 6b41438d0726f96746697af0584ab2f2542ffabf /Omni/Agent | |
| parent | 817bdb1f33e9825946a2da2aa1ff8f91b6166366 (diff) | |
telegram: fix parsing, add webpage reader, use gemini
- Fix Provider.hs to strip leading whitespace from OpenRouter responses
- Fix FunctionCall parser to handle missing 'arguments' field
- Use eitherDecode for better error messages on parse failures
- Switch to claude-sonnet-4.5 for main agent
- Use gemini-2.0-flash for conversation summarization (cheaper)
- Add read_webpage tool for fetching and summarizing URLs
- Add tagsoup to Haskell deps (unused, kept for future)
Diffstat (limited to 'Omni/Agent')
| -rw-r--r-- | Omni/Agent/Provider.hs | 14 | ||||
| -rw-r--r-- | Omni/Agent/Telegram.hs | 17 | ||||
| -rw-r--r-- | Omni/Agent/Telegram/Media.hs | 2 | ||||
| -rw-r--r-- | Omni/Agent/Tools/WebReader.hs | 210 |
4 files changed, 230 insertions, 13 deletions
diff --git a/Omni/Agent/Provider.hs b/Omni/Agent/Provider.hs index a8a5381..2ad6ea8 100644 --- a/Omni/Agent/Provider.hs +++ b/Omni/Agent/Provider.hs @@ -200,7 +200,7 @@ instance Aeson.FromJSON FunctionCall where parseJSON = Aeson.withObject "FunctionCall" <| \v -> (FunctionCall </ (v Aeson..: "name")) - <*> (v Aeson..: "arguments") + <*> (v Aeson..:? "arguments" Aeson..!= "{}") data Usage = Usage { usagePromptTokens :: Int, @@ -322,14 +322,18 @@ chatOpenAI cfg tools messages = do response <- HTTP.httpLBS req let status = HTTP.getResponseStatusCode response + respBody = HTTP.getResponseBody response + cleanedBody = BL.dropWhile (\b -> b `elem` [0x0a, 0x0d, 0x20]) respBody if status >= 200 && status < 300 - then case Aeson.decode (HTTP.getResponseBody response) of - Just resp -> + then case Aeson.eitherDecode cleanedBody of + Right resp -> case respChoices resp of (c : _) -> pure (Right (ChatResult (choiceMessage c) (respUsage resp))) [] -> pure (Left "No choices in response") - Nothing -> pure (Left "Failed to parse response") - else pure (Left ("HTTP error: " <> tshow status <> " - " <> TE.decodeUtf8 (BL.toStrict (HTTP.getResponseBody response)))) + Left err -> do + let bodyPreview = TE.decodeUtf8 (BL.toStrict (BL.take 500 cleanedBody)) + pure (Left ("Failed to parse response: " <> Text.pack err <> " | Body: " <> bodyPreview)) + else pure (Left ("HTTP error: " <> tshow status <> " - " <> TE.decodeUtf8 (BL.toStrict respBody))) chatOllama :: ProviderConfig -> [ToolApi] -> [Message] -> IO (Either Text ChatResult) chatOllama cfg tools messages = do diff --git a/Omni/Agent/Telegram.hs b/Omni/Agent/Telegram.hs index d224acc..c55dc5a 100644 --- a/Omni/Agent/Telegram.hs +++ b/Omni/Agent/Telegram.hs @@ -83,6 +83,7 @@ import qualified Omni.Agent.Tools.Calendar as Calendar import qualified Omni.Agent.Tools.Notes as Notes import qualified Omni.Agent.Tools.Pdf as Pdf import qualified Omni.Agent.Tools.Todos as Todos +import qualified Omni.Agent.Tools.WebReader as WebReader import qualified Omni.Agent.Tools.WebSearch as WebSearch import qualified Omni.Test as Test import System.Environment (lookupEnv) @@ -423,6 +424,7 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do searchTools = case Types.tgKagiApiKey tgConfig of Just kagiKey -> [WebSearch.webSearchTool kagiKey] Nothing -> [] + webReaderTools = [WebReader.webReaderTool (Types.tgOpenRouterApiKey tgConfig)] pdfTools = [Pdf.pdfTool] notesTools = [ Notes.noteAddTool uid, @@ -440,7 +442,7 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do Todos.todoCompleteTool uid, Todos.todoDeleteTool uid ] - tools = memoryTools <> searchTools <> pdfTools <> notesTools <> calendarTools <> todoTools + tools = memoryTools <> searchTools <> webReaderTools <> pdfTools <> notesTools <> calendarTools <> todoTools let agentCfg = Engine.defaultAgentConfig @@ -472,7 +474,7 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do sendMessage tgConfig chatId "hmm, i don't have a response for that" else sendMessage tgConfig chatId response - checkAndSummarize provider uid chatId + checkAndSummarize (Types.tgOpenRouterApiKey tgConfig) uid chatId putText <| "Responded to " @@ -487,8 +489,8 @@ maxConversationTokens = 4000 summarizationThreshold :: Int summarizationThreshold = 3000 -checkAndSummarize :: Provider.Provider -> Text -> Int -> IO () -checkAndSummarize provider uid chatId = do +checkAndSummarize :: Text -> Text -> Int -> IO () +checkAndSummarize openRouterKey uid chatId = do (_, currentTokens) <- Memory.getConversationContext uid chatId maxConversationTokens when (currentTokens > summarizationThreshold) <| do putText <| "Context at " <> tshow currentTokens <> " tokens, summarizing..." @@ -498,9 +500,10 @@ checkAndSummarize provider uid chatId = do [ (if Memory.cmRole m == Memory.UserRole then "User: " else "Assistant: ") <> Memory.cmContent m | m <- reverse recentMsgs ] + gemini = Provider.defaultOpenRouter openRouterKey "google/gemini-2.0-flash-001" summaryResult <- Provider.chat - provider + gemini [] [ Provider.Message Provider.System "You are a conversation summarizer. Summarize the key points, decisions, and context from this conversation in 2-3 paragraphs. Focus on information that would be useful for continuing the conversation later." Nothing Nothing, Provider.Message Provider.User ("Summarize this conversation:\n\n" <> conversationText) Nothing Nothing @@ -510,7 +513,7 @@ checkAndSummarize provider uid chatId = do Right summaryMsg -> do let summary = Provider.msgContent summaryMsg _ <- Memory.summarizeAndArchive uid chatId summary - putText "Conversation summarized and archived" + putText "Conversation summarized and archived (gemini)" checkOllama :: IO (Either Text ()) checkOllama = do @@ -610,7 +613,7 @@ startBot maybeToken = do Just key -> do let orKey = Text.pack key tgConfig = Types.defaultTelegramConfig token allowedIds kagiKey orKey - provider = Provider.defaultOpenRouter orKey "anthropic/claude-sonnet-4" + provider = Provider.defaultOpenRouter orKey "anthropic/claude-sonnet-4.5" putText <| "Allowed user IDs: " <> tshow allowedIds putText <| "Kagi search: " <> if isJust kagiKey then "enabled" else "disabled" runTelegramBot tgConfig provider diff --git a/Omni/Agent/Telegram/Media.hs b/Omni/Agent/Telegram/Media.hs index 1ef35de..137d7d3 100644 --- a/Omni/Agent/Telegram/Media.hs +++ b/Omni/Agent/Telegram/Media.hs @@ -239,7 +239,7 @@ analyzeImage apiKey imageBytes userPrompt = do else userPrompt <> "\n\n(describe objectively in third person, no first person pronouns)" body = Aeson.object - [ "model" .= ("anthropic/claude-sonnet-4" :: Text), + [ "model" .= ("anthropic/claude-sonnet-4.5" :: Text), "messages" .= [ Aeson.object [ "role" .= ("user" :: Text), diff --git a/Omni/Agent/Tools/WebReader.hs b/Omni/Agent/Tools/WebReader.hs new file mode 100644 index 0000000..9b776ad --- /dev/null +++ b/Omni/Agent/Tools/WebReader.hs @@ -0,0 +1,210 @@ +{-# LANGUAGE DeriveGeneric #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE ScopedTypeVariables #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | Web page reader tool - fetches and summarizes web pages. +-- +-- : out omni-agent-tools-webreader +-- : dep aeson +-- : dep http-conduit +module Omni.Agent.Tools.WebReader + ( -- * Tool + webReaderTool, + + -- * Direct API + fetchWebpage, + extractText, + + -- * Testing + main, + test, + ) +where + +import Alpha +import Data.Aeson ((.=)) +import qualified Data.Aeson as Aeson +import qualified Data.ByteString.Lazy as BL +import qualified Data.Text as Text +import qualified Data.Text.Encoding as TE +import qualified Network.HTTP.Client as HTTPClient +import qualified Network.HTTP.Simple as HTTP +import qualified Omni.Agent.Engine as Engine +import qualified Omni.Agent.Provider as Provider +import qualified Omni.Test as Test + +main :: IO () +main = Test.run test + +test :: Test.Tree +test = + Test.group + "Omni.Agent.Tools.WebReader" + [ Test.unit "extractText removes HTML tags" <| do + let html = "<html><body><p>Hello world</p></body></html>" + result = extractText html + ("Hello world" `Text.isInfixOf` result) Test.@=? True, + Test.unit "extractText removes script tags" <| do + let html = "<html><script>alert('hi')</script><p>Content</p></html>" + result = extractText html + ("alert" `Text.isInfixOf` result) Test.@=? False + ("Content" `Text.isInfixOf` result) Test.@=? True, + Test.unit "webReaderTool has correct schema" <| do + let tool = webReaderTool "test-key" + Engine.toolName tool Test.@=? "read_webpage" + ] + +fetchWebpage :: Text -> IO (Either Text Text) +fetchWebpage url = do + result <- + try <| do + req0 <- HTTP.parseRequest (Text.unpack url) + let req = + HTTP.setRequestMethod "GET" + <| HTTP.setRequestHeader "User-Agent" ["Mozilla/5.0 (compatible; OmniBot/1.0)"] + <| HTTP.setRequestHeader "Accept" ["text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"] + <| HTTP.setRequestResponseTimeout (HTTPClient.responseTimeoutMicro (30 * 1000000)) + <| req0 + HTTP.httpLBS req + case result of + Left (e :: SomeException) -> + pure (Left ("Failed to fetch URL: " <> tshow e)) + Right response -> do + let status = HTTP.getResponseStatusCode response + if status >= 200 && status < 300 + then do + let body = HTTP.getResponseBody response + text = TE.decodeUtf8With (\_ _ -> Just '?') (BL.toStrict body) + pure (Right text) + else pure (Left ("HTTP error: " <> tshow status)) + +extractText :: Text -> Text +extractText html = + let noScript = removeTagContent "script" html + noStyle = removeTagContent "style" noScript + noNoscript = removeTagContent "noscript" noStyle + noTags = stripTags noNoscript + in collapseWhitespace noTags + where + removeTagContent :: Text -> Text -> Text + removeTagContent tag txt = + let openTag = "<" <> tag + closeTag = "</" <> tag <> ">" + in removeMatches openTag closeTag txt + + removeMatches :: Text -> Text -> Text -> Text + removeMatches open close txt = + case Text.breakOn open (Text.toLower txt) of + (_, "") -> txt + (before, _) -> + let actualBefore = Text.take (Text.length before) txt + rest = Text.drop (Text.length before) txt + in case Text.breakOn close (Text.toLower rest) of + (_, "") -> actualBefore + (_, afterClose) -> + let skipLen = Text.length close + remaining = Text.drop (Text.length rest - Text.length afterClose + skipLen) txt + in actualBefore <> removeMatches open close remaining + + stripTags :: Text -> Text + stripTags txt = go txt "" + where + go :: Text -> Text -> Text + go remaining acc = + case Text.breakOn "<" remaining of + (before, "") -> acc <> before + (before, rest) -> + case Text.breakOn ">" rest of + (_, "") -> acc <> before + (_, afterTag) -> go (Text.drop 1 afterTag) (acc <> before <> " ") + + collapseWhitespace = Text.unwords <. Text.words + +summarizeContent :: Text -> Text -> Text -> IO (Either Text Text) +summarizeContent apiKey url content = do + let truncatedContent = Text.take 50000 content + gemini = Provider.defaultOpenRouter apiKey "google/gemini-2.0-flash-001" + result <- + Provider.chat + gemini + [] + [ Provider.Message + Provider.System + "You are a webpage summarizer. Provide a concise summary of the webpage content. Focus on the main points and key information. Be brief but comprehensive." + Nothing + Nothing, + Provider.Message + Provider.User + ("Summarize this webpage (" <> url <> "):\n\n" <> truncatedContent) + Nothing + Nothing + ] + case result of + Left err -> pure (Left ("Summarization failed: " <> err)) + Right msg -> pure (Right (Provider.msgContent msg)) + +webReaderTool :: Text -> Engine.Tool +webReaderTool apiKey = + Engine.Tool + { Engine.toolName = "read_webpage", + Engine.toolDescription = + "Fetch and summarize a webpage. Use this when the user shares a URL or link " + <> "and wants to know what it contains. Returns a summary of the page content.", + Engine.toolJsonSchema = + Aeson.object + [ "type" .= ("object" :: Text), + "properties" + .= Aeson.object + [ "url" + .= Aeson.object + [ "type" .= ("string" :: Text), + "description" .= ("The URL of the webpage to read" :: Text) + ] + ], + "required" .= (["url"] :: [Text]) + ], + Engine.toolExecute = executeWebReader apiKey + } + +executeWebReader :: Text -> Aeson.Value -> IO Aeson.Value +executeWebReader apiKey v = + case Aeson.fromJSON v of + Aeson.Error e -> pure (Aeson.object ["error" .= Text.pack e]) + Aeson.Success (args :: WebReaderArgs) -> do + fetchResult <- fetchWebpage (wrUrl args) + case fetchResult of + Left err -> + pure (Aeson.object ["error" .= err]) + Right html -> do + let textContent = extractText html + if Text.null (Text.strip textContent) + then pure (Aeson.object ["error" .= ("Page appears to be empty or JavaScript-only" :: Text)]) + else do + summaryResult <- summarizeContent apiKey (wrUrl args) textContent + case summaryResult of + Left err -> + pure + ( Aeson.object + [ "error" .= err, + "raw_content" .= Text.take 2000 textContent + ] + ) + Right summary -> + pure + ( Aeson.object + [ "success" .= True, + "url" .= wrUrl args, + "summary" .= summary + ] + ) + +newtype WebReaderArgs = WebReaderArgs + { wrUrl :: Text + } + deriving (Generic) + +instance Aeson.FromJSON WebReaderArgs where + parseJSON = + Aeson.withObject "WebReaderArgs" <| \v -> + WebReaderArgs </ (v Aeson..: "url") |
