summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Sima <ben@bensima.com>2025-12-13 00:35:24 -0500
committerBen Sima <ben@bensima.com>2025-12-13 00:35:24 -0500
commit4ff40843e7a6801b7785bfff7f4e9e8fff4e27d4 (patch)
tree6b41438d0726f96746697af0584ab2f2542ffabf
parent817bdb1f33e9825946a2da2aa1ff8f91b6166366 (diff)
telegram: fix parsing, add webpage reader, use gemini
- Fix Provider.hs to strip leading whitespace from OpenRouter responses - Fix FunctionCall parser to handle missing 'arguments' field - Use eitherDecode for better error messages on parse failures - Switch to claude-sonnet-4.5 for main agent - Use gemini-2.0-flash for conversation summarization (cheaper) - Add read_webpage tool for fetching and summarizing URLs - Add tagsoup to Haskell deps (unused, kept for future)
-rw-r--r--Omni/Agent/Provider.hs14
-rw-r--r--Omni/Agent/Telegram.hs17
-rw-r--r--Omni/Agent/Telegram/Media.hs2
-rw-r--r--Omni/Agent/Tools/WebReader.hs210
-rw-r--r--Omni/Bild/Deps/Haskell.nix1
5 files changed, 231 insertions, 13 deletions
diff --git a/Omni/Agent/Provider.hs b/Omni/Agent/Provider.hs
index a8a5381..2ad6ea8 100644
--- a/Omni/Agent/Provider.hs
+++ b/Omni/Agent/Provider.hs
@@ -200,7 +200,7 @@ instance Aeson.FromJSON FunctionCall where
parseJSON =
Aeson.withObject "FunctionCall" <| \v ->
(FunctionCall </ (v Aeson..: "name"))
- <*> (v Aeson..: "arguments")
+ <*> (v Aeson..:? "arguments" Aeson..!= "{}")
data Usage = Usage
{ usagePromptTokens :: Int,
@@ -322,14 +322,18 @@ chatOpenAI cfg tools messages = do
response <- HTTP.httpLBS req
let status = HTTP.getResponseStatusCode response
+ respBody = HTTP.getResponseBody response
+ cleanedBody = BL.dropWhile (\b -> b `elem` [0x0a, 0x0d, 0x20]) respBody
if status >= 200 && status < 300
- then case Aeson.decode (HTTP.getResponseBody response) of
- Just resp ->
+ then case Aeson.eitherDecode cleanedBody of
+ Right resp ->
case respChoices resp of
(c : _) -> pure (Right (ChatResult (choiceMessage c) (respUsage resp)))
[] -> pure (Left "No choices in response")
- Nothing -> pure (Left "Failed to parse response")
- else pure (Left ("HTTP error: " <> tshow status <> " - " <> TE.decodeUtf8 (BL.toStrict (HTTP.getResponseBody response))))
+ Left err -> do
+ let bodyPreview = TE.decodeUtf8 (BL.toStrict (BL.take 500 cleanedBody))
+ pure (Left ("Failed to parse response: " <> Text.pack err <> " | Body: " <> bodyPreview))
+ else pure (Left ("HTTP error: " <> tshow status <> " - " <> TE.decodeUtf8 (BL.toStrict respBody)))
chatOllama :: ProviderConfig -> [ToolApi] -> [Message] -> IO (Either Text ChatResult)
chatOllama cfg tools messages = do
diff --git a/Omni/Agent/Telegram.hs b/Omni/Agent/Telegram.hs
index d224acc..c55dc5a 100644
--- a/Omni/Agent/Telegram.hs
+++ b/Omni/Agent/Telegram.hs
@@ -83,6 +83,7 @@ import qualified Omni.Agent.Tools.Calendar as Calendar
import qualified Omni.Agent.Tools.Notes as Notes
import qualified Omni.Agent.Tools.Pdf as Pdf
import qualified Omni.Agent.Tools.Todos as Todos
+import qualified Omni.Agent.Tools.WebReader as WebReader
import qualified Omni.Agent.Tools.WebSearch as WebSearch
import qualified Omni.Test as Test
import System.Environment (lookupEnv)
@@ -423,6 +424,7 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do
searchTools = case Types.tgKagiApiKey tgConfig of
Just kagiKey -> [WebSearch.webSearchTool kagiKey]
Nothing -> []
+ webReaderTools = [WebReader.webReaderTool (Types.tgOpenRouterApiKey tgConfig)]
pdfTools = [Pdf.pdfTool]
notesTools =
[ Notes.noteAddTool uid,
@@ -440,7 +442,7 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do
Todos.todoCompleteTool uid,
Todos.todoDeleteTool uid
]
- tools = memoryTools <> searchTools <> pdfTools <> notesTools <> calendarTools <> todoTools
+ tools = memoryTools <> searchTools <> webReaderTools <> pdfTools <> notesTools <> calendarTools <> todoTools
let agentCfg =
Engine.defaultAgentConfig
@@ -472,7 +474,7 @@ handleAuthorizedMessage tgConfig provider engineCfg msg uid userName chatId = do
sendMessage tgConfig chatId "hmm, i don't have a response for that"
else sendMessage tgConfig chatId response
- checkAndSummarize provider uid chatId
+ checkAndSummarize (Types.tgOpenRouterApiKey tgConfig) uid chatId
putText
<| "Responded to "
@@ -487,8 +489,8 @@ maxConversationTokens = 4000
summarizationThreshold :: Int
summarizationThreshold = 3000
-checkAndSummarize :: Provider.Provider -> Text -> Int -> IO ()
-checkAndSummarize provider uid chatId = do
+checkAndSummarize :: Text -> Text -> Int -> IO ()
+checkAndSummarize openRouterKey uid chatId = do
(_, currentTokens) <- Memory.getConversationContext uid chatId maxConversationTokens
when (currentTokens > summarizationThreshold) <| do
putText <| "Context at " <> tshow currentTokens <> " tokens, summarizing..."
@@ -498,9 +500,10 @@ checkAndSummarize provider uid chatId = do
[ (if Memory.cmRole m == Memory.UserRole then "User: " else "Assistant: ") <> Memory.cmContent m
| m <- reverse recentMsgs
]
+ gemini = Provider.defaultOpenRouter openRouterKey "google/gemini-2.0-flash-001"
summaryResult <-
Provider.chat
- provider
+ gemini
[]
[ Provider.Message Provider.System "You are a conversation summarizer. Summarize the key points, decisions, and context from this conversation in 2-3 paragraphs. Focus on information that would be useful for continuing the conversation later." Nothing Nothing,
Provider.Message Provider.User ("Summarize this conversation:\n\n" <> conversationText) Nothing Nothing
@@ -510,7 +513,7 @@ checkAndSummarize provider uid chatId = do
Right summaryMsg -> do
let summary = Provider.msgContent summaryMsg
_ <- Memory.summarizeAndArchive uid chatId summary
- putText "Conversation summarized and archived"
+ putText "Conversation summarized and archived (gemini)"
checkOllama :: IO (Either Text ())
checkOllama = do
@@ -610,7 +613,7 @@ startBot maybeToken = do
Just key -> do
let orKey = Text.pack key
tgConfig = Types.defaultTelegramConfig token allowedIds kagiKey orKey
- provider = Provider.defaultOpenRouter orKey "anthropic/claude-sonnet-4"
+ provider = Provider.defaultOpenRouter orKey "anthropic/claude-sonnet-4.5"
putText <| "Allowed user IDs: " <> tshow allowedIds
putText <| "Kagi search: " <> if isJust kagiKey then "enabled" else "disabled"
runTelegramBot tgConfig provider
diff --git a/Omni/Agent/Telegram/Media.hs b/Omni/Agent/Telegram/Media.hs
index 1ef35de..137d7d3 100644
--- a/Omni/Agent/Telegram/Media.hs
+++ b/Omni/Agent/Telegram/Media.hs
@@ -239,7 +239,7 @@ analyzeImage apiKey imageBytes userPrompt = do
else userPrompt <> "\n\n(describe objectively in third person, no first person pronouns)"
body =
Aeson.object
- [ "model" .= ("anthropic/claude-sonnet-4" :: Text),
+ [ "model" .= ("anthropic/claude-sonnet-4.5" :: Text),
"messages"
.= [ Aeson.object
[ "role" .= ("user" :: Text),
diff --git a/Omni/Agent/Tools/WebReader.hs b/Omni/Agent/Tools/WebReader.hs
new file mode 100644
index 0000000..9b776ad
--- /dev/null
+++ b/Omni/Agent/Tools/WebReader.hs
@@ -0,0 +1,210 @@
+{-# LANGUAGE DeriveGeneric #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE ScopedTypeVariables #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- | Web page reader tool - fetches and summarizes web pages.
+--
+-- : out omni-agent-tools-webreader
+-- : dep aeson
+-- : dep http-conduit
+module Omni.Agent.Tools.WebReader
+ ( -- * Tool
+ webReaderTool,
+
+ -- * Direct API
+ fetchWebpage,
+ extractText,
+
+ -- * Testing
+ main,
+ test,
+ )
+where
+
+import Alpha
+import Data.Aeson ((.=))
+import qualified Data.Aeson as Aeson
+import qualified Data.ByteString.Lazy as BL
+import qualified Data.Text as Text
+import qualified Data.Text.Encoding as TE
+import qualified Network.HTTP.Client as HTTPClient
+import qualified Network.HTTP.Simple as HTTP
+import qualified Omni.Agent.Engine as Engine
+import qualified Omni.Agent.Provider as Provider
+import qualified Omni.Test as Test
+
+main :: IO ()
+main = Test.run test
+
+test :: Test.Tree
+test =
+ Test.group
+ "Omni.Agent.Tools.WebReader"
+ [ Test.unit "extractText removes HTML tags" <| do
+ let html = "<html><body><p>Hello world</p></body></html>"
+ result = extractText html
+ ("Hello world" `Text.isInfixOf` result) Test.@=? True,
+ Test.unit "extractText removes script tags" <| do
+ let html = "<html><script>alert('hi')</script><p>Content</p></html>"
+ result = extractText html
+ ("alert" `Text.isInfixOf` result) Test.@=? False
+ ("Content" `Text.isInfixOf` result) Test.@=? True,
+ Test.unit "webReaderTool has correct schema" <| do
+ let tool = webReaderTool "test-key"
+ Engine.toolName tool Test.@=? "read_webpage"
+ ]
+
+fetchWebpage :: Text -> IO (Either Text Text)
+fetchWebpage url = do
+ result <-
+ try <| do
+ req0 <- HTTP.parseRequest (Text.unpack url)
+ let req =
+ HTTP.setRequestMethod "GET"
+ <| HTTP.setRequestHeader "User-Agent" ["Mozilla/5.0 (compatible; OmniBot/1.0)"]
+ <| HTTP.setRequestHeader "Accept" ["text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"]
+ <| HTTP.setRequestResponseTimeout (HTTPClient.responseTimeoutMicro (30 * 1000000))
+ <| req0
+ HTTP.httpLBS req
+ case result of
+ Left (e :: SomeException) ->
+ pure (Left ("Failed to fetch URL: " <> tshow e))
+ Right response -> do
+ let status = HTTP.getResponseStatusCode response
+ if status >= 200 && status < 300
+ then do
+ let body = HTTP.getResponseBody response
+ text = TE.decodeUtf8With (\_ _ -> Just '?') (BL.toStrict body)
+ pure (Right text)
+ else pure (Left ("HTTP error: " <> tshow status))
+
+extractText :: Text -> Text
+extractText html =
+ let noScript = removeTagContent "script" html
+ noStyle = removeTagContent "style" noScript
+ noNoscript = removeTagContent "noscript" noStyle
+ noTags = stripTags noNoscript
+ in collapseWhitespace noTags
+ where
+ removeTagContent :: Text -> Text -> Text
+ removeTagContent tag txt =
+ let openTag = "<" <> tag
+ closeTag = "</" <> tag <> ">"
+ in removeMatches openTag closeTag txt
+
+ removeMatches :: Text -> Text -> Text -> Text
+ removeMatches open close txt =
+ case Text.breakOn open (Text.toLower txt) of
+ (_, "") -> txt
+ (before, _) ->
+ let actualBefore = Text.take (Text.length before) txt
+ rest = Text.drop (Text.length before) txt
+ in case Text.breakOn close (Text.toLower rest) of
+ (_, "") -> actualBefore
+ (_, afterClose) ->
+ let skipLen = Text.length close
+ remaining = Text.drop (Text.length rest - Text.length afterClose + skipLen) txt
+ in actualBefore <> removeMatches open close remaining
+
+ stripTags :: Text -> Text
+ stripTags txt = go txt ""
+ where
+ go :: Text -> Text -> Text
+ go remaining acc =
+ case Text.breakOn "<" remaining of
+ (before, "") -> acc <> before
+ (before, rest) ->
+ case Text.breakOn ">" rest of
+ (_, "") -> acc <> before
+ (_, afterTag) -> go (Text.drop 1 afterTag) (acc <> before <> " ")
+
+ collapseWhitespace = Text.unwords <. Text.words
+
+summarizeContent :: Text -> Text -> Text -> IO (Either Text Text)
+summarizeContent apiKey url content = do
+ let truncatedContent = Text.take 50000 content
+ gemini = Provider.defaultOpenRouter apiKey "google/gemini-2.0-flash-001"
+ result <-
+ Provider.chat
+ gemini
+ []
+ [ Provider.Message
+ Provider.System
+ "You are a webpage summarizer. Provide a concise summary of the webpage content. Focus on the main points and key information. Be brief but comprehensive."
+ Nothing
+ Nothing,
+ Provider.Message
+ Provider.User
+ ("Summarize this webpage (" <> url <> "):\n\n" <> truncatedContent)
+ Nothing
+ Nothing
+ ]
+ case result of
+ Left err -> pure (Left ("Summarization failed: " <> err))
+ Right msg -> pure (Right (Provider.msgContent msg))
+
+webReaderTool :: Text -> Engine.Tool
+webReaderTool apiKey =
+ Engine.Tool
+ { Engine.toolName = "read_webpage",
+ Engine.toolDescription =
+ "Fetch and summarize a webpage. Use this when the user shares a URL or link "
+ <> "and wants to know what it contains. Returns a summary of the page content.",
+ Engine.toolJsonSchema =
+ Aeson.object
+ [ "type" .= ("object" :: Text),
+ "properties"
+ .= Aeson.object
+ [ "url"
+ .= Aeson.object
+ [ "type" .= ("string" :: Text),
+ "description" .= ("The URL of the webpage to read" :: Text)
+ ]
+ ],
+ "required" .= (["url"] :: [Text])
+ ],
+ Engine.toolExecute = executeWebReader apiKey
+ }
+
+executeWebReader :: Text -> Aeson.Value -> IO Aeson.Value
+executeWebReader apiKey v =
+ case Aeson.fromJSON v of
+ Aeson.Error e -> pure (Aeson.object ["error" .= Text.pack e])
+ Aeson.Success (args :: WebReaderArgs) -> do
+ fetchResult <- fetchWebpage (wrUrl args)
+ case fetchResult of
+ Left err ->
+ pure (Aeson.object ["error" .= err])
+ Right html -> do
+ let textContent = extractText html
+ if Text.null (Text.strip textContent)
+ then pure (Aeson.object ["error" .= ("Page appears to be empty or JavaScript-only" :: Text)])
+ else do
+ summaryResult <- summarizeContent apiKey (wrUrl args) textContent
+ case summaryResult of
+ Left err ->
+ pure
+ ( Aeson.object
+ [ "error" .= err,
+ "raw_content" .= Text.take 2000 textContent
+ ]
+ )
+ Right summary ->
+ pure
+ ( Aeson.object
+ [ "success" .= True,
+ "url" .= wrUrl args,
+ "summary" .= summary
+ ]
+ )
+
+newtype WebReaderArgs = WebReaderArgs
+ { wrUrl :: Text
+ }
+ deriving (Generic)
+
+instance Aeson.FromJSON WebReaderArgs where
+ parseJSON =
+ Aeson.withObject "WebReaderArgs" <| \v ->
+ WebReaderArgs </ (v Aeson..: "url")
diff --git a/Omni/Bild/Deps/Haskell.nix b/Omni/Bild/Deps/Haskell.nix
index 7e3650a..21325ec 100644
--- a/Omni/Bild/Deps/Haskell.nix
+++ b/Omni/Bild/Deps/Haskell.nix
@@ -53,6 +53,7 @@
"sqids"
"sqlite-simple"
"stm"
+ "tagsoup"
"tasty"
"tasty-hunit"
"tasty-quickcheck"