summaryrefslogtreecommitdiff
path: root/Omni/Agent/Tools/WebReaderTest.hs
diff options
context:
space:
mode:
authorBen Sima <ben@bensima.com>2025-12-17 13:29:40 -0500
committerBen Sima <ben@bensima.com>2025-12-17 13:29:40 -0500
commitab01b34bf563990e0f491ada646472aaade97610 (patch)
tree5e46a1a157bb846b0c3a090a83153c788da2b977 /Omni/Agent/Tools/WebReaderTest.hs
parente112d3ce07fa24f31a281e521a554cc881a76c7b (diff)
parent337648981cc5a55935116141341521f4fce83214 (diff)
Merge Ava deployment changes
Diffstat (limited to 'Omni/Agent/Tools/WebReaderTest.hs')
-rw-r--r--Omni/Agent/Tools/WebReaderTest.hs53
1 files changed, 53 insertions, 0 deletions
diff --git a/Omni/Agent/Tools/WebReaderTest.hs b/Omni/Agent/Tools/WebReaderTest.hs
new file mode 100644
index 0000000..ca4c119
--- /dev/null
+++ b/Omni/Agent/Tools/WebReaderTest.hs
@@ -0,0 +1,53 @@
+{-# LANGUAGE BangPatterns #-}
+{-# LANGUAGE OverloadedStrings #-}
+{-# LANGUAGE NoImplicitPrelude #-}
+
+-- | Quick test for WebReader to debug hangs
+--
+-- : out webreader-test
+-- : dep http-conduit
+-- : run trafilatura
+module Omni.Agent.Tools.WebReaderTest where
+
+import Alpha
+import qualified Data.Text as Text
+import qualified Data.Text.IO as TIO
+import Data.Time.Clock (diffUTCTime, getCurrentTime)
+import qualified Omni.Agent.Tools.WebReader as WebReader
+
+main :: IO ()
+main = do
+ TIO.putStrLn "=== WebReader Debug Test ==="
+
+ TIO.putStrLn "\n--- Test 1: Small page (httpbin) ---"
+ testUrl "https://httpbin.org/html"
+
+ TIO.putStrLn "\n--- Test 2: Medium page (example.com) ---"
+ testUrl "https://example.com"
+
+ TIO.putStrLn "\n--- Test 3: Large page (github) ---"
+ testUrl "https://github.com/anthropics/skills"
+
+ TIO.putStrLn "\n=== Done ==="
+
+testUrl :: Text -> IO ()
+testUrl url = do
+ TIO.putStrLn ("Fetching: " <> url)
+
+ startFetch <- getCurrentTime
+ result <- WebReader.fetchWebpage url
+ endFetch <- getCurrentTime
+ TIO.putStrLn ("Fetch took: " <> tshow (diffUTCTime endFetch startFetch))
+
+ case result of
+ Left err -> TIO.putStrLn ("Fetch error: " <> err)
+ Right html -> do
+ TIO.putStrLn ("HTML size: " <> tshow (Text.length html) <> " chars")
+
+ TIO.putStrLn "Extracting text (naive, 100k truncated)..."
+ startExtract <- getCurrentTime
+ let !text = WebReader.extractText (Text.take 100000 html)
+ endExtract <- getCurrentTime
+ TIO.putStrLn ("Extract took: " <> tshow (diffUTCTime endExtract startExtract))
+ TIO.putStrLn ("Text size: " <> tshow (Text.length text) <> " chars")
+ TIO.putStrLn ("Preview: " <> Text.take 200 text)