diff options
| author | Ben Sima <ben@bensima.com> | 2025-12-17 13:29:40 -0500 |
|---|---|---|
| committer | Ben Sima <ben@bensima.com> | 2025-12-17 13:29:40 -0500 |
| commit | ab01b34bf563990e0f491ada646472aaade97610 (patch) | |
| tree | 5e46a1a157bb846b0c3a090a83153c788da2b977 /Omni/Agent/Tools/WebReaderTest.hs | |
| parent | e112d3ce07fa24f31a281e521a554cc881a76c7b (diff) | |
| parent | 337648981cc5a55935116141341521f4fce83214 (diff) | |
Merge Ava deployment changes
Diffstat (limited to 'Omni/Agent/Tools/WebReaderTest.hs')
| -rw-r--r-- | Omni/Agent/Tools/WebReaderTest.hs | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/Omni/Agent/Tools/WebReaderTest.hs b/Omni/Agent/Tools/WebReaderTest.hs new file mode 100644 index 0000000..ca4c119 --- /dev/null +++ b/Omni/Agent/Tools/WebReaderTest.hs @@ -0,0 +1,53 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | Quick test for WebReader to debug hangs +-- +-- : out webreader-test +-- : dep http-conduit +-- : run trafilatura +module Omni.Agent.Tools.WebReaderTest where + +import Alpha +import qualified Data.Text as Text +import qualified Data.Text.IO as TIO +import Data.Time.Clock (diffUTCTime, getCurrentTime) +import qualified Omni.Agent.Tools.WebReader as WebReader + +main :: IO () +main = do + TIO.putStrLn "=== WebReader Debug Test ===" + + TIO.putStrLn "\n--- Test 1: Small page (httpbin) ---" + testUrl "https://httpbin.org/html" + + TIO.putStrLn "\n--- Test 2: Medium page (example.com) ---" + testUrl "https://example.com" + + TIO.putStrLn "\n--- Test 3: Large page (github) ---" + testUrl "https://github.com/anthropics/skills" + + TIO.putStrLn "\n=== Done ===" + +testUrl :: Text -> IO () +testUrl url = do + TIO.putStrLn ("Fetching: " <> url) + + startFetch <- getCurrentTime + result <- WebReader.fetchWebpage url + endFetch <- getCurrentTime + TIO.putStrLn ("Fetch took: " <> tshow (diffUTCTime endFetch startFetch)) + + case result of + Left err -> TIO.putStrLn ("Fetch error: " <> err) + Right html -> do + TIO.putStrLn ("HTML size: " <> tshow (Text.length html) <> " chars") + + TIO.putStrLn "Extracting text (naive, 100k truncated)..." + startExtract <- getCurrentTime + let !text = WebReader.extractText (Text.take 100000 html) + endExtract <- getCurrentTime + TIO.putStrLn ("Extract took: " <> tshow (diffUTCTime endExtract startExtract)) + TIO.putStrLn ("Text size: " <> tshow (Text.length text) <> " chars") + TIO.putStrLn ("Preview: " <> Text.take 200 text) |
