diff options
Diffstat (limited to 'Omni/Agent/Tools/WebReaderTest.hs')
| -rw-r--r-- | Omni/Agent/Tools/WebReaderTest.hs | 53 |
1 files changed, 53 insertions, 0 deletions
diff --git a/Omni/Agent/Tools/WebReaderTest.hs b/Omni/Agent/Tools/WebReaderTest.hs new file mode 100644 index 0000000..ca4c119 --- /dev/null +++ b/Omni/Agent/Tools/WebReaderTest.hs @@ -0,0 +1,53 @@ +{-# LANGUAGE BangPatterns #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE NoImplicitPrelude #-} + +-- | Quick test for WebReader to debug hangs +-- +-- : out webreader-test +-- : dep http-conduit +-- : run trafilatura +module Omni.Agent.Tools.WebReaderTest where + +import Alpha +import qualified Data.Text as Text +import qualified Data.Text.IO as TIO +import Data.Time.Clock (diffUTCTime, getCurrentTime) +import qualified Omni.Agent.Tools.WebReader as WebReader + +main :: IO () +main = do + TIO.putStrLn "=== WebReader Debug Test ===" + + TIO.putStrLn "\n--- Test 1: Small page (httpbin) ---" + testUrl "https://httpbin.org/html" + + TIO.putStrLn "\n--- Test 2: Medium page (example.com) ---" + testUrl "https://example.com" + + TIO.putStrLn "\n--- Test 3: Large page (github) ---" + testUrl "https://github.com/anthropics/skills" + + TIO.putStrLn "\n=== Done ===" + +testUrl :: Text -> IO () +testUrl url = do + TIO.putStrLn ("Fetching: " <> url) + + startFetch <- getCurrentTime + result <- WebReader.fetchWebpage url + endFetch <- getCurrentTime + TIO.putStrLn ("Fetch took: " <> tshow (diffUTCTime endFetch startFetch)) + + case result of + Left err -> TIO.putStrLn ("Fetch error: " <> err) + Right html -> do + TIO.putStrLn ("HTML size: " <> tshow (Text.length html) <> " chars") + + TIO.putStrLn "Extracting text (naive, 100k truncated)..." + startExtract <- getCurrentTime + let !text = WebReader.extractText (Text.take 100000 html) + endExtract <- getCurrentTime + TIO.putStrLn ("Extract took: " <> tshow (diffUTCTime endExtract startExtract)) + TIO.putStrLn ("Text size: " <> tshow (Text.length text) <> " chars") + TIO.putStrLn ("Preview: " <> Text.take 200 text) |
