{-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE NoImplicitPrelude #-} -- | Quick test for WebReader to debug hangs -- -- : out webreader-test -- : dep http-conduit -- : run trafilatura module Omni.Agent.Tools.WebReaderTest where import Alpha import qualified Data.Text as Text import qualified Data.Text.IO as TIO import Data.Time.Clock (diffUTCTime, getCurrentTime) import qualified Omni.Agent.Tools.WebReader as WebReader main :: IO () main = do TIO.putStrLn "=== WebReader Debug Test ===" TIO.putStrLn "\n--- Test 1: Small page (httpbin) ---" testUrl "https://httpbin.org/html" TIO.putStrLn "\n--- Test 2: Medium page (example.com) ---" testUrl "https://example.com" TIO.putStrLn "\n--- Test 3: Large page (github) ---" testUrl "https://github.com/anthropics/skills" TIO.putStrLn "\n=== Done ===" testUrl :: Text -> IO () testUrl url = do TIO.putStrLn ("Fetching: " <> url) startFetch <- getCurrentTime result <- WebReader.fetchWebpage url endFetch <- getCurrentTime TIO.putStrLn ("Fetch took: " <> tshow (diffUTCTime endFetch startFetch)) case result of Left err -> TIO.putStrLn ("Fetch error: " <> err) Right html -> do TIO.putStrLn ("HTML size: " <> tshow (Text.length html) <> " chars") TIO.putStrLn "Extracting text (naive, 100k truncated)..." startExtract <- getCurrentTime let !text = WebReader.extractText (Text.take 100000 html) endExtract <- getCurrentTime TIO.putStrLn ("Extract took: " <> tshow (diffUTCTime endExtract startExtract)) TIO.putStrLn ("Text size: " <> tshow (Text.length text) <> " chars") TIO.putStrLn ("Preview: " <> Text.take 200 text)