From c3c2a96e45f41193e608fe4b693ce587f6607b96 Mon Sep 17 00:00:00 2001
From: Ben Sima <ben@bensima.com>
Date: Sat, 13 Dec 2025 01:01:47 -0500
Subject: fix: correct cost estimation formulas

- Update to Dec 2024 OpenRouter pricing
- Use blended input/output rates
- Add gemini-flash, claude-sonnet-4.5 specific rates
- Fix math: was off by ~30x for Claude models
---
 Omni/Agent/Engine.hs | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/Omni/Agent/Engine.hs b/Omni/Agent/Engine.hs
index dab1329..f9b0355 100644
--- a/Omni/Agent/Engine.hs
+++ b/Omni/Agent/Engine.hs
@@ -819,14 +819,20 @@ executeToolCallsWithTracking engineCfg toolMap tcs initialTestFailures initialEd
         _ -> False
     isOldStrNotFoundError _ = False
 
--- | Estimate cost in cents from token count
+-- | Estimate cost in cents from token count.
+-- Uses blended input/output rates (roughly 2:1 output:input ratio).
+-- Prices as of Dec 2024 from OpenRouter.
 estimateCost :: Text -> Int -> Double
 estimateCost model tokens
-  | "gpt-4o-mini" `Text.isInfixOf` model = fromIntegral tokens * 15 / 1000000
-  | "gpt-4o" `Text.isInfixOf` model = fromIntegral tokens * 250 / 100000
-  | "gpt-4" `Text.isInfixOf` model = fromIntegral tokens * 3 / 100000
-  | "claude" `Text.isInfixOf` model = fromIntegral tokens * 3 / 100000
-  | otherwise = fromIntegral tokens / 100000
+  | "gpt-4o-mini" `Text.isInfixOf` model = fromIntegral tokens * 0.04 / 1000
+  | "gpt-4o" `Text.isInfixOf` model = fromIntegral tokens * 0.7 / 1000
+  | "gemini-2.0-flash" `Text.isInfixOf` model = fromIntegral tokens * 0.15 / 1000
+  | "gemini-2.5-flash" `Text.isInfixOf` model = fromIntegral tokens * 0.15 / 1000
+  | "claude-sonnet-4.5" `Text.isInfixOf` model = fromIntegral tokens * 0.9 / 1000
+  | "claude-sonnet-4" `Text.isInfixOf` model = fromIntegral tokens * 0.9 / 1000
+  | "claude-3-haiku" `Text.isInfixOf` model = fromIntegral tokens * 0.1 / 1000
+  | "claude" `Text.isInfixOf` model = fromIntegral tokens * 0.9 / 1000
+  | otherwise = fromIntegral tokens * 0.5 / 1000
 
 -- | Run agent with a Provider instead of LLM.
 -- This is the new preferred way to run agents with multiple backend support.
-- 
cgit v1.2.3