summaryrefslogtreecommitdiff
path: root/Omni/Agent/Engine.hs
diff options
context:
space:
mode:
authorBen Sima <ben@bensima.com>2025-12-01 04:29:51 -0500
committerBen Sima <ben@bensima.com>2025-12-01 04:29:51 -0500
commit3945b6fad4f1620612beb259e8601d165b9f4f12 (patch)
tree04958c178b59e92022c44567ebef071e13998199 /Omni/Agent/Engine.hs
parent7df031715fabd6eb84ef315e8fe78dc7d0b7344d (diff)
Fix cost reporting - parse actual cost from OpenRouter API response
I have successfully completed task t-197.8 to fix cost reporting by pars **Omni/Agent/Engine.hs:** 1. Added `usageCost :: Maybe Double` field to the `Usage` type to captur 2. Updated `FromJSON` instance to parse the optional `"cost"` field 3. Modified `ChatCompletionRequest` ToJSON instance to include `"usage": 4. Changed cost types from `Int` to `Double` throughout (engineOnCost ca 5. Updated `estimateCost` to use floating-point division instead of inte 6. Modified `runAgent` to use actual cost from API when available, conve 7. Added new test case for parsing usage with cost field **Omni/Agent/Worker.hs:** 1. Updated `runWithEngine` signature to return `Double` for cost 2. Changed `totalCostRef` from `IORef Int` to `IORef Double` 3. Added rounding when storing cost in DB metrics to maintain backward c ✅ **All tests pass:** - Omni/Agent/Engine.hs - 16 unit tests pass - Omni/Agent/Worker.hs - Builds successfully - Omni/Agent.hs - All integration tests pass - Omni/Jr.hs - All 12 tests pass ✅ **All lint checks pass:** - No hlint issues - No ormolu formatting issues The implementation correctly handles OpenRouter's cost format (credits w Task-Id: t-197.8
Diffstat (limited to 'Omni/Agent/Engine.hs')
-rw-r--r--Omni/Agent/Engine.hs47
1 files changed, 33 insertions, 14 deletions
diff --git a/Omni/Agent/Engine.hs b/Omni/Agent/Engine.hs
index 01a04e9..2da7722 100644
--- a/Omni/Agent/Engine.hs
+++ b/Omni/Agent/Engine.hs
@@ -111,7 +111,17 @@ test =
Just usage -> do
usagePromptTokens usage Test.@=? 100
usageCompletionTokens usage Test.@=? 50
- usageTotalTokens usage Test.@=? 150,
+ usageTotalTokens usage Test.@=? 150
+ usageCost usage Test.@=? Nothing,
+ Test.unit "Usage JSON parsing with cost" <| do
+ let json = "{\"prompt_tokens\":194,\"completion_tokens\":2,\"total_tokens\":196,\"cost\":0.95}"
+ case Aeson.decode json of
+ Nothing -> Test.assertFailure "Failed to decode usage with cost"
+ Just usage -> do
+ usagePromptTokens usage Test.@=? 194
+ usageCompletionTokens usage Test.@=? 2
+ usageTotalTokens usage Test.@=? 196
+ usageCost usage Test.@=? Just 0.95,
Test.unit "AgentResult JSON roundtrip" <| do
let result =
AgentResult
@@ -130,7 +140,8 @@ test =
Test.unit "estimateCost calculates correctly" <| do
let gpt4oCost = estimateCost "gpt-4o" 1000
gpt4oMiniCost = estimateCost "gpt-4o-mini" 1000
- (gpt4oCost >= gpt4oMiniCost) Test.@=? True,
+ (gpt4oCost >= gpt4oMiniCost) Test.@=? True
+ (gpt4oCost > 0) Test.@=? True,
Test.unit "ToolCall JSON roundtrip" <| do
let tc =
ToolCall
@@ -252,7 +263,7 @@ defaultAgentConfig =
data EngineConfig = EngineConfig
{ engineLLM :: LLM,
- engineOnCost :: Int -> Int -> IO (),
+ engineOnCost :: Int -> Double -> IO (),
engineOnActivity :: Text -> IO (),
engineOnToolCall :: Text -> Text -> IO (),
engineOnAssistant :: Text -> IO (),
@@ -278,7 +289,7 @@ data AgentResult = AgentResult
{ resultFinalMessage :: Text,
resultToolCallCount :: Int,
resultIterations :: Int,
- resultTotalCost :: Int,
+ resultTotalCost :: Double,
resultTotalTokens :: Int
}
deriving (Show, Eq, Generic)
@@ -395,7 +406,8 @@ instance Aeson.ToJSON ChatCompletionRequest where
<| catMaybes
[ Just ("model" .= reqModel r),
Just ("messages" .= reqMessages r),
- ("tools" .=) </ reqTools r
+ ("tools" .=) </ reqTools r,
+ Just ("usage" .= Aeson.object ["include" .= True])
]
data Choice = Choice
@@ -415,7 +427,8 @@ instance Aeson.FromJSON Choice where
data Usage = Usage
{ usagePromptTokens :: Int,
usageCompletionTokens :: Int,
- usageTotalTokens :: Int
+ usageTotalTokens :: Int,
+ usageCost :: Maybe Double
}
deriving (Show, Eq, Generic)
@@ -425,6 +438,7 @@ instance Aeson.FromJSON Usage where
(Usage </ (v .: "prompt_tokens"))
<*> (v .: "completion_tokens")
<*> (v .: "total_tokens")
+ <*> (v .:? "cost")
data ChatCompletionResponse = ChatCompletionResponse
{ respId :: Text,
@@ -517,7 +531,11 @@ runAgent engineCfg agentCfg userPrompt = do
Right chatRes -> do
let msg = chatMessage chatRes
tokens = maybe 0 usageTotalTokens (chatUsage chatRes)
- cost = estimateCost (llmModel llm) tokens
+ -- Use actual cost from API response when available (OpenRouter returns cost in credits = $0.01)
+ -- Convert from credits to cents by multiplying by 100
+ cost = case chatUsage chatRes +> usageCost of
+ Just actualCost -> actualCost * 100
+ Nothing -> estimateCost (llmModel llm) tokens
engineOnCost engineCfg tokens cost
let newTokens = totalTokens + tokens
let assistantText = msgContent msg
@@ -583,13 +601,14 @@ executeToolCalls engineCfg toolMap = traverse executeSingle
engineOnToolResult engineCfg name True resultText
pure <| Message ToolRole resultText Nothing (Just callId)
-estimateCost :: Text -> Int -> Int
+-- | Estimate cost in cents from token count
+estimateCost :: Text -> Int -> Double
estimateCost model tokens
- | "gpt-4o-mini" `Text.isInfixOf` model = tokens * 15 `div` 1000000
- | "gpt-4o" `Text.isInfixOf` model = tokens * 250 `div` 100000
- | "gpt-4" `Text.isInfixOf` model = tokens * 3 `div` 100000
- | "claude" `Text.isInfixOf` model = tokens * 3 `div` 100000
- | otherwise = tokens `div` 100000
+ | "gpt-4o-mini" `Text.isInfixOf` model = fromIntegral tokens * 15 / 1000000
+ | "gpt-4o" `Text.isInfixOf` model = fromIntegral tokens * 250 / 100000
+ | "gpt-4" `Text.isInfixOf` model = fromIntegral tokens * 3 / 100000
+ | "claude" `Text.isInfixOf` model = fromIntegral tokens * 3 / 100000
+ | otherwise = fromIntegral tokens / 100000
-estimateTotalCost :: Text -> Int -> Int
+estimateTotalCost :: Text -> Int -> Double
estimateTotalCost = estimateCost