summaryrefslogtreecommitdiff
path: root/Omni/Agent/Telegram
diff options
context:
space:
mode:
authorBen Sima <ben@bensima.com>2025-12-13 13:28:59 -0500
committerBen Sima <ben@bensima.com>2025-12-13 13:28:59 -0500
commita14881ddcdd6ce83250c978d9df825c29e8d93c6 (patch)
tree68b355d078c05e37e0ac3267f41fdf8656f22b43 /Omni/Agent/Telegram
parent4d21f170cd1d1df239d7ad00fbf79427769a140f (diff)
telegram: fix audio transcription model and prompt order
- Switch from gemini-2.0-flash-001 to gemini-2.5-flash - Put audio content before text prompt (model was ignoring audio) - Strengthen prompt to return only transcription
Diffstat (limited to 'Omni/Agent/Telegram')
-rw-r--r--Omni/Agent/Telegram/Media.hs10
1 files changed, 5 insertions, 5 deletions
diff --git a/Omni/Agent/Telegram/Media.hs b/Omni/Agent/Telegram/Media.hs
index 137d7d3..6539b79 100644
--- a/Omni/Agent/Telegram/Media.hs
+++ b/Omni/Agent/Telegram/Media.hs
@@ -274,22 +274,22 @@ transcribeVoice apiKey audioBytes = do
let base64Data = TL.toStrict (TLE.decodeUtf8 (B64.encode audioBytes))
body =
Aeson.object
- [ "model" .= ("google/gemini-2.0-flash-001" :: Text),
+ [ "model" .= ("google/gemini-2.5-flash" :: Text),
"messages"
.= [ Aeson.object
[ "role" .= ("user" :: Text),
"content"
.= [ Aeson.object
- [ "type" .= ("text" :: Text),
- "text" .= ("transcribe this audio exactly, return only the transcription with no commentary" :: Text)
- ],
- Aeson.object
[ "type" .= ("input_audio" :: Text),
"input_audio"
.= Aeson.object
[ "data" .= base64Data,
"format" .= ("ogg" :: Text)
]
+ ],
+ Aeson.object
+ [ "type" .= ("text" :: Text),
+ "text" .= ("transcribe this audio exactly. return ONLY the transcription, no commentary or preamble." :: Text)
]
]
]