From a14881ddcdd6ce83250c978d9df825c29e8d93c6 Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Sat, 13 Dec 2025 13:28:59 -0500 Subject: telegram: fix audio transcription model and prompt order - Switch from gemini-2.0-flash-001 to gemini-2.5-flash - Put audio content before text prompt (model was ignoring audio) - Strengthen prompt to return only transcription --- Omni/Agent/Telegram/Media.hs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'Omni/Agent') diff --git a/Omni/Agent/Telegram/Media.hs b/Omni/Agent/Telegram/Media.hs index 137d7d3..6539b79 100644 --- a/Omni/Agent/Telegram/Media.hs +++ b/Omni/Agent/Telegram/Media.hs @@ -274,22 +274,22 @@ transcribeVoice apiKey audioBytes = do let base64Data = TL.toStrict (TLE.decodeUtf8 (B64.encode audioBytes)) body = Aeson.object - [ "model" .= ("google/gemini-2.0-flash-001" :: Text), + [ "model" .= ("google/gemini-2.5-flash" :: Text), "messages" .= [ Aeson.object [ "role" .= ("user" :: Text), "content" .= [ Aeson.object - [ "type" .= ("text" :: Text), - "text" .= ("transcribe this audio exactly, return only the transcription with no commentary" :: Text) - ], - Aeson.object [ "type" .= ("input_audio" :: Text), "input_audio" .= Aeson.object [ "data" .= base64Data, "format" .= ("ogg" :: Text) ] + ], + Aeson.object + [ "type" .= ("text" :: Text), + "text" .= ("transcribe this audio exactly. return ONLY the transcription, no commentary or preamble." :: Text) ] ] ] -- cgit v1.2.3