From a14881ddcdd6ce83250c978d9df825c29e8d93c6 Mon Sep 17 00:00:00 2001
From: Ben Sima <ben@bensima.com>
Date: Sat, 13 Dec 2025 13:28:59 -0500
Subject: telegram: fix audio transcription model and prompt order

- Switch from gemini-2.0-flash-001 to gemini-2.5-flash
- Put audio content before text prompt (model was ignoring audio)
- Strengthen prompt to return only transcription
---
 Omni/Agent/Telegram/Media.hs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'Omni/Agent')

diff --git a/Omni/Agent/Telegram/Media.hs b/Omni/Agent/Telegram/Media.hs
index 137d7d3..6539b79 100644
--- a/Omni/Agent/Telegram/Media.hs
+++ b/Omni/Agent/Telegram/Media.hs
@@ -274,22 +274,22 @@ transcribeVoice apiKey audioBytes = do
   let base64Data = TL.toStrict (TLE.decodeUtf8 (B64.encode audioBytes))
       body =
         Aeson.object
-          [ "model" .= ("google/gemini-2.0-flash-001" :: Text),
+          [ "model" .= ("google/gemini-2.5-flash" :: Text),
             "messages"
               .= [ Aeson.object
                      [ "role" .= ("user" :: Text),
                        "content"
                          .= [ Aeson.object
-                                [ "type" .= ("text" :: Text),
-                                  "text" .= ("transcribe this audio exactly, return only the transcription with no commentary" :: Text)
-                                ],
-                              Aeson.object
                                 [ "type" .= ("input_audio" :: Text),
                                   "input_audio"
                                     .= Aeson.object
                                       [ "data" .= base64Data,
                                         "format" .= ("ogg" :: Text)
                                       ]
+                                ],
+                              Aeson.object
+                                [ "type" .= ("text" :: Text),
+                                  "text" .= ("transcribe this audio exactly. return ONLY the transcription, no commentary or preamble." :: Text)
                                 ]
                             ]
                      ]
-- 
cgit v1.2.3