From b384667997140a5e561572e41fe924d10ea7a660 Mon Sep 17 00:00:00 2001 From: Ben Sima Date: Wed, 17 Dec 2025 20:51:02 -0500 Subject: ava: add Python/CLI tools and local whisper fallback - Wrap ava binary with Python (requests, httpx, pandas, etc.) - Add CLI tools: curl, pandoc, ffmpeg, imagemagick, csvkit - Add local whisper-cli fallback when OPENAI_API_KEY unavailable Amp-Thread-ID: https://ampcode.com/threads/T-019b2dc2-36e0-75e1-90c1-622901fc9847 Co-authored-by: Amp --- Omni/Agent/Telegram/Media.hs | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) (limited to 'Omni/Agent/Telegram') diff --git a/Omni/Agent/Telegram/Media.hs b/Omni/Agent/Telegram/Media.hs index 47fbf91..0d62edd 100644 --- a/Omni/Agent/Telegram/Media.hs +++ b/Omni/Agent/Telegram/Media.hs @@ -54,9 +54,12 @@ import qualified Network.HTTP.Simple as HTTP import qualified Omni.Agent.Telegram.Types as Types import qualified Omni.Agent.Tools.Pdf as Pdf import qualified Omni.Test as Test +import qualified System.Directory as Directory import System.Environment (lookupEnv) +import qualified System.Exit as Exit import System.IO (hClose) import System.IO.Temp (withSystemTempFile) +import qualified System.Process as Process main :: IO () main = Test.run test @@ -274,8 +277,34 @@ transcribeVoice :: Text -> BL.ByteString -> IO (Either Text Text) transcribeVoice _unusedApiKey audioBytes = do maybeKey <- lookupEnv "OPENAI_API_KEY" case maybeKey of - Nothing -> pure (Left "OPENAI_API_KEY not set - required for voice transcription") - Just key -> transcribeWithWhisper (Text.pack key) audioBytes + Nothing -> transcribeWithWhisperLocal audioBytes + Just key -> do + result <- transcribeWithWhisper (Text.pack key) audioBytes + case result of + Left _ -> transcribeWithWhisperLocal audioBytes + Right text -> pure (Right text) + +transcribeWithWhisperLocal :: BL.ByteString -> IO (Either Text Text) +transcribeWithWhisperLocal audioBytes = do + tmpDir <- Directory.getTemporaryDirectory + let audioFile = tmpDir <> "/ava_voice_" <> show (BL.length audioBytes) <> ".ogg" + result <- + try <| do + BL.writeFile audioFile audioBytes + (exitCode, stdoutStr, stderrStr) <- + Process.readProcessWithExitCode + "whisper-cli" + ["--model", "/home/ava/models/ggml-base.en.bin", "--file", audioFile, "--no-timestamps"] + "" + Directory.removeFile audioFile + case exitCode of + Exit.ExitSuccess -> pure (Right (Text.strip (Text.pack stdoutStr))) + Exit.ExitFailure _ -> pure (Left (Text.pack stderrStr)) + case result of + Left (e :: SomeException) -> do + _ <- try @SomeException (Directory.removeFile audioFile) + pure (Left ("Local whisper failed: " <> tshow e)) + Right r -> pure r transcribeWithWhisper :: Text -> BL.ByteString -> IO (Either Text Text) transcribeWithWhisper apiKey audioBytes = do -- cgit v1.2.3