summaryrefslogtreecommitdiff
path: root/Omni/Agent
diff options
context:
space:
mode:
authorBen Sima <ben@bensima.com>2025-12-17 20:51:02 -0500
committerBen Sima <ben@bensima.com>2025-12-17 20:51:02 -0500
commitb384667997140a5e561572e41fe924d10ea7a660 (patch)
tree2920df90ffb140bb5427a423d4e44bd891730ace /Omni/Agent
parentf44a7f7fb29077b97af56219b906a1867aa7dc6d (diff)
ava: add Python/CLI tools and local whisper fallback
- Wrap ava binary with Python (requests, httpx, pandas, etc.) - Add CLI tools: curl, pandoc, ffmpeg, imagemagick, csvkit - Add local whisper-cli fallback when OPENAI_API_KEY unavailable Amp-Thread-ID: https://ampcode.com/threads/T-019b2dc2-36e0-75e1-90c1-622901fc9847 Co-authored-by: Amp <amp@ampcode.com>
Diffstat (limited to 'Omni/Agent')
-rw-r--r--Omni/Agent/Telegram/Media.hs33
1 files changed, 31 insertions, 2 deletions
diff --git a/Omni/Agent/Telegram/Media.hs b/Omni/Agent/Telegram/Media.hs
index 47fbf91..0d62edd 100644
--- a/Omni/Agent/Telegram/Media.hs
+++ b/Omni/Agent/Telegram/Media.hs
@@ -54,9 +54,12 @@ import qualified Network.HTTP.Simple as HTTP
import qualified Omni.Agent.Telegram.Types as Types
import qualified Omni.Agent.Tools.Pdf as Pdf
import qualified Omni.Test as Test
+import qualified System.Directory as Directory
import System.Environment (lookupEnv)
+import qualified System.Exit as Exit
import System.IO (hClose)
import System.IO.Temp (withSystemTempFile)
+import qualified System.Process as Process
main :: IO ()
main = Test.run test
@@ -274,8 +277,34 @@ transcribeVoice :: Text -> BL.ByteString -> IO (Either Text Text)
transcribeVoice _unusedApiKey audioBytes = do
maybeKey <- lookupEnv "OPENAI_API_KEY"
case maybeKey of
- Nothing -> pure (Left "OPENAI_API_KEY not set - required for voice transcription")
- Just key -> transcribeWithWhisper (Text.pack key) audioBytes
+ Nothing -> transcribeWithWhisperLocal audioBytes
+ Just key -> do
+ result <- transcribeWithWhisper (Text.pack key) audioBytes
+ case result of
+ Left _ -> transcribeWithWhisperLocal audioBytes
+ Right text -> pure (Right text)
+
+transcribeWithWhisperLocal :: BL.ByteString -> IO (Either Text Text)
+transcribeWithWhisperLocal audioBytes = do
+ tmpDir <- Directory.getTemporaryDirectory
+ let audioFile = tmpDir <> "/ava_voice_" <> show (BL.length audioBytes) <> ".ogg"
+ result <-
+ try <| do
+ BL.writeFile audioFile audioBytes
+ (exitCode, stdoutStr, stderrStr) <-
+ Process.readProcessWithExitCode
+ "whisper-cli"
+ ["--model", "/home/ava/models/ggml-base.en.bin", "--file", audioFile, "--no-timestamps"]
+ ""
+ Directory.removeFile audioFile
+ case exitCode of
+ Exit.ExitSuccess -> pure (Right (Text.strip (Text.pack stdoutStr)))
+ Exit.ExitFailure _ -> pure (Left (Text.pack stderrStr))
+ case result of
+ Left (e :: SomeException) -> do
+ _ <- try @SomeException (Directory.removeFile audioFile)
+ pure (Left ("Local whisper failed: " <> tshow e))
+ Right r -> pure r
transcribeWithWhisper :: Text -> BL.ByteString -> IO (Either Text Text)
transcribeWithWhisper apiKey audioBytes = do