diff options
| author | Ben Sima <ben@bensima.com> | 2025-09-18 11:42:17 -0400 |
|---|---|---|
| committer | Ben Sima <ben@bensima.com> | 2025-09-18 11:42:17 -0400 |
| commit | f8d26da7c054c0a4c89ab12993e00f74ea9260a1 (patch) | |
| tree | 3d73580d38a1296c512433b32fa43ada0dfa6d10 /whisper-dictate/whisper_dictate_daemon.py | |
| parent | 1c6de10ac4f9808e3a480ed7b7b11577d13ad005 (diff) | |
Diffstat (limited to 'whisper-dictate/whisper_dictate_daemon.py')
| -rw-r--r-- | whisper-dictate/whisper_dictate_daemon.py | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/whisper-dictate/whisper_dictate_daemon.py b/whisper-dictate/whisper_dictate_daemon.py new file mode 100644 index 0000000..77d458b --- /dev/null +++ b/whisper-dictate/whisper_dictate_daemon.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +# whisper-dictate-daemon.py +import gi +gi.require_version('Gtk', '3.0') +gi.require_version('AppIndicator3', '0.1') +from gi.repository import Gtk, AppIndicator3, GLib +import subprocess +import tempfile +import threading +import os +import time +import signal + +class WhisperDictate: + def __init__(self): + self.indicator = AppIndicator3.Indicator.new( + "whisper-dictate", + "audio-input-microphone", + AppIndicator3.IndicatorCategory.APPLICATION_STATUS + ) + self.indicator.set_status(AppIndicator3.IndicatorStatus.ACTIVE) + self.recording = False + self.recording_process = None + self.work_dir = "/tmp/whisper-dictate" + os.makedirs(self.work_dir, exist_ok=True) + + # menu + menu = Gtk.Menu() + self.status_item = Gtk.MenuItem(label="Ready") + self.status_item.set_sensitive(False) + menu.append(self.status_item) + menu.append(Gtk.SeparatorMenuItem()) + + toggle_item = Gtk.MenuItem(label=f"Toggle Recording") + toggle_item.connect("activate", self.toggle_recording) + menu.append(toggle_item) + + quit_item = Gtk.MenuItem(label="Quit") + quit_item.connect("activate", Gtk.main_quit) + menu.append(quit_item) + + menu.show_all() + self.indicator.set_menu(menu) + signal.signal(signal.SIGUSR1, lambda sig, frame: GLib.idle_add(self.toggle_recording)) + + def update_status(self, text): + GLib.idle_add(lambda: self.status_item.set_label(text)) + + def toggle_recording(self, widget=None): + if self.recording: + self.stop_recording() + else: + self.start_recording() + + def start_recording(self): + self.recording = True + self.indicator.set_icon("media-record") + self.update_status("🔴 Recording...") + + audio_file = os.path.join(self.work_dir, "recording.wav") + self.recording_process = subprocess.Popen([ + "arecord", "-f", "S16_LE", "-r", "16000", "-c", "1", audio_file + ]) + + def stop_recording(self): + self.recording = False + self.indicator.set_icon("audio-input-microphone") + self.update_status("⏳ Transcribing...") + + if self.recording_process: + self.recording_process.terminate() + self.recording_process.wait() + + # transcribe in background thread + threading.Thread(target=self.transcribe, daemon=True).start() + + def transcribe(self): + audio_file = os.path.join(self.work_dir, "recording.wav") + + # run whisper + result = subprocess.run([ + "whisper", audio_file, + "--model", "base", + "--language", "en", + "--output_format", "txt", + "--output_dir", self.work_dir + ], capture_output=True) + + # get text + txt_file = audio_file.replace('.wav', '.txt') + try: + with open(txt_file, 'r') as f: + text = f.read().strip() + except: + text = "ERROR: transcription failed" + + # update UI + self.update_status(f"✓ Done: {text[:30]}...") + + # use xsel instead of xclip - more reliable with systemd + # also set DISPLAY explicitly + env = os.environ.copy() + env['DISPLAY'] = ':0' + subprocess.run(["xsel", "-bi"], input=text.encode(), env=env) + + # type it - small delay to ensure focus + time.sleep(0.1) + subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], env=env) + + # cleanup + for f in [audio_file, txt_file]: + if os.path.exists(f): + os.unlink(f) + +def main(): + app = WhisperDictate() + Gtk.main() + + +if __name__ == "__main__": + main() |
