add whisper-dictate subserviceHEAD master

author: Ben Sima <ben@bensima.com> 2025-09-18 11:42:17 -0400
committer: Ben Sima <ben@bensima.com> 2025-09-18 11:42:17 -0400
commit: f8d26da7c054c0a4c89ab12993e00f74ea9260a1 (patch)
tree: 3d73580d38a1296c512433b32fa43ada0dfa6d10 /whisper-dictate/whisper_dictate_daemon.py
parent: 1c6de10ac4f9808e3a480ed7b7b11577d13ad005 (diff)
1 files changed, 121 insertions, 0 deletions
diff --git a/whisper-dictate/whisper_dictate_daemon.py b/whisper-dictate/whisper_dictate_daemon.py
new file mode 100644
index 0000000..77d458b
--- /dev/null
+++ b/whisper-dictate/whisper_dictate_daemon.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+# whisper-dictate-daemon.py
+import gi
+gi.require_version('Gtk', '3.0')
+gi.require_version('AppIndicator3', '0.1')
+from gi.repository import Gtk, AppIndicator3, GLib
+import subprocess
+import tempfile
+import threading
+import os
+import time
+import signal
+
+class WhisperDictate:
+    def __init__(self):
+        self.indicator = AppIndicator3.Indicator.new(
+            "whisper-dictate",
+            "audio-input-microphone",
+            AppIndicator3.IndicatorCategory.APPLICATION_STATUS
+        )
+        self.indicator.set_status(AppIndicator3.IndicatorStatus.ACTIVE)
+        self.recording = False
+        self.recording_process = None
+        self.work_dir = "/tmp/whisper-dictate"
+        os.makedirs(self.work_dir, exist_ok=True)
+
+        # menu
+        menu = Gtk.Menu()
+        self.status_item = Gtk.MenuItem(label="Ready")
+        self.status_item.set_sensitive(False)
+        menu.append(self.status_item)
+        menu.append(Gtk.SeparatorMenuItem())
+
+        toggle_item = Gtk.MenuItem(label=f"Toggle Recording")
+        toggle_item.connect("activate", self.toggle_recording)
+        menu.append(toggle_item)
+
+        quit_item = Gtk.MenuItem(label="Quit")
+        quit_item.connect("activate", Gtk.main_quit)
+        menu.append(quit_item)
+
+        menu.show_all()
+        self.indicator.set_menu(menu)
+        signal.signal(signal.SIGUSR1, lambda sig, frame: GLib.idle_add(self.toggle_recording))
+
+    def update_status(self, text):
+        GLib.idle_add(lambda: self.status_item.set_label(text))
+
+    def toggle_recording(self, widget=None):
+        if self.recording:
+            self.stop_recording()
+        else:
+            self.start_recording()
+
+    def start_recording(self):
+        self.recording = True
+        self.indicator.set_icon("media-record")
+        self.update_status("🔴 Recording...")
+
+        audio_file = os.path.join(self.work_dir, "recording.wav")
+        self.recording_process = subprocess.Popen([
+            "arecord", "-f", "S16_LE", "-r", "16000", "-c", "1", audio_file
+        ])
+
+    def stop_recording(self):
+        self.recording = False
+        self.indicator.set_icon("audio-input-microphone")
+        self.update_status("⏳ Transcribing...")
+
+        if self.recording_process:
+            self.recording_process.terminate()
+            self.recording_process.wait()
+
+        # transcribe in background thread
+        threading.Thread(target=self.transcribe, daemon=True).start()
+
+    def transcribe(self):
+        audio_file = os.path.join(self.work_dir, "recording.wav")
+
+        # run whisper
+        result = subprocess.run([
+            "whisper", audio_file,
+            "--model", "base",
+            "--language", "en",
+            "--output_format", "txt",
+            "--output_dir", self.work_dir
+        ], capture_output=True)
+
+        # get text
+        txt_file = audio_file.replace('.wav', '.txt')
+        try:
+            with open(txt_file, 'r') as f:
+                text = f.read().strip()
+        except:
+            text = "ERROR: transcription failed"
+
+        # update UI
+        self.update_status(f"✓ Done: {text[:30]}...")
+
+        # use xsel instead of xclip - more reliable with systemd
+        # also set DISPLAY explicitly
+        env = os.environ.copy()
+        env['DISPLAY'] = ':0'
+        subprocess.run(["xsel", "-bi"], input=text.encode(), env=env)
+
+        # type it - small delay to ensure focus
+        time.sleep(0.1)
+        subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], env=env)
+
+        # cleanup
+        for f in [audio_file, txt_file]:
+            if os.path.exists(f):
+                os.unlink(f)
+
+def main():
+    app = WhisperDictate()
+    Gtk.main()
+
+
+if __name__ == "__main__":
+    main()
author	Ben Sima <ben@bensima.com>	2025-09-18 11:42:17 -0400
committer	Ben Sima <ben@bensima.com>	2025-09-18 11:42:17 -0400
commit	f8d26da7c054c0a4c89ab12993e00f74ea9260a1 (patch)
tree	3d73580d38a1296c512433b32fa43ada0dfa6d10 /whisper-dictate/whisper_dictate_daemon.py
parent	1c6de10ac4f9808e3a480ed7b7b11577d13ad005 (diff)