summaryrefslogtreecommitdiff
path: root/whisper-dictate/whisper_dictate_daemon.py
diff options
context:
space:
mode:
Diffstat (limited to 'whisper-dictate/whisper_dictate_daemon.py')
-rw-r--r--whisper-dictate/whisper_dictate_daemon.py121
1 files changed, 121 insertions, 0 deletions
diff --git a/whisper-dictate/whisper_dictate_daemon.py b/whisper-dictate/whisper_dictate_daemon.py
new file mode 100644
index 0000000..77d458b
--- /dev/null
+++ b/whisper-dictate/whisper_dictate_daemon.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+# whisper-dictate-daemon.py
+import gi
+gi.require_version('Gtk', '3.0')
+gi.require_version('AppIndicator3', '0.1')
+from gi.repository import Gtk, AppIndicator3, GLib
+import subprocess
+import tempfile
+import threading
+import os
+import time
+import signal
+
+class WhisperDictate:
+ def __init__(self):
+ self.indicator = AppIndicator3.Indicator.new(
+ "whisper-dictate",
+ "audio-input-microphone",
+ AppIndicator3.IndicatorCategory.APPLICATION_STATUS
+ )
+ self.indicator.set_status(AppIndicator3.IndicatorStatus.ACTIVE)
+ self.recording = False
+ self.recording_process = None
+ self.work_dir = "/tmp/whisper-dictate"
+ os.makedirs(self.work_dir, exist_ok=True)
+
+ # menu
+ menu = Gtk.Menu()
+ self.status_item = Gtk.MenuItem(label="Ready")
+ self.status_item.set_sensitive(False)
+ menu.append(self.status_item)
+ menu.append(Gtk.SeparatorMenuItem())
+
+ toggle_item = Gtk.MenuItem(label=f"Toggle Recording")
+ toggle_item.connect("activate", self.toggle_recording)
+ menu.append(toggle_item)
+
+ quit_item = Gtk.MenuItem(label="Quit")
+ quit_item.connect("activate", Gtk.main_quit)
+ menu.append(quit_item)
+
+ menu.show_all()
+ self.indicator.set_menu(menu)
+ signal.signal(signal.SIGUSR1, lambda sig, frame: GLib.idle_add(self.toggle_recording))
+
+ def update_status(self, text):
+ GLib.idle_add(lambda: self.status_item.set_label(text))
+
+ def toggle_recording(self, widget=None):
+ if self.recording:
+ self.stop_recording()
+ else:
+ self.start_recording()
+
+ def start_recording(self):
+ self.recording = True
+ self.indicator.set_icon("media-record")
+ self.update_status("🔴 Recording...")
+
+ audio_file = os.path.join(self.work_dir, "recording.wav")
+ self.recording_process = subprocess.Popen([
+ "arecord", "-f", "S16_LE", "-r", "16000", "-c", "1", audio_file
+ ])
+
+ def stop_recording(self):
+ self.recording = False
+ self.indicator.set_icon("audio-input-microphone")
+ self.update_status("⏳ Transcribing...")
+
+ if self.recording_process:
+ self.recording_process.terminate()
+ self.recording_process.wait()
+
+ # transcribe in background thread
+ threading.Thread(target=self.transcribe, daemon=True).start()
+
+ def transcribe(self):
+ audio_file = os.path.join(self.work_dir, "recording.wav")
+
+ # run whisper
+ result = subprocess.run([
+ "whisper", audio_file,
+ "--model", "base",
+ "--language", "en",
+ "--output_format", "txt",
+ "--output_dir", self.work_dir
+ ], capture_output=True)
+
+ # get text
+ txt_file = audio_file.replace('.wav', '.txt')
+ try:
+ with open(txt_file, 'r') as f:
+ text = f.read().strip()
+ except:
+ text = "ERROR: transcription failed"
+
+ # update UI
+ self.update_status(f"✓ Done: {text[:30]}...")
+
+ # use xsel instead of xclip - more reliable with systemd
+ # also set DISPLAY explicitly
+ env = os.environ.copy()
+ env['DISPLAY'] = ':0'
+ subprocess.run(["xsel", "-bi"], input=text.encode(), env=env)
+
+ # type it - small delay to ensure focus
+ time.sleep(0.1)
+ subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], env=env)
+
+ # cleanup
+ for f in [audio_file, txt_file]:
+ if os.path.exists(f):
+ os.unlink(f)
+
+def main():
+ app = WhisperDictate()
+ Gtk.main()
+
+
+if __name__ == "__main__":
+ main()