#!/usr/bin/env python3 # whisper-dictate-daemon.py import gi gi.require_version('Gtk', '3.0') gi.require_version('AppIndicator3', '0.1') from gi.repository import Gtk, AppIndicator3, GLib import subprocess import tempfile import threading import os import time import signal class WhisperDictate: def __init__(self): self.indicator = AppIndicator3.Indicator.new( "whisper-dictate", "audio-input-microphone", AppIndicator3.IndicatorCategory.APPLICATION_STATUS ) self.indicator.set_status(AppIndicator3.IndicatorStatus.ACTIVE) self.recording = False self.recording_process = None self.work_dir = "/tmp/whisper-dictate" os.makedirs(self.work_dir, exist_ok=True) # menu menu = Gtk.Menu() self.status_item = Gtk.MenuItem(label="Ready") self.status_item.set_sensitive(False) menu.append(self.status_item) menu.append(Gtk.SeparatorMenuItem()) toggle_item = Gtk.MenuItem(label=f"Toggle Recording") toggle_item.connect("activate", self.toggle_recording) menu.append(toggle_item) quit_item = Gtk.MenuItem(label="Quit") quit_item.connect("activate", Gtk.main_quit) menu.append(quit_item) menu.show_all() self.indicator.set_menu(menu) signal.signal(signal.SIGUSR1, lambda sig, frame: GLib.idle_add(self.toggle_recording)) def update_status(self, text): GLib.idle_add(lambda: self.status_item.set_label(text)) def toggle_recording(self, widget=None): if self.recording: self.stop_recording() else: self.start_recording() def start_recording(self): self.recording = True self.indicator.set_icon("media-record") self.update_status("🔴 Recording...") audio_file = os.path.join(self.work_dir, "recording.wav") self.recording_process = subprocess.Popen([ "arecord", "-f", "S16_LE", "-r", "16000", "-c", "1", audio_file ]) def stop_recording(self): self.recording = False self.indicator.set_icon("audio-input-microphone") self.update_status("⏳ Transcribing...") if self.recording_process: self.recording_process.terminate() self.recording_process.wait() # transcribe in background thread threading.Thread(target=self.transcribe, daemon=True).start() def transcribe(self): audio_file = os.path.join(self.work_dir, "recording.wav") # run whisper result = subprocess.run([ "whisper", audio_file, "--model", "base", "--language", "en", "--output_format", "txt", "--output_dir", self.work_dir ], capture_output=True) # get text txt_file = audio_file.replace('.wav', '.txt') try: with open(txt_file, 'r') as f: text = f.read().strip() except: text = "ERROR: transcription failed" # update UI self.update_status(f"✓ Done: {text[:30]}...") # use xsel instead of xclip - more reliable with systemd # also set DISPLAY explicitly env = os.environ.copy() env['DISPLAY'] = ':0' subprocess.run(["xsel", "-bi"], input=text.encode(), env=env) # type it - small delay to ensure focus time.sleep(0.1) subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], env=env) # cleanup for f in [audio_file, txt_file]: if os.path.exists(f): os.unlink(f) def main(): app = WhisperDictate() Gtk.main() if __name__ == "__main__": main()