diff options
| -rw-r--r-- | lib/xmonad.hs | 2 | ||||
| -rw-r--r-- | overlay.nix | 2 | ||||
| -rw-r--r-- | profiles/beryllium.nix | 1 | ||||
| -rw-r--r-- | whisper-dictate/default.nix | 35 | ||||
| -rw-r--r-- | whisper-dictate/service.nix | 18 | ||||
| -rw-r--r-- | whisper-dictate/setup.py | 17 | ||||
| -rw-r--r-- | whisper-dictate/whisper_dictate_daemon.py | 121 |
7 files changed, 196 insertions, 0 deletions
diff --git a/lib/xmonad.hs b/lib/xmonad.hs index 575ad43..e58ab09 100644 --- a/lib/xmonad.hs +++ b/lib/xmonad.hs @@ -130,6 +130,8 @@ insKeys conf@(XConfig {modMask = modMask}) = -- run my script ~/bin/open-webui-ask ((meh, xK_a), spawn "open-webui-ask"), + -- toggle whisper-dictate + ((meh, xK_t), spawn "pkill -USR1 -f whisper-dictate-daemon"), -- refresh display via autorandr ( (modMask, xK_r), diff --git a/overlay.nix b/overlay.nix index 1545d9d..53a5086 100644 --- a/overlay.nix +++ b/overlay.nix @@ -5,4 +5,6 @@ self: super: cmdtree = super.callPackage ./pkgs/cmdtree.nix {}; forgit = super.callPackage ./pkgs/forgit.nix {}; zebra = super.callPackage ./pkgs/zebra.nix {}; + + whisper-dictate = super.callPackage ./whisper-dictate {}; } diff --git a/profiles/beryllium.nix b/profiles/beryllium.nix index 03d7d03..1087cbb 100644 --- a/profiles/beryllium.nix +++ b/profiles/beryllium.nix @@ -17,6 +17,7 @@ in { ../lib/polybar.nix ../lib/ssh.nix ../lib/urxvt.nix + ../whisper-dictate/service.nix ]; home = { diff --git a/whisper-dictate/default.nix b/whisper-dictate/default.nix new file mode 100644 index 0000000..cbb52f5 --- /dev/null +++ b/whisper-dictate/default.nix @@ -0,0 +1,35 @@ +{pkgs ? import <nixpkgs> {}}: +pkgs.python3Packages.buildPythonApplication { + pname = "whisper-dictate"; + version = "0.1.0"; + src = ./.; + + format = "setuptools"; + + nativeBuildInputs = with pkgs; [ + wrapGAppsHook + gobject-introspection + ]; + + buildInputs = with pkgs; [ + gtk3 + libappindicator-gtk3 + glib + ]; + + propagatedBuildInputs = with pkgs; [ + python3Packages.pygobject3 + python3Packages.openai-whisper + alsa-utils + xdotool + xsel # more reliable than xclip + ]; + + strictDeps = false; + + dontWrapGApps = false; + + preFixup = '' + makeWrapperArgs+=("''${gappsWrapperArgs[@]}") + ''; +} diff --git a/whisper-dictate/service.nix b/whisper-dictate/service.nix new file mode 100644 index 0000000..f0f394c --- /dev/null +++ b/whisper-dictate/service.nix @@ -0,0 +1,18 @@ +{ pkgs, ... }: + +{ + home.packages = [ pkgs.whisper-dictate ]; + + systemd.user.services.whisper-dictate = { + Unit = { + Description = "Whisper dictation daemon"; + After = [ "graphical-session.target" ]; + }; + Service = { + ExecStart = "${pkgs.whisper-dictate}/bin/whisper-dictate-daemon"; + Restart = "on-failure"; + Environment = "DISPLAY=:0"; + }; + Install.WantedBy = [ "default.target" ]; + }; +} diff --git a/whisper-dictate/setup.py b/whisper-dictate/setup.py new file mode 100644 index 0000000..516c0ca --- /dev/null +++ b/whisper-dictate/setup.py @@ -0,0 +1,17 @@ +# setup.py +from setuptools import setup, find_packages + +setup( + name="whisper-dictate", + version="0.1.0", + py_modules=["whisper_dictate_daemon"], + entry_points={ + "console_scripts": [ + "whisper-dictate-daemon=whisper_dictate_daemon:main", + ], + }, + install_requires=[ + "PyGObject", + "openai-whisper", + ], +) diff --git a/whisper-dictate/whisper_dictate_daemon.py b/whisper-dictate/whisper_dictate_daemon.py new file mode 100644 index 0000000..77d458b --- /dev/null +++ b/whisper-dictate/whisper_dictate_daemon.py @@ -0,0 +1,121 @@ +#!/usr/bin/env python3 +# whisper-dictate-daemon.py +import gi +gi.require_version('Gtk', '3.0') +gi.require_version('AppIndicator3', '0.1') +from gi.repository import Gtk, AppIndicator3, GLib +import subprocess +import tempfile +import threading +import os +import time +import signal + +class WhisperDictate: + def __init__(self): + self.indicator = AppIndicator3.Indicator.new( + "whisper-dictate", + "audio-input-microphone", + AppIndicator3.IndicatorCategory.APPLICATION_STATUS + ) + self.indicator.set_status(AppIndicator3.IndicatorStatus.ACTIVE) + self.recording = False + self.recording_process = None + self.work_dir = "/tmp/whisper-dictate" + os.makedirs(self.work_dir, exist_ok=True) + + # menu + menu = Gtk.Menu() + self.status_item = Gtk.MenuItem(label="Ready") + self.status_item.set_sensitive(False) + menu.append(self.status_item) + menu.append(Gtk.SeparatorMenuItem()) + + toggle_item = Gtk.MenuItem(label=f"Toggle Recording") + toggle_item.connect("activate", self.toggle_recording) + menu.append(toggle_item) + + quit_item = Gtk.MenuItem(label="Quit") + quit_item.connect("activate", Gtk.main_quit) + menu.append(quit_item) + + menu.show_all() + self.indicator.set_menu(menu) + signal.signal(signal.SIGUSR1, lambda sig, frame: GLib.idle_add(self.toggle_recording)) + + def update_status(self, text): + GLib.idle_add(lambda: self.status_item.set_label(text)) + + def toggle_recording(self, widget=None): + if self.recording: + self.stop_recording() + else: + self.start_recording() + + def start_recording(self): + self.recording = True + self.indicator.set_icon("media-record") + self.update_status("🔴 Recording...") + + audio_file = os.path.join(self.work_dir, "recording.wav") + self.recording_process = subprocess.Popen([ + "arecord", "-f", "S16_LE", "-r", "16000", "-c", "1", audio_file + ]) + + def stop_recording(self): + self.recording = False + self.indicator.set_icon("audio-input-microphone") + self.update_status("⏳ Transcribing...") + + if self.recording_process: + self.recording_process.terminate() + self.recording_process.wait() + + # transcribe in background thread + threading.Thread(target=self.transcribe, daemon=True).start() + + def transcribe(self): + audio_file = os.path.join(self.work_dir, "recording.wav") + + # run whisper + result = subprocess.run([ + "whisper", audio_file, + "--model", "base", + "--language", "en", + "--output_format", "txt", + "--output_dir", self.work_dir + ], capture_output=True) + + # get text + txt_file = audio_file.replace('.wav', '.txt') + try: + with open(txt_file, 'r') as f: + text = f.read().strip() + except: + text = "ERROR: transcription failed" + + # update UI + self.update_status(f"✓ Done: {text[:30]}...") + + # use xsel instead of xclip - more reliable with systemd + # also set DISPLAY explicitly + env = os.environ.copy() + env['DISPLAY'] = ':0' + subprocess.run(["xsel", "-bi"], input=text.encode(), env=env) + + # type it - small delay to ensure focus + time.sleep(0.1) + subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], env=env) + + # cleanup + for f in [audio_file, txt_file]: + if os.path.exists(f): + os.unlink(f) + +def main(): + app = WhisperDictate() + Gtk.main() + + +if __name__ == "__main__": + main() |
