summaryrefslogtreecommitdiff
path: root/whisper-dictate
diff options
context:
space:
mode:
authorBen Sima <ben@bensima.com>2025-09-18 11:42:17 -0400
committerBen Sima <ben@bensima.com>2025-09-18 11:42:17 -0400
commitf8d26da7c054c0a4c89ab12993e00f74ea9260a1 (patch)
tree3d73580d38a1296c512433b32fa43ada0dfa6d10 /whisper-dictate
parent1c6de10ac4f9808e3a480ed7b7b11577d13ad005 (diff)
add whisper-dictate subserviceHEADmaster
Diffstat (limited to 'whisper-dictate')
-rw-r--r--whisper-dictate/default.nix35
-rw-r--r--whisper-dictate/service.nix18
-rw-r--r--whisper-dictate/setup.py17
-rw-r--r--whisper-dictate/whisper_dictate_daemon.py121
4 files changed, 191 insertions, 0 deletions
diff --git a/whisper-dictate/default.nix b/whisper-dictate/default.nix
new file mode 100644
index 0000000..cbb52f5
--- /dev/null
+++ b/whisper-dictate/default.nix
@@ -0,0 +1,35 @@
+{pkgs ? import <nixpkgs> {}}:
+pkgs.python3Packages.buildPythonApplication {
+ pname = "whisper-dictate";
+ version = "0.1.0";
+ src = ./.;
+
+ format = "setuptools";
+
+ nativeBuildInputs = with pkgs; [
+ wrapGAppsHook
+ gobject-introspection
+ ];
+
+ buildInputs = with pkgs; [
+ gtk3
+ libappindicator-gtk3
+ glib
+ ];
+
+ propagatedBuildInputs = with pkgs; [
+ python3Packages.pygobject3
+ python3Packages.openai-whisper
+ alsa-utils
+ xdotool
+ xsel # more reliable than xclip
+ ];
+
+ strictDeps = false;
+
+ dontWrapGApps = false;
+
+ preFixup = ''
+ makeWrapperArgs+=("''${gappsWrapperArgs[@]}")
+ '';
+}
diff --git a/whisper-dictate/service.nix b/whisper-dictate/service.nix
new file mode 100644
index 0000000..f0f394c
--- /dev/null
+++ b/whisper-dictate/service.nix
@@ -0,0 +1,18 @@
+{ pkgs, ... }:
+
+{
+ home.packages = [ pkgs.whisper-dictate ];
+
+ systemd.user.services.whisper-dictate = {
+ Unit = {
+ Description = "Whisper dictation daemon";
+ After = [ "graphical-session.target" ];
+ };
+ Service = {
+ ExecStart = "${pkgs.whisper-dictate}/bin/whisper-dictate-daemon";
+ Restart = "on-failure";
+ Environment = "DISPLAY=:0";
+ };
+ Install.WantedBy = [ "default.target" ];
+ };
+}
diff --git a/whisper-dictate/setup.py b/whisper-dictate/setup.py
new file mode 100644
index 0000000..516c0ca
--- /dev/null
+++ b/whisper-dictate/setup.py
@@ -0,0 +1,17 @@
+# setup.py
+from setuptools import setup, find_packages
+
+setup(
+ name="whisper-dictate",
+ version="0.1.0",
+ py_modules=["whisper_dictate_daemon"],
+ entry_points={
+ "console_scripts": [
+ "whisper-dictate-daemon=whisper_dictate_daemon:main",
+ ],
+ },
+ install_requires=[
+ "PyGObject",
+ "openai-whisper",
+ ],
+)
diff --git a/whisper-dictate/whisper_dictate_daemon.py b/whisper-dictate/whisper_dictate_daemon.py
new file mode 100644
index 0000000..77d458b
--- /dev/null
+++ b/whisper-dictate/whisper_dictate_daemon.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+# whisper-dictate-daemon.py
+import gi
+gi.require_version('Gtk', '3.0')
+gi.require_version('AppIndicator3', '0.1')
+from gi.repository import Gtk, AppIndicator3, GLib
+import subprocess
+import tempfile
+import threading
+import os
+import time
+import signal
+
+class WhisperDictate:
+ def __init__(self):
+ self.indicator = AppIndicator3.Indicator.new(
+ "whisper-dictate",
+ "audio-input-microphone",
+ AppIndicator3.IndicatorCategory.APPLICATION_STATUS
+ )
+ self.indicator.set_status(AppIndicator3.IndicatorStatus.ACTIVE)
+ self.recording = False
+ self.recording_process = None
+ self.work_dir = "/tmp/whisper-dictate"
+ os.makedirs(self.work_dir, exist_ok=True)
+
+ # menu
+ menu = Gtk.Menu()
+ self.status_item = Gtk.MenuItem(label="Ready")
+ self.status_item.set_sensitive(False)
+ menu.append(self.status_item)
+ menu.append(Gtk.SeparatorMenuItem())
+
+ toggle_item = Gtk.MenuItem(label=f"Toggle Recording")
+ toggle_item.connect("activate", self.toggle_recording)
+ menu.append(toggle_item)
+
+ quit_item = Gtk.MenuItem(label="Quit")
+ quit_item.connect("activate", Gtk.main_quit)
+ menu.append(quit_item)
+
+ menu.show_all()
+ self.indicator.set_menu(menu)
+ signal.signal(signal.SIGUSR1, lambda sig, frame: GLib.idle_add(self.toggle_recording))
+
+ def update_status(self, text):
+ GLib.idle_add(lambda: self.status_item.set_label(text))
+
+ def toggle_recording(self, widget=None):
+ if self.recording:
+ self.stop_recording()
+ else:
+ self.start_recording()
+
+ def start_recording(self):
+ self.recording = True
+ self.indicator.set_icon("media-record")
+ self.update_status("🔴 Recording...")
+
+ audio_file = os.path.join(self.work_dir, "recording.wav")
+ self.recording_process = subprocess.Popen([
+ "arecord", "-f", "S16_LE", "-r", "16000", "-c", "1", audio_file
+ ])
+
+ def stop_recording(self):
+ self.recording = False
+ self.indicator.set_icon("audio-input-microphone")
+ self.update_status("⏳ Transcribing...")
+
+ if self.recording_process:
+ self.recording_process.terminate()
+ self.recording_process.wait()
+
+ # transcribe in background thread
+ threading.Thread(target=self.transcribe, daemon=True).start()
+
+ def transcribe(self):
+ audio_file = os.path.join(self.work_dir, "recording.wav")
+
+ # run whisper
+ result = subprocess.run([
+ "whisper", audio_file,
+ "--model", "base",
+ "--language", "en",
+ "--output_format", "txt",
+ "--output_dir", self.work_dir
+ ], capture_output=True)
+
+ # get text
+ txt_file = audio_file.replace('.wav', '.txt')
+ try:
+ with open(txt_file, 'r') as f:
+ text = f.read().strip()
+ except:
+ text = "ERROR: transcription failed"
+
+ # update UI
+ self.update_status(f"✓ Done: {text[:30]}...")
+
+ # use xsel instead of xclip - more reliable with systemd
+ # also set DISPLAY explicitly
+ env = os.environ.copy()
+ env['DISPLAY'] = ':0'
+ subprocess.run(["xsel", "-bi"], input=text.encode(), env=env)
+
+ # type it - small delay to ensure focus
+ time.sleep(0.1)
+ subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], env=env)
+
+ # cleanup
+ for f in [audio_file, txt_file]:
+ if os.path.exists(f):
+ os.unlink(f)
+
+def main():
+ app = WhisperDictate()
+ Gtk.main()
+
+
+if __name__ == "__main__":
+ main()