add whisper-dictate subservice

author: Ben Sima <ben@bensima.com> 2025-09-18 11:42:17 -0400
committer: Ben Sima <ben@bensima.com> 2025-09-18 11:42:17 -0400
commit: f8d26da7c054c0a4c89ab12993e00f74ea9260a1 (patch)
tree: 3d73580d38a1296c512433b32fa43ada0dfa6d10 /whisper-dictate
parent: 1c6de10ac4f9808e3a480ed7b7b11577d13ad005 (diff)
4 files changed, 191 insertions, 0 deletions
diff --git a/whisper-dictate/default.nix b/whisper-dictate/default.nix
new file mode 100644
index 0000000..cbb52f5
--- /dev/null
+++ b/whisper-dictate/default.nix
@@ -0,0 +1,35 @@
+{pkgs ? import <nixpkgs> {}}:
+pkgs.python3Packages.buildPythonApplication {
+  pname = "whisper-dictate";
+  version = "0.1.0";
+  src = ./.;
+
+  format = "setuptools";
+
+  nativeBuildInputs = with pkgs; [
+    wrapGAppsHook
+    gobject-introspection
+  ];
+
+  buildInputs = with pkgs; [
+    gtk3
+    libappindicator-gtk3
+    glib
+  ];
+
+  propagatedBuildInputs = with pkgs; [
+    python3Packages.pygobject3
+    python3Packages.openai-whisper
+    alsa-utils
+    xdotool
+    xsel  # more reliable than xclip
+  ];
+
+  strictDeps = false;
+
+  dontWrapGApps = false;
+
+  preFixup = ''
+    makeWrapperArgs+=("''${gappsWrapperArgs[@]}")
+  '';
+}
diff --git a/whisper-dictate/service.nix b/whisper-dictate/service.nix
new file mode 100644
index 0000000..f0f394c
--- /dev/null
+++ b/whisper-dictate/service.nix
@@ -0,0 +1,18 @@
+{ pkgs, ... }:
+
+{
+  home.packages = [ pkgs.whisper-dictate ];
+
+  systemd.user.services.whisper-dictate = {
+    Unit = {
+      Description = "Whisper dictation daemon";
+      After = [ "graphical-session.target" ];
+    };
+    Service = {
+      ExecStart = "${pkgs.whisper-dictate}/bin/whisper-dictate-daemon";
+      Restart = "on-failure";
+      Environment = "DISPLAY=:0";
+    };
+    Install.WantedBy = [ "default.target" ];
+  };
+}
diff --git a/whisper-dictate/setup.py b/whisper-dictate/setup.py
new file mode 100644
index 0000000..516c0ca
--- /dev/null
+++ b/whisper-dictate/setup.py
@@ -0,0 +1,17 @@
+# setup.py
+from setuptools import setup, find_packages
+
+setup(
+    name="whisper-dictate",
+    version="0.1.0",
+    py_modules=["whisper_dictate_daemon"],
+    entry_points={
+        "console_scripts": [
+            "whisper-dictate-daemon=whisper_dictate_daemon:main",
+        ],
+    },
+    install_requires=[
+        "PyGObject",
+        "openai-whisper",
+    ],
+)
diff --git a/whisper-dictate/whisper_dictate_daemon.py b/whisper-dictate/whisper_dictate_daemon.py
new file mode 100644
index 0000000..77d458b
--- /dev/null
+++ b/whisper-dictate/whisper_dictate_daemon.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+# whisper-dictate-daemon.py
+import gi
+gi.require_version('Gtk', '3.0')
+gi.require_version('AppIndicator3', '0.1')
+from gi.repository import Gtk, AppIndicator3, GLib
+import subprocess
+import tempfile
+import threading
+import os
+import time
+import signal
+
+class WhisperDictate:
+    def __init__(self):
+        self.indicator = AppIndicator3.Indicator.new(
+            "whisper-dictate",
+            "audio-input-microphone",
+            AppIndicator3.IndicatorCategory.APPLICATION_STATUS
+        )
+        self.indicator.set_status(AppIndicator3.IndicatorStatus.ACTIVE)
+        self.recording = False
+        self.recording_process = None
+        self.work_dir = "/tmp/whisper-dictate"
+        os.makedirs(self.work_dir, exist_ok=True)
+
+        # menu
+        menu = Gtk.Menu()
+        self.status_item = Gtk.MenuItem(label="Ready")
+        self.status_item.set_sensitive(False)
+        menu.append(self.status_item)
+        menu.append(Gtk.SeparatorMenuItem())
+
+        toggle_item = Gtk.MenuItem(label=f"Toggle Recording")
+        toggle_item.connect("activate", self.toggle_recording)
+        menu.append(toggle_item)
+
+        quit_item = Gtk.MenuItem(label="Quit")
+        quit_item.connect("activate", Gtk.main_quit)
+        menu.append(quit_item)
+
+        menu.show_all()
+        self.indicator.set_menu(menu)
+        signal.signal(signal.SIGUSR1, lambda sig, frame: GLib.idle_add(self.toggle_recording))
+
+    def update_status(self, text):
+        GLib.idle_add(lambda: self.status_item.set_label(text))
+
+    def toggle_recording(self, widget=None):
+        if self.recording:
+            self.stop_recording()
+        else:
+            self.start_recording()
+
+    def start_recording(self):
+        self.recording = True
+        self.indicator.set_icon("media-record")
+        self.update_status("🔴 Recording...")
+
+        audio_file = os.path.join(self.work_dir, "recording.wav")
+        self.recording_process = subprocess.Popen([
+            "arecord", "-f", "S16_LE", "-r", "16000", "-c", "1", audio_file
+        ])
+
+    def stop_recording(self):
+        self.recording = False
+        self.indicator.set_icon("audio-input-microphone")
+        self.update_status("⏳ Transcribing...")
+
+        if self.recording_process:
+            self.recording_process.terminate()
+            self.recording_process.wait()
+
+        # transcribe in background thread
+        threading.Thread(target=self.transcribe, daemon=True).start()
+
+    def transcribe(self):
+        audio_file = os.path.join(self.work_dir, "recording.wav")
+
+        # run whisper
+        result = subprocess.run([
+            "whisper", audio_file,
+            "--model", "base",
+            "--language", "en",
+            "--output_format", "txt",
+            "--output_dir", self.work_dir
+        ], capture_output=True)
+
+        # get text
+        txt_file = audio_file.replace('.wav', '.txt')
+        try:
+            with open(txt_file, 'r') as f:
+                text = f.read().strip()
+        except:
+            text = "ERROR: transcription failed"
+
+        # update UI
+        self.update_status(f"✓ Done: {text[:30]}...")
+
+        # use xsel instead of xclip - more reliable with systemd
+        # also set DISPLAY explicitly
+        env = os.environ.copy()
+        env['DISPLAY'] = ':0'
+        subprocess.run(["xsel", "-bi"], input=text.encode(), env=env)
+
+        # type it - small delay to ensure focus
+        time.sleep(0.1)
+        subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], env=env)
+
+        # cleanup
+        for f in [audio_file, txt_file]:
+            if os.path.exists(f):
+                os.unlink(f)
+
+def main():
+    app = WhisperDictate()
+    Gtk.main()
+
+
+if __name__ == "__main__":
+    main()
author	Ben Sima <ben@bensima.com>	2025-09-18 11:42:17 -0400
committer	Ben Sima <ben@bensima.com>	2025-09-18 11:42:17 -0400
commit	f8d26da7c054c0a4c89ab12993e00f74ea9260a1 (patch)
tree	3d73580d38a1296c512433b32fa43ada0dfa6d10 /whisper-dictate
parent	1c6de10ac4f9808e3a480ed7b7b11577d13ad005 (diff)