summaryrefslogtreecommitdiff
path: root/whisper-dictate/whisper_dictate_daemon.py
blob: 77d458ba7624745ec933875261b0d1038c525c98 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/usr/bin/env python3
# whisper-dictate-daemon.py
import gi
gi.require_version('Gtk', '3.0')
gi.require_version('AppIndicator3', '0.1')
from gi.repository import Gtk, AppIndicator3, GLib
import subprocess
import tempfile
import threading
import os
import time
import signal

class WhisperDictate:
    def __init__(self):
        self.indicator = AppIndicator3.Indicator.new(
            "whisper-dictate",
            "audio-input-microphone",
            AppIndicator3.IndicatorCategory.APPLICATION_STATUS
        )
        self.indicator.set_status(AppIndicator3.IndicatorStatus.ACTIVE)
        self.recording = False
        self.recording_process = None
        self.work_dir = "/tmp/whisper-dictate"
        os.makedirs(self.work_dir, exist_ok=True)

        # menu
        menu = Gtk.Menu()
        self.status_item = Gtk.MenuItem(label="Ready")
        self.status_item.set_sensitive(False)
        menu.append(self.status_item)
        menu.append(Gtk.SeparatorMenuItem())

        toggle_item = Gtk.MenuItem(label=f"Toggle Recording")
        toggle_item.connect("activate", self.toggle_recording)
        menu.append(toggle_item)

        quit_item = Gtk.MenuItem(label="Quit")
        quit_item.connect("activate", Gtk.main_quit)
        menu.append(quit_item)

        menu.show_all()
        self.indicator.set_menu(menu)
        signal.signal(signal.SIGUSR1, lambda sig, frame: GLib.idle_add(self.toggle_recording))

    def update_status(self, text):
        GLib.idle_add(lambda: self.status_item.set_label(text))

    def toggle_recording(self, widget=None):
        if self.recording:
            self.stop_recording()
        else:
            self.start_recording()

    def start_recording(self):
        self.recording = True
        self.indicator.set_icon("media-record")
        self.update_status("🔴 Recording...")

        audio_file = os.path.join(self.work_dir, "recording.wav")
        self.recording_process = subprocess.Popen([
            "arecord", "-f", "S16_LE", "-r", "16000", "-c", "1", audio_file
        ])

    def stop_recording(self):
        self.recording = False
        self.indicator.set_icon("audio-input-microphone")
        self.update_status("⏳ Transcribing...")

        if self.recording_process:
            self.recording_process.terminate()
            self.recording_process.wait()

        # transcribe in background thread
        threading.Thread(target=self.transcribe, daemon=True).start()

    def transcribe(self):
        audio_file = os.path.join(self.work_dir, "recording.wav")

        # run whisper
        result = subprocess.run([
            "whisper", audio_file,
            "--model", "base",
            "--language", "en",
            "--output_format", "txt",
            "--output_dir", self.work_dir
        ], capture_output=True)

        # get text
        txt_file = audio_file.replace('.wav', '.txt')
        try:
            with open(txt_file, 'r') as f:
                text = f.read().strip()
        except:
            text = "ERROR: transcription failed"

        # update UI
        self.update_status(f"✓ Done: {text[:30]}...")

        # use xsel instead of xclip - more reliable with systemd
        # also set DISPLAY explicitly
        env = os.environ.copy()
        env['DISPLAY'] = ':0'
        subprocess.run(["xsel", "-bi"], input=text.encode(), env=env)

        # type it - small delay to ensure focus
        time.sleep(0.1)
        subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], env=env)

        # cleanup
        for f in [audio_file, txt_file]:
            if os.path.exists(f):
                os.unlink(f)

def main():
    app = WhisperDictate()
    Gtk.main()


if __name__ == "__main__":
    main()