1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
#!/usr/bin/env python3
# whisper-dictate-daemon.py
import gi
gi.require_version('Gtk', '3.0')
gi.require_version('AppIndicator3', '0.1')
from gi.repository import Gtk, AppIndicator3, GLib
import subprocess
import tempfile
import threading
import os
import time
import signal
class WhisperDictate:
def __init__(self):
self.indicator = AppIndicator3.Indicator.new(
"whisper-dictate",
"audio-input-microphone",
AppIndicator3.IndicatorCategory.APPLICATION_STATUS
)
self.indicator.set_status(AppIndicator3.IndicatorStatus.ACTIVE)
self.recording = False
self.recording_process = None
self.work_dir = "/tmp/whisper-dictate"
os.makedirs(self.work_dir, exist_ok=True)
# menu
menu = Gtk.Menu()
self.status_item = Gtk.MenuItem(label="Ready")
self.status_item.set_sensitive(False)
menu.append(self.status_item)
menu.append(Gtk.SeparatorMenuItem())
toggle_item = Gtk.MenuItem(label=f"Toggle Recording")
toggle_item.connect("activate", self.toggle_recording)
menu.append(toggle_item)
quit_item = Gtk.MenuItem(label="Quit")
quit_item.connect("activate", Gtk.main_quit)
menu.append(quit_item)
menu.show_all()
self.indicator.set_menu(menu)
signal.signal(signal.SIGUSR1, lambda sig, frame: GLib.idle_add(self.toggle_recording))
def update_status(self, text):
GLib.idle_add(lambda: self.status_item.set_label(text))
def toggle_recording(self, widget=None):
if self.recording:
self.stop_recording()
else:
self.start_recording()
def start_recording(self):
self.recording = True
self.indicator.set_icon("media-record")
self.update_status("🔴 Recording...")
audio_file = os.path.join(self.work_dir, "recording.wav")
self.recording_process = subprocess.Popen([
"arecord", "-f", "S16_LE", "-r", "16000", "-c", "1", audio_file
])
def stop_recording(self):
self.recording = False
self.indicator.set_icon("audio-input-microphone")
self.update_status("⏳ Transcribing...")
if self.recording_process:
self.recording_process.terminate()
self.recording_process.wait()
# transcribe in background thread
threading.Thread(target=self.transcribe, daemon=True).start()
def transcribe(self):
audio_file = os.path.join(self.work_dir, "recording.wav")
# run whisper
result = subprocess.run([
"whisper", audio_file,
"--model", "base",
"--language", "en",
"--output_format", "txt",
"--output_dir", self.work_dir
], capture_output=True)
# get text
txt_file = audio_file.replace('.wav', '.txt')
try:
with open(txt_file, 'r') as f:
text = f.read().strip()
except:
text = "ERROR: transcription failed"
# update UI
self.update_status(f"✓ Done: {text[:30]}...")
# use xsel instead of xclip - more reliable with systemd
# also set DISPLAY explicitly
env = os.environ.copy()
env['DISPLAY'] = ':0'
subprocess.run(["xsel", "-bi"], input=text.encode(), env=env)
# type it - small delay to ensure focus
time.sleep(0.1)
subprocess.run(["xdotool", "type", "--clearmodifiers", "--", text], env=env)
# cleanup
for f in [audio_file, txt_file]:
if os.path.exists(f):
os.unlink(f)
def main():
app = WhisperDictate()
Gtk.main()
if __name__ == "__main__":
main()
|