Files
text-to-mic/text-to-mic.py
2024-05-01 16:23:54 +01:00

247 lines
9.4 KiB
Python

import tkinter as tk
from tkinter import ttk, messagebox, simpledialog, Menu
import os
import pyaudio
import wave
from openai import OpenAI
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
class Application(tk.Tk):
def __init__(self):
super().__init__()
self.title("Scorchsoft Text to Mic")
self.style = ttk.Style(self)
self.style.theme_use('clam') # Using a theme for a better look
# Ensure API Key is loaded or prompted for before initializing GUI components
self.api_key = self.get_api_key()
if not self.api_key:
messagebox.showinfo("API Key Needed", "Please provide your OpenAI API Key.")
self.destroy()
return
self.client = OpenAI(api_key=self.api_key)
# Initializing device index variables before they are used
self.device_index = tk.StringVar(self)
self.device_index_2 = tk.StringVar(self)
self.available_devices = self.get_audio_devices() # Load audio devices
self.create_menu()
self.initialize_gui()
def create_menu(self):
self.menubar = Menu(self)
self.config(menu=self.menubar)
# File or settings menu
settings_menu = Menu(self.menubar, tearoff=0)
self.menubar.add_cascade(label="Settings", menu=settings_menu)
settings_menu.add_command(label="Change API Key", command=self.change_api_key)
# Playback menu
playback_menu = Menu(self.menubar, tearoff=0)
self.menubar.add_cascade(label="Playback", menu=playback_menu)
playback_menu.add_command(label="Play Last Audio", command=self.play_last_audio)
def initialize_gui(self):
self.device_index = tk.StringVar(self)
self.device_index_2 = tk.StringVar(self)
self.device_index.set("Select Device")
self.device_index_2.set("None")
# Fetching available devices
available_devices = self.get_audio_devices()
device_names = list(available_devices.keys())
main_frame = ttk.Frame(self, padding="10")
main_frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S))
self.columnconfigure(0, weight=1)
self.rowconfigure(0, weight=1)
ttk.Label(main_frame, text="Scorchsoft Text to Mic").grid(column=0, row=0, columnspan=2, pady=(0, 10))
ttk.Label(main_frame, text="This tool uses OpenAI's text-to-speech to stream audio.").grid(column=0, row=1, columnspan=2)
ttk.Label(main_frame, text="Please select primary audio device:").grid(column=0, row=2, sticky=tk.W)
primary_device_menu = ttk.OptionMenu(main_frame, self.device_index, *self.available_devices.keys())
primary_device_menu.grid(column=1, row=2, sticky=tk.W, padx=(10, 0))
ttk.Label(main_frame, text="Please select secondary audio device (optional):").grid(column=0, row=3, sticky=tk.W)
secondary_device_menu = ttk.OptionMenu(main_frame, self.device_index_2, "None", *self.available_devices.keys())
secondary_device_menu.grid(column=1, row=3, sticky=tk.W, padx=(10, 0))
ttk.Label(main_frame, text="Text to read:").grid(column=0, row=4, sticky=tk.W, pady=(10, 0))
self.text_input = tk.Text(main_frame, height=10, width=50)
self.text_input.grid(column=0, row=5, columnspan=2, pady=(0, 10))
submit_button = ttk.Button(main_frame, text="Submit", command=self.submit_text)
submit_button.grid(column=0, row=6, columnspan=2)
def get_api_key(self):
api_key = os.getenv("OPENAI_API_KEY")
if not api_key: # Only ask if .env has no API key
api_key = simpledialog.askstring("API Key", "Enter your OpenAI API Key:")
if api_key:
print(f"\nAPI Key: {api_key }\n")
self.save_api_key(api_key)
return api_key
def save_api_key(self, api_key):
with open('.env', 'w') as f:
f.write(f"OPENAI_API_KEY={api_key}\n")
load_dotenv()
def get_audio_devices(self):
p = pyaudio.PyAudio()
devices = {}
for i in range(p.get_device_count()):
info = p.get_device_info_by_index(i)
if info['maxOutputChannels'] > 0: # Filter for output-capable devices
devices[info['name']] = i
p.terminate()
return devices
def submit_text(self):
text = self.text_input.get("1.0", tk.END).strip()
if not text:
messagebox.showinfo("Error", "Please enter some text to synthesize.")
return
# Convert device names to indices
primary_index = self.available_devices.get(self.device_index.get(), None)
secondary_index = self.available_devices.get(self.device_index_2.get(), None) if self.device_index_2.get() != "None" else None
if primary_index is None:
messagebox.showerror("Error", "Primary device not selected or unavailable.")
return
print(f"Primary Index: {primary_index}, Secondary Index: {secondary_index}")
try:
response = self.client.audio.speech.create(
model="tts-1",
voice="fable",
input=text,
response_format='wav'
)
self.last_audio_file = "last_output.wav"
response.stream_to_file(self.last_audio_file)
#Play to either two or a single stream
if primary_index and secondary_index != "None" and secondary_index is not None:
self.play_audio_multiplexed([self.last_audio_file, self.last_audio_file],
[primary_index, secondary_index])
else:
self.play_audio_multiplexed([self.last_audio_file],
[primary_index])
except Exception as e:
messagebox.showerror("API Error", f"Failed to generate audio: {str(e)}")
def play_audio_multiplexed(self, file_paths, device_indices):
p = pyaudio.PyAudio()
streams = []
try:
# Open all files and start all streams
for file_path, device_index in zip(file_paths, device_indices):
wf = wave.open(file_path, 'rb')
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
output_device_index=int(device_index))
streams.append((stream, wf))
# Play interleaved
active_streams = len(streams)
while active_streams > 0:
for stream, wf in streams:
data = wf.readframes(1024)
if data:
stream.write(data)
else:
stream.stop_stream()
stream.close()
wf.close()
streams.remove((stream, wf))
active_streams -= 1
except Exception as e:
messagebox.showerror("Playback Error", f"Error during multiplexed playback: {e}")
finally:
p.terminate()
def play_last_audio(self):
if hasattr(self, 'last_audio_file'):
primary_index = self.available_devices.get(self.device_index.get(), None)
secondary_index = self.available_devices.get(self.device_index_2.get(), None) if self.device_index_2.get() != "None" else None
# Check if a secondary device is selected
if primary_index and secondary_index != "None" and secondary_index is not None:
self.play_audio_multiplexed([self.last_audio_file, self.last_audio_file],
[primary_index, secondary_index])
else:
self.play_audio_multiplexed([self.last_audio_file],
[primary_index])
else:
messagebox.showinfo("No Audio", "No audio has been generated yet.")
def play_saved_audio(self, file_path, device_name):
device_index = self.available_devices.get(device_name, None)
if device_index is None:
messagebox.showerror("Error", "Selected audio device is not available.")
return
wf = wave.open(file_path, 'rb')
p = pyaudio.PyAudio()
try:
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
output_device_index=device_index)
data = wf.readframes(1024)
while data:
stream.write(data)
data = wf.readframes(1024)
finally:
stream.stop_stream()
stream.close()
wf.close()
p.terminate()
def change_api_key(self):
new_key = simpledialog.askstring("API Key", "Enter new OpenAI API Key:", parent=self)
if new_key:
self.save_api_key(new_key)
self.api_key = new_key
self.client = OpenAI(api_key=self.api_key)
messagebox.showinfo("API Key Updated", "The OpenAI API Key has been updated successfully.")
if __name__ == "__main__":
app = Application()
app.mainloop()