Create a basic GUI

This commit is contained in:
Andrew Ward
2024-05-01 16:23:54 +01:00
parent 862d73d96b
commit 1dc15bc3b0
4 changed files with 362 additions and 113 deletions

1
.gitignore vendored
View File

@@ -1,3 +1,4 @@
.env
.env_backup
*.wav
*.mp3

View File

@@ -28,7 +28,7 @@ python text-to-mic.py "Text you'd like to speak" 8 5
To get this script working you will need to install the following on the relevant operating system
### Windows
pip install tk
pip install pyaudio
pip install python-dotenv
pip install wave

133
text-to-mic-cli.py Normal file
View File

@@ -0,0 +1,133 @@
import openai
from openai import OpenAI
import pyaudio
import wave
import threading
from dotenv import load_dotenv
import os
# Load environment variables from .env file
load_dotenv()
# Set up your OpenAI API key from the environment variable
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
def list_audio_devices():
p = pyaudio.PyAudio()
print("Available audio devices:")
info = p.get_host_api_info_by_index(0)
num_devices = info.get('deviceCount')
# List all available devices, and mark output devices
for i in range(0, num_devices):
if p.get_device_info_by_index(i).get('maxOutputChannels') > 0:
print(f"Device index {i}: {p.get_device_info_by_index(i).get('name')}")
p.terminate()
def play_saved_audio(file_path, device_index=None):
# Open the saved audio file
wf = wave.open(file_path, 'rb')
print(f"Playing audio to device {device_index}")
# Setup PyAudio
p = pyaudio.PyAudio()
try:
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
output_device_index=device_index)
data = wf.readframes(1024)
while data:
stream.write(data)
data = wf.readframes(1024)
except Exception as e:
print(f"Error playing audio on device {device_index}: {e}")
finally:
stream.stop_stream()
stream.close()
wf.close()
p.terminate()
#Plays to multiple device indexes at the same time
def play_audio_multiplexed(file_paths, device_indices):
p = pyaudio.PyAudio()
streams = []
# Open all files and start all streams
for file_path, device_index in zip(file_paths, device_indices):
wf = wave.open(file_path, 'rb')
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
output_device_index=device_index)
streams.append((stream, wf))
# Play interleaved
active_streams = len(streams)
while active_streams > 0:
for stream, wf in streams:
data = wf.readframes(1024)
if data:
stream.write(data)
else:
stream.stop_stream()
stream.close()
wf.close()
active_streams -= 1
p.terminate()
def stream_audio_to_virtual_mic(text, voice="fable", device_index=None, device_index_2=None):
response = client.audio.speech.create(
model="tts-1",
voice=voice,
input=text,
response_format='wav'
)
#This can either stream to one device index at a time, or, via multiplexing
#it can stream to two similtaneously to prevent lag playing in sequence
if device_index_2 is not None:
file_path_1 = "output1.wav"
file_path_2 = "output2.wav"
response.stream_to_file(file_path_1)
response.stream_to_file(file_path_2)
play_audio_multiplexed([file_path_1, file_path_2], [device_index, device_index_2])
else:
file_path_1 = "output1.wav"
response.stream_to_file(file_path_1)
play_saved_audio(file_path_1, device_index)
return "";
if __name__ == "__main__":
import sys
arglen = len(sys.argv)
if arglen < 2:
print("Usage: python script.py 'text to convert'")
sys.exit(1)
print(f"arg count {arglen}")
if arglen == 4:
device_index = int(sys.argv[2])
device_index_2 = int(sys.argv[3])
elif arglen == 3:
device_index = int(sys.argv[2])
device_index_2 = None
else:
list_audio_devices()
device_index = int(input("Enter the device index: "))
device_index_2 = None
stream_audio_to_virtual_mic(sys.argv[1], voice="fable", device_index=device_index,device_index_2=device_index_2)

View File

@@ -1,132 +1,247 @@
import openai
from openai import OpenAI
import tkinter as tk
from tkinter import ttk, messagebox, simpledialog, Menu
import os
import pyaudio
import wave
import threading
from openai import OpenAI
from dotenv import load_dotenv
import os
# Load environment variables from .env file
# Load environment variables
load_dotenv()
# Set up your OpenAI API key from the environment variable
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
class Application(tk.Tk):
def list_audio_devices():
p = pyaudio.PyAudio()
print("Available audio devices:")
info = p.get_host_api_info_by_index(0)
num_devices = info.get('deviceCount')
# List all available devices, and mark output devices
for i in range(0, num_devices):
if p.get_device_info_by_index(i).get('maxOutputChannels') > 0:
print(f"Device index {i}: {p.get_device_info_by_index(i).get('name')}")
p.terminate()
def __init__(self):
super().__init__()
self.title("Scorchsoft Text to Mic")
self.style = ttk.Style(self)
self.style.theme_use('clam') # Using a theme for a better look
# Ensure API Key is loaded or prompted for before initializing GUI components
self.api_key = self.get_api_key()
if not self.api_key:
messagebox.showinfo("API Key Needed", "Please provide your OpenAI API Key.")
self.destroy()
return
self.client = OpenAI(api_key=self.api_key)
# Initializing device index variables before they are used
self.device_index = tk.StringVar(self)
self.device_index_2 = tk.StringVar(self)
self.available_devices = self.get_audio_devices() # Load audio devices
self.create_menu()
self.initialize_gui()
def create_menu(self):
self.menubar = Menu(self)
self.config(menu=self.menubar)
# File or settings menu
settings_menu = Menu(self.menubar, tearoff=0)
self.menubar.add_cascade(label="Settings", menu=settings_menu)
settings_menu.add_command(label="Change API Key", command=self.change_api_key)
# Playback menu
playback_menu = Menu(self.menubar, tearoff=0)
self.menubar.add_cascade(label="Playback", menu=playback_menu)
playback_menu.add_command(label="Play Last Audio", command=self.play_last_audio)
def initialize_gui(self):
self.device_index = tk.StringVar(self)
self.device_index_2 = tk.StringVar(self)
self.device_index.set("Select Device")
self.device_index_2.set("None")
# Fetching available devices
available_devices = self.get_audio_devices()
device_names = list(available_devices.keys())
def play_saved_audio(file_path, device_index=None):
# Open the saved audio file
wf = wave.open(file_path, 'rb')
print(f"Playing audio to device {device_index}")
main_frame = ttk.Frame(self, padding="10")
main_frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S))
self.columnconfigure(0, weight=1)
self.rowconfigure(0, weight=1)
# Setup PyAudio
p = pyaudio.PyAudio()
ttk.Label(main_frame, text="Scorchsoft Text to Mic").grid(column=0, row=0, columnspan=2, pady=(0, 10))
ttk.Label(main_frame, text="This tool uses OpenAI's text-to-speech to stream audio.").grid(column=0, row=1, columnspan=2)
try:
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
output_device_index=device_index)
data = wf.readframes(1024)
while data:
stream.write(data)
data = wf.readframes(1024)
except Exception as e:
print(f"Error playing audio on device {device_index}: {e}")
finally:
stream.stop_stream()
stream.close()
wf.close()
ttk.Label(main_frame, text="Please select primary audio device:").grid(column=0, row=2, sticky=tk.W)
primary_device_menu = ttk.OptionMenu(main_frame, self.device_index, *self.available_devices.keys())
primary_device_menu.grid(column=1, row=2, sticky=tk.W, padx=(10, 0))
ttk.Label(main_frame, text="Please select secondary audio device (optional):").grid(column=0, row=3, sticky=tk.W)
secondary_device_menu = ttk.OptionMenu(main_frame, self.device_index_2, "None", *self.available_devices.keys())
secondary_device_menu.grid(column=1, row=3, sticky=tk.W, padx=(10, 0))
ttk.Label(main_frame, text="Text to read:").grid(column=0, row=4, sticky=tk.W, pady=(10, 0))
self.text_input = tk.Text(main_frame, height=10, width=50)
self.text_input.grid(column=0, row=5, columnspan=2, pady=(0, 10))
submit_button = ttk.Button(main_frame, text="Submit", command=self.submit_text)
submit_button.grid(column=0, row=6, columnspan=2)
def get_api_key(self):
api_key = os.getenv("OPENAI_API_KEY")
if not api_key: # Only ask if .env has no API key
api_key = simpledialog.askstring("API Key", "Enter your OpenAI API Key:")
if api_key:
print(f"\nAPI Key: {api_key }\n")
self.save_api_key(api_key)
return api_key
def save_api_key(self, api_key):
with open('.env', 'w') as f:
f.write(f"OPENAI_API_KEY={api_key}\n")
load_dotenv()
def get_audio_devices(self):
p = pyaudio.PyAudio()
devices = {}
for i in range(p.get_device_count()):
info = p.get_device_info_by_index(i)
if info['maxOutputChannels'] > 0: # Filter for output-capable devices
devices[info['name']] = i
p.terminate()
return devices
#Plays to multiple device indexes at the same time
def play_audio_multiplexed(file_paths, device_indices):
p = pyaudio.PyAudio()
streams = []
# Open all files and start all streams
for file_path, device_index in zip(file_paths, device_indices):
wf = wave.open(file_path, 'rb')
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
output_device_index=device_index)
streams.append((stream, wf))
# Play interleaved
active_streams = len(streams)
while active_streams > 0:
for stream, wf in streams:
data = wf.readframes(1024)
if data:
stream.write(data)
def submit_text(self):
text = self.text_input.get("1.0", tk.END).strip()
if not text:
messagebox.showinfo("Error", "Please enter some text to synthesize.")
return
# Convert device names to indices
primary_index = self.available_devices.get(self.device_index.get(), None)
secondary_index = self.available_devices.get(self.device_index_2.get(), None) if self.device_index_2.get() != "None" else None
if primary_index is None:
messagebox.showerror("Error", "Primary device not selected or unavailable.")
return
print(f"Primary Index: {primary_index}, Secondary Index: {secondary_index}")
try:
response = self.client.audio.speech.create(
model="tts-1",
voice="fable",
input=text,
response_format='wav'
)
self.last_audio_file = "last_output.wav"
response.stream_to_file(self.last_audio_file)
#Play to either two or a single stream
if primary_index and secondary_index != "None" and secondary_index is not None:
self.play_audio_multiplexed([self.last_audio_file, self.last_audio_file],
[primary_index, secondary_index])
else:
stream.stop_stream()
stream.close()
wf.close()
active_streams -= 1
p.terminate()
def stream_audio_to_virtual_mic(text, voice="fable", device_index=None, device_index_2=None):
response = client.audio.speech.create(
model="tts-1",
voice=voice,
input=text,
response_format='wav'
)
self.play_audio_multiplexed([self.last_audio_file],
[primary_index])
#This can either stream to one device index at a time, or, via multiplexing
#it can stream to two similtaneously to prevent lag playing in sequence
if device_index_2 is not None:
file_path_1 = "output1.wav"
file_path_2 = "output2.wav"
response.stream_to_file(file_path_1)
response.stream_to_file(file_path_2)
play_audio_multiplexed([file_path_1, file_path_2], [device_index, device_index_2])
else:
file_path_1 = "output1.wav"
response.stream_to_file(file_path_1)
play_saved_audio(file_path_1, device_index)
return "";
except Exception as e:
messagebox.showerror("API Error", f"Failed to generate audio: {str(e)}")
def play_audio_multiplexed(self, file_paths, device_indices):
p = pyaudio.PyAudio()
streams = []
try:
# Open all files and start all streams
for file_path, device_index in zip(file_paths, device_indices):
wf = wave.open(file_path, 'rb')
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
output_device_index=int(device_index))
streams.append((stream, wf))
# Play interleaved
active_streams = len(streams)
while active_streams > 0:
for stream, wf in streams:
data = wf.readframes(1024)
if data:
stream.write(data)
else:
stream.stop_stream()
stream.close()
wf.close()
streams.remove((stream, wf))
active_streams -= 1
except Exception as e:
messagebox.showerror("Playback Error", f"Error during multiplexed playback: {e}")
finally:
p.terminate()
def play_last_audio(self):
if hasattr(self, 'last_audio_file'):
primary_index = self.available_devices.get(self.device_index.get(), None)
secondary_index = self.available_devices.get(self.device_index_2.get(), None) if self.device_index_2.get() != "None" else None
# Check if a secondary device is selected
if primary_index and secondary_index != "None" and secondary_index is not None:
self.play_audio_multiplexed([self.last_audio_file, self.last_audio_file],
[primary_index, secondary_index])
else:
self.play_audio_multiplexed([self.last_audio_file],
[primary_index])
else:
messagebox.showinfo("No Audio", "No audio has been generated yet.")
def play_saved_audio(self, file_path, device_name):
device_index = self.available_devices.get(device_name, None)
if device_index is None:
messagebox.showerror("Error", "Selected audio device is not available.")
return
wf = wave.open(file_path, 'rb')
p = pyaudio.PyAudio()
try:
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=wf.getframerate(),
output=True,
output_device_index=device_index)
data = wf.readframes(1024)
while data:
stream.write(data)
data = wf.readframes(1024)
finally:
stream.stop_stream()
stream.close()
wf.close()
p.terminate()
def change_api_key(self):
new_key = simpledialog.askstring("API Key", "Enter new OpenAI API Key:", parent=self)
if new_key:
self.save_api_key(new_key)
self.api_key = new_key
self.client = OpenAI(api_key=self.api_key)
messagebox.showinfo("API Key Updated", "The OpenAI API Key has been updated successfully.")
if __name__ == "__main__":
import sys
arglen = len(sys.argv)
if arglen < 2:
print("Usage: python script.py 'text to convert'")
sys.exit(1)
print(f"arg count {arglen}")
if arglen == 4:
device_index = int(sys.argv[2])
device_index_2 = int(sys.argv[3])
elif arglen == 3:
device_index = int(sys.argv[2])
device_index_2 = None
else:
list_audio_devices()
device_index = int(input("Enter the device index: "))
device_index_2 = None
stream_audio_to_virtual_mic(sys.argv[1], voice="fable", device_index=device_index,device_index_2=device_index_2)
app = Application()
app.mainloop()