Create a basic GUI
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,3 +1,4 @@
|
|||||||
.env
|
.env
|
||||||
|
.env_backup
|
||||||
*.wav
|
*.wav
|
||||||
*.mp3
|
*.mp3
|
||||||
@@ -28,7 +28,7 @@ python text-to-mic.py "Text you'd like to speak" 8 5
|
|||||||
To get this script working you will need to install the following on the relevant operating system
|
To get this script working you will need to install the following on the relevant operating system
|
||||||
|
|
||||||
### Windows
|
### Windows
|
||||||
|
pip install tk
|
||||||
pip install pyaudio
|
pip install pyaudio
|
||||||
pip install python-dotenv
|
pip install python-dotenv
|
||||||
pip install wave
|
pip install wave
|
||||||
|
|||||||
133
text-to-mic-cli.py
Normal file
133
text-to-mic-cli.py
Normal file
@@ -0,0 +1,133 @@
|
|||||||
|
import openai
|
||||||
|
from openai import OpenAI
|
||||||
|
import pyaudio
|
||||||
|
import wave
|
||||||
|
import threading
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Load environment variables from .env file
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Set up your OpenAI API key from the environment variable
|
||||||
|
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
||||||
|
|
||||||
|
def list_audio_devices():
|
||||||
|
p = pyaudio.PyAudio()
|
||||||
|
print("Available audio devices:")
|
||||||
|
info = p.get_host_api_info_by_index(0)
|
||||||
|
num_devices = info.get('deviceCount')
|
||||||
|
# List all available devices, and mark output devices
|
||||||
|
for i in range(0, num_devices):
|
||||||
|
if p.get_device_info_by_index(i).get('maxOutputChannels') > 0:
|
||||||
|
print(f"Device index {i}: {p.get_device_info_by_index(i).get('name')}")
|
||||||
|
p.terminate()
|
||||||
|
|
||||||
|
|
||||||
|
def play_saved_audio(file_path, device_index=None):
|
||||||
|
# Open the saved audio file
|
||||||
|
wf = wave.open(file_path, 'rb')
|
||||||
|
|
||||||
|
print(f"Playing audio to device {device_index}")
|
||||||
|
|
||||||
|
# Setup PyAudio
|
||||||
|
p = pyaudio.PyAudio()
|
||||||
|
|
||||||
|
try:
|
||||||
|
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
|
||||||
|
channels=wf.getnchannels(),
|
||||||
|
rate=wf.getframerate(),
|
||||||
|
output=True,
|
||||||
|
output_device_index=device_index)
|
||||||
|
data = wf.readframes(1024)
|
||||||
|
while data:
|
||||||
|
stream.write(data)
|
||||||
|
data = wf.readframes(1024)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error playing audio on device {device_index}: {e}")
|
||||||
|
finally:
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
wf.close()
|
||||||
|
p.terminate()
|
||||||
|
|
||||||
|
#Plays to multiple device indexes at the same time
|
||||||
|
def play_audio_multiplexed(file_paths, device_indices):
|
||||||
|
p = pyaudio.PyAudio()
|
||||||
|
streams = []
|
||||||
|
|
||||||
|
# Open all files and start all streams
|
||||||
|
for file_path, device_index in zip(file_paths, device_indices):
|
||||||
|
wf = wave.open(file_path, 'rb')
|
||||||
|
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
|
||||||
|
channels=wf.getnchannels(),
|
||||||
|
rate=wf.getframerate(),
|
||||||
|
output=True,
|
||||||
|
output_device_index=device_index)
|
||||||
|
streams.append((stream, wf))
|
||||||
|
|
||||||
|
# Play interleaved
|
||||||
|
active_streams = len(streams)
|
||||||
|
while active_streams > 0:
|
||||||
|
for stream, wf in streams:
|
||||||
|
data = wf.readframes(1024)
|
||||||
|
if data:
|
||||||
|
stream.write(data)
|
||||||
|
else:
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
wf.close()
|
||||||
|
active_streams -= 1
|
||||||
|
|
||||||
|
p.terminate()
|
||||||
|
|
||||||
|
def stream_audio_to_virtual_mic(text, voice="fable", device_index=None, device_index_2=None):
|
||||||
|
response = client.audio.speech.create(
|
||||||
|
model="tts-1",
|
||||||
|
voice=voice,
|
||||||
|
input=text,
|
||||||
|
response_format='wav'
|
||||||
|
)
|
||||||
|
|
||||||
|
#This can either stream to one device index at a time, or, via multiplexing
|
||||||
|
#it can stream to two similtaneously to prevent lag playing in sequence
|
||||||
|
if device_index_2 is not None:
|
||||||
|
file_path_1 = "output1.wav"
|
||||||
|
file_path_2 = "output2.wav"
|
||||||
|
response.stream_to_file(file_path_1)
|
||||||
|
response.stream_to_file(file_path_2)
|
||||||
|
play_audio_multiplexed([file_path_1, file_path_2], [device_index, device_index_2])
|
||||||
|
else:
|
||||||
|
file_path_1 = "output1.wav"
|
||||||
|
response.stream_to_file(file_path_1)
|
||||||
|
play_saved_audio(file_path_1, device_index)
|
||||||
|
|
||||||
|
return "";
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
|
arglen = len(sys.argv)
|
||||||
|
|
||||||
|
if arglen < 2:
|
||||||
|
print("Usage: python script.py 'text to convert'")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
print(f"arg count {arglen}")
|
||||||
|
|
||||||
|
if arglen == 4:
|
||||||
|
device_index = int(sys.argv[2])
|
||||||
|
device_index_2 = int(sys.argv[3])
|
||||||
|
elif arglen == 3:
|
||||||
|
device_index = int(sys.argv[2])
|
||||||
|
device_index_2 = None
|
||||||
|
else:
|
||||||
|
list_audio_devices()
|
||||||
|
device_index = int(input("Enter the device index: "))
|
||||||
|
device_index_2 = None
|
||||||
|
|
||||||
|
|
||||||
|
stream_audio_to_virtual_mic(sys.argv[1], voice="fable", device_index=device_index,device_index_2=device_index_2)
|
||||||
339
text-to-mic.py
339
text-to-mic.py
@@ -1,132 +1,247 @@
|
|||||||
import openai
|
import tkinter as tk
|
||||||
from openai import OpenAI
|
from tkinter import ttk, messagebox, simpledialog, Menu
|
||||||
|
import os
|
||||||
import pyaudio
|
import pyaudio
|
||||||
import wave
|
import wave
|
||||||
import threading
|
from openai import OpenAI
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
|
||||||
|
|
||||||
# Load environment variables from .env file
|
# Load environment variables
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Set up your OpenAI API key from the environment variable
|
class Application(tk.Tk):
|
||||||
client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
|
|
||||||
|
|
||||||
def list_audio_devices():
|
def __init__(self):
|
||||||
p = pyaudio.PyAudio()
|
super().__init__()
|
||||||
print("Available audio devices:")
|
self.title("Scorchsoft Text to Mic")
|
||||||
info = p.get_host_api_info_by_index(0)
|
self.style = ttk.Style(self)
|
||||||
num_devices = info.get('deviceCount')
|
self.style.theme_use('clam') # Using a theme for a better look
|
||||||
# List all available devices, and mark output devices
|
|
||||||
for i in range(0, num_devices):
|
# Ensure API Key is loaded or prompted for before initializing GUI components
|
||||||
if p.get_device_info_by_index(i).get('maxOutputChannels') > 0:
|
self.api_key = self.get_api_key()
|
||||||
print(f"Device index {i}: {p.get_device_info_by_index(i).get('name')}")
|
if not self.api_key:
|
||||||
p.terminate()
|
messagebox.showinfo("API Key Needed", "Please provide your OpenAI API Key.")
|
||||||
|
self.destroy()
|
||||||
|
return
|
||||||
|
|
||||||
|
self.client = OpenAI(api_key=self.api_key)
|
||||||
|
|
||||||
|
# Initializing device index variables before they are used
|
||||||
|
self.device_index = tk.StringVar(self)
|
||||||
|
self.device_index_2 = tk.StringVar(self)
|
||||||
|
|
||||||
|
self.available_devices = self.get_audio_devices() # Load audio devices
|
||||||
|
|
||||||
|
self.create_menu()
|
||||||
|
self.initialize_gui()
|
||||||
|
|
||||||
|
def create_menu(self):
|
||||||
|
self.menubar = Menu(self)
|
||||||
|
self.config(menu=self.menubar)
|
||||||
|
|
||||||
|
# File or settings menu
|
||||||
|
settings_menu = Menu(self.menubar, tearoff=0)
|
||||||
|
self.menubar.add_cascade(label="Settings", menu=settings_menu)
|
||||||
|
settings_menu.add_command(label="Change API Key", command=self.change_api_key)
|
||||||
|
|
||||||
|
# Playback menu
|
||||||
|
playback_menu = Menu(self.menubar, tearoff=0)
|
||||||
|
self.menubar.add_cascade(label="Playback", menu=playback_menu)
|
||||||
|
playback_menu.add_command(label="Play Last Audio", command=self.play_last_audio)
|
||||||
|
|
||||||
|
def initialize_gui(self):
|
||||||
|
self.device_index = tk.StringVar(self)
|
||||||
|
self.device_index_2 = tk.StringVar(self)
|
||||||
|
self.device_index.set("Select Device")
|
||||||
|
self.device_index_2.set("None")
|
||||||
|
|
||||||
|
# Fetching available devices
|
||||||
|
available_devices = self.get_audio_devices()
|
||||||
|
device_names = list(available_devices.keys())
|
||||||
|
|
||||||
|
|
||||||
def play_saved_audio(file_path, device_index=None):
|
|
||||||
# Open the saved audio file
|
|
||||||
wf = wave.open(file_path, 'rb')
|
|
||||||
|
|
||||||
print(f"Playing audio to device {device_index}")
|
main_frame = ttk.Frame(self, padding="10")
|
||||||
|
main_frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S))
|
||||||
|
self.columnconfigure(0, weight=1)
|
||||||
|
self.rowconfigure(0, weight=1)
|
||||||
|
|
||||||
# Setup PyAudio
|
ttk.Label(main_frame, text="Scorchsoft Text to Mic").grid(column=0, row=0, columnspan=2, pady=(0, 10))
|
||||||
p = pyaudio.PyAudio()
|
ttk.Label(main_frame, text="This tool uses OpenAI's text-to-speech to stream audio.").grid(column=0, row=1, columnspan=2)
|
||||||
|
|
||||||
try:
|
ttk.Label(main_frame, text="Please select primary audio device:").grid(column=0, row=2, sticky=tk.W)
|
||||||
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
|
primary_device_menu = ttk.OptionMenu(main_frame, self.device_index, *self.available_devices.keys())
|
||||||
channels=wf.getnchannels(),
|
primary_device_menu.grid(column=1, row=2, sticky=tk.W, padx=(10, 0))
|
||||||
rate=wf.getframerate(),
|
|
||||||
output=True,
|
ttk.Label(main_frame, text="Please select secondary audio device (optional):").grid(column=0, row=3, sticky=tk.W)
|
||||||
output_device_index=device_index)
|
secondary_device_menu = ttk.OptionMenu(main_frame, self.device_index_2, "None", *self.available_devices.keys())
|
||||||
data = wf.readframes(1024)
|
secondary_device_menu.grid(column=1, row=3, sticky=tk.W, padx=(10, 0))
|
||||||
while data:
|
|
||||||
stream.write(data)
|
ttk.Label(main_frame, text="Text to read:").grid(column=0, row=4, sticky=tk.W, pady=(10, 0))
|
||||||
data = wf.readframes(1024)
|
self.text_input = tk.Text(main_frame, height=10, width=50)
|
||||||
except Exception as e:
|
self.text_input.grid(column=0, row=5, columnspan=2, pady=(0, 10))
|
||||||
print(f"Error playing audio on device {device_index}: {e}")
|
|
||||||
finally:
|
submit_button = ttk.Button(main_frame, text="Submit", command=self.submit_text)
|
||||||
stream.stop_stream()
|
submit_button.grid(column=0, row=6, columnspan=2)
|
||||||
stream.close()
|
|
||||||
wf.close()
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_api_key(self):
|
||||||
|
api_key = os.getenv("OPENAI_API_KEY")
|
||||||
|
|
||||||
|
|
||||||
|
if not api_key: # Only ask if .env has no API key
|
||||||
|
api_key = simpledialog.askstring("API Key", "Enter your OpenAI API Key:")
|
||||||
|
|
||||||
|
if api_key:
|
||||||
|
print(f"\nAPI Key: {api_key }\n")
|
||||||
|
self.save_api_key(api_key)
|
||||||
|
|
||||||
|
return api_key
|
||||||
|
|
||||||
|
def save_api_key(self, api_key):
|
||||||
|
with open('.env', 'w') as f:
|
||||||
|
f.write(f"OPENAI_API_KEY={api_key}\n")
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
def get_audio_devices(self):
|
||||||
|
p = pyaudio.PyAudio()
|
||||||
|
devices = {}
|
||||||
|
for i in range(p.get_device_count()):
|
||||||
|
info = p.get_device_info_by_index(i)
|
||||||
|
if info['maxOutputChannels'] > 0: # Filter for output-capable devices
|
||||||
|
devices[info['name']] = i
|
||||||
p.terminate()
|
p.terminate()
|
||||||
|
return devices
|
||||||
|
|
||||||
#Plays to multiple device indexes at the same time
|
def submit_text(self):
|
||||||
def play_audio_multiplexed(file_paths, device_indices):
|
text = self.text_input.get("1.0", tk.END).strip()
|
||||||
p = pyaudio.PyAudio()
|
if not text:
|
||||||
streams = []
|
messagebox.showinfo("Error", "Please enter some text to synthesize.")
|
||||||
|
return
|
||||||
# Open all files and start all streams
|
|
||||||
for file_path, device_index in zip(file_paths, device_indices):
|
# Convert device names to indices
|
||||||
wf = wave.open(file_path, 'rb')
|
primary_index = self.available_devices.get(self.device_index.get(), None)
|
||||||
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
|
secondary_index = self.available_devices.get(self.device_index_2.get(), None) if self.device_index_2.get() != "None" else None
|
||||||
channels=wf.getnchannels(),
|
|
||||||
rate=wf.getframerate(),
|
if primary_index is None:
|
||||||
output=True,
|
messagebox.showerror("Error", "Primary device not selected or unavailable.")
|
||||||
output_device_index=device_index)
|
return
|
||||||
streams.append((stream, wf))
|
|
||||||
|
print(f"Primary Index: {primary_index}, Secondary Index: {secondary_index}")
|
||||||
# Play interleaved
|
|
||||||
active_streams = len(streams)
|
try:
|
||||||
while active_streams > 0:
|
|
||||||
for stream, wf in streams:
|
response = self.client.audio.speech.create(
|
||||||
data = wf.readframes(1024)
|
model="tts-1",
|
||||||
if data:
|
voice="fable",
|
||||||
stream.write(data)
|
input=text,
|
||||||
|
response_format='wav'
|
||||||
|
)
|
||||||
|
self.last_audio_file = "last_output.wav"
|
||||||
|
response.stream_to_file(self.last_audio_file)
|
||||||
|
|
||||||
|
#Play to either two or a single stream
|
||||||
|
if primary_index and secondary_index != "None" and secondary_index is not None:
|
||||||
|
self.play_audio_multiplexed([self.last_audio_file, self.last_audio_file],
|
||||||
|
[primary_index, secondary_index])
|
||||||
else:
|
else:
|
||||||
stream.stop_stream()
|
self.play_audio_multiplexed([self.last_audio_file],
|
||||||
stream.close()
|
[primary_index])
|
||||||
wf.close()
|
|
||||||
active_streams -= 1
|
|
||||||
|
|
||||||
p.terminate()
|
|
||||||
|
|
||||||
def stream_audio_to_virtual_mic(text, voice="fable", device_index=None, device_index_2=None):
|
|
||||||
response = client.audio.speech.create(
|
|
||||||
model="tts-1",
|
|
||||||
voice=voice,
|
|
||||||
input=text,
|
|
||||||
response_format='wav'
|
|
||||||
)
|
|
||||||
|
|
||||||
#This can either stream to one device index at a time, or, via multiplexing
|
|
||||||
#it can stream to two similtaneously to prevent lag playing in sequence
|
|
||||||
if device_index_2 is not None:
|
|
||||||
file_path_1 = "output1.wav"
|
|
||||||
file_path_2 = "output2.wav"
|
|
||||||
response.stream_to_file(file_path_1)
|
|
||||||
response.stream_to_file(file_path_2)
|
|
||||||
play_audio_multiplexed([file_path_1, file_path_2], [device_index, device_index_2])
|
|
||||||
else:
|
|
||||||
file_path_1 = "output1.wav"
|
|
||||||
response.stream_to_file(file_path_1)
|
|
||||||
play_saved_audio(file_path_1, device_index)
|
|
||||||
|
|
||||||
return "";
|
except Exception as e:
|
||||||
|
messagebox.showerror("API Error", f"Failed to generate audio: {str(e)}")
|
||||||
|
|
||||||
|
def play_audio_multiplexed(self, file_paths, device_indices):
|
||||||
|
|
||||||
|
p = pyaudio.PyAudio()
|
||||||
|
streams = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Open all files and start all streams
|
||||||
|
for file_path, device_index in zip(file_paths, device_indices):
|
||||||
|
wf = wave.open(file_path, 'rb')
|
||||||
|
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
|
||||||
|
channels=wf.getnchannels(),
|
||||||
|
rate=wf.getframerate(),
|
||||||
|
output=True,
|
||||||
|
output_device_index=int(device_index))
|
||||||
|
streams.append((stream, wf))
|
||||||
|
|
||||||
|
# Play interleaved
|
||||||
|
active_streams = len(streams)
|
||||||
|
while active_streams > 0:
|
||||||
|
for stream, wf in streams:
|
||||||
|
data = wf.readframes(1024)
|
||||||
|
if data:
|
||||||
|
stream.write(data)
|
||||||
|
else:
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
wf.close()
|
||||||
|
streams.remove((stream, wf))
|
||||||
|
active_streams -= 1
|
||||||
|
except Exception as e:
|
||||||
|
messagebox.showerror("Playback Error", f"Error during multiplexed playback: {e}")
|
||||||
|
finally:
|
||||||
|
p.terminate()
|
||||||
|
|
||||||
|
|
||||||
|
def play_last_audio(self):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if hasattr(self, 'last_audio_file'):
|
||||||
|
primary_index = self.available_devices.get(self.device_index.get(), None)
|
||||||
|
secondary_index = self.available_devices.get(self.device_index_2.get(), None) if self.device_index_2.get() != "None" else None
|
||||||
|
|
||||||
|
# Check if a secondary device is selected
|
||||||
|
if primary_index and secondary_index != "None" and secondary_index is not None:
|
||||||
|
self.play_audio_multiplexed([self.last_audio_file, self.last_audio_file],
|
||||||
|
[primary_index, secondary_index])
|
||||||
|
else:
|
||||||
|
self.play_audio_multiplexed([self.last_audio_file],
|
||||||
|
[primary_index])
|
||||||
|
|
||||||
|
else:
|
||||||
|
messagebox.showinfo("No Audio", "No audio has been generated yet.")
|
||||||
|
|
||||||
|
def play_saved_audio(self, file_path, device_name):
|
||||||
|
device_index = self.available_devices.get(device_name, None)
|
||||||
|
if device_index is None:
|
||||||
|
messagebox.showerror("Error", "Selected audio device is not available.")
|
||||||
|
return
|
||||||
|
|
||||||
|
wf = wave.open(file_path, 'rb')
|
||||||
|
p = pyaudio.PyAudio()
|
||||||
|
try:
|
||||||
|
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
|
||||||
|
channels=wf.getnchannels(),
|
||||||
|
rate=wf.getframerate(),
|
||||||
|
output=True,
|
||||||
|
output_device_index=device_index)
|
||||||
|
data = wf.readframes(1024)
|
||||||
|
while data:
|
||||||
|
stream.write(data)
|
||||||
|
data = wf.readframes(1024)
|
||||||
|
finally:
|
||||||
|
stream.stop_stream()
|
||||||
|
stream.close()
|
||||||
|
wf.close()
|
||||||
|
p.terminate()
|
||||||
|
|
||||||
|
|
||||||
|
def change_api_key(self):
|
||||||
|
new_key = simpledialog.askstring("API Key", "Enter new OpenAI API Key:", parent=self)
|
||||||
|
if new_key:
|
||||||
|
self.save_api_key(new_key)
|
||||||
|
self.api_key = new_key
|
||||||
|
self.client = OpenAI(api_key=self.api_key)
|
||||||
|
messagebox.showinfo("API Key Updated", "The OpenAI API Key has been updated successfully.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
import sys
|
app = Application()
|
||||||
|
app.mainloop()
|
||||||
arglen = len(sys.argv)
|
|
||||||
|
|
||||||
if arglen < 2:
|
|
||||||
print("Usage: python script.py 'text to convert'")
|
|
||||||
sys.exit(1)
|
|
||||||
|
|
||||||
print(f"arg count {arglen}")
|
|
||||||
|
|
||||||
if arglen == 4:
|
|
||||||
device_index = int(sys.argv[2])
|
|
||||||
device_index_2 = int(sys.argv[3])
|
|
||||||
elif arglen == 3:
|
|
||||||
device_index = int(sys.argv[2])
|
|
||||||
device_index_2 = None
|
|
||||||
else:
|
|
||||||
list_audio_devices()
|
|
||||||
device_index = int(input("Enter the device index: "))
|
|
||||||
device_index_2 = None
|
|
||||||
|
|
||||||
|
|
||||||
stream_audio_to_virtual_mic(sys.argv[1], voice="fable", device_index=device_index,device_index_2=device_index_2)
|
|
||||||
Reference in New Issue
Block a user