Create a basic GUI

2024-05-01 16:23:54 +01:00
parent 862d73d96b
commit 1dc15bc3b0
4 changed files with 362 additions and 113 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
 .env
 .env_backup
 *.wav
 *.mp3
--- a/Readme.md
+++ b/Readme.md
@@ -28,7 +28,7 @@ python text-to-mic.py "Text you'd like to speak" 8 5
 To get this script working you will need to install the following on the relevant operating system
 ### Windows
-
+pip install tk
 pip install pyaudio
 pip install python-dotenv
 pip install wave
--- a/text-to-mic-cli.py
+++ b/text-to-mic-cli.py
@@ -0,0 +1,133 @@
 import openai
 from openai import OpenAI
 import pyaudio
 import wave
 import threading
 from dotenv import load_dotenv
 import os
 # Load environment variables from .env file
 load_dotenv()
 # Set up your OpenAI API key from the environment variable
 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
 def list_audio_devices():
    p = pyaudio.PyAudio()
    print("Available audio devices:")
    info = p.get_host_api_info_by_index(0)
    num_devices = info.get('deviceCount')
    # List all available devices, and mark output devices
    for i in range(0, num_devices):
        if p.get_device_info_by_index(i).get('maxOutputChannels') > 0:
            print(f"Device index {i}: {p.get_device_info_by_index(i).get('name')}")
    p.terminate()
 def play_saved_audio(file_path, device_index=None):
    # Open the saved audio file
    wf = wave.open(file_path, 'rb')
    print(f"Playing audio to device {device_index}")
    # Setup PyAudio
    p = pyaudio.PyAudio()
    try:
        stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                        channels=wf.getnchannels(),
                        rate=wf.getframerate(),
                        output=True,
                        output_device_index=device_index)
        data = wf.readframes(1024)
        while data:
            stream.write(data)
            data = wf.readframes(1024)
    except Exception as e:
        print(f"Error playing audio on device {device_index}: {e}")
    finally:
        stream.stop_stream()
        stream.close()
        wf.close()
        p.terminate()
 #Plays to multiple device indexes at the same time
 def play_audio_multiplexed(file_paths, device_indices):
    p = pyaudio.PyAudio()
    streams = []
    # Open all files and start all streams
    for file_path, device_index in zip(file_paths, device_indices):
        wf = wave.open(file_path, 'rb')
        stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                        channels=wf.getnchannels(),
                        rate=wf.getframerate(),
                        output=True,
                        output_device_index=device_index)
        streams.append((stream, wf))
    # Play interleaved
    active_streams = len(streams)
    while active_streams > 0:
        for stream, wf in streams:
            data = wf.readframes(1024)
            if data:
                stream.write(data)
            else:
                stream.stop_stream()
                stream.close()
                wf.close()
                active_streams -= 1
    p.terminate()
 def stream_audio_to_virtual_mic(text, voice="fable", device_index=None, device_index_2=None):
    response = client.audio.speech.create(
        model="tts-1",
        voice=voice,
        input=text,
        response_format='wav'
    )
    #This can either stream to one device index at a time, or, via multiplexing
    #it can stream to two similtaneously to prevent lag playing in sequence
    if device_index_2 is not None:
        file_path_1 = "output1.wav"
        file_path_2 = "output2.wav"
        response.stream_to_file(file_path_1)
        response.stream_to_file(file_path_2)
        play_audio_multiplexed([file_path_1, file_path_2], [device_index, device_index_2])
    else:
        file_path_1 = "output1.wav"
        response.stream_to_file(file_path_1)
        play_saved_audio(file_path_1, device_index)
    return "";
 if __name__ == "__main__":
    import sys
    arglen = len(sys.argv)
    if arglen < 2:
        print("Usage: python script.py 'text to convert'")
        sys.exit(1)
    print(f"arg count {arglen}")
    if arglen == 4:
        device_index = int(sys.argv[2])
        device_index_2 = int(sys.argv[3])
    elif arglen == 3:
        device_index = int(sys.argv[2])
        device_index_2 = None
    else:
        list_audio_devices()
        device_index = int(input("Enter the device index: "))
        device_index_2 = None
    stream_audio_to_virtual_mic(sys.argv[1], voice="fable", device_index=device_index,device_index_2=device_index_2)
--- a/text-to-mic.py
+++ b/text-to-mic.py
@@ -1,132 +1,247 @@
-import openai
+import tkinter as tk
-from openai import OpenAI
+from tkinter import ttk, messagebox, simpledialog, Menu
 import os
 import pyaudio
 import wave
-import threading
+from openai import OpenAI
 from dotenv import load_dotenv
 import os
-# Load environment variables from .env file
+# Load environment variables
 load_dotenv()
-# Set up your OpenAI API key from the environment variable
+class Application(tk.Tk):
 client = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
-def list_audio_devices():
+    def __init__(self):
-    p = pyaudio.PyAudio()
+        super().__init__()
-    print("Available audio devices:")
+        self.title("Scorchsoft Text to Mic")
-    info = p.get_host_api_info_by_index(0)
+        self.style = ttk.Style(self)
-    num_devices = info.get('deviceCount')
+        self.style.theme_use('clam')  # Using a theme for a better look
-    # List all available devices, and mark output devices
+        
-    for i in range(0, num_devices):
+        # Ensure API Key is loaded or prompted for before initializing GUI components
-        if p.get_device_info_by_index(i).get('maxOutputChannels') > 0:
+        self.api_key = self.get_api_key()
-            print(f"Device index {i}: {p.get_device_info_by_index(i).get('name')}")
+        if not self.api_key:
-    p.terminate()
+            messagebox.showinfo("API Key Needed", "Please provide your OpenAI API Key.")
            self.destroy()
            return
        self.client = OpenAI(api_key=self.api_key)
        # Initializing device index variables before they are used
        self.device_index = tk.StringVar(self)
        self.device_index_2 = tk.StringVar(self)
        self.available_devices = self.get_audio_devices()  # Load audio devices
        self.create_menu()
        self.initialize_gui()
    def create_menu(self):
        self.menubar = Menu(self)
        self.config(menu=self.menubar)
        # File or settings menu
        settings_menu = Menu(self.menubar, tearoff=0)
        self.menubar.add_cascade(label="Settings", menu=settings_menu)
        settings_menu.add_command(label="Change API Key", command=self.change_api_key)
        # Playback menu
        playback_menu = Menu(self.menubar, tearoff=0)
        self.menubar.add_cascade(label="Playback", menu=playback_menu)
        playback_menu.add_command(label="Play Last Audio", command=self.play_last_audio)
    def initialize_gui(self):
        self.device_index = tk.StringVar(self)
        self.device_index_2 = tk.StringVar(self)
        self.device_index.set("Select Device")
        self.device_index_2.set("None")
        # Fetching available devices
        available_devices = self.get_audio_devices()
        device_names = list(available_devices.keys())
 def play_saved_audio(file_path, device_index=None):
    # Open the saved audio file
    wf = wave.open(file_path, 'rb')
-    print(f"Playing audio to device {device_index}")
+        main_frame = ttk.Frame(self, padding="10")
        main_frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S))
        self.columnconfigure(0, weight=1)
        self.rowconfigure(0, weight=1)
-    # Setup PyAudio
+        ttk.Label(main_frame, text="Scorchsoft Text to Mic").grid(column=0, row=0, columnspan=2, pady=(0, 10))
-    p = pyaudio.PyAudio()
+        ttk.Label(main_frame, text="This tool uses OpenAI's text-to-speech to stream audio.").grid(column=0, row=1, columnspan=2)
-    try:
+        ttk.Label(main_frame, text="Please select primary audio device:").grid(column=0, row=2, sticky=tk.W)
-        stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
+        primary_device_menu = ttk.OptionMenu(main_frame, self.device_index, *self.available_devices.keys())
-                        channels=wf.getnchannels(),
+        primary_device_menu.grid(column=1, row=2, sticky=tk.W, padx=(10, 0))
-                        rate=wf.getframerate(),
+
-                        output=True,
+        ttk.Label(main_frame, text="Please select secondary audio device (optional):").grid(column=0, row=3, sticky=tk.W)
-                        output_device_index=device_index)
+        secondary_device_menu = ttk.OptionMenu(main_frame, self.device_index_2, "None", *self.available_devices.keys())
-        data = wf.readframes(1024)
+        secondary_device_menu.grid(column=1, row=3, sticky=tk.W, padx=(10, 0))
-        while data:
+
-            stream.write(data)
+        ttk.Label(main_frame, text="Text to read:").grid(column=0, row=4, sticky=tk.W, pady=(10, 0))
-            data = wf.readframes(1024)
+        self.text_input = tk.Text(main_frame, height=10, width=50)
-    except Exception as e:
+        self.text_input.grid(column=0, row=5, columnspan=2, pady=(0, 10))
-        print(f"Error playing audio on device {device_index}: {e}")
+
-    finally:
+        submit_button = ttk.Button(main_frame, text="Submit", command=self.submit_text)
-        stream.stop_stream()
+        submit_button.grid(column=0, row=6, columnspan=2)
-        stream.close()
+
-        wf.close()
+
    def get_api_key(self):
        api_key = os.getenv("OPENAI_API_KEY")
        if not api_key:  # Only ask if .env has no API key
            api_key = simpledialog.askstring("API Key", "Enter your OpenAI API Key:")
        if api_key:
                print(f"\nAPI Key: {api_key }\n")
                self.save_api_key(api_key)
        return api_key
    def save_api_key(self, api_key):
        with open('.env', 'w') as f:
            f.write(f"OPENAI_API_KEY={api_key}\n")
        load_dotenv()
    def get_audio_devices(self):
        p = pyaudio.PyAudio()
        devices = {}
        for i in range(p.get_device_count()):
            info = p.get_device_info_by_index(i)
            if info['maxOutputChannels'] > 0:  # Filter for output-capable devices
                devices[info['name']] = i
        p.terminate()
        return devices
-#Plays to multiple device indexes at the same time
+    def submit_text(self):
-def play_audio_multiplexed(file_paths, device_indices):
+        text = self.text_input.get("1.0", tk.END).strip()
-    p = pyaudio.PyAudio()
+        if not text:
-    streams = []
+            messagebox.showinfo("Error", "Please enter some text to synthesize.")
-    
+            return
-    # Open all files and start all streams
+        
-    for file_path, device_index in zip(file_paths, device_indices):
+        # Convert device names to indices
-        wf = wave.open(file_path, 'rb')
+        primary_index = self.available_devices.get(self.device_index.get(), None)
-        stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
+        secondary_index = self.available_devices.get(self.device_index_2.get(), None) if self.device_index_2.get() != "None" else None
-                        channels=wf.getnchannels(),
+
-                        rate=wf.getframerate(),
+        if primary_index is None:
-                        output=True,
+            messagebox.showerror("Error", "Primary device not selected or unavailable.")
-                        output_device_index=device_index)
+            return
-        streams.append((stream, wf))
+        
-    
+        print(f"Primary Index: {primary_index}, Secondary Index: {secondary_index}")
-    # Play interleaved
+
-    active_streams = len(streams)
+        try:
-    while active_streams > 0:
+
-        for stream, wf in streams:
+            response = self.client.audio.speech.create(
-            data = wf.readframes(1024)
+                model="tts-1",
-            if data:
+                voice="fable",
-                stream.write(data)
+                input=text,
                response_format='wav'
            )
            self.last_audio_file = "last_output.wav"
            response.stream_to_file(self.last_audio_file)
            #Play to either two or a single stream
            if primary_index and secondary_index != "None" and secondary_index is not None:
                self.play_audio_multiplexed([self.last_audio_file, self.last_audio_file],
                                            [primary_index, secondary_index])
            else:
-                stream.stop_stream()
+                self.play_audio_multiplexed([self.last_audio_file],
-                stream.close()
+                                            [primary_index])
                wf.close()
                active_streams -= 1
    p.terminate()
 def stream_audio_to_virtual_mic(text, voice="fable", device_index=None, device_index_2=None):
    response = client.audio.speech.create(
        model="tts-1",
        voice=voice,
        input=text,
        response_format='wav'
    )
    #This can either stream to one device index at a time, or, via multiplexing
    #it can stream to two similtaneously to prevent lag playing in sequence
    if device_index_2 is not None:
        file_path_1 = "output1.wav"
        file_path_2 = "output2.wav"
        response.stream_to_file(file_path_1)
        response.stream_to_file(file_path_2)
        play_audio_multiplexed([file_path_1, file_path_2], [device_index, device_index_2])
    else:
        file_path_1 = "output1.wav"
        response.stream_to_file(file_path_1)
        play_saved_audio(file_path_1, device_index)
-    return "";
+        except Exception as e:
            messagebox.showerror("API Error", f"Failed to generate audio: {str(e)}")
    def play_audio_multiplexed(self, file_paths, device_indices):
        p = pyaudio.PyAudio()
        streams = []
        try:
            # Open all files and start all streams
            for file_path, device_index in zip(file_paths, device_indices):
                wf = wave.open(file_path, 'rb')
                stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                                channels=wf.getnchannels(),
                                rate=wf.getframerate(),
                                output=True,
                                output_device_index=int(device_index))
                streams.append((stream, wf))
            # Play interleaved
            active_streams = len(streams)
            while active_streams > 0:
                for stream, wf in streams:
                    data = wf.readframes(1024)
                    if data:
                        stream.write(data)
                    else:
                        stream.stop_stream()
                        stream.close()
                        wf.close()
                        streams.remove((stream, wf))
                        active_streams -= 1
        except Exception as e:
            messagebox.showerror("Playback Error", f"Error during multiplexed playback: {e}")
        finally:
            p.terminate()
    def play_last_audio(self):
        if hasattr(self, 'last_audio_file'):
            primary_index = self.available_devices.get(self.device_index.get(), None)
            secondary_index = self.available_devices.get(self.device_index_2.get(), None) if self.device_index_2.get() != "None" else None
            # Check if a secondary device is selected
            if primary_index and secondary_index != "None" and secondary_index is not None:
                self.play_audio_multiplexed([self.last_audio_file, self.last_audio_file],
                                            [primary_index, secondary_index])
            else:
                self.play_audio_multiplexed([self.last_audio_file],
                                            [primary_index])
        else:
            messagebox.showinfo("No Audio", "No audio has been generated yet.")
    def play_saved_audio(self, file_path, device_name):
        device_index = self.available_devices.get(device_name, None)
        if device_index is None:
            messagebox.showerror("Error", "Selected audio device is not available.")
            return
        wf = wave.open(file_path, 'rb')
        p = pyaudio.PyAudio()
        try:
            stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
                            channels=wf.getnchannels(),
                            rate=wf.getframerate(),
                            output=True,
                            output_device_index=device_index)
            data = wf.readframes(1024)
            while data:
                stream.write(data)
                data = wf.readframes(1024)
        finally:
            stream.stop_stream()
            stream.close()
            wf.close()
            p.terminate()
    def change_api_key(self):
        new_key = simpledialog.askstring("API Key", "Enter new OpenAI API Key:", parent=self)
        if new_key:
            self.save_api_key(new_key)
            self.api_key = new_key
            self.client = OpenAI(api_key=self.api_key)
            messagebox.showinfo("API Key Updated", "The OpenAI API Key has been updated successfully.")
 if __name__ == "__main__":
-    import sys
+    app = Application()
-
+    app.mainloop()
    arglen = len(sys.argv)
    if arglen < 2:
        print("Usage: python script.py 'text to convert'")
        sys.exit(1)
    print(f"arg count {arglen}")
    if arglen == 4:
        device_index = int(sys.argv[2])
        device_index_2 = int(sys.argv[3])
    elif arglen == 3:
        device_index = int(sys.argv[2])
        device_index_2 = None
    else:
        list_audio_devices()
        device_index = int(input("Enter the device index: "))
        device_index_2 = None
    stream_audio_to_virtual_mic(sys.argv[1], voice="fable", device_index=device_index,device_index_2=device_index_2)