999 lines
41 KiB
Python
999 lines
41 KiB
Python
import tkinter as tk
|
|
import platform
|
|
import os
|
|
import threading
|
|
import pyaudio
|
|
import wave
|
|
import webbrowser
|
|
import json
|
|
import keyboard
|
|
import sys
|
|
|
|
from pystray import Icon as icon, MenuItem as item, Menu as menu
|
|
from PIL import Image, ImageDraw
|
|
from tkinter import ttk, messagebox, simpledialog, Menu
|
|
from openai import OpenAI
|
|
from dotenv import load_dotenv
|
|
from pathlib import Path
|
|
from pydub import AudioSegment
|
|
from audioplayer import AudioPlayer
|
|
|
|
|
|
# Modify the load environment variables to load from config/.env
|
|
def load_env_file():
|
|
env_path = Path("config") / ".env"
|
|
load_dotenv(dotenv_path=env_path)
|
|
|
|
class TextToMic(tk.Tk):
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
|
|
self.title("Scorchsoft Text to Mic")
|
|
self.style = ttk.Style(self)
|
|
if self.tk.call('tk', 'windowingsystem') == 'aqua':
|
|
self.style.theme_use('aqua')
|
|
else:
|
|
self.style.theme_use('clam') # Fallback to 'clam' on non-macOS systems
|
|
|
|
#Define stules
|
|
self.style.configure('Recording.TButton', background='red', foreground='white')
|
|
self.style.configure("Green.TButton", background="green", foreground="white")
|
|
|
|
|
|
# Ensure that the config directory exists
|
|
self.ensure_config_directory()
|
|
load_env_file()
|
|
|
|
# Ensure API Key is loaded or prompted for before initializing GUI components
|
|
self.api_key = self.get_api_key()
|
|
if not self.api_key:
|
|
messagebox.showinfo("API Key Needed", "Please provide your OpenAI API Key.")
|
|
self.destroy()
|
|
return
|
|
|
|
|
|
self.client = OpenAI(api_key=self.api_key)
|
|
|
|
# Initializing device index variables before they are used
|
|
self.device_index = tk.StringVar(self)
|
|
self.device_index_2 = tk.StringVar(self)
|
|
|
|
self.available_devices = self.get_audio_devices() # Load audio devices
|
|
self.available_input_devices = self.get_input_devices() # Load input devices
|
|
|
|
self.create_menu()
|
|
self.initialize_gui()
|
|
self.setup_hotkeys()
|
|
|
|
def ensure_config_directory(self):
|
|
"""Ensure the config directory exists."""
|
|
config_dir = Path("config")
|
|
config_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
def show_version(self):
|
|
instruction_window = tk.Toplevel(self)
|
|
instruction_window.title("App Version")
|
|
instruction_window.geometry("300x150") # Width x Height
|
|
|
|
instructions = """Version 1.1.0\n\n App by Scorchsoft.com"""
|
|
|
|
tk.Label(instruction_window, text=instructions, justify=tk.LEFT, wraplength=280).pack(padx=10, pady=10)
|
|
|
|
# Add a button to close the window
|
|
ttk.Button(instruction_window, text="Close", command=instruction_window.destroy).pack(pady=(10, 0))
|
|
|
|
def create_menu(self):
|
|
self.menubar = Menu(self)
|
|
self.config(menu=self.menubar)
|
|
|
|
# File or settings menu
|
|
settings_menu = Menu(self.menubar, tearoff=0)
|
|
self.menubar.add_cascade(label="Settings", menu=settings_menu)
|
|
settings_menu.add_command(label="Change API Key", command=self.change_api_key)
|
|
settings_menu.add_command(label="ChatGPT Manipulation", command=self.chat_gpt_settings)
|
|
settings_menu.add_command(label="Hotkey Settings", command=self.hotkey_settings)
|
|
|
|
|
|
# Playback menu
|
|
playback_menu = Menu(self.menubar, tearoff=0)
|
|
self.menubar.add_cascade(label="Playback", menu=playback_menu)
|
|
playback_menu.add_command(label="Play Last Audio", command=self.play_last_audio)
|
|
#playback_menu.add_command(label="Input Speech to Text", command=self.input_speech_to_text)
|
|
|
|
#apply_ai
|
|
input_menu = Menu(self.menubar, tearoff=0)
|
|
self.menubar.add_cascade(label="Input", menu=input_menu)
|
|
input_menu.add_command(label="Apply AI Manipulation to Input Text", command=self.apply_ai)
|
|
|
|
|
|
# Help menu
|
|
help_menu = Menu(self.menubar, tearoff=0)
|
|
self.menubar.add_cascade(label="Help", menu=help_menu)
|
|
help_menu.add_command(label="How to Use", command=self.show_instructions)
|
|
help_menu.add_command(label="Terms of Use and Licence", command=self.show_terms_of_use)
|
|
help_menu.add_command(label="Version", command=self.show_version)
|
|
help_menu.add_command(label="Hotkey Instructions", command=self.show_hotkey_instructions)
|
|
|
|
|
|
def hotkey_settings(self):
|
|
settings = self.load_settings()
|
|
hotkey_window = tk.Toplevel(self)
|
|
hotkey_window.title("Hotkey Settings")
|
|
hotkey_window.grab_set() # Grab the focus on this toplevel window
|
|
|
|
main_frame = ttk.Frame(hotkey_window, padding="10")
|
|
main_frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S))
|
|
|
|
# Create dropdowns for each hotkey
|
|
keys = ["", "ctrl", "shift", "alt", "tab", "altgr"]
|
|
main_keys = list("abcdefghijklmnopqrstuvwxyz1234567890[];'#,./`") + \
|
|
[f"f{i}" for i in range(1, 13)] # Add function keys F1 to F12
|
|
|
|
def create_hotkey_row(label_text, key_combo):
|
|
ttk.Label(main_frame, text=label_text).grid(row=create_hotkey_row.row, column=0, sticky=tk.W, pady=2)
|
|
|
|
var1 = tk.StringVar(value=key_combo[0] if len(key_combo) > 0 else "")
|
|
var2 = tk.StringVar(value=key_combo[1] if len(key_combo) > 1 else "")
|
|
var3 = tk.StringVar(value=key_combo[2] if len(key_combo) > 2 else "")
|
|
|
|
option_menu1 = ttk.OptionMenu(main_frame, var1, key_combo[0], *keys)
|
|
option_menu1.grid(row=create_hotkey_row.row, column=1, sticky=tk.W, pady=2)
|
|
|
|
option_menu2 = ttk.OptionMenu(main_frame, var2, key_combo[1] if len(key_combo) > 1 else "", *keys)
|
|
option_menu2.grid(row=create_hotkey_row.row, column=2, sticky=tk.W, pady=2)
|
|
|
|
option_menu3 = ttk.OptionMenu(main_frame, var3, key_combo[2] if len(key_combo) > 2 else "", *main_keys)
|
|
option_menu3.grid(row=create_hotkey_row.row, column=3, sticky=tk.W, pady=2)
|
|
|
|
create_hotkey_row.row += 1
|
|
return [var1, var2, var3]
|
|
|
|
create_hotkey_row.row = 0
|
|
|
|
record_start_stop_vars = create_hotkey_row("Record Start/Stop:", settings["hotkeys"]["record_start_stop"])
|
|
stop_recording_vars = create_hotkey_row("Stop Recording:", settings["hotkeys"]["stop_recording"])
|
|
play_last_audio_vars = create_hotkey_row("Play Last Audio:", settings["hotkeys"]["play_last_audio"])
|
|
|
|
# Save Button
|
|
save_btn = ttk.Button(main_frame, text="Save", command=lambda: self.save_hotkey_settings({
|
|
"record_start_stop": [record_start_stop_vars[0].get(), record_start_stop_vars[1].get(), record_start_stop_vars[2].get()],
|
|
"stop_recording": [stop_recording_vars[0].get(), stop_recording_vars[1].get(), stop_recording_vars[2].get()],
|
|
"play_last_audio": [play_last_audio_vars[0].get(), play_last_audio_vars[1].get(), play_last_audio_vars[2].get()]
|
|
}))
|
|
save_btn.grid(row=create_hotkey_row.row, column=1, sticky=tk.W + tk.E, pady=10)
|
|
|
|
|
|
def save_hotkey_settings(self, hotkeys):
|
|
settings = self.load_settings()
|
|
settings["hotkeys"] = hotkeys
|
|
self.save_settings_to_JSON(settings)
|
|
self.setup_hotkeys() # Re-register the hotkeys with the new settings
|
|
messagebox.showinfo("Settings Updated", "Your hotkey settings have been saved successfully.")
|
|
|
|
def setup_hotkeys(self):
|
|
try:
|
|
# Attempt to clear existing hotkeys
|
|
keyboard.unhook_all() # This should clear all hotkeys in some versions of the library.
|
|
except AttributeError:
|
|
pass # Ignore if the method isn't supported
|
|
|
|
settings = self.load_settings()
|
|
|
|
def parse_hotkey(combo):
|
|
return '+'.join(filter(None, combo))
|
|
|
|
keyboard.add_hotkey(parse_hotkey(settings["hotkeys"]["record_start_stop"]), lambda: self.hotkey_record_trigger())
|
|
keyboard.add_hotkey(parse_hotkey(settings["hotkeys"]["stop_recording"]), lambda: self.hotkey_stop_trigger())
|
|
keyboard.add_hotkey(parse_hotkey(settings["hotkeys"]["play_last_audio"]), lambda: self.hotkey_play_last_audio_trigger())
|
|
|
|
|
|
def hotkey_play_last_audio_trigger(self):
|
|
if hasattr(self, 'last_audio_file'):
|
|
self.play_last_audio()
|
|
else:
|
|
self.play_sound('assets/no-last-audio.wav')
|
|
|
|
|
|
def hotkey_stop_trigger(self):
|
|
self.play_sound('assets/wrong-short.wav')
|
|
if self.recording:
|
|
self.stop_recording(auto_play=False)
|
|
self.recording=False
|
|
|
|
# Sounds from https://mixkit.co/free-sound-effects/notification/
|
|
def hotkey_record_trigger(self):
|
|
|
|
if self.recording:
|
|
self.play_sound('assets/pop.wav')
|
|
self.submit_text()
|
|
else:
|
|
|
|
if not self.recording:
|
|
self.start_recording(play_confirm_sound=True)
|
|
else:
|
|
self.stop_recording(auto_play=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def play_sound(self, sound_file):
|
|
player = AudioPlayer(self.resource_path(sound_file))
|
|
player.play(block=True)
|
|
|
|
def resource_path(self, relative_path):
|
|
"""Get the absolute path to the resource, works for both development and PyInstaller environments."""
|
|
|
|
try:
|
|
# When running in a PyInstaller bundle, use the '_MEIPASS' directory
|
|
base_path = sys._MEIPASS
|
|
except AttributeError:
|
|
# When running normally (not bundled), use the directory where the main script is located
|
|
base_path = os.path.dirname(os.path.abspath(sys.argv[0]))
|
|
|
|
# Resolve the absolute path
|
|
abs_path = os.path.join(base_path, relative_path)
|
|
|
|
# Debugging: Print the absolute path to check if it's correct
|
|
print(f"Resolved path for {relative_path}: {abs_path}")
|
|
|
|
return abs_path
|
|
|
|
|
|
|
|
|
|
def initialize_gui(self):
|
|
|
|
self.input_device_index = tk.StringVar(self)
|
|
self.device_index = tk.StringVar(self)
|
|
self.device_index_2 = tk.StringVar(self)
|
|
|
|
self.input_device_index.set("Default")
|
|
self.device_index.set("Select Device")
|
|
self.device_index_2.set("None")
|
|
|
|
# Fetching available devices (no longer needed here?)
|
|
#available_devices = self.get_audio_devices()
|
|
#available_input_devices = self.get_input_devices()
|
|
#device_names = list(self.available_devices.keys())
|
|
#input_device_names = list(self.available_input_devices.keys())
|
|
|
|
main_frame = ttk.Frame(self, padding="20")
|
|
main_frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S))
|
|
self.columnconfigure(0, weight=1)
|
|
self.rowconfigure(0, weight=1)
|
|
|
|
# Voice Selection
|
|
self.voice_var = tk.StringVar()
|
|
voices = ['alloy', 'echo', 'fable', 'onyx', 'nova', 'shimmer']
|
|
ttk.Label(main_frame, text="Voice").grid(column=0, row=0, sticky=tk.W, pady=(10, 10)) # Padding added
|
|
voice_menu = ttk.OptionMenu(main_frame, self.voice_var,'fable', *voices)
|
|
voice_menu.grid(column=1, row=0, sticky=tk.W)
|
|
|
|
# Microphone Selection Setup
|
|
ttk.Label(main_frame, text="Input Device (optional):").grid(column=0, row=1, sticky=tk.W, pady=(5, 10)) # Padding added
|
|
input_device_menu = ttk.OptionMenu(main_frame, self.input_device_index, "None", *self.available_input_devices.keys())
|
|
input_device_menu.grid(column=1, row=1, sticky=tk.W)
|
|
|
|
# Select Primary audio device
|
|
ttk.Label(main_frame, text="Primary Playback Device:").grid(column=0, row=2, sticky=tk.W, pady=(10, 10)) # Padding added
|
|
primary_device_menu = ttk.OptionMenu(main_frame, self.device_index, *self.available_devices.keys())
|
|
primary_device_menu.grid(column=1, row=2, sticky=tk.W)
|
|
|
|
# Select Secondary audio device
|
|
ttk.Label(main_frame, text="Secondary Playback Device (optional):").grid(column=0, row=3, sticky=tk.W, pady=(5, 10)) # Padding added
|
|
secondary_device_menu = ttk.OptionMenu(main_frame, self.device_index_2, "None", *self.available_devices.keys())
|
|
secondary_device_menu.grid(column=1, row=3, sticky=tk.W)
|
|
|
|
# Specify the text to read
|
|
ttk.Label(main_frame, text="Text to Read:").grid(column=0, row=4, sticky=tk.W, pady=(10, 0))
|
|
self.text_input = tk.Text(main_frame, height=10, width=50)
|
|
self.text_input.grid(column=0, row=5, columnspan=2, pady=(0, 20)) # Padding added before submit button
|
|
|
|
# Button configuration
|
|
|
|
self.recording = False # State to check if currently recording
|
|
self.record_button = ttk.Button(main_frame, text="Record Mic", command=self.toggle_recording)
|
|
self.record_button.grid(column=0, row=6, sticky=tk.W + tk.E, pady=(0, 20), padx=(0, 10)) # Left padding to separate buttons
|
|
|
|
self.submit_button = ttk.Button(main_frame, text="Play Audio", style="Green.TButton", command=self.submit_text )
|
|
self.submit_button.grid(column=1, row=6, sticky=tk.W + tk.E, pady=(0, 20), padx=(10, 0)) # Right padding to separate buttons
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#Credits
|
|
info_label = tk.Label(main_frame, text="Created by Scorchsoft.com App Development", fg="blue", cursor="hand2")
|
|
info_label.grid(column=0, row=7, columnspan=2, pady=(0, 0))
|
|
info_label.bind("<Button-1>", lambda e: self.open_scorchsoft())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def open_scorchsoft(self, event=None):
|
|
webbrowser.open('https://www.scorchsoft.com')
|
|
|
|
|
|
|
|
|
|
def show_hotkey_instructions(self):
|
|
instruction_window = tk.Toplevel(self)
|
|
instruction_window.title("Hotkey Instructions")
|
|
instruction_window.geometry("400x300") # Width x Height
|
|
|
|
instructions = """How to use Hotkeys
|
|
ctrl+shift+0
|
|
This starts a recording, then converts to text and plays when you press this hotkey again.
|
|
|
|
ctrl+shift+9
|
|
If you are recording, you can press this hotkey to stop recording without playing
|
|
|
|
ctrl+shift+8
|
|
This replays the last audio clip played
|
|
|
|
"""
|
|
tk.Label(instruction_window, text=instructions, justify=tk.LEFT, wraplength=380).pack(padx=10, pady=10)
|
|
|
|
# Add a button to close the window
|
|
ttk.Button(instruction_window, text="Close", command=instruction_window.destroy).pack(pady=(10, 0))
|
|
|
|
def show_instructions(self):
|
|
instruction_window = tk.Toplevel(self)
|
|
instruction_window.title("How to Use")
|
|
instruction_window.geometry("600x680") # Width x Height
|
|
|
|
instructions = """How to Use Scorchsoft Text to Mic:
|
|
|
|
1. Install VB-Cable if you haven't already
|
|
https://vb-audio.com/Cable/
|
|
This tool creates a virtual microphone on your Windows computer or Mac. Once installed you can then trigger audio to be played on this virual cable.
|
|
|
|
2. Open the Text to Mic app by Scorchsoft, and input your OpenAPI key. How to set up an API key:
|
|
https://platform.openai.com/docs/quickstart/account-setup
|
|
(note that this may require you to add your billing details to OpenAI's playground before a key can be generated)
|
|
In short, you sign up, go to playground, add billing details, go to API keys, add one, copy it, paste into Text to Mic.
|
|
|
|
WARNING: This will use your OpenAI key to generate audio via the OpenAI API, which will incur charges per use. So please make sure to carefully monitor use.
|
|
OpenAI pricing: openai.com/pricing
|
|
|
|
3. Choose a voice that you prefer for the speech synthesis.
|
|
|
|
4. Select a playback device. I recommend you select one device to be your headphones, and the other the virtuall microphone installed above (Which is usually labelled "Cable Input (VB-Audio))"
|
|
|
|
3. Enter the text in the provided text area that you want to convert to speech.
|
|
|
|
4. Click 'Submit' to hear the spoken version of your text.
|
|
|
|
5. The 'Play Last Audio' button can be used to replay the last generated speech output.
|
|
|
|
6. You can change the API key at any time under the 'Settings' menu.
|
|
|
|
This tool was brought to you by Scorchsoft - We build custom apps to your requirements. Please contact us if you have a requirement for a custom app project.
|
|
|
|
If you like this tool then please help us out and give us a backlink to help others find it at:
|
|
https://www.scorchsoft.com/blog/text-to-mic-for-meetings/
|
|
|
|
Please also make sure you read the Terms of use and licence statement before using this app."""
|
|
|
|
tk.Label(instruction_window, text=instructions, justify=tk.LEFT, wraplength=580).pack(padx=10, pady=10)
|
|
|
|
# Add a button to close the window
|
|
ttk.Button(instruction_window, text="Close", command=instruction_window.destroy).pack(pady=(10, 0))
|
|
|
|
|
|
def show_terms_of_use(self):
|
|
# Get the path to the LICENSE.md file using the resource_path method
|
|
license_path = self.resource_path("LICENSE.md")
|
|
|
|
# Attempt to read the content of the LICENSE.md file
|
|
try:
|
|
# Open the file with 'r' (read mode) and specify 'utf-8' encoding
|
|
with open(license_path, "r", encoding="utf-8") as file:
|
|
license_content = file.read()
|
|
except FileNotFoundError:
|
|
license_content = "License file not found. Please ensure the LICENSE.md file exists in the application directory."
|
|
except PermissionError:
|
|
license_content = "Permission denied. Please ensure the script has read access to LICENSE.md."
|
|
except UnicodeDecodeError as e:
|
|
license_content = f"Error reading license file due to encoding issue: {e}"
|
|
except Exception as e:
|
|
license_content = f"An unexpected error occurred while reading the license file: {e}"
|
|
|
|
# Create a new window to display the terms of use
|
|
instruction_window = tk.Toplevel(self)
|
|
instruction_window.title("Terms of Use")
|
|
instruction_window.geometry("800x700") # Width x Height
|
|
|
|
# Create a frame to contain the text widget and scrollbar
|
|
frame = ttk.Frame(instruction_window)
|
|
frame.pack(fill=tk.BOTH, expand=True)
|
|
|
|
# Add a scrolling text widget to display the license content
|
|
text_widget = tk.Text(frame, wrap=tk.WORD)
|
|
text_widget.insert(tk.END, license_content)
|
|
text_widget.config(state=tk.DISABLED) # Make the text read-only
|
|
text_widget.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
|
|
|
|
# Add a vertical scrollbar
|
|
scrollbar = ttk.Scrollbar(frame, orient=tk.VERTICAL, command=text_widget.yview)
|
|
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
|
|
|
|
# Configure the scrollbar to work with the text widget
|
|
text_widget.config(yscrollcommand=scrollbar.set)
|
|
|
|
# Add a button to close the window
|
|
ttk.Button(instruction_window, text="Close", command=instruction_window.destroy).pack(pady=(10, 0))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_app_support_path_mac(self):
|
|
home = Path.home()
|
|
app_support_path = home / 'Library' / 'Application Support' / 'scorchsoft-text-to-mic'
|
|
app_support_path.mkdir(parents=True, exist_ok=True) # Ensure directory exists
|
|
return app_support_path
|
|
|
|
def save_api_key_mac(self, api_key):
|
|
env_path = self.get_app_support_path_mac() / 'config' / '.env'
|
|
with open(env_path, 'w') as f:
|
|
f.write(f"OPENAI_API_KEY={api_key}\n")
|
|
# Consider manually loading this .env file into your environment as needed
|
|
|
|
def save_api_key(self, api_key):
|
|
"""Save the API key to the config/.env file."""
|
|
try:
|
|
config_dir = Path("config")
|
|
config_dir.mkdir(parents=True, exist_ok=True) # Ensure directory exists
|
|
|
|
env_path = config_dir / ".env"
|
|
with open(env_path, 'w') as f:
|
|
f.write(f"OPENAI_API_KEY={api_key}\n")
|
|
|
|
load_dotenv(dotenv_path=env_path) # Reload environment to include the new API key
|
|
|
|
except Exception as e:
|
|
messagebox.showerror("Error", f"Failed to save API key: {str(e)}")
|
|
|
|
def load_api_key_mac(self):
|
|
env_path = self.get_app_support_path_mac() / '.env'
|
|
if env_path.exists():
|
|
with open(env_path, 'r') as f:
|
|
for line in f:
|
|
if line.startswith('OPENAI_API_KEY'):
|
|
return line.strip().split('=')[1]
|
|
return None
|
|
|
|
|
|
def get_api_key(self):
|
|
# First, try to load the API key from environment variables or local file
|
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
if not api_key: # Check for macOS and use the macOS-specific method
|
|
if platform.system() == 'Darwin': # Darwin is the system name for macOS
|
|
api_key = self.load_api_key_mac()
|
|
|
|
# If no API key is found, prompt the user
|
|
if not api_key:
|
|
self.show_instructions() # Show the "How to Use" modal after setting the key
|
|
api_key = simpledialog.askstring("API Key", "Enter your OpenAI API Key:", parent=self)
|
|
if api_key:
|
|
try:
|
|
if platform.system() == 'Darwin':
|
|
self.save_api_key_mac(api_key)
|
|
else:
|
|
self.save_api_key(api_key)
|
|
messagebox.showinfo("API Key Set", "The OpenAI API Key has been updated successfully.")
|
|
except Exception as e:
|
|
messagebox.showerror("Error", f"Failed to save API key: {str(e)}")
|
|
|
|
return api_key
|
|
|
|
|
|
|
|
|
|
|
|
def get_audio_devices(self):
|
|
p = pyaudio.PyAudio()
|
|
devices = {}
|
|
for i in range(p.get_device_count()):
|
|
info = p.get_device_info_by_index(i)
|
|
if info['maxOutputChannels'] > 0: # Filter for output-capable devices
|
|
devices[info['name']] = i
|
|
p.terminate()
|
|
return devices
|
|
|
|
def get_input_devices(self):
|
|
p = pyaudio.PyAudio()
|
|
devices = {}
|
|
for i in range(p.get_device_count()):
|
|
info = p.get_device_info_by_index(i)
|
|
if info['maxInputChannels'] > 0: # Filter for input-capable devices
|
|
devices[info['name']] = i
|
|
p.terminate()
|
|
return devices
|
|
|
|
|
|
def get_audio_file_path(self, filename):
|
|
if platform.system() == 'Darwin': # Check if the OS is macOS
|
|
mac_path = self.get_app_support_path_mac()
|
|
#return self.get_app_support_path_mac() / filename
|
|
return f"{mac_path}/{filename}"
|
|
else:
|
|
return Path(filename) # Default to current directory for non-macOS systems
|
|
|
|
|
|
def submit_text(self, play_text = None):
|
|
|
|
print(f"submit text self recording: {self.recording}")
|
|
if self.recording:
|
|
print("Stopping recording")
|
|
self.stop_recording(auto_play = True)
|
|
else:
|
|
print("Submitting text")
|
|
self.submit_text_helper(play_text = play_text)
|
|
|
|
def submit_text_helper(self, play_text = None):
|
|
|
|
if play_text is None:
|
|
#Load from GUI if play text not set
|
|
text = self.text_input.get("1.0", tk.END).strip()
|
|
else:
|
|
text = play_text
|
|
|
|
if not text:
|
|
messagebox.showinfo("Error", "Please enter some text to synthesize.")
|
|
return
|
|
|
|
selected_voice = self.voice_var.get()
|
|
|
|
# Convert device names to indices
|
|
primary_index = self.available_devices.get(self.device_index.get(), None)
|
|
secondary_index = self.available_devices.get(self.device_index_2.get(), None) if self.device_index_2.get() != "None" else None
|
|
|
|
if primary_index is None:
|
|
messagebox.showerror("Error", "Primary device not selected or unavailable.")
|
|
return
|
|
|
|
print(f"Primary Index: {primary_index}, Secondary Index: {secondary_index}")
|
|
|
|
try:
|
|
|
|
response = self.client.audio.speech.create(
|
|
model="tts-1",
|
|
voice=selected_voice,
|
|
input=text,
|
|
response_format='wav'
|
|
)
|
|
|
|
self.last_audio_file = self.get_audio_file_path("last_output.wav")
|
|
response.stream_to_file(str(self.last_audio_file))
|
|
|
|
#Play to either two or a single stream
|
|
if primary_index and secondary_index != "None" and secondary_index is not None:
|
|
self.play_audio_multiplexed([self.last_audio_file, self.last_audio_file],
|
|
[primary_index, secondary_index])
|
|
else:
|
|
self.play_audio_multiplexed([self.last_audio_file],
|
|
[primary_index])
|
|
|
|
|
|
except Exception as e:
|
|
messagebox.showerror("API Error", f"Failed to generate audio: {str(e)}")
|
|
|
|
|
|
def resample_audio(self, file_path, target_sample_rate):
|
|
sound = AudioSegment.from_file(file_path)
|
|
resampled_sound = sound.set_frame_rate(target_sample_rate)
|
|
resampled_file_path = "resampled_" + file_path
|
|
resampled_sound.export(resampled_file_path, format="wav")
|
|
return resampled_file_path
|
|
|
|
def play_audio_multiplexed(self, file_paths, device_indices):
|
|
|
|
p = pyaudio.PyAudio()
|
|
streams = []
|
|
|
|
try:
|
|
# Open all files and start all streams
|
|
for file_path, device_index in zip(file_paths, device_indices):
|
|
|
|
try:
|
|
# Ensure the file_path is a string when opening the file
|
|
wf = wave.open(str(file_path), 'rb')
|
|
except FileNotFoundError:
|
|
messagebox.showerror("File Not Found", f"Could not find audio file: {file_path}")
|
|
continue # Skip this iteration and proceed with other files if any
|
|
except wave.Error as e:
|
|
messagebox.showerror("Wave Error", f"Error reading audio file: {file_path}. Error: {str(e)}")
|
|
continue
|
|
|
|
try:
|
|
|
|
# Ensure output audio sample rate matches that of the selected device
|
|
device_info = self.get_device_info(device_index)
|
|
sample_rate = int(device_info['defaultSampleRate']) # Fetch default sample rate from device info
|
|
wf_frame_rate = wf.getframerate()
|
|
|
|
print(f"Sample Rate: {sample_rate}")
|
|
print(f"WF Sample Width: {wf_frame_rate}")
|
|
|
|
if sample_rate is None:
|
|
sample_rate = wf_frame_rate
|
|
|
|
# Make the audio file sample rate match the device output sample rate
|
|
# if there is a mismatch (prevents playback speed issues or crashes)
|
|
if sample_rate != wf_frame_rate:
|
|
#if mismatch, make a new resampled version that matches the output device
|
|
resampled_file_path = self.resample_audio(str(file_path), sample_rate)
|
|
#update the playback file to the new resampled file
|
|
file_path = resampled_file_path
|
|
#re-open the new file for processing
|
|
wf = wave.open(str(file_path), 'rb')
|
|
|
|
#Create a stream from our file
|
|
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
|
|
channels=wf.getnchannels(),
|
|
rate=sample_rate,
|
|
output=True,
|
|
output_device_index=int(device_index))
|
|
|
|
except Exception as e:
|
|
messagebox.showerror("Stream Creation Error", f"Failed to create audio stream for device index {device_index}: {str(e)}")
|
|
wf.close()
|
|
continue
|
|
|
|
streams.append((stream, wf))
|
|
|
|
# Play interleaved
|
|
active_streams = len(streams)
|
|
while active_streams > 0:
|
|
for stream, wf in streams:
|
|
data = wf.readframes(1024)
|
|
if data:
|
|
stream.write(data)
|
|
else:
|
|
stream.stop_stream()
|
|
stream.close()
|
|
wf.close()
|
|
streams.remove((stream, wf))
|
|
active_streams -= 1
|
|
|
|
except Exception as e:
|
|
messagebox.showerror("Playback Error", f"Error during multiplexed playback: {e}")
|
|
finally:
|
|
p.terminate()
|
|
|
|
|
|
def play_last_audio(self):
|
|
|
|
if hasattr(self, 'last_audio_file'):
|
|
primary_index = self.available_devices.get(self.device_index.get(), None)
|
|
secondary_index = self.available_devices.get(self.device_index_2.get(), None) if self.device_index_2.get() != "None" else None
|
|
|
|
# Check if a secondary device is selected
|
|
if primary_index and secondary_index != "None" and secondary_index is not None:
|
|
self.play_audio_multiplexed([self.last_audio_file, self.last_audio_file],
|
|
[primary_index, secondary_index])
|
|
else:
|
|
self.play_audio_multiplexed([self.last_audio_file],
|
|
[primary_index])
|
|
|
|
else:
|
|
messagebox.showinfo("No Audio", "No audio has been generated yet.")
|
|
|
|
def play_saved_audio(self, file_path, device_name):
|
|
device_index = self.available_devices.get(device_name, None)
|
|
if device_index is None:
|
|
messagebox.showerror("Error", "Selected audio device is not available.")
|
|
return
|
|
|
|
wf = wave.open(file_path, 'rb')
|
|
p = pyaudio.PyAudio()
|
|
try:
|
|
stream = p.open(format=p.get_format_from_width(wf.getsampwidth()),
|
|
channels=wf.getnchannels(),
|
|
rate=wf.getframerate(),
|
|
output=True,
|
|
output_device_index=device_index)
|
|
data = wf.readframes(1024)
|
|
while data:
|
|
stream.write(data)
|
|
data = wf.readframes(1024)
|
|
finally:
|
|
stream.stop_stream()
|
|
stream.close()
|
|
wf.close()
|
|
p.terminate()
|
|
|
|
|
|
def change_api_key(self):
|
|
new_key = simpledialog.askstring("API Key", "Enter new OpenAI API Key:", parent=self)
|
|
if new_key:
|
|
self.save_api_key(new_key)
|
|
self.api_key = new_key
|
|
self.client = OpenAI(api_key=self.api_key)
|
|
messagebox.showinfo("API Key Updated", "The OpenAI API Key has been updated successfully.")
|
|
|
|
|
|
def get_device_info(self, device_index):
|
|
p = pyaudio.PyAudio()
|
|
try:
|
|
device_info = p.get_device_info_by_index(device_index)
|
|
return device_info
|
|
finally:
|
|
p.terminate()
|
|
|
|
def toggle_recording(self, auto_play=False):
|
|
if not self.recording:
|
|
self.start_recording()
|
|
else:
|
|
self.stop_recording(auto_play)
|
|
|
|
def stop_recording_btn_change(self, btn_text):
|
|
self.record_button.config(text=btn_text)
|
|
|
|
|
|
def start_recording(self, play_confirm_sound=False):
|
|
|
|
input_device_index = self.input_device_index.get() # Assuming input_device_index is a StringVar
|
|
input_device_id = self.available_input_devices.get(input_device_index)
|
|
|
|
if input_device_id is None:
|
|
if play_confirm_sound:
|
|
self.play_sound('assets/please-select-input.wav')
|
|
else:
|
|
messagebox.showerror("Error", "Selected audio device is not available.")
|
|
return
|
|
|
|
device_info = self.get_device_info(input_device_id)
|
|
sample_rate = int(device_info['defaultSampleRate'])
|
|
|
|
print(f"Device info: {device_info}")
|
|
|
|
if sample_rate is None:
|
|
sample_rate = 44100
|
|
|
|
#Record to GUI selected device ID
|
|
#device_id = None if self.input_device_index.get() == "Default" else input_devices[self.input_device_index.get()]
|
|
|
|
if input_device_id is None:
|
|
messagebox.showerror("Error", "Selected audio device is not available.")
|
|
return
|
|
|
|
try:
|
|
self.recording = True
|
|
self.record_button.config(text="Stop and Insert", style='Recording.TButton')
|
|
self.submit_button.config(text="Stop and Play", style='Recording.TButton')
|
|
|
|
self.frames = []
|
|
|
|
self.p = pyaudio.PyAudio()
|
|
self.stream = self.p.open(format=pyaudio.paInt16, channels=1, rate=sample_rate, input=True, frames_per_buffer=1024, input_device_index=input_device_id)
|
|
|
|
if play_confirm_sound:
|
|
self.play_sound('assets/pop.wav')
|
|
|
|
def record():
|
|
while self.recording:
|
|
data = self.stream.read(1024, exception_on_overflow=False)
|
|
self.frames.append(data)
|
|
|
|
self.record_thread = threading.Thread(target=record)
|
|
self.record_thread.start()
|
|
|
|
except Exception as e:
|
|
messagebox.showerror("Recording Error", f"Failed to record audio: {str(e)}")
|
|
self.stop_recording(True)
|
|
|
|
def stop_recording(self, cancel_save=False, auto_play=False):
|
|
self.recording = False
|
|
if self.record_thread:
|
|
self.record_thread.join()
|
|
|
|
if self.stream:
|
|
self.stream.stop_stream()
|
|
self.stream.close()
|
|
|
|
if self.p:
|
|
self.p.terminate()
|
|
|
|
if cancel_save==False:
|
|
self.save_recording(auto_play=auto_play)
|
|
|
|
self.record_button.config(text="Record Mic", style='TButton') # Revert to default style
|
|
self.submit_button.config(text="Play", style='Green.TButton') # Revert to default style
|
|
|
|
|
|
def save_recording(self, auto_play = False):
|
|
file_path = "output.wav"
|
|
wf = wave.open(file_path, 'wb')
|
|
wf.setnchannels(1)
|
|
wf.setsampwidth(self.p.get_sample_size(pyaudio.paInt16))
|
|
wf.setframerate(44100)
|
|
wf.writeframes(b''.join(self.frames))
|
|
wf.close()
|
|
print("Recording saved.")
|
|
|
|
self.after(0, self.transcribe_audio, file_path, auto_play)
|
|
|
|
|
|
|
|
|
|
def transcribe_audio(self, file_path, auto_play = False):
|
|
try:
|
|
with open(str(file_path), "rb") as audio_file:
|
|
transcription = self.client.audio.transcriptions.create(
|
|
file=audio_file,
|
|
model="whisper-1",
|
|
response_format="verbose_json"
|
|
)
|
|
|
|
settings = self.load_settings()
|
|
|
|
if settings["chat_gpt_completion"] and settings["auto_apply_ai_to_recording"]:
|
|
auto_apply_ai = settings["auto_apply_ai_to_recording"]
|
|
else:
|
|
auto_apply_ai = False
|
|
|
|
print(f"auto_apply_ai: {auto_apply_ai}")
|
|
|
|
if auto_apply_ai:
|
|
print("applying ai")
|
|
play_text = self.apply_ai(transcription.text)
|
|
else:
|
|
print("outputting without ai")
|
|
#This prevents issues with trying to upload TK after thread operations
|
|
#whcih can cause crashes with no error displayed
|
|
self.text_input.delete("1.0", tk.END) # Clear existing text
|
|
self.text_input.insert("1.0", transcription.text) # Insert new text
|
|
play_text = transcription.text
|
|
|
|
if auto_play:
|
|
#self.submit_text(play_text = playtext)#
|
|
print(f"Triggering auto play with: {play_text} ")
|
|
self.submit_text_helper(play_text = play_text)
|
|
# TODO: PLAY THE TEXT IMMEDIATELY
|
|
|
|
print("Transcription Complete: The audio has been transcribed and the text has been placed in the input area.")
|
|
#messagebox.showinfo("Transcription Complete", "The audio has been transcribed and the text has been placed in the input area.")
|
|
|
|
except Exception as e:
|
|
print(f"Transcription error: An error occurred during transcription: {str(e)}")
|
|
|
|
|
|
def load_settings(self):
|
|
settings_file = self.get_settings_file_path("settings.json")
|
|
try:
|
|
with open(settings_file, "r") as f:
|
|
settings = json.load(f)
|
|
except FileNotFoundError:
|
|
# Default settings
|
|
settings = {
|
|
"chat_gpt_completion": False,
|
|
"model": "gpt-4o-mini",
|
|
"prompt": "",
|
|
"auto_apply_ai_to_recording": False,
|
|
"hotkeys": {
|
|
"record_start_stop": ["ctrl", "shift", "0"],
|
|
"stop_recording": ["ctrl", "shift", "9"],
|
|
"play_last_audio": ["ctrl", "shift", "8"]
|
|
}
|
|
}
|
|
self.save_settings_to_JSON(settings)
|
|
return settings
|
|
|
|
def save_settings_to_JSON(self, settings):
|
|
settings_file = self.get_settings_file_path("settings.json")
|
|
|
|
with open(settings_file, "w") as f:
|
|
json.dump(settings, f)
|
|
|
|
def get_settings_file_path(self, filename):
|
|
if platform.system() == 'Darwin': # Check if the OS is macOS
|
|
mac_path = self.get_app_support_path_mac()
|
|
return f"{mac_path}/{filename}"
|
|
else:
|
|
return filename # Default to current directory for non-macOS systems
|
|
|
|
def chat_gpt_settings(self):
|
|
settings = self.load_settings()
|
|
settings_window = tk.Toplevel(self)
|
|
settings_window.title("ChatGPT Manipulation Settings")
|
|
settings_window.grab_set() # Grab the focus on this toplevel window
|
|
|
|
main_frame = ttk.Frame(settings_window, padding="10")
|
|
main_frame.grid(column=0, row=0, sticky=(tk.W, tk.E, tk.N, tk.S))
|
|
|
|
# Use the ttk style for uniformity
|
|
style = ttk.Style()
|
|
style.theme_use('clam')
|
|
|
|
enable_completion = tk.BooleanVar(value=settings.get("chat_gpt_completion", False))
|
|
ttk.Checkbutton(main_frame, text="Enable ChatGPT Completion", variable=enable_completion).grid(row=0, column=1, sticky=tk.W, pady=2)
|
|
|
|
# Model selection
|
|
model_var = tk.StringVar(value=settings.get("model", "gpt-3.5-turbo"))
|
|
ttk.Label(main_frame, text="Model:").grid(row=1, column=0, sticky=tk.W, pady=2)
|
|
ttk.OptionMenu(main_frame, model_var, "gpt-4o-mini", "gpt-4o", "gpt-4-turbo").grid(row=1, column=1, sticky=tk.W, pady=2)
|
|
|
|
# Max Tokens selection
|
|
max_tokens_var = tk.IntVar(value=settings.get("max_tokens", 750))
|
|
ttk.Label(main_frame, text="Max Tokens:").grid(row=2, column=0, sticky=tk.W, pady=2)
|
|
max_tokens_menu = ttk.OptionMenu(main_frame, max_tokens_var, 750, 100, 250, 500, 750, 1000, 1250, 1500)
|
|
max_tokens_menu.grid(row=2, column=1, sticky=tk.W, pady=2)
|
|
|
|
# Prompt entry as a Text area
|
|
ttk.Label(main_frame, text="Prompt:").grid(row=3, column=0, sticky=tk.NW, pady=2)
|
|
prompt_entry = tk.Text(main_frame, height=4, width=40)
|
|
prompt_entry.insert('1.0', settings.get("prompt", ""))
|
|
prompt_entry.grid(row=3, column=1, sticky=tk.W, pady=2)
|
|
|
|
# Auto-apply checkbox
|
|
auto_apply = tk.BooleanVar(value=settings.get("auto_apply_ai_to_recording", False))
|
|
ttk.Checkbutton(main_frame, text="Auto Apply to Recorded Transcript", variable=auto_apply).grid(row=4, column=1, sticky=tk.W, pady=2)
|
|
|
|
# Save Button
|
|
save_btn = ttk.Button(main_frame, text="Save", command=lambda: self.save_chat_gpt_settings({
|
|
"chat_gpt_completion": enable_completion.get(),
|
|
"model": model_var.get(),
|
|
"prompt": prompt_entry.get("1.0", tk.END).strip(),
|
|
"auto_apply_ai_to_recording": auto_apply.get(),
|
|
"max_tokens": max_tokens_var.get()
|
|
}))
|
|
save_btn.grid(row=5, column=1, sticky=tk.W + tk.E, pady=10)
|
|
|
|
def save_chat_gpt_settings(self, settings):
|
|
self.save_settings_to_JSON(settings)
|
|
messagebox.showinfo("Settings Updated", "Your settings have been saved successfully.")
|
|
self.load_settings() # Refresh settings if needed elsewhere
|
|
|
|
def apply_ai(self, input_text=None):
|
|
|
|
if input_text is None:
|
|
print("Will apply AI to UI input box")
|
|
text = self.text_input.get("1.0", tk.END).strip()
|
|
else:
|
|
print("Will apply AI to input_text")
|
|
text = input_text
|
|
|
|
settings = self.load_settings()
|
|
|
|
if settings["chat_gpt_completion"] and settings["max_tokens"]:
|
|
var_max_tokens = settings["max_tokens"]
|
|
else:
|
|
var_max_tokens = 750
|
|
|
|
print(f"GPT Settings: {settings}")
|
|
print(f"Max Tokens: {var_max_tokens}")
|
|
|
|
if settings["chat_gpt_completion"]:
|
|
# Assuming OpenAI's completion method is configured correctly
|
|
response = self.client.chat.completions.create(
|
|
model=settings["model"],
|
|
messages=[
|
|
{"role": "system", "content": settings["prompt"] },
|
|
{"role": "user", "content": "\n\n# Apply to the following (Do not output system prompt or hyphens markup or anything before this line):\n\n-----\n\n" + text + "\n\n-----"}],
|
|
max_tokens=750
|
|
)
|
|
self.text_input.delete("1.0", tk.END)
|
|
self.text_input.insert("1.0", response.choices[0].message.content)
|
|
|
|
return_text = response.choices[0].message.content
|
|
else:
|
|
return_text = text
|
|
|
|
return return_text
|
|
|
|
|