Compare commits

...

10 Commits

Author SHA1 Message Date
Andrew Ward
20358adafb update gitignore 2025-03-25 10:43:06 +00:00
Andrew Ward
13de96fdad update version number 2025-03-25 10:09:13 +00:00
Andrew Ward
a529835037 address bugs around api key and non api key app modes, also refresh app on key input. Grey out buttons and add warnings for disabled features 2025-03-25 10:06:39 +00:00
Andrew Ward
aa092a3c15 update to add system voices and enable basic app use without an API key 2025-03-25 09:44:32 +00:00
Andrew Ward
2ed693edd8 readme new line 2025-03-22 12:53:58 +00:00
Andrew Ward
dac0e8ce0e add screenshots and update readme 2025-03-22 12:53:22 +00:00
Andrew Ward
86de047170 version 1.4.0 2025-03-22 12:43:31 +00:00
Andrew Ward
e69e45343e fix main ui select box layout and area sizing 2025-03-22 12:30:06 +00:00
Andrew Ward
a2a7c6f9a5 lots of ux improvements and small bug fixes 2025-03-22 12:18:40 +00:00
Andrew Ward
0e3cb8925a add check for updates banner 2025-03-22 11:47:57 +00:00
24 changed files with 777 additions and 293 deletions

7
.gitignore vendored
View File

@@ -4,9 +4,14 @@
build/
dist/
settings.json
resampled_last_output.wav
output.wav
temp_speech_output.wav
esampled_temp_speech_output.wav
last_output.wav
output.wav
**output.wav
.vs
vs/
.zip

View File

@@ -4,13 +4,15 @@
**Text to Mic** is a powerful, user-friendly application that seamlessly converts written text into natural-sounding speech, playing it over a virtual microphone. This tool is perfect for situations where you want to automate voice output — whether it's for presentations, online meetings, voiceovers, or accessibility purposes.
![Example Image 1](images/app-screenshot-v1-3-5.png)
![Main UI](images/main-ui-v1-4-0.png)
More info about Text to Mic including a changelog can be found here:
https://www.scorchsoft.com/blog/text-to-mic-for-meetings/
Developed by Scorchsoft, a leading app development agency, Text to Mic leverages the advanced capabilities of OpenAI's speech synthesis models to produce high-quality audio output. By using a virtual microphone (created with VB-Cable), it allows you to stream generated speech to any application that accepts microphone input, such as Zoom, Google Meet, Microsoft Teams, or any other conferencing tool.
### Key Features:
- **Seamless Text-to-Speech Conversion**: Converts any text into clear, human-like speech with just a few clicks or commands.
- **Virtual Microphone Integration**: Outputs speech directly to a virtual microphone, allowing you to use it in any communication or recording software.
@@ -25,6 +27,21 @@ This script uses OpenAI to convert text to speech, and then speak that speech ov
This project is licensed under the terms of the [GNU Lesser General Public License (LGPL) v3.0](LICENSE.md) as well as additional terms of use. By using this software, you agree to the terms and conditions outlined in the [LICENSE.md](LICENSE.md) file.
## Screenshots
![Main UI with Presets Expanded](images/main-ui-plus-presets-v1-4-0.png)
Manage different speaking tones available:
![Tone Manager](images/tone-manager-v1-4-0.png)
Customise keyboard shortcuts:
![Keyboard shortcut management](images/keyboard-shortcuts-v1-4-0.png)
Enable AI Copyediting
![AI Copyediting](images/ai-copyediting-v1-4-0.png)
## Running the GUI version

Binary file not shown.

After

Width:  |  Height:  |  Size: 74 KiB

BIN
images/api-key-v1-4-0.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 66 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 231 KiB

BIN
images/main-ui-v1-4-0.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 95 KiB

BIN
images/presets-v1-4-0.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 156 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 175 KiB

Binary file not shown.

BIN
temp_speech_output.wav Normal file

Binary file not shown.

View File

@@ -19,7 +19,18 @@ class AIEditorManager:
self.default_model = "gpt-4o-mini"
def show_settings(self):
"""Display the AI copy editing settings dialog"""
"""Show dialog for AI Editor settings."""
# Check if API key is available and show info banner if not
if not self.app.has_api_key:
messagebox.showinfo(
"API Key Required",
"AI copyediting requires an OpenAI API key.\n\n"
"Please add your API key in Settings first."
)
# Optionally, proceed to show settings anyway or return here
# If you want to stop and not show settings, add: return
# Proceed with showing settings dialog
settings = self.app.load_settings()
settings_window = tk.Toplevel(self.app)
settings_window.title("AI Copy Editing Settings")
@@ -62,7 +73,7 @@ class AIEditorManager:
# Prompt entry renamed to "Copy Editing Rules" with a Text area
ttk.Label(main_frame, text="Copy Editing Rules:").grid(row=4, column=0, sticky=tk.NW, pady=2)
prompt_entry = tk.Text(main_frame, height=8, width=50)
prompt_entry = tk.Text(main_frame, height=8, width=50, background="white", font=("Arial", 10))
# Default prompt example if none exists
default_prompt = "Edit the text provided to ensure it has a clear, professional tone. Fix any grammatical errors, improve sentence structure, and maintain consistent formatting. Make the language concise and impactful while preserving the original meaning. Make sure to edit text only and do not reply to it."
@@ -112,17 +123,31 @@ class AIEditorManager:
if hasattr(self.app, 'editing_status'):
self.app.editing_status.config(text=status_text)
def apply_ai(self, input_text=None, update_ui=None):
"""
Apply AI editing to text
def apply_ai(self, input_text=None, update_ui=False):
"""Apply AI to the given text or the current text in the input box."""
# Check if API key is available
if not self.app.has_api_key:
messagebox.showinfo(
"API Key Required",
"AI copyediting requires an OpenAI API key.\n\n"
"Please add your API key in Settings to use this feature."
)
return None
# Get settings to check if AI copy editing is enabled
settings = self.app.load_settings()
if not settings.get("chat_gpt_completion", False):
# If called with update_ui=True, we should show a message
if update_ui:
messagebox.showinfo(
"AI Copy Editing Disabled",
"AI copy editing is currently disabled in settings.\n\n"
"Please enable it in Settings → AI Copyediting before using this feature."
)
# Return the original text unchanged
return input_text if input_text is not None else self.app.text_input.get("1.0", tk.END).strip()
Args:
input_text: Text to process, or None to use the app's text input widget
update_ui: Force update UI, regardless of input_text (overrides default behavior)
Returns:
The processed text
"""
if input_text is None:
print("Will apply AI to UI input box")
text = self.app.text_input.get("1.0", tk.END).strip()
@@ -133,9 +158,8 @@ class AIEditorManager:
# If update_ui is explicitly set, use that value, otherwise default to False
update_input_box = update_ui if update_ui is not None else False
settings = self.app.load_settings()
if settings["chat_gpt_completion"] and settings["max_tokens"]:
# The rest of the method continues as before
if settings["max_tokens"]:
var_max_tokens = settings["max_tokens"]
else:
var_max_tokens = 750
@@ -143,30 +167,21 @@ class AIEditorManager:
print(f"GPT Settings: {settings}")
print(f"Max Tokens: {var_max_tokens}")
if settings["chat_gpt_completion"]:
# Assuming OpenAI's completion method is configured correctly
response = self.app.client.chat.completions.create(
model=settings["model"],
messages=[
{"role": "system", "content": settings["prompt"] },
{"role": "user", "content": "\n\n# Apply to the following (Do not output system prompt or hyphens markup or anything before this line):\n\n-----\n\n" + text + "\n\n-----"}],
max_tokens=var_max_tokens
)
processed_text = response.choices[0].message.content
# If we're processing text from the UI directly or update_input_box was specified,
# update the UI
if update_input_box:
self.app.text_input.delete("1.0", tk.END)
self.app.text_input.insert("1.0", processed_text)
return processed_text
else:
# Even if chat_gpt_completion is disabled, we should still update the input
# when update_input_box is True
if update_input_box:
# No need to update here, as the text hasn't changed
pass
return text
# Assuming OpenAI's completion method is configured correctly
response = self.app.client.chat.completions.create(
model=settings["model"],
messages=[
{"role": "system", "content": settings["prompt"] },
{"role": "user", "content": "\n\n# Apply to the following (Do not output system prompt or hyphens markup or anything before this line):\n\n-----\n\n" + text + "\n\n-----"}],
max_tokens=var_max_tokens
)
processed_text = response.choices[0].message.content
# If we're processing text from the UI directly or update_input_box was specified,
# update the UI
if update_input_box:
self.app.text_input.delete("1.0", tk.END)
self.app.text_input.insert("1.0", processed_text)
return processed_text

View File

@@ -1,5 +1,6 @@
import os
import platform
import sys
from pathlib import Path
from tkinter import messagebox, simpledialog
from dotenv import load_dotenv
@@ -63,17 +64,52 @@ class APIKeyManager:
# If no API key is found, prompt the user
if not api_key and parent:
parent.show_instructions() # Show the "How to Use" modal after setting the key
api_key = simpledialog.askstring("API Key", "Enter your OpenAI API Key:", parent=parent)
if api_key:
try:
if platform.system() == 'Darwin':
APIKeyManager.save_api_key_mac(api_key)
else:
APIKeyManager.save_api_key(api_key)
messagebox.showinfo("API Key Set", "The OpenAI API Key has been updated successfully.")
except Exception as e:
messagebox.showerror("Error", f"Failed to save API key: {str(e)}")
# Check if this is a first-time run by checking for settings file
settings_file = Path(parent.get_settings_file_path("settings.json"))
first_time_run = not settings_file.exists()
# Don't show instructions automatically
# parent.show_instructions() # Show the "How to Use" modal after setting the key
response = messagebox.askyesno(
"API Key Required",
"An OpenAI API Key is required for full functionality, such as speech to text and OpenAI voices.\n\n"
"Without an API key, you can still use basic system voices with text to speech.\n\n"
"Would you like to enter an API key now?",
parent=parent
)
if response:
# Show instructions only when user wants to add an API key
if first_time_run:
parent.show_instructions()
api_key = simpledialog.askstring("API Key", "Enter your OpenAI API Key:", parent=parent)
if api_key:
try:
if platform.system() == 'Darwin':
APIKeyManager.save_api_key_mac(api_key)
else:
APIKeyManager.save_api_key(api_key)
# Check if this is the first time setting the key
if first_time_run:
messagebox.showinfo(
"API Key Set - Restarting",
"The OpenAI API Key has been saved. The application will now restart to apply changes."
)
# Schedule a restart after the message dialog is closed
parent.after(200, lambda: APIKeyManager.restart_application(parent))
else:
messagebox.showinfo("API Key Set", "The OpenAI API Key has been updated successfully.")
except Exception as e:
messagebox.showerror("Error", f"Failed to save API key: {str(e)}")
else:
messagebox.showinfo(
"Limited Functionality",
"You are using the basic version with system voices only.\n\n"
"To access OpenAI voices and other features, you can add an API key later in Settings."
)
return api_key
@@ -84,6 +120,28 @@ class APIKeyManager:
if new_key:
success = APIKeyManager.save_api_key(new_key)
if success:
messagebox.showinfo("API Key Updated", "The OpenAI API Key has been updated successfully.")
# Check if the first time adding a key (no existing key)
is_first_key = not parent.has_api_key
if is_first_key:
messagebox.showinfo(
"API Key Set - Restarting",
"The OpenAI API Key has been saved. The application will now restart to apply changes."
)
# Schedule a restart after the message dialog is closed
parent.after(200, lambda: APIKeyManager.restart_application(parent))
else:
messagebox.showinfo("API Key Updated", "The OpenAI API Key has been updated successfully.")
return new_key
return None
return None
@staticmethod
def restart_application(root):
"""Restart the application."""
# Destroy the current instance
root.destroy()
# Restart the application
python = sys.executable
os.execl(python, python, *sys.argv)

View File

@@ -876,4 +876,28 @@ You can also access tone presets from Settings → Manage Tone Presets to modify
refresh_btn.pack(pady=(5, 10))
# Add a close button
ttk.Button(main_frame, text="Close", command=instruction_window.destroy).pack(pady=5)
ttk.Button(main_frame, text="Close", command=instruction_window.destroy).pack(pady=5)
def handle_ai_edit_hotkey(self):
"""Handle the hotkey for AI editing."""
# Check if API key is available
if not self.parent.has_api_key:
messagebox.showinfo(
"API Key Required",
"AI copyediting requires an OpenAI API key.\n\n"
"Please add your API key in Settings to use this feature."
)
return
# Check if AI copy editing is enabled in settings
settings = self.parent.load_settings()
if not settings.get("chat_gpt_completion", False):
messagebox.showinfo(
"AI Copy Editing Disabled",
"AI copy editing is currently disabled in settings.\n\n"
"Please enable it in Settings → AI Copyediting before using this feature."
)
return
# If we have an API key and AI is enabled, proceed with the AI editing
self.parent.apply_ai_to_input()

View File

@@ -44,6 +44,8 @@ class PresetsManager:
# Bind to window resize for responsive layout
self.parent.bind("<Configure>", self.on_window_resize)
# This resize timer helps batch updates during resizing
self.resize_timer = None
def create_presets_section(self):
"""Create the presets section UI with accordion behavior."""
@@ -146,6 +148,9 @@ class PresetsManager:
# Configure the scroll region to update when the frame changes
self.presets_scrollable_frame.bind("<Configure>",
lambda e: self.presets_canvas.configure(scrollregion=self.presets_canvas.bbox("all")))
# Add a binding for the canvas size changes
self.presets_canvas.bind("<Configure>", self.on_canvas_resize)
# Populate tabs and presets
self.populate_tabs() # Refresh tabs to show selection
@@ -159,10 +164,9 @@ class PresetsManager:
"""Handler for window resize events to adjust the presets layout."""
# Only proceed if event is from the main window and presets are visible
if event and event.widget == self.parent and not self.presets_collapsed:
# Schedule a refresh after a short delay to prevent excessive updates during resize
if hasattr(self, 'resize_timer') and self.resize_timer:
self.parent.after_cancel(self.resize_timer)
self.resize_timer = self.parent.after(100, self.refresh_presets_display)
# We don't need to immediately refresh here, as on_canvas_resize will handle it
# This is because the window resize will trigger canvas resize events
pass
def _adjust_row_weights(self):
"""Adjust row weights to prioritize presets area expansion."""
@@ -313,14 +317,17 @@ class PresetsManager:
canvas_width = self.presets_canvas.winfo_width()
# Ensure we have a minimum width to calculate with
if canvas_width < 50: # If the canvas is too narrow or not yet realized
canvas_width = self.parent.winfo_width() - 30 # Estimate canvas width
canvas_width = self.parent.winfo_width() - 40 # Estimate canvas width with more margin for scrollbar
# Calculate number of columns (minimum 1, maximum 20)
min_card_width = 140 # Minimum width for each card
num_columns = max(1, min(20, canvas_width // min_card_width))
# Log for debugging - can be removed in production
print(f"Canvas width: {canvas_width}, Columns: {num_columns}")
# Dynamically adjust card width based on available space
preset_width = max(min_card_width, canvas_width // num_columns - 8)
preset_width = max(min_card_width, (canvas_width // num_columns) - 10) # Slightly more padding
preset_height = 100
# Configure columns to fill available space
@@ -715,4 +722,15 @@ class PresetsManager:
# Save and refresh
self.debounced_save()
self.refresh_presets_display()
self.refresh_presets_display()
def on_canvas_resize(self, event=None):
"""Handle resize events specifically for the presets canvas area."""
# Only process if presets are visible
if not self.presets_collapsed:
# Cancel any previous refresh timer to avoid multiple refreshes
if hasattr(self, 'resize_timer') and self.resize_timer:
self.parent.after_cancel(self.resize_timer)
# Schedule a refresh with a short delay to avoid excessive refreshes during drag
self.resize_timer = self.parent.after(150, self.refresh_presets_display)

View File

@@ -87,7 +87,7 @@ class SettingsManager:
settings_file = cls.get_settings_file_path()
with open(settings_file, "w") as f:
json.dump(settings, f)
json.dump(settings, f, indent=4)
@classmethod
def update_settings(cls, partial_settings):

View File

@@ -9,6 +9,8 @@ import json
import sys
import time
import requests
import pyttsx3
import tempfile
from pystray import Icon as icon, MenuItem as item, Menu as menu
from PIL import Image, ImageDraw, ImageTk
@@ -40,10 +42,9 @@ class TextToMic(tk.Tk):
def __init__(self):
super().__init__()
self.version = "1.3.5"
self.version = "1.4.1"
self.title(f"Text to Mic by Scorchsoft.com - v{self.version}")
# Add these lines to set up the window icon
icon_path = self.resource_path("assets/logo-circle-32.png")
self.iconphoto(False, tk.PhotoImage(file=icon_path))
@@ -58,13 +59,24 @@ class TextToMic(tk.Tk):
# Fixed window dimensions for all states - DEFINED ONCE as class constants
# These are the ONLY values that should be used throughout the application
self.BASE_WIDTH = 590
self.BASE_HEIGHT_WITH_BANNER = 860
self.BASE_HEIGHT_NO_BANNER = 700
self.COLLAPSED_HEIGHT_WITH_BANNER = 630
self.BASE_HEIGHT_WITH_BANNER = 890
self.BASE_HEIGHT_NO_BANNER = 730
self.COLLAPSED_HEIGHT_WITH_BANNER = 620
self.COLLAPSED_HEIGHT_NO_BANNER = 512
# Initial window geometry
# Initial window geometry - start with base size
self.geometry(f"{self.BASE_WIDTH}x{self.BASE_HEIGHT_WITH_BANNER}")
# Center the window immediately before any popups appear
self.center_window()
# Withdraw window temporarily to prevent flashing before everything is ready
self.withdraw()
# Initialize system TTS engine
self.engine = pyttsx3.init()
self.engine.setProperty('rate', 150)
self.system_voices = self.engine.getProperty('voices')
self.available_models = ["gpt-4o-mini", "gpt-4o", "gpt-4-turbo"]
self.default_model = "gpt-4o-mini"
@@ -108,14 +120,11 @@ class TextToMic(tk.Tk):
# Get API key using APIKeyManager
self.api_key = APIKeyManager.get_api_key(self)
if not self.api_key:
messagebox.showinfo("API Key Needed", "Please provide your OpenAI API Key.")
self.destroy()
return
self.has_api_key = bool(self.api_key)
if self.has_api_key:
self.client = OpenAI(api_key=self.api_key)
self.client = OpenAI(api_key=self.api_key)
# Initializing device index variables before they are used
self.device_index = tk.StringVar(self)
self.device_index_2 = tk.StringVar(self)
@@ -161,12 +170,41 @@ class TextToMic(tk.Tk):
if self.banner_var.get():
self.toggle_banner()
# Center the window on the screen
self.center_window()
# Schedule version check after app is fully loaded
# Only check automatically if the setting is enabled
if self.auto_check_version.get():
# Delay the check to ensure UI is fully loaded
self.after(2000, self.version_checker.check_version, False)
# At the end of __init__, after all initialization:
# Make the window visible again, now properly centered and with all elements loaded
self.deiconify()
# Set initial window height based on banner and presets state
self.update_window_size()
def center_window(self):
"""Center the window on the screen."""
self.update_idletasks() # Update window size info
# Get screen width and height
screen_width = self.winfo_screenwidth()
screen_height = self.winfo_screenheight()
# Get window width and height
window_width = self.winfo_width()
window_height = self.winfo_height()
# Calculate position coordinates
x = (screen_width - window_width) // 2
y = (screen_height - window_height) // 2
# Set the position
self.geometry(f"+{x}+{y}")
def ensure_config_directory(self):
"""Ensure the config directory exists."""
config_dir = Path("config")
@@ -188,47 +226,62 @@ class TextToMic(tk.Tk):
self.menubar = Menu(self)
self.config(menu=self.menubar)
# Get current hotkey settings
settings = self.load_settings()
hotkey_manager = self.hotkey_manager if hasattr(self, 'hotkey_manager') else None
# Format hotkeys for display in menus
if hotkey_manager:
replay_shortcut = hotkey_manager.format_shortcut(settings["hotkeys"]["play_last_audio"])
record_shortcut = hotkey_manager.format_shortcut(settings["hotkeys"]["record_start_stop"])
stop_shortcut = hotkey_manager.format_shortcut(settings["hotkeys"]["stop_recording"])
cancel_shortcut = hotkey_manager.format_shortcut(settings["hotkeys"]["cancel_operation"])
else:
# Default values if hotkey_manager isn't available
replay_shortcut = "Ctrl+Shift+8"
record_shortcut = "Ctrl+Shift+0"
stop_shortcut = "Ctrl+Shift+9"
cancel_shortcut = "Ctrl+Shift+1"
# File or settings menu
settings_menu = Menu(self.menubar, tearoff=0)
self.menubar.add_cascade(label="Settings", menu=settings_menu)
settings_menu.add_command(label="Change API Key", command=self.change_api_key)
settings_menu.add_command(label="AI Copy Editing", command=self.show_ai_editor_settings)
settings_menu.add_command(label="Hotkey Settings", command=self.show_hotkey_settings)
settings_menu.add_command(label="Manage Tone Presets", command=self.show_tone_presets_manager)
settings_menu.add_command(label="API Key", command=self.change_api_key)
settings_menu.add_command(label="AI Copyediting", command=self.show_ai_editor_settings)
settings_menu.add_command(label="Keyboard Shortcuts", command=self.show_hotkey_settings)
settings_menu.add_command(label="Manage Tones", command=self.show_tone_presets_manager)
settings_menu.add_separator()
# Add presets toggle with checkbox
self.presets_visible_var = tk.BooleanVar(value=not self.presets_collapsed)
settings_menu.add_checkbutton(label="Show Presets", variable=self.presets_visible_var, command=self.toggle_presets_from_menu)
settings_menu.add_checkbutton(label="Auto Check for Updates", variable=self.auto_check_version, command=self.toggle_auto_version_check)
settings_menu.add_checkbutton(label="Hide Scorchsoft Banner", variable=self.banner_var, command=self.toggle_banner)
# Playback menu
playback_menu = Menu(self.menubar, tearoff=0)
self.menubar.add_cascade(label="Playback", menu=playback_menu)
playback_menu.add_command(label="Play Last Audio", command=self.play_last_audio)
#apply_ai
input_menu = Menu(self.menubar, tearoff=0)
self.menubar.add_cascade(label="Input", menu=input_menu)
input_menu.add_command(label="Apply AI Manipulation to Input Text", command=self.apply_ai_to_input)
self.menubar.add_cascade(label="Actions", menu=playback_menu)
# Add keyboard shortcuts to menu items
playback_menu.add_command(label=f"Replay [{replay_shortcut}]", command=self.play_last_audio)
playback_menu.add_command(label="Apply AI Copyedit", command=self.apply_ai_to_input)
playback_menu.add_separator()
playback_menu.add_command(label=f"Start/Stop Recording [{record_shortcut}]", command=self.handle_record_button_click)
playback_menu.add_command(label=f"Stop Recording [{stop_shortcut}]", command=lambda: self.stop_recording(auto_play=False))
playback_menu.add_command(label=f"Cancel Operation [{cancel_shortcut}]", command=self.stop_playback)
# Help menu
help_menu = Menu(self.menubar, tearoff=0)
self.menubar.add_cascade(label="Help", menu=help_menu)
help_menu.add_command(label="Check Version", command=self.check_version)
help_menu.add_command(label="How to Use", command=self.show_instructions)
help_menu.add_command(label="Terms of Use and Licence", command=self.show_terms_of_use)
help_menu.add_command(label="Check Version", command=self.check_version)
help_menu.add_command(label="Hotkey Instructions", command=self.show_hotkey_instructions)
# Add toggle for automatic version checking
help_menu.add_checkbutton(label="Auto Check for Updates", variable=self.auto_check_version, command=self.toggle_auto_version_check)
# Add toggle for banner visibility - use the existing banner_var from __init__
help_menu.add_checkbutton(label="Hide Banner", variable=self.banner_var, command=self.toggle_banner)
def show_hotkey_settings(self):
"""Show the hotkey settings dialog."""
HotkeyManager.hotkey_settings_dialog(self)
def show_hotkey_instructions(self):
"""Show hotkey instructions."""
HotkeyManager.show_hotkey_instructions(self)
def change_api_key(self):
"""Change the API key using APIKeyManager."""
new_key = APIKeyManager.change_api_key(self)
@@ -290,6 +343,10 @@ class TextToMic(tk.Tk):
self.columnconfigure(0, weight=1)
self.rowconfigure(0, weight=1)
# Configure columns in main_frame to expand properly
main_frame.columnconfigure(0, weight=1)
main_frame.columnconfigure(1, weight=1)
# Store reference to main_frame for version notification
self.main_frame = main_frame
@@ -323,11 +380,13 @@ class TextToMic(tk.Tk):
# Create frames for better organization
voice_frame = ttk.Frame(main_frame)
voice_frame.grid(column=0, row=0, columnspan=2, sticky="ew")
voice_frame.columnconfigure(1, weight=1) # Make the second column expandable
voice_frame.columnconfigure(0, weight=1) # Make the first column expandable
voice_frame.columnconfigure(1, weight=3) # Make the second column expand more
device_frame = ttk.Frame(main_frame)
device_frame.grid(column=0, row=1, columnspan=2, sticky="ew", pady=(10, 0))
device_frame.columnconfigure(1, weight=1) # Make the second column expandable
device_frame.columnconfigure(0, weight=1) # Make the first column expandable
device_frame.columnconfigure(1, weight=3) # Make the second column expand more
# Set fixed width for dropdown menus
dropdown_width = 30
@@ -338,19 +397,47 @@ class TextToMic(tk.Tk):
# Voice and Tone Settings
ttk.Label(voice_frame, text="Voice Settings", font=("Arial", 10, "bold")).grid(column=0, row=0, sticky=tk.W, pady=(0, 10), columnspan=2)
self.voice_var = tk.StringVar(value="fable")
voices = ['alloy', 'ash', 'ballad', 'coral', 'echo', 'fable', 'onyx', 'nova', 'sage', 'shimmer']
ttk.Label(voice_frame, text="Voice:").grid(column=0, row=1, sticky=tk.W, pady=(0, 5))
voice_menu = ttk.OptionMenu(voice_frame, self.voice_var, self.voice_var.get(), *voices)
voice_menu.grid(column=1, row=1, sticky=tk.E, pady=(0, 5))
# Make sure voice_frame columns expand properly
voice_frame.columnconfigure(1, weight=1)
# Set fixed width for all labels
label_width = 35 # Adjust this value as needed for your UI
# Initialize voice selection
self.available_voices = self.get_available_voices()
# Determine default voice based on whether API key is available
default_voice = "fable" if self.has_api_key else self.available_voices[0] if self.available_voices else "[System] Default"
self.voice_var = tk.StringVar(value=default_voice)
voice_label = ttk.Label(voice_frame, text="Voice:", width=label_width)
voice_label.grid(column=0, row=1, sticky=tk.W, pady=(0, 5))
voice_menu = ttk.OptionMenu(voice_frame, self.voice_var, self.voice_var.get(), *self.available_voices, command=self.on_voice_change)
voice_menu.grid(column=1, row=1, sticky="ew", pady=(0, 5))
voice_menu.config(width=dropdown_width, style='Compact.TMenubutton')
# Tone selection with warning for basic version
self.tone_var = tk.StringVar(value=self.current_tone_name)
tone_options = ["None"] + list(self.tone_presets.keys())
ttk.Label(voice_frame, text="Tone Preset:").grid(column=0, row=2, sticky=tk.W, pady=(0, 5))
tone_label = ttk.Label(voice_frame, text="Tone Preset:", width=label_width)
tone_label.grid(column=0, row=2, sticky=tk.W, pady=(0, 5))
self.tone_menu = ttk.OptionMenu(voice_frame, self.tone_var, self.tone_var.get(), *tone_options, command=self.on_tone_change)
self.tone_menu.grid(column=1, row=2, sticky=tk.E, pady=(0, 5))
self.tone_menu.grid(column=1, row=2, sticky="ew", pady=(0, 5))
self.tone_menu.config(width=dropdown_width, style='Compact.TMenubutton')
# Check if we should disable tone menu based on voice type
if self.voice_var.get().startswith("[System]"):
self.tone_menu.state(['disabled'])
self.tone_var.set("None")
# Add warning label for basic version
if not self.has_api_key:
warning_label = ttk.Label(voice_frame,
text="⚠️ Basic Version - Add API Key in Settings for full features",
foreground="orange",
font=("Arial", 8, "italic"))
warning_label.grid(column=0, row=3, columnspan=2, sticky=tk.W, pady=(5, 0))
# Separator between Voice Settings and Device Settings
separator = ttk.Separator(main_frame, orient='horizontal')
@@ -359,33 +446,42 @@ class TextToMic(tk.Tk):
# Device Settings
ttk.Label(device_frame, text="Device Settings", font=("Arial", 10, "bold")).grid(column=0, row=0, sticky=tk.W, pady=(0, 10), columnspan=2)
ttk.Label(device_frame, text="Input Device (optional):").grid(column=0, row=1, sticky=tk.W, pady=(0, 5))
input_label = ttk.Label(device_frame, text="Input Device (optional):", width=label_width)
input_label.grid(column=0, row=1, sticky=tk.W, pady=(0, 5))
input_device_menu = ttk.OptionMenu(device_frame, self.input_device_index, self.input_device_index.get(),
*self.available_input_devices.keys(),
command=self.on_input_device_change)
input_device_menu.grid(column=1, row=1, sticky=tk.E, pady=(0, 5))
input_device_menu.grid(column=1, row=1, sticky="ew", pady=(0, 5))
input_device_menu.config(width=dropdown_width, style='Compact.TMenubutton')
ttk.Label(device_frame, text="Primary Playback Device:").grid(column=0, row=2, sticky=tk.W, pady=(0, 5))
primary_label = ttk.Label(device_frame, text="Primary Playback Device:", width=label_width)
primary_label.grid(column=0, row=2, sticky=tk.W, pady=(0, 5))
primary_device_menu = ttk.OptionMenu(device_frame, self.device_index, self.device_index.get(),
*self.available_devices.keys(),
command=self.on_primary_device_change)
primary_device_menu.grid(column=1, row=2, sticky=tk.E, pady=(0, 5))
primary_device_menu.grid(column=1, row=2, sticky="ew", pady=(0, 5))
primary_device_menu.config(width=dropdown_width, style='Compact.TMenubutton')
ttk.Label(device_frame, text="Secondary Playback Device (optional):").grid(column=0, row=3, sticky=tk.W, pady=(0, 5))
secondary_label = ttk.Label(device_frame, text="Secondary Playback Device (optional):", width=label_width)
secondary_label.grid(column=0, row=3, sticky=tk.W, pady=(0, 5))
secondary_device_menu = ttk.OptionMenu(device_frame, self.device_index_2, self.device_index_2.get(),
"None", *self.available_devices.keys(),
command=self.on_secondary_device_change)
secondary_device_menu.grid(column=1, row=3, sticky=tk.E, pady=(0, 5))
secondary_device_menu.grid(column=1, row=3, sticky="ew", pady=(0, 5))
secondary_device_menu.config(width=dropdown_width, style='Compact.TMenubutton')
# Make sure device_frame columns expand properly
device_frame.columnconfigure(1, weight=1)
# Text to Read section with proper layout
text_read_frame = ttk.Frame(main_frame)
text_read_frame.grid(column=0, row=4, columnspan=2, sticky="ew", pady=(10, 0))
text_read_frame.columnconfigure(0, weight=1) # Left side expands
text_read_frame.columnconfigure(1, weight=0) # Right side fixed width
# Make sure text_read_frame columns expand properly
text_read_frame.columnconfigure(0, weight=1)
# Text to Read label - Updated to match other section titles
ttk.Label(text_read_frame, text="Text to Read", font=("Arial", 10, "bold")).grid(column=0, row=0, sticky=tk.W, pady=(0, 10))
@@ -402,12 +498,12 @@ class TextToMic(tk.Tk):
self.text_input = tk.Text(main_frame, height=5, width=68)
# Use white background for text input instead of the system background color
text_color = self.style.lookup('TLabel', 'foreground')
self.text_input.configure(bg="white", fg=text_color, insertbackground=text_color, wrap=tk.WORD)
self.text_input.configure(bg="white", fg=text_color, insertbackground=text_color, wrap=tk.WORD, font=("Arial", 10))
self.text_input.grid(column=0, row=5, columnspan=2, pady=(0, 20), sticky="nsew") # Proper spacing
# Add a status frame at the bottom of the text input with white background
status_frame = ttk.Frame(main_frame, style='White.TFrame')
status_frame.grid(column=0, row=5, columnspan=2, sticky=(tk.S, tk.E), pady=(0, 25))
status_frame.grid(column=0, row=5, columnspan=2, sticky=(tk.S, tk.E), pady=(0, 25), padx=(0, 5)) # Add right padding to shift the frame inward
# Create a custom style for the white frame
self.style.configure('White.TFrame', background='white')
@@ -449,7 +545,7 @@ class TextToMic(tk.Tk):
corner_radius=20,
height=button_height,
width=button_width,
fg_color="#058705",
fg_color="#777777" if not self.has_api_key else "#058705", # Grey if no API key, green if API key exists
font=("Arial", 13, "bold"),
command=self.handle_record_button_click
)
@@ -471,7 +567,7 @@ class TextToMic(tk.Tk):
#Credits
# Banner image that links to Scorchsoft
self.banner_frame = ttk.Frame(main_frame)
self.banner_frame.grid(column=0, row=7, columnspan=2, pady=(10, 10))
self.banner_frame.grid(column=0, row=7, columnspan=2, pady=(10, 0))
banner_path = self.resource_path("assets/ss-banner-550.png")
try:
@@ -605,17 +701,43 @@ class TextToMic(tk.Tk):
# If no API key is found, prompt the user
if not api_key:
self.show_instructions() # Show the "How to Use" modal after setting the key
api_key = simpledialog.askstring("API Key", "Enter your OpenAI API Key:", parent=self)
if api_key:
try:
if platform.system() == 'Darwin':
self.save_api_key_mac(api_key)
else:
self.save_api_key(api_key)
messagebox.showinfo("API Key Set", "The OpenAI API Key has been updated successfully.")
except Exception as e:
messagebox.showerror("Error", f"Failed to save API key: {str(e)}")
# Check if this is a first-time run by checking for settings file
settings_file = Path(SettingsManager.get_settings_file_path())
first_time_run = not settings_file.exists()
# No longer show instructions automatically
# if first_time_run:
# self.show_instructions() # Show the "How to Use" modal for first-time users
response = messagebox.askyesno(
"API Key Required",
"An OpenAI API Key is required for full functionality, such as speech to text and OpenAI voices.\n\n"
"Without an API key, you can still use basic system voices with text to speech.\n\n"
"Would you like to enter an API key now?",
parent=self
)
if response:
# Show instructions only when user wants to add an API key
if first_time_run:
self.show_instructions()
api_key = simpledialog.askstring("API Key", "Enter your OpenAI API Key:", parent=self)
if api_key:
try:
if platform.system() == 'Darwin':
self.save_api_key_mac(api_key)
else:
self.save_api_key(api_key)
messagebox.showinfo("API Key Set", "The OpenAI API Key has been updated successfully.")
except Exception as e:
messagebox.showerror("Error", f"Failed to save API key: {str(e)}")
else:
messagebox.showinfo(
"Limited Functionality",
"You are using the basic version with system voices only.\n\n"
"To access OpenAI voices and other features, you can add an API key later in Settings."
)
return api_key
@@ -662,7 +784,6 @@ class TextToMic(tk.Tk):
self.submit_text_helper(play_text = play_text)
def submit_text_helper(self, play_text = None):
if play_text is None:
#Load from GUI if play text not set
text = self.text_input.get("1.0", tk.END).strip()
@@ -674,52 +795,96 @@ class TextToMic(tk.Tk):
return
selected_voice = self.voice_var.get()
is_system_voice = selected_voice.startswith("[System]")
# Check if a tone preset is selected and add it to the text
selected_tone_name = self.tone_var.get()
# Get the actual tone instructions from the tone_presets dictionary
tone_instructions = None
if selected_tone_name != "None" and selected_tone_name in self.tone_presets:
tone_instructions = self.tone_presets[selected_tone_name]
if is_system_voice:
# Use system TTS
system_voice_name = selected_voice.replace("[System] ", "")
for voice in self.system_voices:
if voice.name == system_voice_name:
self.engine.setProperty('voice', voice.id)
break
# Convert device names to indices
primary_index = self.available_devices.get(self.device_index.get(), None)
secondary_index = self.available_devices.get(self.device_index_2.get(), None) if self.device_index_2.get() != "None" else None
if primary_index is None:
messagebox.showerror("Error", "Primary device not selected or unavailable.")
return
try:
# Create a proper temporary file with a simple name in current directory
temp_filename = "temp_speech_output.wav"
# Generate audio using system TTS
self.engine.save_to_file(text, temp_filename)
self.engine.runAndWait()
# Store as last audio file for replay
self.last_audio_file = temp_filename
# Play the generated audio
if primary_index and secondary_index != "None" and secondary_index is not None:
self.play_audio_multiplexed([temp_filename, temp_filename],
[primary_index, secondary_index])
else:
self.play_audio_multiplexed([temp_filename],
[primary_index])
# We'll leave the file for potential replay rather than deleting it immediately
except Exception as e:
messagebox.showerror("TTS Error", f"Failed to generate or play system voice: {str(e)}")
else:
tone_instructions = "" # Empty string if "None" or not found
# Convert device names to indices
primary_index = self.available_devices.get(self.device_index.get(), None)
secondary_index = self.available_devices.get(self.device_index_2.get(), None) if self.device_index_2.get() != "None" else None
if primary_index is None:
messagebox.showerror("Error", "Primary device not selected or unavailable.")
return
print(f"Primary Index: {primary_index}, Secondary Index: {secondary_index}")
print(f"Selected Tone: {selected_tone_name}")
print(f"Tone Instructions: {tone_instructions}")
try:
response = self.client.audio.speech.create(
model="gpt-4o-mini-tts",
voice=selected_voice,
input=text,
instructions=tone_instructions,
response_format='wav'
)
self.last_audio_file = self.get_audio_file_path("last_output.wav")
response.stream_to_file(str(self.last_audio_file))
#Play to either two or a single stream
if primary_index and secondary_index != "None" and secondary_index is not None:
self.play_audio_multiplexed([self.last_audio_file, self.last_audio_file],
[primary_index, secondary_index])
# Use OpenAI TTS
if not self.has_api_key:
messagebox.showerror("API Key Required",
"An OpenAI API Key is required for speech to text or to use OpenAI voices.\n\n"
"Please add your API key in Settings.\n\n"
"Note: You can still use text to speech with the system voices only.")
return
# Check if a tone preset is selected and add it to the text
selected_tone_name = self.tone_var.get()
# Get the actual tone instructions from the tone_presets dictionary
tone_instructions = None
if selected_tone_name != "None" and selected_tone_name in self.tone_presets:
tone_instructions = self.tone_presets[selected_tone_name]
else:
self.play_audio_multiplexed([self.last_audio_file],
[primary_index])
tone_instructions = "" # Empty string if "None" or not found
# Convert device names to indices
primary_index = self.available_devices.get(self.device_index.get(), None)
secondary_index = self.available_devices.get(self.device_index_2.get(), None) if self.device_index_2.get() != "None" else None
if primary_index is None:
messagebox.showerror("Error", "Primary device not selected or unavailable.")
return
try:
response = self.client.audio.speech.create(
model="gpt-4o-mini-tts",
voice=selected_voice,
input=text,
instructions=tone_instructions,
response_format='wav'
)
except Exception as e:
messagebox.showerror("API Error", f"Failed to generate audio: {str(e)}")
self.last_audio_file = self.get_audio_file_path("last_output.wav")
response.stream_to_file(str(self.last_audio_file))
#Play to either two or a single stream
if primary_index and secondary_index != "None" and secondary_index is not None:
self.play_audio_multiplexed([self.last_audio_file, self.last_audio_file],
[primary_index, secondary_index])
else:
self.play_audio_multiplexed([self.last_audio_file],
[primary_index])
except Exception as e:
messagebox.showerror("API Error", f"Failed to generate audio: {str(e)}")
def resample_audio(self, file_path, target_sample_rate):
@@ -760,47 +925,45 @@ class TextToMic(tk.Tk):
try:
# Ensure the file_path is a string when opening the file
wf = wave.open(str(file_path), 'rb')
file_path_str = str(file_path)
print(f"Opening audio file: {file_path_str}")
# Make sure file exists
if not os.path.exists(file_path_str):
messagebox.showerror("File Not Found", f"Could not find audio file: {file_path_str}")
continue
wf = wave.open(file_path_str, 'rb')
except FileNotFoundError:
messagebox.showerror("File Not Found", f"Could not find audio file: {file_path}")
messagebox.showerror("File Not Found", f"Could not find audio file: {file_path_str}")
continue # Skip this iteration and proceed with other files if any
except wave.Error as e:
messagebox.showerror("Wave Error", f"Error reading audio file: {file_path}. Error: {str(e)}")
messagebox.showerror("Wave Error", f"Error reading audio file: {file_path_str}. Error: {str(e)}")
continue
except Exception as e:
messagebox.showerror("File Error", f"Unexpected error with audio file: {str(e)}")
continue
try:
# Ensure output audio sample rate matches that of the selected device
# Get device info including default sample rate
device_info = self.get_device_info(device_index)
sample_rate = int(device_info['defaultSampleRate']) # Fetch default sample rate from device info
sample_rate = int(device_info['defaultSampleRate']) if device_info else 44100
wf_frame_rate = wf.getframerate()
print(f"Sample Rate: {sample_rate}")
print(f"WF Sample Width: {wf_frame_rate}")
print(f"Device Sample Rate: {sample_rate}")
print(f"Audio Sample Rate: {wf_frame_rate}")
if sample_rate is None:
sample_rate = wf_frame_rate
# Make the audio file sample rate match the device output sample rate
# if there is a mismatch (prevents playback speed issues or crashes)
if sample_rate != wf_frame_rate:
# If mismatch, make a new resampled version that matches the output device
resampled_file_path = self.resample_audio(str(file_path), sample_rate)
# Update the playback file to the new resampled file
file_path = resampled_file_path
# Re-open the new file for processing
wf.close() # Close the original file first
wf = wave.open(str(file_path), 'rb')
# Create a stream from our file
# Create a stream from our file with current frame rate (we'll handle resampling for mismatch later)
stream = self.current_playback_p.open(
format=self.current_playback_p.get_format_from_width(wf.getsampwidth()),
channels=wf.getnchannels(),
rate=sample_rate,
rate=wf_frame_rate, # Use audio file's rate for now
output=True,
output_device_index=int(device_index)
)
except Exception as e:
print(f"Stream creation error: {e}")
messagebox.showerror("Stream Creation Error", f"Failed to create audio stream for device index {device_index}: {str(e)}")
wf.close()
continue
@@ -968,6 +1131,26 @@ class TextToMic(tk.Tk):
def apply_ai_to_input(self):
"""Apply AI to the current input text"""
# First check if we have an API key
if not self.has_api_key:
messagebox.showinfo(
"API Key Required",
"AI copyediting requires an OpenAI API key.\n\n"
"Please add your API key in Settings to use this feature."
)
return
# Check if AI copy editing is enabled in settings
settings = self.load_settings()
if not settings.get("chat_gpt_completion", False):
messagebox.showinfo(
"AI Copy Editing Disabled",
"AI copy editing is currently disabled in settings.\n\n"
"Please enable it in Settings → AI Copyediting before using this feature."
)
return
# If we have an API key and AI is enabled, proceed with the AI editing
self.ai_editor.apply_ai()
def chat_gpt_settings(self):
@@ -1000,7 +1183,14 @@ class TextToMic(tk.Tk):
self.record_button.config(text=btn_text)
def start_recording(self, play_confirm_sound=False):
input_device_index = self.input_device_index.get() # Assuming input_device_index is a StringVar
if not self.has_api_key:
messagebox.showerror("API Key Required",
"An OpenAI API Key is required for speech to text or to use OpenAI voices.\n\n"
"Please add your API key in Settings.\n\n"
"Note: You can still use text to speech with the system voices only.")
return
input_device_index = self.input_device_index.get()
input_device_id = self.available_input_devices.get(input_device_index)
if input_device_id is None:
@@ -1030,6 +1220,7 @@ class TextToMic(tk.Tk):
record_shortcut = "+".join(filter(None, settings["hotkeys"]["record_start_stop"]))
play_shortcut = "+".join(filter(None, settings["hotkeys"]["play_last_audio"]))
stop_shortcut = "+".join(filter(None, settings["hotkeys"]["stop_recording"]))
cancel_shortcut = "+".join(filter(None, settings["hotkeys"]["cancel_operation"]))
# Update CTkButton for recording state, keeping shortcuts visible
self.record_button.configure(text=f"Stop and Insert", fg_color="#d32f2f")
@@ -1093,7 +1284,7 @@ class TextToMic(tk.Tk):
# This ensures proper button state updates regardless of how playback is triggered
self.after(0, self.transcribe_audio, file_path, auto_play)
def transcribe_audio(self, file_path, auto_play = False):
def transcribe_audio(self, file_path, auto_play=False):
try:
with open(str(file_path), "rb") as audio_file:
transcription = self.client.audio.transcriptions.create(
@@ -1108,8 +1299,9 @@ class TextToMic(tk.Tk):
self.text_input.delete("1.0", tk.END)
self.text_input.insert("1.0", transcription.text)
if settings["chat_gpt_completion"] and settings["auto_apply_ai_to_recording"]:
auto_apply_ai = settings["auto_apply_ai_to_recording"]
# Check if AI processing is enabled AND we have an API key
if settings["chat_gpt_completion"] and settings["auto_apply_ai_to_recording"] and self.has_api_key:
auto_apply_ai = True
else:
auto_apply_ai = False
@@ -1127,7 +1319,7 @@ class TextToMic(tk.Tk):
if auto_play:
print(f"Triggering auto play with: {play_text} ")
# Use a slight delay to allow UI to update before playback starts
self.after(100, lambda: self.submit_text_helper(play_text = play_text))
self.after(100, lambda: self.submit_text_helper(play_text=play_text))
print("Transcription Complete: The audio has been transcribed and the text has been placed in the input area.")
@@ -1192,60 +1384,55 @@ class TextToMic(tk.Tk):
# Add a new method to toggle banner visibility
def toggle_banner(self):
"""Toggle the visibility of the banner image."""
# Store notification state before changes
had_notification = False
if hasattr(self, 'version_checker') and self.version_checker.notification_visible:
had_notification = True
# Toggle banner state
settings = self.load_settings()
hide_banner = self.banner_var.get()
# Get current presets state
presets_visible = hasattr(self, 'presets_manager') and not self.presets_manager.presets_collapsed
# Calculate width that preserves current width if manually resized
current_width = self.winfo_width()
width_to_use = max(current_width, self.BASE_WIDTH)
if hide_banner:
# Hide the banner
self.banner_frame.grid_remove()
# Set window geometry based on presets state
if presets_visible:
# Presets visible, banner hidden
self.geometry(f"{width_to_use}x{self.BASE_HEIGHT_NO_BANNER}")
else:
# Presets collapsed, banner hidden
self.geometry(f"{width_to_use}x{self.COLLAPSED_HEIGHT_NO_BANNER}")
else:
# Show the banner
self.banner_frame.grid()
# Set window geometry based on presets state
if presets_visible:
# Presets visible, banner visible
self.geometry(f"{width_to_use}x{self.BASE_HEIGHT_WITH_BANNER}")
else:
# Presets collapsed, banner visible
self.geometry(f"{width_to_use}x{self.COLLAPSED_HEIGHT_WITH_BANNER}")
# Update the settings
settings["hide_banner"] = hide_banner
self.save_settings_to_JSON(settings)
# Update window size based on new banner state
self.update_window_size()
# Ensure input elements maintain consistent width
self._maintain_consistent_width()
# Make sure presets are laid out correctly if visible
if presets_visible and hasattr(self, 'presets_manager'):
if not self.presets_collapsed and hasattr(self, 'presets_manager'):
self.presets_manager.refresh_presets_display()
# Ensure the presets button is correctly positioned using grid
if hasattr(self, 'presets_manager') and hasattr(self.presets_manager, 'presets_button'):
if self.presets_manager.presets_button.winfo_exists():
# Use grid (not pack) to ensure proper positioning
self.presets_manager.presets_button.grid_configure(column=0, row=0, sticky=tk.W, padx=0, pady=2)
# If we have a version notification visible, ensure it remains at the top
if hasattr(self, 'version_checker') and self.version_checker.notification_visible:
self.version_checker.notification_frame.grid(row=0, column=0, sticky="ew")
self.main_frame.grid(row=1, column=0, sticky="nsew")
# If we had a notification, make sure it's correctly positioned after layout changes
if had_notification:
# Need to schedule this after all geometry changes are complete
self.after(100, self._reposition_version_notification)
def toggle_presets(self):
"""Toggle the visibility of the presets panel."""
# Store notification state before changes
had_notification = False
if hasattr(self, 'version_checker') and self.version_checker.notification_visible:
had_notification = True
if hasattr(self, 'presets_manager'):
# Toggle presets via presets manager
self.presets_manager.toggle_presets()
@@ -1253,39 +1440,24 @@ class TextToMic(tk.Tk):
# Update our local tracking of presets state
self.presets_collapsed = self.presets_manager.presets_collapsed
# Get banner visibility state
banner_hidden = self.banner_var.get()
# Update the menu checkbox state to match
if hasattr(self, 'presets_visible_var'):
self.presets_visible_var.set(not self.presets_collapsed)
# Calculate a width that preserves the current width if it's larger than default
current_width = self.winfo_width()
width_to_use = max(current_width, self.BASE_WIDTH)
# Set window geometry based on both states
if self.presets_collapsed:
# Presets collapsed - ensure minimum height with current width
if banner_hidden:
# Banner hidden, presets collapsed
self.geometry(f"{width_to_use}x{self.COLLAPSED_HEIGHT_NO_BANNER}")
else:
# Banner visible, presets collapsed
self.geometry(f"{width_to_use}x{self.COLLAPSED_HEIGHT_WITH_BANNER}")
else:
# Presets expanded - use full height with current width
if banner_hidden:
# Banner hidden, presets expanded
self.geometry(f"{width_to_use}x{self.BASE_HEIGHT_NO_BANNER}")
else:
# Banner visible, presets expanded
self.geometry(f"{width_to_use}x{self.BASE_HEIGHT_WITH_BANNER}")
# Update window size based on new presets state
self.update_window_size()
# Refresh presets display if they're visible
if not self.presets_collapsed:
self.presets_manager.refresh_presets_display()
# If we have a version notification visible, ensure it remains at the top
if hasattr(self, 'version_checker') and self.version_checker.notification_visible:
self.version_checker.notification_frame.grid(row=0, column=0, sticky="ew")
self.main_frame.grid(row=1, column=0, sticky="nsew")
# If we had a notification, make sure it's correctly positioned after layout changes
if had_notification:
# Need to schedule this after all geometry changes are complete
self.after(100, self._reposition_version_notification)
# Ensure input elements maintain consistent width
self._maintain_consistent_width()
def update_buttons_for_playback(self, is_playing):
"""Update button text based on playback state."""
@@ -1302,7 +1474,9 @@ class TextToMic(tk.Tk):
self.submit_button.configure(text=f"Stop Audio ({cancel_shortcut})", fg_color="#d32f2f")
else:
# Reset buttons to normal state
self.record_button.configure(text=f"Record Mic ({record_shortcut})", fg_color="#058705")
# Use grey color for record button if no API key
record_color = "#777777" if not self.has_api_key else "#058705"
self.record_button.configure(text=f"Record Mic ({record_shortcut})", fg_color=record_color)
self.submit_button.configure(text=f"Play Audio ({play_shortcut})", fg_color="#058705")
except Exception as e:
print(f"Error updating buttons: {e}")
@@ -1362,4 +1536,110 @@ class TextToMic(tk.Tk):
settings["auto_check_version"] = self.auto_check_version.get()
self.save_settings_to_JSON(settings)
def _reposition_version_notification(self):
"""Helper method to reposition version notification after layout changes"""
if hasattr(self, 'version_checker') and self.version_checker.notification_visible:
if hasattr(self.version_checker, 'notification_window') and self.version_checker.notification_window:
# First ensure the notification window is visible
self.version_checker.notification_window.deiconify()
# Then reposition it
self.version_checker._reposition_notification()
def _maintain_consistent_width(self):
"""Ensure all input elements maintain consistent width after banner toggle."""
# Get the main frame width (accounting for padding)
main_frame_width = self.winfo_width() - 40 # subtract padding (20px on each side)
# Force update to ensure we have current dimensions
self.update_idletasks()
# Configure column weights for main components to ensure they expand properly
for frame in [self.main_frame]:
if hasattr(frame, 'columnconfigure'):
# Make all columns in the frame expandable
for i in range(frame.grid_size()[0]): # Get number of columns
frame.columnconfigure(i, weight=1)
# Ensure text input maintains its width
if hasattr(self, 'text_input'):
self.text_input.config(width=0) # Let it be sized by the grid
# Update and refresh all frames to apply the new layout
self.update_idletasks()
def get_available_voices(self):
"""Get list of available voices, including system voices if no API key."""
voices = []
if self.has_api_key:
# Add OpenAI voices
voices.extend(['alloy', 'ash', 'ballad', 'coral', 'echo', 'fable', 'onyx', 'nova', 'sage', 'shimmer'])
# Add system voices with [System] prefix
try:
if hasattr(self, 'system_voices') and self.system_voices:
for voice in self.system_voices:
voices.append(f"[System] {voice.name}")
# If no system voices were found, add a default system voice
if not voices:
voices.append("[System] Default")
except Exception as e:
print(f"Error loading system voices: {e}")
# Ensure we have at least one voice option
if not voices:
voices.append("[System] Default")
return voices
def on_voice_change(self, *args):
"""Handle voice selection change."""
selected_voice = self.voice_var.get()
is_system_voice = selected_voice.startswith("[System]")
# Update tone menu state based on voice type
if is_system_voice:
self.tone_menu.state(['disabled'])
self.tone_var.set("None")
else:
self.tone_menu.state(['!disabled'])
def update_window_size(self):
"""Update window size based on current banner and presets state."""
# Calculate a width that preserves the current width if it's larger than default
current_width = self.winfo_width()
width_to_use = max(current_width, self.BASE_WIDTH)
# Determine appropriate height based on current states
banner_hidden = self.banner_var.get()
if self.presets_collapsed:
# Presets collapsed
if banner_hidden:
# Banner hidden, presets collapsed
height = self.COLLAPSED_HEIGHT_NO_BANNER
else:
# Banner visible, presets collapsed
height = self.COLLAPSED_HEIGHT_WITH_BANNER
else:
# Presets expanded
if banner_hidden:
# Banner hidden, presets expanded
height = self.BASE_HEIGHT_NO_BANNER
else:
# Banner visible, presets expanded
height = self.BASE_HEIGHT_WITH_BANNER
# Update geometry and re-center
self.geometry(f"{width_to_use}x{height}")
self.center_window()
def toggle_presets_from_menu(self):
"""Toggle presets visibility from menu, ensuring button state is updated."""
# Toggle presets
self.toggle_presets()
# Make sure the checkbox state matches the actual state
# (in case toggling failed for some reason)
self.presets_visible_var.set(not self.presets_collapsed)

View File

@@ -103,7 +103,7 @@ class TonePresetsManager:
# Listbox for tones with scrollbar
self.tone_list = tk.Listbox(select_frame, height=8, selectmode=tk.BROWSE,
bg="#f0f0f0", fg="#333333",
bg="#ffffff", fg="#333333",
selectbackground="#0078d7", selectforeground="#ffffff",
font=("Arial", 10))
tone_scrollbar = ttk.Scrollbar(select_frame, orient=tk.VERTICAL, command=self.tone_list.yview)
@@ -168,7 +168,7 @@ class TonePresetsManager:
# Create the text widget with word wrap and vertical scrollbar
self.content_text = tk.Text(text_frame, wrap=tk.WORD,
yscrollcommand=v_scrollbar.set,
bg="#f0f0f0", fg="#333333",
bg="#ffffff", fg="#333333",
font=("Arial", 10),
relief="solid", borderwidth=1)
self.content_text.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)

View File

@@ -13,7 +13,7 @@ class VersionChecker:
self.current_version = version
self.version_url = "https://www.scorchsoft.com/public/blog/text-to-mic/leatest-version.json"
self.notification_visible = False
self.notification_frame = None
self.notification_window = None
def check_version(self, show_result=True):
"""
@@ -68,36 +68,59 @@ class VersionChecker:
f"Could not check for updates. Server returned status code: {response.status_code}"
))
except requests.RequestException as e:
error_message = f"Could not connect to update server: {str(e)}"
print(f"Version check error: {error_message}")
if show_result:
self.app.after(0, lambda: messagebox.showwarning(
self.app.after(0, lambda msg=error_message: messagebox.showwarning(
"Version Check Failed",
f"Could not connect to update server: {str(e)}"
msg
))
except json.JSONDecodeError:
except json.JSONDecodeError as e:
error_message = "Invalid update information received."
print(f"Version check error: {error_message}")
if show_result:
self.app.after(0, lambda: messagebox.showwarning(
self.app.after(0, lambda msg=error_message: messagebox.showwarning(
"Version Check Failed",
"Invalid update information received."
msg
))
except Exception as e:
error_message = f"Could not check for updates: {str(e)}"
print(f"Version check error: {error_message}")
if show_result:
self.app.after(0, lambda: messagebox.showwarning(
self.app.after(0, lambda msg=error_message: messagebox.showwarning(
"Version Check Failed",
f"Could not check for updates: {str(e)}"
msg
))
def show_update_notification(self, latest_version, download_url, message):
"""Display an update notification banner in the app"""
"""Display an update notification banner as an overlay"""
if self.notification_visible:
return # Already showing notification
# Create notification frame
self.notification_frame = ttk.Frame(self.app, style='Notification.TFrame')
# Configure notification style (light yellow background)
self.app.style.configure('Notification.TFrame', background='#fff3cd')
self.app.style.configure('Notification.TLabel', background='#fff3cd', foreground='#856404')
self.app.style.configure('Notification.TButton', background='#fff3cd')
# Create a new toplevel window for the notification
self.notification_window = tk.Toplevel(self.app)
self.notification_window.overrideredirect(True) # Remove window decorations
self.notification_window.attributes('-topmost', True) # Keep on top
# Calculate position (aligned with top of main window)
app_x = self.app.winfo_rootx()
app_y = self.app.winfo_rooty()
app_width = self.app.winfo_width()
# Configure the notification window
bg_color = '#fff3cd' # Light yellow background
fg_color = '#856404' # Darker yellow/brown text
# Create the main frame in the notification window
self.notification_frame = ttk.Frame(self.notification_window, style='Notification.TFrame')
self.notification_frame.pack(fill='both', expand=True)
# Configure styles
self.app.style.configure('Notification.TFrame', background=bg_color)
self.app.style.configure('Notification.TLabel', background=bg_color, foreground=fg_color)
self.app.style.map('Notification.TButton',
background=[('active', bg_color), ('!active', bg_color)],
foreground=[('active', fg_color), ('!active', fg_color)])
# Create notification content
notification_text = message or f"A new version ({latest_version}) is available. You're currently using version {self.current_version}."
@@ -106,42 +129,86 @@ class VersionChecker:
self.notification_frame,
text=notification_text,
style='Notification.TLabel',
wraplength=400
wraplength=app_width - 150 # Allow for button width
)
label.grid(row=0, column=0, padx=(10, 5), pady=10, sticky="w")
# Create buttons
button_frame = ttk.Frame(self.notification_frame, style='Notification.TFrame')
button_frame.grid(row=0, column=1, padx=5, pady=5)
download_button = ttk.Button(
self.notification_frame,
button_frame,
text="Download",
style='Notification.TButton',
command=lambda: self.open_download_page(download_url)
)
download_button.grid(row=0, column=1, padx=5, pady=10)
download_button.pack(side='left', padx=5)
close_button = ttk.Button(
self.notification_frame,
button_frame,
text="×",
width=2,
style='Notification.TButton',
command=self.dismiss_notification
)
close_button.grid(row=0, column=2, padx=(0, 5), pady=10)
close_button.pack(side='left')
# Insert at the top of the application, below menu
self.notification_frame.grid(row=0, column=0, sticky="ew", padx=0, pady=0)
# Move other content down
self.app.main_frame.grid(row=1, column=0, sticky="nsew")
# Position the window and set its size
self.notification_window.update_idletasks() # Update to get correct dimensions
notification_height = self.notification_window.winfo_reqheight()
# Mark notification as visible first so _reposition_notification will work
self.notification_visible = True
# Setup event binding to follow main window if it's moved
self.app.bind("<Configure>", self._reposition_notification)
# Add bindings for window minimize/restore events
self.app.bind("<Unmap>", self._handle_window_unmap)
self.app.bind("<Map>", self._handle_window_map)
# Use the reposition function to set initial position and size
# This ensures consistent sizing between initial load and repositioning
self._reposition_notification()
def _handle_window_unmap(self, event=None):
"""Hide the notification when main window is minimized"""
if self.notification_visible and self.notification_window:
self.notification_window.withdraw()
def _handle_window_map(self, event=None):
"""Show the notification when main window is restored"""
if self.notification_visible and self.notification_window:
self.notification_window.deiconify()
# Reposition after showing
self._reposition_notification()
def _reposition_notification(self, event=None):
"""Reposition the notification window to stay at the top of the main window"""
if self.notification_visible and self.notification_window:
app_x = self.app.winfo_rootx()
app_y = self.app.winfo_rooty()
app_width = self.app.winfo_width()
# Subtract a small amount to ensure it doesn't extend beyond the window
adjusted_width = app_width - 5 # Adjust by 4 pixels to account for borders
# Update the width and position
notification_height = self.notification_window.winfo_height()
self.notification_window.geometry(f"{adjusted_width}x{notification_height}+{app_x}+{app_y}")
def dismiss_notification(self):
"""Remove the notification banner"""
if self.notification_frame:
self.notification_frame.grid_forget()
self.notification_frame = None
if self.notification_window:
# Unbind all the events first
self.app.unbind("<Configure>")
self.app.unbind("<Unmap>")
self.app.unbind("<Map>")
# Move main frame back to top position
self.app.main_frame.grid(row=0, column=0, sticky="nsew")
# Destroy the window
self.notification_window.destroy()
self.notification_window = None
self.notification_visible = False