Add model field parameter for generic OpenAI TTS (#4171)

resolves #4165
This commit is contained in:
Timothy Carambat 2025-07-17 10:28:51 -07:00 committed by GitHub
parent ff34c8cefc
commit 043cb1d085
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 40 additions and 4 deletions

View File

@ -255,6 +255,7 @@ GID='1000'
# TTS_PROVIDER="generic-openai" # TTS_PROVIDER="generic-openai"
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example # TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
# TTS_OPEN_AI_COMPATIBLE_MODEL=tts-1
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova # TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1" # TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"

View File

@ -5,7 +5,7 @@ export default function OpenAiGenericTextToSpeechOptions({ settings }) {
<div className="w-full flex flex-col gap-y-7"> <div className="w-full flex flex-col gap-y-7">
<div className="flex gap-x-4"> <div className="flex gap-x-4">
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<div className="flex justify-between items-center mb-2"> <div className="flex justify-between items-start mb-2">
<label className="text-white text-sm font-semibold">Base URL</label> <label className="text-white text-sm font-semibold">Base URL</label>
</div> </div>
<input <input
@ -23,9 +23,8 @@ export default function OpenAiGenericTextToSpeechOptions({ settings }) {
will generate TTS responses from. will generate TTS responses from.
</p> </p>
</div> </div>
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3"> <label className="text-white text-sm font-semibold block mb-2">
API Key API Key
</label> </label>
<input <input
@ -44,6 +43,28 @@ export default function OpenAiGenericTextToSpeechOptions({ settings }) {
this is optional if your service does not require one. this is optional if your service does not require one.
</p> </p>
</div> </div>
</div>
<div className="flex gap-x-4">
<div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3">
TTS Model
</label>
<input
type="text"
name="TTSOpenAICompatibleModel"
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
placeholder="Your TTS model identifier"
defaultValue={settings?.TTSOpenAICompatibleModel}
required={true}
autoComplete="off"
spellCheck={false}
/>
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
Most TTS services will have several models available. This is the{" "}
<code>model</code> parameter you will use to select the model you
want to use. Note: This is not the same as the voice model.
</p>
</div>
<div className="flex flex-col w-60"> <div className="flex flex-col w-60">
<label className="text-white text-sm font-semibold block mb-3"> <label className="text-white text-sm font-semibold block mb-3">
Voice Model Voice Model

View File

@ -257,6 +257,7 @@ TTS_PROVIDER="native"
# TTS_PROVIDER="generic-openai" # TTS_PROVIDER="generic-openai"
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example # TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
# TTS_OPEN_AI_COMPATIBLE_MODEL=tts-1
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova # TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1" # TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"

View File

@ -256,6 +256,7 @@ const SystemSettings = {
process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium", process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium",
// OpenAI Generic TTS // OpenAI Generic TTS
TTSOpenAICompatibleKey: !!process.env.TTS_OPEN_AI_COMPATIBLE_KEY, TTSOpenAICompatibleKey: !!process.env.TTS_OPEN_AI_COMPATIBLE_KEY,
TTSOpenAICompatibleModel: process.env.TTS_OPEN_AI_COMPATIBLE_MODEL,
TTSOpenAICompatibleVoiceModel: TTSOpenAICompatibleVoiceModel:
process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL, process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL,
TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT, TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,

View File

@ -4,6 +4,10 @@ class GenericOpenAiTTS {
this.#log( this.#log(
"No OpenAI compatible API key was set. You might need to set this to use your OpenAI compatible TTS service." "No OpenAI compatible API key was set. You might need to set this to use your OpenAI compatible TTS service."
); );
if (!process.env.TTS_OPEN_AI_COMPATIBLE_MODEL)
this.#log(
"No OpenAI compatible TTS model was set. We will use the default voice model 'tts-1'. This may not exist or be valid your selected endpoint."
);
if (!process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL) if (!process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL)
this.#log( this.#log(
"No OpenAI compatible voice model was set. We will use the default voice model 'alloy'. This may not exist for your selected endpoint." "No OpenAI compatible voice model was set. We will use the default voice model 'alloy'. This may not exist for your selected endpoint."
@ -18,7 +22,11 @@ class GenericOpenAiTTS {
apiKey: process.env.TTS_OPEN_AI_COMPATIBLE_KEY || null, apiKey: process.env.TTS_OPEN_AI_COMPATIBLE_KEY || null,
baseURL: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT, baseURL: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
}); });
this.model = process.env.TTS_OPEN_AI_COMPATIBLE_MODEL ?? "tts-1";
this.voice = process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL ?? "alloy"; this.voice = process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL ?? "alloy";
this.#log(
`Service (${process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT}) with model: ${this.model} and voice: ${this.voice}`
);
} }
#log(text, ...args) { #log(text, ...args) {
@ -33,7 +41,7 @@ class GenericOpenAiTTS {
async ttsBuffer(textInput) { async ttsBuffer(textInput) {
try { try {
const result = await this.openai.audio.speech.create({ const result = await this.openai.audio.speech.create({
model: "tts-1", model: this.model,
voice: this.voice, voice: this.voice,
input: textInput, input: textInput,
}); });

View File

@ -601,6 +601,10 @@ const KEY_MAPPING = {
envKey: "TTS_OPEN_AI_COMPATIBLE_KEY", envKey: "TTS_OPEN_AI_COMPATIBLE_KEY",
checks: [], checks: [],
}, },
TTSOpenAICompatibleModel: {
envKey: "TTS_OPEN_AI_COMPATIBLE_MODEL",
checks: [],
},
TTSOpenAICompatibleVoiceModel: { TTSOpenAICompatibleVoiceModel: {
envKey: "TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL", envKey: "TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL",
checks: [isNotEmpty], checks: [isNotEmpty],