Add model field parameter for generic OpenAI TTS (#4171)
resolves #4165
This commit is contained in:
parent
ff34c8cefc
commit
043cb1d085
@ -255,6 +255,7 @@ GID='1000'
|
|||||||
|
|
||||||
# TTS_PROVIDER="generic-openai"
|
# TTS_PROVIDER="generic-openai"
|
||||||
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
|
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
|
||||||
|
# TTS_OPEN_AI_COMPATIBLE_MODEL=tts-1
|
||||||
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
|
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
|
||||||
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"
|
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"
|
||||||
|
|
||||||
|
|||||||
@ -5,7 +5,7 @@ export default function OpenAiGenericTextToSpeechOptions({ settings }) {
|
|||||||
<div className="w-full flex flex-col gap-y-7">
|
<div className="w-full flex flex-col gap-y-7">
|
||||||
<div className="flex gap-x-4">
|
<div className="flex gap-x-4">
|
||||||
<div className="flex flex-col w-60">
|
<div className="flex flex-col w-60">
|
||||||
<div className="flex justify-between items-center mb-2">
|
<div className="flex justify-between items-start mb-2">
|
||||||
<label className="text-white text-sm font-semibold">Base URL</label>
|
<label className="text-white text-sm font-semibold">Base URL</label>
|
||||||
</div>
|
</div>
|
||||||
<input
|
<input
|
||||||
@ -23,9 +23,8 @@ export default function OpenAiGenericTextToSpeechOptions({ settings }) {
|
|||||||
will generate TTS responses from.
|
will generate TTS responses from.
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="flex flex-col w-60">
|
<div className="flex flex-col w-60">
|
||||||
<label className="text-white text-sm font-semibold block mb-3">
|
<label className="text-white text-sm font-semibold block mb-2">
|
||||||
API Key
|
API Key
|
||||||
</label>
|
</label>
|
||||||
<input
|
<input
|
||||||
@ -44,6 +43,28 @@ export default function OpenAiGenericTextToSpeechOptions({ settings }) {
|
|||||||
this is optional if your service does not require one.
|
this is optional if your service does not require one.
|
||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
|
</div>
|
||||||
|
<div className="flex gap-x-4">
|
||||||
|
<div className="flex flex-col w-60">
|
||||||
|
<label className="text-white text-sm font-semibold block mb-3">
|
||||||
|
TTS Model
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="text"
|
||||||
|
name="TTSOpenAICompatibleModel"
|
||||||
|
className="border-none bg-theme-settings-input-bg text-white placeholder:text-theme-settings-input-placeholder text-sm rounded-lg focus:outline-primary-button active:outline-primary-button outline-none block w-full p-2.5"
|
||||||
|
placeholder="Your TTS model identifier"
|
||||||
|
defaultValue={settings?.TTSOpenAICompatibleModel}
|
||||||
|
required={true}
|
||||||
|
autoComplete="off"
|
||||||
|
spellCheck={false}
|
||||||
|
/>
|
||||||
|
<p className="text-xs leading-[18px] font-base text-white text-opacity-60 mt-2">
|
||||||
|
Most TTS services will have several models available. This is the{" "}
|
||||||
|
<code>model</code> parameter you will use to select the model you
|
||||||
|
want to use. Note: This is not the same as the voice model.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
<div className="flex flex-col w-60">
|
<div className="flex flex-col w-60">
|
||||||
<label className="text-white text-sm font-semibold block mb-3">
|
<label className="text-white text-sm font-semibold block mb-3">
|
||||||
Voice Model
|
Voice Model
|
||||||
|
|||||||
@ -257,6 +257,7 @@ TTS_PROVIDER="native"
|
|||||||
|
|
||||||
# TTS_PROVIDER="generic-openai"
|
# TTS_PROVIDER="generic-openai"
|
||||||
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
|
# TTS_OPEN_AI_COMPATIBLE_KEY=sk-example
|
||||||
|
# TTS_OPEN_AI_COMPATIBLE_MODEL=tts-1
|
||||||
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
|
# TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL=nova
|
||||||
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"
|
# TTS_OPEN_AI_COMPATIBLE_ENDPOINT="https://api.openai.com/v1"
|
||||||
|
|
||||||
|
|||||||
@ -256,6 +256,7 @@ const SystemSettings = {
|
|||||||
process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium",
|
process.env.TTS_PIPER_VOICE_MODEL ?? "en_US-hfc_female-medium",
|
||||||
// OpenAI Generic TTS
|
// OpenAI Generic TTS
|
||||||
TTSOpenAICompatibleKey: !!process.env.TTS_OPEN_AI_COMPATIBLE_KEY,
|
TTSOpenAICompatibleKey: !!process.env.TTS_OPEN_AI_COMPATIBLE_KEY,
|
||||||
|
TTSOpenAICompatibleModel: process.env.TTS_OPEN_AI_COMPATIBLE_MODEL,
|
||||||
TTSOpenAICompatibleVoiceModel:
|
TTSOpenAICompatibleVoiceModel:
|
||||||
process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL,
|
process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL,
|
||||||
TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
|
TTSOpenAICompatibleEndpoint: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
|
||||||
|
|||||||
@ -4,6 +4,10 @@ class GenericOpenAiTTS {
|
|||||||
this.#log(
|
this.#log(
|
||||||
"No OpenAI compatible API key was set. You might need to set this to use your OpenAI compatible TTS service."
|
"No OpenAI compatible API key was set. You might need to set this to use your OpenAI compatible TTS service."
|
||||||
);
|
);
|
||||||
|
if (!process.env.TTS_OPEN_AI_COMPATIBLE_MODEL)
|
||||||
|
this.#log(
|
||||||
|
"No OpenAI compatible TTS model was set. We will use the default voice model 'tts-1'. This may not exist or be valid your selected endpoint."
|
||||||
|
);
|
||||||
if (!process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL)
|
if (!process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL)
|
||||||
this.#log(
|
this.#log(
|
||||||
"No OpenAI compatible voice model was set. We will use the default voice model 'alloy'. This may not exist for your selected endpoint."
|
"No OpenAI compatible voice model was set. We will use the default voice model 'alloy'. This may not exist for your selected endpoint."
|
||||||
@ -18,7 +22,11 @@ class GenericOpenAiTTS {
|
|||||||
apiKey: process.env.TTS_OPEN_AI_COMPATIBLE_KEY || null,
|
apiKey: process.env.TTS_OPEN_AI_COMPATIBLE_KEY || null,
|
||||||
baseURL: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
|
baseURL: process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT,
|
||||||
});
|
});
|
||||||
|
this.model = process.env.TTS_OPEN_AI_COMPATIBLE_MODEL ?? "tts-1";
|
||||||
this.voice = process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL ?? "alloy";
|
this.voice = process.env.TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL ?? "alloy";
|
||||||
|
this.#log(
|
||||||
|
`Service (${process.env.TTS_OPEN_AI_COMPATIBLE_ENDPOINT}) with model: ${this.model} and voice: ${this.voice}`
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#log(text, ...args) {
|
#log(text, ...args) {
|
||||||
@ -33,7 +41,7 @@ class GenericOpenAiTTS {
|
|||||||
async ttsBuffer(textInput) {
|
async ttsBuffer(textInput) {
|
||||||
try {
|
try {
|
||||||
const result = await this.openai.audio.speech.create({
|
const result = await this.openai.audio.speech.create({
|
||||||
model: "tts-1",
|
model: this.model,
|
||||||
voice: this.voice,
|
voice: this.voice,
|
||||||
input: textInput,
|
input: textInput,
|
||||||
});
|
});
|
||||||
|
|||||||
@ -601,6 +601,10 @@ const KEY_MAPPING = {
|
|||||||
envKey: "TTS_OPEN_AI_COMPATIBLE_KEY",
|
envKey: "TTS_OPEN_AI_COMPATIBLE_KEY",
|
||||||
checks: [],
|
checks: [],
|
||||||
},
|
},
|
||||||
|
TTSOpenAICompatibleModel: {
|
||||||
|
envKey: "TTS_OPEN_AI_COMPATIBLE_MODEL",
|
||||||
|
checks: [],
|
||||||
|
},
|
||||||
TTSOpenAICompatibleVoiceModel: {
|
TTSOpenAICompatibleVoiceModel: {
|
||||||
envKey: "TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL",
|
envKey: "TTS_OPEN_AI_COMPATIBLE_VOICE_MODEL",
|
||||||
checks: [isNotEmpty],
|
checks: [isNotEmpty],
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user