merlyn/extras/translator/index.mjs
Timothy Carambat cd5530de39
[Chore] Autotranslation tool using DMR (#4907)
* update translations + DMR loading

* updates to misspellings
2026-01-27 09:29:37 -08:00

186 lines
6.9 KiB
JavaScript

import fs from 'fs';
import {resources} from '../../frontend/src/locales/resources.js';
import "../../server/node_modules/dotenv/lib/main.js";
import DMRModule from '../../server/utils/AiProviders/dockerModelRunner/index.js';
const { DockerModelRunnerLLM, getDockerModels } = DMRModule;
function getNestedValue(obj, path) {
const keys = path.split('.');
let result = obj;
for (const key of keys) {
if (result == null) return undefined;
result = result[key];
}
return result;
}
function setNestedValue(obj, path, value) {
const keys = path.split('.');
let result = obj;
for (let i = 0; i < keys.length - 1; i++) {
const key = keys[i];
if (result[key] == null) result[key] = {};
result = result[key];
}
result[keys[keys.length - 1]] = value;
}
class Translator {
static modelTag = 'mintplexlabs/translategemma4b:Q4_K_M'
constructor(provider = "dmr") {
switch (provider) {
case "dmr":
this.provider = new DockerModelRunnerLLM(null, Translator.modelTag);
break;
default:
throw new Error(`Unsupported provider: ${provider}`);
}
this.localeObj = new Intl.DisplayNames(Object.keys(resources), { type: 'language' });
}
getLanguageName(localeCode) {
try {
return this.localeObj.of(localeCode);
} catch (error) {
console.error("Error getting language name:", error);
return null;
}
}
#log(text, ...args) {
console.log(`\x1b[32m[Translator]\x1b[0m ${text}`, ...args);
}
static slog(text, ...args) {
console.log(`\x1b[32m[Translator]\x1b[0m ${text}`, ...args);
}
buildPrompt(text, sourceLangCode, targetLangCode) {
const sourceLanguage = this.getLanguageName(sourceLangCode);
const targetLanguage = this.getLanguageName(targetLangCode);
return `You are a professional ${sourceLanguage} (${sourceLangCode.toLowerCase()}) to ${targetLanguage} (${sourceLangCode.toLowerCase()}) translator. Your goal is to accurately convey the meaning and nuances of the original ${sourceLanguage} text while adhering to ${targetLanguage} grammar, vocabulary, and cultural sensitivities.
Produce only the ${targetLanguage} translation, without any additional explanations or commentary. Please translate the following ${sourceLanguage} text into ${targetLanguage}:
${text}`
}
async verifyReady() {
const models = await getDockerModels(process.env.DOCKER_MODEL_RUNNER_BASE_PATH);
if(!models.find(m => m.id.toLowerCase() === Translator.modelTag.toLowerCase())) throw new Error(`Model ${Translator.modelTag} not found. Pull with docker model pull ${Translator.modelTag}`);
return true;
}
/**
* Clean the output text from the model
* Output text: `在助手回复中呈现 HTML 响应。这可以显著提高回复的质量,但也可能带来潜在的安全风险。<|im_end|>`
* We want to remove the <|im_end|> or im_start tags
* @param {*} text
* @returns
*/
cleanOutputText(text) {
return text.replace(/<\|im_end\|>|<\|im_start\|>/g, '').trim();
}
async translate(text, sourceLangCode, targetLangCode) {
await this.verifyReady();
const prompt = this.buildPrompt(text, sourceLangCode, targetLangCode);
const response = await this.provider.getChatCompletion([{ role: 'user', content: prompt }], { temperature: 0.1 });
return this.cleanOutputText(response.textResponse);
}
writeTranslations(langCode, translations) {
let langFilename = langCode.toLowerCase();
// Special cases
if(langCode === 'pt') langFilename = 'pt_BR';
if(langCode === 'zh-tw') langFilename = 'zh_TW';
fs.writeFileSync(
`../../frontend/src/locales/${langFilename}/common.js`,
`// Anything with "null" requires a translation. Contribute to translation via a PR!
const TRANSLATIONS = ${JSON.stringify(translations, null, 2)}
export default TRANSLATIONS;`
);
console.log(`Updated ${langCode} translations file`);
}
}
// Deep traverse the english translations and get all the path to any all keys
const translator = new Translator("dmr");
const englishTranslations = resources.en.common;
const allKeys = [];
function traverseTranslations(translations, parentKey = '') {
for(const [key, value] of Object.entries(translations)) {
if(typeof value === 'object') {
const newKey = !parentKey ? key : `${parentKey}.${key}`;
traverseTranslations(value, newKey);
} else {
allKeys.push(`${parentKey}.${key}`);
}
}
}
traverseTranslations(englishTranslations);
delete resources.en;
async function translateAllLanguages() {
for(const [langCode, { common }] of Object.entries(resources)) {
console.log(`Translating ${translator.getLanguageName(langCode)}(${langCode}) to all languages`);
await translateSingleLanguage(langCode);
}
}
async function translateSingleLanguage(langCode) {
let totalTranslations = 0;
for(const key of allKeys) {
const sourceText = getNestedValue(englishTranslations, key);
if(!sourceText) continue;
// If the source text is @agent, set the translation to @agent - this has no
// direct translation and must be handled manually
if(sourceText === '@agent') {
setNestedValue(resources[langCode].common, key, '@agent');
continue;
}
if(sourceText === '/reset') {
setNestedValue(resources[langCode].common, key, '/reset');
continue;
}
const value = getNestedValue(resources[langCode].common, key);
if(!!value) continue; // If the translation is already present, skip it
console.log(`Translation not found for ${translator.getLanguageName(langCode)}(${langCode})`, {
key,
sourceText,
});
const outputText = await translator.translate(sourceText, 'en', langCode);
if(!outputText) {
console.log('No output text - skipping');
continue;
}
console.log(`Output text: ${outputText}`);
setNestedValue(resources[langCode].common, key, outputText);
console.log(`--------------------------------`);
totalTranslations++;
}
console.log(`--------------------------------`);
console.log(`Translated ${totalTranslations} translations for ${langCode}`);
translator.writeTranslations(langCode, resources[langCode].common);
console.log(`--------------------------------`);
}
let langArg = process.argv[2];
if(langArg) {
if(langArg.toLowerCase() === '--all') await translateAllLanguages();
else {
if(!Object.keys(resources).includes(langArg)) throw new Error(`Language ${langArg} not found in resources`);
await translateSingleLanguage(langArg);
}
} else {
throw new Error('Please provide a language code as an argument or --all to translate all languages');
}