From cb0aea711867ca7cf83dc6c33689c095728389e0 Mon Sep 17 00:00:00 2001 From: Serhiy Mytrovtsiy Date: Sat, 7 Mar 2026 18:50:27 +0100 Subject: [PATCH] lang: generated missing translations for all languages using translategemma:4b model --- Kit/scripts/i18n.py | 283 ++++++++- .../xcshareddata/xcschemes/SMC.xcscheme | 2 +- .../xcshareddata/xcschemes/Stats.xcscheme | 2 +- .../xcschemes/WidgetsExtension.xcscheme | 2 +- .../ar.lproj/Localizable.strings | 122 ++-- .../bg.lproj/Localizable.strings | 292 ++++----- .../ca.lproj/Localizable.strings | 100 ++-- .../cs.lproj/Localizable.strings | 248 ++++---- .../da.lproj/Localizable.strings | 206 +++---- .../de.lproj/Localizable.strings | 16 +- .../el.lproj/Localizable.strings | 326 +++++----- .../es.lproj/Localizable.strings | 78 +-- .../et.lproj/Localizable.strings | 208 +++---- .../fa.lproj/Localizable.strings | 252 ++++---- .../fi.lproj/Localizable.strings | 96 +-- .../fr.lproj/Localizable.strings | 102 ++-- .../he.lproj/Localizable.strings | 390 ++++++------ .../hi.lproj/Localizable.strings | 184 +++--- .../hr.lproj/Localizable.strings | 320 +++++----- .../hu.lproj/Localizable.strings | 154 ++--- .../id.lproj/Localizable.strings | 408 ++++++------- .../it.lproj/Localizable.strings | 96 +-- .../ja.lproj/Localizable.strings | 340 +++++------ .../ko.lproj/Localizable.strings | 174 +++--- .../nb.lproj/Localizable.strings | 188 +++--- .../nl.lproj/Localizable.strings | 430 +++++++------- .../pl.lproj/Localizable.strings | 6 +- .../pt-BR.lproj/Localizable.strings | 120 ++-- .../pt-PT.lproj/Localizable.strings | 276 ++++----- .../ro.lproj/Localizable.strings | 558 +++++++++--------- .../ru.lproj/Localizable.strings | 6 +- .../sk.lproj/Localizable.strings | 250 ++++---- .../sl.lproj/Localizable.strings | 74 +-- .../sv.lproj/Localizable.strings | 82 +-- .../th.lproj/Localizable.strings | 254 ++++---- .../tr.lproj/Localizable.strings | 10 +- .../uk.lproj/Localizable.strings | 4 +- .../vi.lproj/Localizable.strings | 18 +- .../zh-Hans.lproj/Localizable.strings | 6 +- .../zh-Hant.lproj/Localizable.strings | 2 +- 40 files changed, 3475 insertions(+), 3210 deletions(-) diff --git a/Kit/scripts/i18n.py b/Kit/scripts/i18n.py index 3e031f1d..1d44cbd1 100644 --- a/Kit/scripts/i18n.py +++ b/Kit/scripts/i18n.py @@ -1,5 +1,14 @@ import os import sys +import json +import urllib.request +import subprocess +import unicodedata + +try: + import langcodes +except Exception: + langcodes = None def dictionary(lines): @@ -25,7 +34,8 @@ class i18n: self.languages = list(filter(lambda x: x.endswith(".lproj"), os.listdir(self.path))) def en_file(self): - en_file = open(f"{self.path}/en.lproj/Localizable.strings", "r").readlines() + with open(f"{self.path}/en.lproj/Localizable.strings", "r") as f: + en_file = f.readlines() if en_file is None: sys.exit("English language not found.") return en_file @@ -35,7 +45,8 @@ class i18n: en_dict = dictionary(en_file) for lang in self.languages: - file = open(f"{self.path}/{lang}/Localizable.strings", "r").readlines() + with open(f"{self.path}/{lang}/Localizable.strings", "r") as f: + file = f.readlines() name = lang.replace(".lproj", "") lang_dict = dictionary(file) @@ -43,9 +54,9 @@ class i18n: en_key = en_dict[v].get("key") if v not in lang_dict: sys.exit(f"missing key `{en_key}` in `{name}` on line `{v}`") - lang_ley = lang_dict[v].get("key") - if lang_ley != en_key: - sys.exit(f"missing or wrong key `{lang_ley}` in `{name}` on line `{v}`, must be `{en_key}`") + lang_key = lang_dict[v].get("key") + if lang_key != en_key: + sys.exit(f"missing or wrong key `{lang_key}` in `{name}` on line `{v}`, must be `{en_key}`") print(f"All fine, found {len(en_file)} lines in {len(self.languages)} languages.") @@ -59,24 +70,278 @@ class i18n: for lang in self.languages: lang_path = f"{self.path}/{lang}/Localizable.strings" - file = open(lang_path, "r").readlines() + with open(lang_path, "r") as f: + file = f.readlines() lang_dict = dictionary(file) if v not in lang_dict or en_key != lang_dict[v].get("key"): file.insert(v, f"\"{en_key}\" = \"{en_value}\";\n") with open(lang_path, "w") as f: - file = "".join(file) - f.write(file) - f.close() + f.write("".join(file)) self.check() + def _normalize_lang_code(self, code): + code = (code or "").strip() + if code.endswith(".lproj"): + code = code[:-6] + return code.replace("-", "_") + + def _extract_translation(self, raw, fallback): + raw = (raw or "").strip() + if not raw: + return fallback + + def _clean(s): + return (s or "").strip().strip("*").strip('"').strip("'").strip() + + def _from_dict(obj): + if not isinstance(obj, dict): + return None + + role = (obj.get("role") or "").strip().lower() + obj_type = (obj.get("type") or "").strip().lower() + + text = obj.get("text") + if isinstance(text, str) and text.strip(): + if role in ("assistant", "translation") or obj_type == "translation": + return _clean(text) + + content = obj.get("content") + if isinstance(content, list): + for item in content: + if not isinstance(item, dict): + continue + item_role = (item.get("role") or role).strip().lower() + item_type = (item.get("type") or "").strip().lower() + t = item.get("text") + if isinstance(t, str) and t.strip(): + if item_role in ("assistant", "translation") or item_type in ("translation", "text"): + return _clean(t) + return None + + try: + parsed = json.loads(raw) + if isinstance(parsed, dict): + hit = _from_dict(parsed) + if hit: + return hit + elif isinstance(parsed, list): + for item in parsed: + hit = _from_dict(item) + if hit: + return hit + except json.JSONDecodeError: + pass + + if "\n" not in raw and len(raw) <= 200: + candidate = _clean(raw) + if candidate and not candidate.startswith("{") and not candidate.startswith("["): + return candidate + + for line in raw.splitlines(): + line = _clean(line) + if line and not line.startswith("{") and not line.startswith("["): + return line + + return fallback + + def _lang_name_from_code(self, code): + c = self._normalize_lang_code(code).replace("_", "-").strip() + if not c: + return "Unknown" + + if langcodes: + try: + name = langcodes.get(c).display_name("en") + if name: + return name + except Exception: + pass + + return c + + def _script_hint(self, lang_code): + lang = self._normalize_lang_code(lang_code).lower() + hints = { + "el": "Greek script only (Α-Ω, α-ω) except numbers/punctuation/brand names.", + "ru": "Cyrillic script only except numbers/punctuation/brand names.", + "uk": "Cyrillic script only except numbers/punctuation/brand names.", + "bg": "Cyrillic script only except numbers/punctuation/brand names.", + "ja": "Japanese writing system (Hiragana/Katakana/Kanji), no romaji unless required.", + "zh_cn": "Simplified Chinese characters.", + "zh_hans": "Simplified Chinese characters.", + "zh_tw": "Traditional Chinese characters.", + "zh_hant": "Traditional Chinese characters.", + "ko": "Korean Hangul preferred.", + "et": "Use Estonian only. Do not use Russian.", + } + return hints.get(lang, "") + + def _ollama_translate(self, text, target_lang, model="translategemma:4b", retries=2): + url = "http://ai:11434/api/generate" + tgt = self._normalize_lang_code(target_lang) + lang = self._lang_name_from_code(tgt) + script_hint = self._script_hint(tgt) + + prompt = ( + f"You are a professional English (en) to {lang} ({tgt}) translator. Your goal is to accurately convey the meaning and nuances of the original English text while adhering to {lang} grammar, vocabulary, and cultural sensitivities. Produce only the {lang} translation, without any additional explanations or commentary. Output only the final translated text. Do not add explanations, notes, JSON, markdown, or quotes. Preserve placeholders/tokens exactly \\(e\\.g\\. `%@`, `%d`, `{0}`, `MB/s`\\). Preserve punctuation, casing intent, and technical abbreviations. {script_hint} Please translate the following English text into {lang}:\\n\\n" + f"{text}" + ) + + payload = { + "model": model, + "prompt": prompt, + "stream": False, + } + + req = urllib.request.Request( + url, + data=json.dumps(payload).encode("utf-8"), + headers={"Content-Type": "application/json"}, + method="POST" + ) + + with urllib.request.urlopen(req, timeout=240) as resp: + data = json.loads(resp.read().decode("utf-8")) + raw = data.get("response", "").strip() + + return self._extract_translation(raw, fallback=text) + + def _line_authors(self, file_path): + cmd = ["git", "blame", "--line-porcelain", file_path] + out = subprocess.check_output(cmd, text=True, cwd=os.getcwd(), stderr=subprocess.DEVNULL) + authors = [] + for line in out.splitlines(): + if line.startswith("author "): + authors.append(line[len("author "):].strip()) + return authors + + def _my_git_author(self): + try: + return subprocess.check_output( + ["git", "config", "user.name"], + text=True, + cwd=os.getcwd() + ).strip() + except Exception: + return "" + + def _strings_escape(self, value): + s = "" if value is None else str(value) + s = s.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n") + return s + + def translate(self, model="translategemma:4b", accept=False): + en_lines = self.en_file() + en_dict = dictionary(en_lines) + my_author = self._my_git_author() + omit_keys = ["Swap"] + ai_tag = f"// {model}" + + target_languages = [ + l for l in self.languages + if not self._normalize_lang_code(l).lower().startswith("en") +# if self._normalize_lang_code(l).lower() in ("sk") + ] + total_langs = len(target_languages) + + for lang_idx, lang in enumerate(target_languages, start=1): + lang_code = lang.replace(".lproj", "") + lang_name = self._lang_name_from_code(lang_code) + lang_path = f"{self.path}/{lang}/Localizable.strings" + + with open(lang_path, "r") as f: + old_lines = f.readlines() + + new_lines = old_lines[:] + lang_dict = dictionary(old_lines) + changed = False + + try: + authors = self._line_authors(lang_path) + except Exception: + authors = [""] * len(old_lines) + + candidates = [] + for i, en_item in en_dict.items(): + en_key = en_item.get("key") + en_value = en_item.get("value") + + translate_item = lang_dict.get(i) + translate_key = translate_item.get("key") if translate_item else None + translate_value = translate_item.get("value") if translate_item else None + + if translate_item is None or translate_key != en_key: + line = f"\"{en_key}\" = \"{en_value}\";\n" + if i < len(new_lines): + new_lines.insert(i, line) + else: + new_lines.append(line) + if i <= len(authors): + authors.insert(i, my_author) + changed = True + translate_value = en_value + + if translate_key != en_key: + continue + if en_key in omit_keys: + continue + if i < len(authors) and my_author and authors[i] != my_author and en_value != translate_value: + continue + + if translate_value is None or translate_value == en_value: + candidates.append((i, en_key, en_value)) + + print("Candidates for translation in {} ({}): {}".format(lang_name, lang_code, len(candidates))) + + for idx, (i, en_key, en_value) in enumerate(candidates, start=1): + translated = self._ollama_translate(en_value, lang_code, model=model) + safe_translated = self._strings_escape(translated) + print(f"[{lang_name} {lang_idx}/{total_langs}] {idx}/{len(candidates)} {en_key} -> {safe_translated}") + + translated_line = f"\"{en_key}\" = \"{safe_translated}\";\n" + update_line = f"\"{en_key}\" = \"{safe_translated}\"; {ai_tag}\n" + if i < len(new_lines): + if new_lines[i] != translated_line: + new_lines[i] = update_line + changed = True + else: + new_lines.append(update_line) + changed = True + + if not changed: + print(f"No changes for {lang_code} ({lang_code}).") + continue + + if accept: + with open(lang_path, "w") as f: + f.write("".join(new_lines)) + print(f"Saved: {lang_path}") + else: + answer = input(f"Save changes to {lang_path}? [Y/n]: ").strip().lower() + if answer in ("", "y", "yes"): + with open(lang_path, "w") as f: + f.write("".join(new_lines)) + print(f"Saved: {lang_path}") + else: + print(f"Skipped: {lang_path}") + + print("Translation completed.") + if __name__ == "__main__": i18n = i18n() + args = sys.argv[1:] + accept = "--accept" in args + args = [a for a in args if a != "--accept"] + if len(sys.argv) >= 2 and sys.argv[1] == "fix": print("running fix command...") i18n.fix() + elif len(sys.argv) >= 2 and sys.argv[1] == "translate": + print("running translate command...") + i18n.translate(accept=accept) else: print("running check command...") i18n.check() diff --git a/Stats.xcodeproj/xcshareddata/xcschemes/SMC.xcscheme b/Stats.xcodeproj/xcshareddata/xcschemes/SMC.xcscheme index 9bf53d07..763eb7e1 100644 --- a/Stats.xcodeproj/xcshareddata/xcschemes/SMC.xcscheme +++ b/Stats.xcodeproj/xcshareddata/xcschemes/SMC.xcscheme @@ -1,6 +1,6 @@