mirror of
https://github.com/morgan9e/macos-stats
synced 2026-04-14 00:04:15 +09:00
350 lines
13 KiB
Python
350 lines
13 KiB
Python
import os
|
||
import sys
|
||
import json
|
||
import urllib.request
|
||
import subprocess
|
||
import unicodedata
|
||
|
||
try:
|
||
import langcodes
|
||
except Exception:
|
||
langcodes = None
|
||
|
||
|
||
def dictionary(lines):
|
||
parsed_lines = {}
|
||
for i, line in enumerate(lines):
|
||
if line.startswith("//") or len(line) == 0 or line == "\n":
|
||
continue
|
||
line = line.replace("\n", "")
|
||
pair = line.split(" = ")
|
||
parsed_lines[i] = {
|
||
"key": pair[0].replace('"', ""),
|
||
"value": pair[1].replace('"', "").replace(';', "")
|
||
}
|
||
return parsed_lines
|
||
|
||
|
||
class i18n:
|
||
path = os.getcwd() + "/Stats/Supporting Files/"
|
||
|
||
def __init__(self):
|
||
if "Kit/scripts" in os.getcwd():
|
||
self.path = os.getcwd() + "/../../Stats/Supporting Files/"
|
||
self.languages = list(filter(lambda x: x.endswith(".lproj"), os.listdir(self.path)))
|
||
|
||
def en_file(self):
|
||
with open(f"{self.path}/en.lproj/Localizable.strings", "r") as f:
|
||
en_file = f.readlines()
|
||
if en_file is None:
|
||
sys.exit("English language not found.")
|
||
return en_file
|
||
|
||
def check(self):
|
||
en_file = self.en_file()
|
||
en_dict = dictionary(en_file)
|
||
|
||
for lang in self.languages:
|
||
with open(f"{self.path}/{lang}/Localizable.strings", "r") as f:
|
||
file = f.readlines()
|
||
name = lang.replace(".lproj", "")
|
||
lang_dict = dictionary(file)
|
||
|
||
for v in en_dict:
|
||
en_key = en_dict[v].get("key")
|
||
if v not in lang_dict:
|
||
sys.exit(f"missing key `{en_key}` in `{name}` on line `{v}`")
|
||
lang_key = lang_dict[v].get("key")
|
||
if lang_key != en_key:
|
||
sys.exit(f"missing or wrong key `{lang_key}` in `{name}` on line `{v}`, must be `{en_key}`")
|
||
|
||
print(f"All fine, found {len(en_file)} lines in {len(self.languages)} languages.")
|
||
|
||
def fix(self):
|
||
en_file = self.en_file()
|
||
en_dict = dictionary(en_file)
|
||
|
||
for v in en_dict:
|
||
en_key = en_dict[v].get("key")
|
||
en_value = en_dict[v].get("value")
|
||
|
||
for lang in self.languages:
|
||
lang_path = f"{self.path}/{lang}/Localizable.strings"
|
||
with open(lang_path, "r") as f:
|
||
file = f.readlines()
|
||
lang_dict = dictionary(file)
|
||
|
||
if v not in lang_dict or en_key != lang_dict[v].get("key"):
|
||
file.insert(v, f"\"{en_key}\" = \"{en_value}\";\n")
|
||
with open(lang_path, "w") as f:
|
||
f.write("".join(file))
|
||
|
||
self.check()
|
||
|
||
def _normalize_lang_code(self, code):
|
||
code = (code or "").strip()
|
||
if code.endswith(".lproj"):
|
||
code = code[:-6]
|
||
return code.replace("-", "_")
|
||
|
||
def _extract_translation(self, raw, fallback):
|
||
raw = (raw or "").strip()
|
||
if not raw:
|
||
return fallback
|
||
|
||
def _clean(s):
|
||
return (s or "").strip().strip("*").strip('"').strip("'").strip()
|
||
|
||
def _from_dict(obj):
|
||
if not isinstance(obj, dict):
|
||
return None
|
||
|
||
role = (obj.get("role") or "").strip().lower()
|
||
obj_type = (obj.get("type") or "").strip().lower()
|
||
|
||
text = obj.get("text")
|
||
if isinstance(text, str) and text.strip():
|
||
if role in ("assistant", "translation") or obj_type == "translation":
|
||
return _clean(text)
|
||
|
||
content = obj.get("content")
|
||
if isinstance(content, list):
|
||
for item in content:
|
||
if not isinstance(item, dict):
|
||
continue
|
||
item_role = (item.get("role") or role).strip().lower()
|
||
item_type = (item.get("type") or "").strip().lower()
|
||
t = item.get("text")
|
||
if isinstance(t, str) and t.strip():
|
||
if item_role in ("assistant", "translation") or item_type in ("translation", "text"):
|
||
return _clean(t)
|
||
return None
|
||
|
||
try:
|
||
parsed = json.loads(raw)
|
||
if isinstance(parsed, dict):
|
||
hit = _from_dict(parsed)
|
||
if hit:
|
||
return hit
|
||
elif isinstance(parsed, list):
|
||
for item in parsed:
|
||
hit = _from_dict(item)
|
||
if hit:
|
||
return hit
|
||
except json.JSONDecodeError:
|
||
pass
|
||
|
||
if "\n" not in raw and len(raw) <= 200:
|
||
candidate = _clean(raw)
|
||
if candidate and not candidate.startswith("{") and not candidate.startswith("["):
|
||
return candidate
|
||
|
||
for line in raw.splitlines():
|
||
line = _clean(line)
|
||
if line and not line.startswith("{") and not line.startswith("["):
|
||
return line
|
||
|
||
return fallback
|
||
|
||
def _lang_name_from_code(self, code):
|
||
c = self._normalize_lang_code(code).replace("_", "-").strip()
|
||
if not c:
|
||
return "Unknown"
|
||
|
||
if langcodes:
|
||
try:
|
||
name = langcodes.get(c).display_name("en")
|
||
if name:
|
||
return name
|
||
except Exception:
|
||
pass
|
||
|
||
return c
|
||
|
||
def _script_hint(self, lang_code):
|
||
lang = self._normalize_lang_code(lang_code).lower()
|
||
hints = {
|
||
"el": "Greek script only (Α-Ω, α-ω) except numbers/punctuation/brand names.",
|
||
"ru": "Cyrillic script only except numbers/punctuation/brand names.",
|
||
"uk": "Cyrillic script only except numbers/punctuation/brand names.",
|
||
"bg": "Cyrillic script only except numbers/punctuation/brand names.",
|
||
"ja": "Japanese writing system (Hiragana/Katakana/Kanji), no romaji unless required.",
|
||
"zh_cn": "Simplified Chinese characters.",
|
||
"zh_hans": "Simplified Chinese characters.",
|
||
"zh_tw": "Traditional Chinese characters.",
|
||
"zh_hant": "Traditional Chinese characters.",
|
||
"ko": "Korean Hangul preferred.",
|
||
"et": "Use Estonian only. Do not use Russian.",
|
||
}
|
||
return hints.get(lang, "")
|
||
|
||
def _ollama_translate(self, text, target_lang, model="translategemma:4b", retries=2):
|
||
url = "http://ai:11434/api/generate"
|
||
tgt = self._normalize_lang_code(target_lang)
|
||
lang = self._lang_name_from_code(tgt)
|
||
script_hint = self._script_hint(tgt)
|
||
|
||
prompt = (
|
||
f"You are a professional English (en) to {lang} ({tgt}) translator. Your goal is to accurately convey the meaning and nuances of the original English text while adhering to {lang} grammar, vocabulary, and cultural sensitivities. Produce only the {lang} translation, without any additional explanations or commentary. Output only the final translated text. Do not add explanations, notes, JSON, markdown, or quotes. Preserve placeholders/tokens exactly \\(e\\.g\\. `%@`, `%d`, `{0}`, `MB/s`\\). Preserve punctuation, casing intent, and technical abbreviations. {script_hint} Please translate the following English text into {lang}:\\n\\n"
|
||
f"{text}"
|
||
)
|
||
|
||
payload = {
|
||
"model": model,
|
||
"prompt": prompt,
|
||
"stream": False,
|
||
}
|
||
|
||
req = urllib.request.Request(
|
||
url,
|
||
data=json.dumps(payload).encode("utf-8"),
|
||
headers={"Content-Type": "application/json"},
|
||
method="POST"
|
||
)
|
||
|
||
with urllib.request.urlopen(req, timeout=240) as resp:
|
||
data = json.loads(resp.read().decode("utf-8"))
|
||
raw = data.get("response", "").strip()
|
||
|
||
return self._extract_translation(raw, fallback=text)
|
||
|
||
def _line_authors(self, file_path):
|
||
cmd = ["git", "blame", "--line-porcelain", file_path]
|
||
out = subprocess.check_output(cmd, text=True, cwd=os.getcwd(), stderr=subprocess.DEVNULL)
|
||
authors = []
|
||
for line in out.splitlines():
|
||
if line.startswith("author "):
|
||
authors.append(line[len("author "):].strip())
|
||
return authors
|
||
|
||
def _my_git_author(self):
|
||
try:
|
||
return subprocess.check_output(
|
||
["git", "config", "user.name"],
|
||
text=True,
|
||
cwd=os.getcwd()
|
||
).strip()
|
||
except Exception:
|
||
return ""
|
||
|
||
def _strings_escape(self, value):
|
||
s = "" if value is None else str(value)
|
||
s = s.replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
|
||
return s
|
||
|
||
def translate(self, model="translategemma:4b", accept=False):
|
||
en_lines = self.en_file()
|
||
en_dict = dictionary(en_lines)
|
||
my_author = self._my_git_author()
|
||
omit_keys = ["Swap"]
|
||
ai_tag = f"// {model}"
|
||
|
||
target_languages = [
|
||
l for l in self.languages
|
||
if not self._normalize_lang_code(l).lower().startswith("en")
|
||
# if self._normalize_lang_code(l).lower() in ("sk")
|
||
]
|
||
total_langs = len(target_languages)
|
||
|
||
for lang_idx, lang in enumerate(target_languages, start=1):
|
||
lang_code = lang.replace(".lproj", "")
|
||
lang_name = self._lang_name_from_code(lang_code)
|
||
lang_path = f"{self.path}/{lang}/Localizable.strings"
|
||
|
||
with open(lang_path, "r") as f:
|
||
old_lines = f.readlines()
|
||
|
||
new_lines = old_lines[:]
|
||
lang_dict = dictionary(old_lines)
|
||
changed = False
|
||
|
||
try:
|
||
authors = self._line_authors(lang_path)
|
||
except Exception:
|
||
authors = [""] * len(old_lines)
|
||
|
||
candidates = []
|
||
for i, en_item in en_dict.items():
|
||
en_key = en_item.get("key")
|
||
en_value = en_item.get("value")
|
||
|
||
translate_item = lang_dict.get(i)
|
||
translate_key = translate_item.get("key") if translate_item else None
|
||
translate_value = translate_item.get("value") if translate_item else None
|
||
|
||
if translate_item is None or translate_key != en_key:
|
||
line = f"\"{en_key}\" = \"{en_value}\";\n"
|
||
if i < len(new_lines):
|
||
new_lines.insert(i, line)
|
||
else:
|
||
new_lines.append(line)
|
||
if i <= len(authors):
|
||
authors.insert(i, my_author)
|
||
changed = True
|
||
translate_value = en_value
|
||
|
||
if translate_key != en_key:
|
||
continue
|
||
if en_key in omit_keys:
|
||
continue
|
||
if i < len(authors) and my_author and authors[i] != my_author and en_value != translate_value:
|
||
continue
|
||
|
||
if translate_value is None or translate_value == en_value:
|
||
candidates.append((i, en_key, en_value))
|
||
|
||
print("Candidates for translation in {} ({}): {}".format(lang_name, lang_code, len(candidates)))
|
||
|
||
for idx, (i, en_key, en_value) in enumerate(candidates, start=1):
|
||
translated = self._ollama_translate(en_value, lang_code, model=model)
|
||
safe_translated = self._strings_escape(translated)
|
||
print(f"[{lang_name} {lang_idx}/{total_langs}] {idx}/{len(candidates)} {en_key} -> {safe_translated}")
|
||
|
||
translated_line = f"\"{en_key}\" = \"{safe_translated}\";\n"
|
||
update_line = f"\"{en_key}\" = \"{safe_translated}\"; {ai_tag}\n"
|
||
if i < len(new_lines):
|
||
if new_lines[i] != translated_line:
|
||
new_lines[i] = update_line
|
||
changed = True
|
||
else:
|
||
new_lines.append(update_line)
|
||
changed = True
|
||
|
||
if not changed:
|
||
print(f"No changes for {lang_code} ({lang_code}).")
|
||
continue
|
||
|
||
if accept:
|
||
with open(lang_path, "w") as f:
|
||
f.write("".join(new_lines))
|
||
print(f"Saved: {lang_path}")
|
||
else:
|
||
answer = input(f"Save changes to {lang_path}? [Y/n]: ").strip().lower()
|
||
if answer in ("", "y", "yes"):
|
||
with open(lang_path, "w") as f:
|
||
f.write("".join(new_lines))
|
||
print(f"Saved: {lang_path}")
|
||
else:
|
||
print(f"Skipped: {lang_path}")
|
||
|
||
print("Translation completed.")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
i18n = i18n()
|
||
args = sys.argv[1:]
|
||
accept = "--accept" in args
|
||
args = [a for a in args if a != "--accept"]
|
||
|
||
if len(sys.argv) >= 2 and sys.argv[1] == "fix":
|
||
print("running fix command...")
|
||
i18n.fix()
|
||
elif len(sys.argv) >= 2 and sys.argv[1] == "translate":
|
||
print("running translate command...")
|
||
i18n.translate(accept=accept)
|
||
else:
|
||
print("running check command...")
|
||
i18n.check()
|
||
|
||
print("done")
|