Add localization support and translation script

- Introduced a new extension for localization in Flutter with `LocalizationExtension` in `l10n.dart`.
- Added a Python script `translate.py` for translating ARB/JSON localization files using a local Ollama model, preserving keys and placeholders, and handling ICU format rules.
This commit is contained in:
zjs81 2026-01-11 17:13:50 -07:00
parent 2495cd840f
commit b2ce82fe7e
64 changed files with 54716 additions and 1254 deletions

723
tools/translate.py Normal file
View file

@ -0,0 +1,723 @@
#!/usr/bin/env python3
"""
translate_arb_with_ollama.py
Translates ARB/JSON localization values using a local Ollama model, while:
- preserving keys
- skipping "@@locale" and all "@key" metadata blocks
- preserving placeholders like {deviceName}, {count, plural, ...}
- writing a new file with updated @@locale
- printing progress as it runs
Usage:
python translate_arb_with_ollama.py \
--in /home/zjs81/Desktop/meshcore-open/lib/l10n/app_en.arb \
--out /home/zjs81/Desktop/meshcore-open/lib/l10n/app_es.arb \
--to-locale es \
--model ministral-3:latest \
--temperature 0 \
--concurrency 4
"""
from __future__ import annotations
import argparse
import json
import re
import sys
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from dataclasses import dataclass
from typing import Any, Dict, List, Tuple, Optional
from urllib import request
# Simple placeholder like {name}, {count}, {deviceName}
SIMPLE_PLACEHOLDER_RE = re.compile(r"\{(\w+)\}")
# ICU plural/select variable name extraction: {count, plural, ...} or {gender, select, ...}
ICU_VAR_RE = re.compile(r"\{(\w+)\s*,\s*(?:plural|select|selectordinal)\s*,", re.IGNORECASE)
@dataclass
class OllamaConfig:
host: str
model: str
timeout_s: float
temperature: float
num_ctx: int
num_predict: int
top_p: float
def http_post_json(url: str, payload: Dict[str, Any], timeout_s: float) -> Dict[str, Any]:
data = json.dumps(payload).encode("utf-8")
req = request.Request(
url,
data=data,
headers={"Content-Type": "application/json"},
method="POST",
)
with request.urlopen(req, timeout=timeout_s) as resp:
body = resp.read().decode("utf-8")
return json.loads(body)
def strip_markdown(s: str) -> str:
"""Remove common markdown formatting from output."""
# Remove bold/italic markers
s = re.sub(r'\*\*(.+?)\*\*', r'\1', s)
s = re.sub(r'\*(.+?)\*', r'\1', s)
s = re.sub(r'__(.+?)__', r'\1', s)
s = re.sub(r'_(.+?)_', r'\1', s)
# Remove stray asterisks
s = re.sub(r'\*+', '', s)
return s.strip()
def ollama_generate(cfg: OllamaConfig, prompt: str) -> str:
url = cfg.host.rstrip("/") + "/api/generate"
payload = {
"model": cfg.model,
"prompt": prompt,
"stream": False,
"options": {
"temperature": cfg.temperature,
"num_ctx": cfg.num_ctx,
"num_predict": cfg.num_predict,
"top_p": cfg.top_p,
},
}
resp = http_post_json(url, payload, cfg.timeout_s)
out = resp.get("response", "")
# Clean up common LLM artifacts
out = strip_markdown(out)
return out.strip()
def extract_placeholder_names(s: str) -> List[str]:
"""Extract placeholder variable names (not the full braced expression).
For '{name}' returns ['name']
For '{count} {count, plural, =1{hop} other{hops}}' returns ['count']
"""
names = set()
# Get ICU variable names first
for m in ICU_VAR_RE.finditer(s):
names.add(m.group(1))
# Get simple placeholders, but skip if they're inside ICU blocks (text forms like {hop})
# We do this by checking if the name is also an ICU variable - if not, it's a simple placeholder
# unless it looks like a word (ICU text forms are usually short words)
for m in SIMPLE_PLACEHOLDER_RE.finditer(s):
name = m.group(1)
# Check if this appears as a simple {name} placeholder (not inside ICU)
# by looking at what comes after it
full_match = m.group(0)
pos = m.start()
# Look for pattern like {name, plural/select - if found, skip (handled by ICU_VAR_RE)
rest = s[pos:]
if re.match(r"\{\w+\s*,\s*(?:plural|select|selectordinal)", rest, re.IGNORECASE):
continue
# Check if this is likely a text form inside ICU (preceded by =X{ or other{)
before = s[:pos]
if re.search(r"(?:=\d+|zero|one|two|few|many|other)\s*$", before, re.IGNORECASE):
continue # This is a text form like "=1{hop}", skip it
names.add(name)
return sorted(names)
def build_prompt(text: str, target_lang: str, placeholder_names: List[str], has_icu: bool, ask_confidence: bool = False) -> str:
preserve_list = "\n".join(f"- {{{t}}}" for t in placeholder_names) if placeholder_names else "- (none)"
icu_note = ""
if has_icu:
icu_note = (
"ICU FORMAT RULES:\n"
f"- This text uses ICU plural/select format: {{var, plural, =1{{singular}} other{{plural}}}}\n"
"- Keep structure keywords EXACTLY: plural, select, =0, =1, =2, zero, one, two, few, many, other\n"
f"- TRANSLATE the words inside each form to {target_lang}\n"
"- Example: =1{item} other{items} -> translate 'item'/'items' but keep =1{{ }} other{{ }} structure\n\n"
)
if ask_confidence:
return (
f"Translate this UI string to {target_lang}.\n\n"
"RULES:\n"
"- Placeholders like {name}, {count} must appear EXACTLY unchanged.\n"
"- Use infinitive verb forms for buttons (Save, Delete, etc.).\n"
f"- Use natural {target_lang} word order.\n"
"- Keep brand names and technical terms unchanged.\n\n"
f"{icu_note}"
f"Placeholders: {', '.join(f'{{{t}}}' for t in placeholder_names) if placeholder_names else 'none'}\n\n"
f"English: {text}\n\n"
"Respond with EXACTLY two lines:\n"
"1. The translation (no quotes)\n"
"2. Confidence score 1-5 (5=certain, 1=unsure)\n\n"
"Example response:\n"
"Guardar archivo\n"
"5"
)
else:
return (
f"Translate this UI string to {target_lang}. Return ONLY the translation.\n\n"
"RULES:\n"
"- Output the translated text ONLY. No markdown, no quotes, no explanations.\n"
"- Placeholders like {name}, {count} must appear EXACTLY unchanged.\n"
"- Use infinitive verb forms for buttons (Save, Delete, etc.).\n"
f"- Use natural {target_lang} word order.\n"
"- Keep brand names and technical terms unchanged.\n"
"- Translation length should be similar to the original.\n\n"
f"{icu_note}"
f"Placeholders: {', '.join(f'{{{t}}}' for t in placeholder_names) if placeholder_names else 'none'}\n\n"
f"English: {text}\n"
f"{target_lang}:"
)
def parse_confidence_response(response: str) -> Tuple[str, int]:
"""Parse response with translation and confidence score.
Returns (translation, confidence) where confidence is 1-5, or 0 if unparseable.
"""
lines = response.strip().split('\n')
if len(lines) >= 2:
translation = '\n'.join(lines[:-1]).strip() # All but last line
try:
# Try to extract number from last line
last_line = lines[-1].strip()
# Handle formats like "5", "5/5", "Confidence: 5"
match = re.search(r'\b([1-5])\b', last_line)
if match:
confidence = int(match.group(1))
return translation, confidence
except ValueError:
pass
# Fallback: treat whole response as translation with unknown confidence
return strip_markdown(response), 0
def looks_like_translation_failed(src: str, out: str) -> bool:
if not out:
return True
if src.strip() == out.strip() and len(src.strip()) > 8:
return True
# Detect hallucination: output much longer than input (3x+ for short strings, 2x for longer)
src_len = len(src.strip())
out_len = len(out.strip())
if src_len < 50 and out_len > src_len * 3:
return True
if src_len >= 50 and out_len > src_len * 2:
return True
return False
def has_icu_block(s: str) -> bool:
"""Check if string contains ICU plural/select block."""
return bool(ICU_VAR_RE.search(s))
def validate_preserved_tokens(src: str, out: str) -> Tuple[bool, Optional[str]]:
"""Validate that placeholder names are preserved in translation."""
src_names = extract_placeholder_names(src)
# Check each placeholder name appears in output
for name in src_names:
# Look for {name} or {name, plural/select...}
pattern = r"\{" + re.escape(name) + r"(?:\}|\s*,)"
if not re.search(pattern, out):
return False, f"Missing placeholder: {{{name}}}"
# If source has ICU block, output should too
if has_icu_block(src) and not has_icu_block(out):
return False, "ICU plural/select block missing in output"
return True, None
def compute_confidence(src: str, out: str) -> Tuple[float, List[str]]:
"""
Compute confidence score (0.0 to 1.0) for a translation.
Returns (score, list of issues).
"""
issues = []
score = 1.0
src_len = len(src.strip())
out_len = len(out.strip())
# Length ratio check
if src_len > 0:
ratio = out_len / src_len
if ratio < 0.3: # Way too short
score -= 0.4
issues.append("too_short")
elif ratio < 0.5:
score -= 0.2
issues.append("short")
elif ratio > 2.5: # Way too long
score -= 0.4
issues.append("too_long")
elif ratio > 1.8:
score -= 0.2
issues.append("long")
# Contains question mark when source doesn't (model asking questions)
if '?' in out and '?' not in src:
score -= 0.3
issues.append("added_question")
# Contains common LLM artifacts
artifacts = ['```', '**', 'translation:', 'here is', 'certainly', 'i can', 'i\'ll']
out_lower = out.lower()
for artifact in artifacts:
if artifact in out_lower:
score -= 0.3
issues.append(f"artifact:{artifact}")
break
# Output looks like it's in English still (common words)
english_indicators = ['the ', ' is ', ' are ', ' was ', ' were ', ' have ', ' has ', 'you ', ' your ']
english_count = sum(1 for ind in english_indicators if ind in out_lower)
if english_count >= 3 and src_len > 20:
score -= 0.3
issues.append("likely_english")
# Contains newlines when source doesn't
if '\n' in out and '\n' not in src:
score -= 0.2
issues.append("added_newlines")
# ICU/placeholder validation
ok, _ = validate_preserved_tokens(src, out)
if not ok:
score -= 0.3
issues.append("placeholder_error")
return max(0.0, score), issues
# Keys to skip translation (brand names)
SKIP_KEYS = {
"appTitle",
}
# Manual translations for problematic strings (key -> {locale: translation})
MANUAL_TRANSLATIONS: Dict[str, Dict[str, str]] = {
"repeater_daysHoursMinsSecs": {
"es": "{days} días {hours}h {minutes}m {seconds}s",
"fr": "{days} jours {hours}h {minutes}m {seconds}s",
"de": "{days} Tage {hours}h {minutes}m {seconds}s",
"it": "{days} giorni {hours}h {minutes}m {seconds}s",
"pt": "{days} dias {hours}h {minutes}m {seconds}s",
"pl": "{days} dni {hours}h {minutes}m {seconds}s",
"sk": "{days} dní {hours}h {minutes}m {seconds}s",
"sl": "{days} dni {hours}h {minutes}m {seconds}s",
"cs": "{days} dní {hours}h {minutes}m {seconds}s",
"ja": "{days}{hours}時間 {minutes}{seconds}",
"ko": "{days}{hours}시간 {minutes}{seconds}",
"zh": "{days}{hours}小时 {minutes}{seconds}",
"ru": "{days} дней {hours}ч {minutes}м {seconds}с",
"bg": "{days} дни {hours}ч {minutes}м {seconds}с",
"nl": "{days} dagen {hours}u {minutes}m {seconds}s",
"sv": "{days} dagar {hours}t {minutes}m {seconds}s",
},
}
def is_translatable_entry(key: str, value: Any) -> bool:
if key == "@@locale":
return False
if key in SKIP_KEYS:
return False
if key.startswith("@"):
return False
if not isinstance(value, str):
return False
if value.strip() == "":
return False
return True
def translate_one(
key: str,
text: str,
target_lang: str,
cfg: OllamaConfig,
retries: int,
backoff_s: float,
fallback_cfg: Optional[OllamaConfig] = None,
confidence_threshold: float = 0.7,
model_confidence_threshold: int = 4,
ask_model_confidence: bool = True,
) -> Tuple[str, str, Optional[str], bool]:
"""
Translate a single string.
Returns (key, translated_text, error_or_none, used_fallback_model).
"""
placeholder_names = extract_placeholder_names(text)
text_has_icu = has_icu_block(text)
# Ask for confidence if we have a fallback model
should_ask_confidence = ask_model_confidence and fallback_cfg and fallback_cfg.model != cfg.model
prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu, ask_confidence=should_ask_confidence)
used_fallback = False
last_err: Optional[str] = None
for attempt in range(retries + 1):
try:
raw_out = ollama_generate(cfg, prompt)
# Parse confidence if we asked for it
if should_ask_confidence:
out, model_confidence = parse_confidence_response(raw_out)
else:
out = raw_out
model_confidence = 5 # Assume high confidence if not asked
ok, why = validate_preserved_tokens(text, out)
if not ok:
last_err = f"Validation failed: {why}"
# Retry without confidence format for simpler response
prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu, ask_confidence=False)
prompt = (
prompt
+ "\n\nIMPORTANT: You MUST keep every {...} segment exactly unchanged. "
"If you cannot, return the original text unchanged."
)
raise ValueError(last_err)
if looks_like_translation_failed(text, out) and attempt < retries:
last_err = "Output identical/suspicious; retrying"
time.sleep(backoff_s * (attempt + 1))
continue
# Check if model reported low confidence - use fallback
if model_confidence > 0 and model_confidence < model_confidence_threshold and fallback_cfg:
fallback_prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu, ask_confidence=False)
fallback_out = ollama_generate(fallback_cfg, fallback_prompt)
fallback_ok, _ = validate_preserved_tokens(text, fallback_out)
if fallback_ok and not looks_like_translation_failed(text, fallback_out):
return key, fallback_out, None, True
# Also check computed confidence and use fallback model if needed
confidence, issues = compute_confidence(text, out)
if confidence < confidence_threshold and fallback_cfg and fallback_cfg.model != cfg.model:
# Low confidence - try with bigger model
fallback_prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu)
fallback_out = ollama_generate(fallback_cfg, fallback_prompt)
fallback_ok, _ = validate_preserved_tokens(text, fallback_out)
fallback_conf, _ = compute_confidence(text, fallback_out)
if fallback_ok and fallback_conf > confidence:
# Fallback is better
return key, fallback_out, None, True
elif fallback_ok and not ok:
# Original failed validation but fallback passed
return key, fallback_out, None, True
return key, out, None, used_fallback
except Exception as e:
last_err = str(e)
if attempt < retries:
time.sleep(backoff_s * (attempt + 1))
continue
# Last resort: try fallback model
if fallback_cfg and fallback_cfg.model != cfg.model:
try:
fallback_prompt = build_prompt(text, target_lang, placeholder_names, text_has_icu)
fallback_out = ollama_generate(fallback_cfg, fallback_prompt)
fallback_ok, _ = validate_preserved_tokens(text, fallback_out)
if fallback_ok and not looks_like_translation_failed(text, fallback_out):
return key, fallback_out, None, True
except Exception:
pass
return key, text, last_err, False # fallback to original on failure
def fmt_duration(seconds: float) -> str:
if seconds < 60:
return f"{seconds:.1f}s"
m = int(seconds // 60)
s = seconds - 60 * m
if m < 60:
return f"{m}m {s:.0f}s"
h = m // 60
m2 = m % 60
return f"{h}h {m2}m"
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--in", dest="in_path", required=True, help="Input .arb/.json file path")
ap.add_argument("--out", dest="out_path", required=True, help="Output .arb/.json file path")
ap.add_argument("--to-locale", required=True, help="Target locale code, e.g. es, fr, de")
ap.add_argument("--target-lang", default=None, help="Target language name for the model, e.g. Spanish (defaults from locale)")
ap.add_argument("--model", default="gemma3:4b", help="Ollama model name")
ap.add_argument("--fallback-model", default=None, help="Larger model to use for low-confidence translations")
ap.add_argument("--confidence-threshold", type=float, default=0.7, help="Computed confidence threshold to trigger fallback (0.0-1.0)")
ap.add_argument("--model-confidence-threshold", type=int, default=4, help="Model self-reported confidence threshold (1-5, use fallback if below)")
ap.add_argument("--retry-model", default="ministral-3:latest", help="Model to use for end-of-run retries")
ap.add_argument("--host", default="http://localhost:11434", help="Ollama host")
ap.add_argument("--timeout", type=float, default=120.0, help="HTTP timeout seconds")
ap.add_argument("--temperature", type=float, default=0.2, help="Model temperature")
ap.add_argument("--num-ctx", type=int, default=4096, help="Context size")
ap.add_argument("--num-predict", type=int, default=256, help="Max tokens to generate")
ap.add_argument("--top-p", type=float, default=0.9, help="Top-p")
ap.add_argument("--concurrency", type=int, default=4, help="Parallel requests")
ap.add_argument("--retries", type=int, default=2, help="Retries per string")
ap.add_argument("--backoff", type=float, default=0.6, help="Backoff seconds base")
ap.add_argument("--dry-run", action="store_true", help="Do not write file; just print summary")
ap.add_argument("--progress-every", type=int, default=1, help="Print progress every N completed strings (default: 1)")
args = ap.parse_args()
locale_map = {
"es": "Spanish",
"fr": "French",
"de": "German",
"it": "Italian",
"pt": "Portuguese",
"pt-BR": "Brazilian Portuguese",
"ja": "Japanese",
"ko": "Korean",
"zh": "Chinese (Simplified)",
"zh-Hant": "Chinese (Traditional)",
"ru": "Russian",
"uk": "Ukrainian",
"ar": "Arabic",
"hi": "Hindi",
"tr": "Turkish",
"nl": "Dutch",
"sv": "Swedish",
"no": "Norwegian",
"da": "Danish",
"fi": "Finnish",
"pl": "Polish",
"cs": "Czech",
"sk": "Slovak",
"sl": "Slovenian",
"bg": "Bulgarian",
"el": "Greek",
"he": "Hebrew",
"th": "Thai",
"vi": "Vietnamese",
"id": "Indonesian",
}
target_lang = args.target_lang or locale_map.get(args.to_locale, args.to_locale)
try:
with open(args.in_path, "r", encoding="utf-8") as f:
data = json.load(f)
except Exception as e:
print(f"Failed to read input: {e}", file=sys.stderr)
return 2
if not isinstance(data, dict):
print("Input JSON must be an object at top-level.", file=sys.stderr)
return 2
cfg = OllamaConfig(
host=args.host,
model=args.model,
timeout_s=args.timeout,
temperature=args.temperature,
num_ctx=args.num_ctx,
num_predict=args.num_predict,
top_p=args.top_p,
)
# Fallback model for low-confidence translations
fallback_cfg = None
if args.fallback_model:
fallback_cfg = OllamaConfig(
host=args.host,
model=args.fallback_model,
timeout_s=args.timeout,
temperature=args.temperature,
num_ctx=args.num_ctx,
num_predict=args.num_predict,
top_p=args.top_p,
)
out_data: Dict[str, Any] = dict(data)
out_data["@@locale"] = args.to_locale
items: List[Tuple[str, str]] = [(k, v) for k, v in data.items() if is_translatable_entry(k, v)]
# Apply manual translations first
manual_count = 0
items_to_translate: List[Tuple[str, str]] = []
for k, v in items:
if k in MANUAL_TRANSLATIONS and args.to_locale in MANUAL_TRANSLATIONS[k]:
out_data[k] = MANUAL_TRANSLATIONS[k][args.to_locale]
manual_count += 1
else:
items_to_translate.append((k, v))
if manual_count > 0:
print(f"Applied {manual_count} manual translation(s)")
total = len(items_to_translate)
if total == 0 and manual_count == 0:
print("No translatable string entries found (excluding @@locale and @metadata).", file=sys.stderr)
return 1
if total == 0:
print("All strings handled by manual translations.")
else:
fallback_info = f" (fallback: {args.fallback_model})" if args.fallback_model else ""
print(f"Translating {total} strings -> {target_lang} using {cfg.model}{fallback_info} (concurrency={args.concurrency})")
start = time.time()
failures: List[Tuple[str, str]] = []
translated_ok = manual_count # Count manual translations as OK
fallback_used = 0
completed = 0
# Build a lookup for original text by key
items_dict: Dict[str, str] = dict(items_to_translate)
# Submit all tasks up front
if total > 0:
with ThreadPoolExecutor(max_workers=max(1, args.concurrency)) as ex:
future_to_key = {
ex.submit(
translate_one,
key=k,
text=v,
target_lang=target_lang,
cfg=cfg,
retries=args.retries,
backoff_s=args.backoff,
fallback_cfg=fallback_cfg,
confidence_threshold=args.confidence_threshold,
model_confidence_threshold=args.model_confidence_threshold,
ask_model_confidence=bool(args.fallback_model),
): k
for (k, v) in items_to_translate
}
for fut in as_completed(future_to_key):
k, translated, err, used_fallback = fut.result()
out_data[k] = translated
completed += 1
if err:
failures.append((k, err))
status = "FAIL"
else:
translated_ok += 1
if used_fallback:
fallback_used += 1
status = "OK*" # asterisk indicates fallback model was used
else:
status = "OK"
if args.progress_every > 0 and (completed % args.progress_every == 0 or completed == total):
elapsed = time.time() - start
rate = completed / elapsed if elapsed > 0 else 0.0
remaining = (total - completed) / rate if rate > 0 else 0.0
# Keep it single-line friendly but readable.
print(
f"[{completed:>4}/{total}] {status:<4} {k} | "
f"elapsed {fmt_duration(elapsed)} | ETA {fmt_duration(remaining)}"
)
elapsed = time.time() - start
fallback_msg = f", used_fallback_model={fallback_used}" if fallback_used > 0 else ""
print(f"Done in {fmt_duration(elapsed)}. OK={translated_ok}{fallback_msg}, errors={len(failures)}")
# Retry failed translations at the end with increasing temperature
retry_round = 1
max_end_retries = 3
retry_model = args.retry_model
while failures and retry_round <= max_end_retries:
# Increase temperature for each retry round
retry_temp = min(cfg.temperature + (0.2 * retry_round), 1.0)
print(f"\n--- Retry round {retry_round}/{max_end_retries} for {len(failures)} failed key(s) (model={retry_model}, temp={retry_temp:.1f}) ---")
retry_items = [(k, items_dict[k]) for k, _ in failures]
failures = []
retry_completed = 0
retry_total = len(retry_items)
retry_start = time.time()
# Create config with higher temperature (and optionally different model) for retries
retry_cfg = OllamaConfig(
host=cfg.host,
model=retry_model,
timeout_s=cfg.timeout_s,
temperature=retry_temp,
num_ctx=cfg.num_ctx,
num_predict=cfg.num_predict,
top_p=cfg.top_p,
)
with ThreadPoolExecutor(max_workers=max(1, args.concurrency)) as ex:
future_to_key = {
ex.submit(
translate_one,
key=k,
text=v,
target_lang=target_lang,
cfg=retry_cfg,
retries=args.retries,
backoff_s=args.backoff,
): k
for (k, v) in retry_items
}
for fut in as_completed(future_to_key):
k, translated, err, used_fb = fut.result()
out_data[k] = translated
retry_completed += 1
if err:
failures.append((k, err))
status = "FAIL"
else:
translated_ok += 1
status = "OK"
if args.progress_every > 0 and (retry_completed % args.progress_every == 0 or retry_completed == retry_total):
elapsed = time.time() - retry_start
rate = retry_completed / elapsed if elapsed > 0 else 0.0
remaining = (retry_total - retry_completed) / rate if rate > 0 else 0.0
print(
f"[{retry_completed:>4}/{retry_total}] {status:<4} {k} | "
f"elapsed {fmt_duration(elapsed)} | ETA {fmt_duration(remaining)}"
)
retry_elapsed = time.time() - retry_start
print(f"Retry round {retry_round} done in {fmt_duration(retry_elapsed)}. Remaining failures: {len(failures)}")
retry_round += 1
total_elapsed = time.time() - start
print(f"\nTotal time: {fmt_duration(total_elapsed)}. OK={translated_ok}, final fallback={len(failures)}")
if failures:
print("Fallback keys (kept original English due to errors):")
for k, err in failures[:60]:
print(f" - {k}: {err}")
if len(failures) > 60:
print(f" ... and {len(failures) - 60} more")
if args.dry_run:
print("Dry run: not writing output file.")
return 0
try:
with open(args.out_path, "w", encoding="utf-8") as f:
json.dump(out_data, f, ensure_ascii=False, indent=2)
f.write("\n")
except Exception as e:
print(f"Failed to write output: {e}", file=sys.stderr)
return 2
print(f"Wrote: {args.out_path}")
return 0
if __name__ == "__main__":
raise SystemExit(main())