207 lines
9.2 KiB
Python
207 lines
9.2 KiB
Python
"""Emit src/i18n/products/de.js + property-code helpers from scraped JSON.
|
||
|
||
Runs after scrape_kaiser_natron.py. Property labels (German UI text)
|
||
get converted into stable slug-style codes that messages.js translates
|
||
back to display strings — so DE and EN stay parallel without inline
|
||
translation in the data fixture.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
import re
|
||
import unicodedata
|
||
from pathlib import Path
|
||
|
||
ROOT = Path(__file__).resolve().parent.parent
|
||
SCRAPED = ROOT / "scripts" / "output" / "products-content.json"
|
||
OUT_DE = ROOT / "src" / "i18n" / "products" / "de.js"
|
||
OUT_HELP = ROOT / "scripts" / "output" / "manual-merge.md"
|
||
|
||
# Maps the German property phrase from the source site to a stable
|
||
# code we'll use in src/api/products.js + messages.js. New entries
|
||
# get added here when the source surface a new label.
|
||
PROP_CODES: dict[str, str] = {
|
||
"Enthärtet": "enthaertet",
|
||
"Frei von Mikroplastik": "frei-von-mikroplastik",
|
||
"Frei von tierischen Substanzen": "frei-von-tierischen-substanzen",
|
||
"Für Böden und Oberflächen": "fuer-boeden-und-oberflaechen",
|
||
"Für Küche und Haushalt": "fuer-kueche-und-haushalt",
|
||
"Glutenfrei": "glutenfrei",
|
||
"Hautverträglich – dermatologisch bestätigt": "hautvertraeglich-dermatologisch-bestaetigt",
|
||
"Hygienische Sauberkeit": "hygienische-sauberkeit",
|
||
"Laktosefrei": "laktosefrei",
|
||
"Mild-alkalisch": "mild-alkalisch",
|
||
"Mit Pampelmusenduft": "mit-pampelmusenduft",
|
||
"Mit dem Duft der Latschenkiefer": "mit-dem-duft-der-latschenkiefer",
|
||
"Mit entspannendem Apfelsinenduft": "mit-entspannendem-apfelsinenduft",
|
||
"Mit natürlicher Seife": "mit-natuerlicher-seife",
|
||
"Mit pflegendem Lanolin": "mit-pflegendem-lanolin",
|
||
"Nachhaltig durch Tenside nachwachsenden Ursprungs": "nachhaltig-durch-tenside-nachwachsenden-ursprungs",
|
||
"Neutralisiert Säuren": "neutralisiert-saeuren",
|
||
"Ohne Konservierungsstoffe": "ohne-konservierungsstoffe",
|
||
"Ohne Mineralöl": "ohne-mineraloel",
|
||
"Vegan": "vegan",
|
||
"Wohltuend und erfrischend": "wohltuend-und-erfrischend",
|
||
# Carry-overs from messages.js v1 (not yet seen in source).
|
||
"In Österreich abgefüllt": "made-in-austria",
|
||
"Bio": "bio",
|
||
}
|
||
|
||
# English labels — translations land here (a single source of truth so
|
||
# we don't drift between locales).
|
||
EN_LABELS: dict[str, str] = {
|
||
"enthaertet": "Softens water",
|
||
"frei-von-mikroplastik": "Microplastic-free",
|
||
"frei-von-tierischen-substanzen": "No animal substances",
|
||
"fuer-boeden-und-oberflaechen": "For floors & surfaces",
|
||
"fuer-kueche-und-haushalt": "For kitchen & home",
|
||
"glutenfrei": "Gluten-free",
|
||
"hautvertraeglich-dermatologisch-bestaetigt": "Skin-friendly — dermatologically tested",
|
||
"hygienische-sauberkeit": "Hygienic clean",
|
||
"laktosefrei": "Lactose-free",
|
||
"mild-alkalisch": "Mildly alkaline",
|
||
"mit-pampelmusenduft": "Pomelo scent",
|
||
"mit-dem-duft-der-latschenkiefer": "Mountain pine scent",
|
||
"mit-entspannendem-apfelsinenduft": "Sweet orange scent",
|
||
"mit-natuerlicher-seife": "With natural soap",
|
||
"mit-pflegendem-lanolin": "With nourishing lanolin",
|
||
"nachhaltig-durch-tenside-nachwachsenden-ursprungs": "Sustainable plant-based surfactants",
|
||
"neutralisiert-saeuren": "Neutralises acids",
|
||
"ohne-konservierungsstoffe": "Preservative-free",
|
||
"ohne-mineraloel": "Mineral-oil-free",
|
||
"vegan": "Vegan",
|
||
"wohltuend-und-erfrischend": "Soothing & refreshing",
|
||
"made-in-austria": "Bottled in Austria",
|
||
"bio": "Organic",
|
||
}
|
||
|
||
|
||
def js_str(s: str) -> str:
|
||
"""Single-line JS string with safe escaping (handles backticks/$)."""
|
||
if s is None:
|
||
return "null"
|
||
s = s.replace("\\", "\\\\").replace("'", "\\'").replace("\n", " ")
|
||
return f"'{s}'"
|
||
|
||
|
||
def normalise(text: str | None) -> str | None:
|
||
if not text:
|
||
return None
|
||
text = text.replace("1/2 Tl", "½ TL").replace("1/2 TL", "½ TL")
|
||
text = re.sub(r"\s+", " ", text).strip()
|
||
return text or None
|
||
|
||
|
||
def emit() -> None:
|
||
data = json.loads(SCRAPED.read_text(encoding="utf-8"))
|
||
populated: dict[str, dict] = {}
|
||
skipped: list[str] = []
|
||
unknown_props: set[str] = set()
|
||
|
||
for slug, p in data.items():
|
||
tagline = normalise(p.get("tagline"))
|
||
lead = normalise(p.get("lead"))
|
||
long_desc = normalise(p.get("descriptionLong"))
|
||
apps = []
|
||
for a in p.get("applications") or []:
|
||
t = normalise(a.get("title"))
|
||
b = normalise(a.get("body"))
|
||
if t or b:
|
||
apps.append({"title": t, "body": b})
|
||
|
||
has_content = any([tagline, lead, long_desc, apps])
|
||
if not has_content:
|
||
skipped.append(slug)
|
||
continue
|
||
populated[slug] = {
|
||
"tagline": tagline,
|
||
"lead": lead,
|
||
"descriptionLong": long_desc,
|
||
"applications": apps,
|
||
}
|
||
|
||
# Build js content
|
||
lines = [
|
||
"// German product long-form copy. Source of truth for all product pages.",
|
||
"// Keyed by product id (matches src/api/products.js). UI chrome strings",
|
||
"// live in src/i18n/messages.js — only product-specific narrative belongs here.",
|
||
"//",
|
||
"// Generated from scripts/output/products-content.json by",
|
||
"// scripts/emit_de_locale.py — re-run after scraping to refresh.",
|
||
"",
|
||
"export default {",
|
||
]
|
||
for slug, c in populated.items():
|
||
lines.append(f" {js_str(slug)}: {{")
|
||
if c["tagline"]:
|
||
lines.append(f" tagline: {js_str(c['tagline'])},")
|
||
if c["lead"]:
|
||
lines.append(f" lead: {js_str(c['lead'])},")
|
||
if c["descriptionLong"]:
|
||
lines.append(f" descriptionLong: {js_str(c['descriptionLong'])},")
|
||
if c["applications"]:
|
||
lines.append(" applications: [")
|
||
for a in c["applications"]:
|
||
lines.append(" {")
|
||
if a["title"]:
|
||
lines.append(f" title: {js_str(a['title'])},")
|
||
if a["body"]:
|
||
lines.append(f" body: {js_str(a['body'])},")
|
||
lines.append(" },")
|
||
lines.append(" ],")
|
||
lines.append(" },")
|
||
lines.append("}")
|
||
lines.append("")
|
||
OUT_DE.write_text("\n".join(lines), encoding="utf-8")
|
||
print(f"wrote {OUT_DE.relative_to(ROOT)} ({len(populated)} entries)")
|
||
if skipped:
|
||
print(f"skipped (no source content): {len(skipped)}")
|
||
for s in skipped:
|
||
print(f" - {s}")
|
||
|
||
# Manual-merge helper — code arrays per product + i18n keys to splice into
|
||
# src/api/products.js and src/i18n/messages.js.
|
||
helper_lines: list[str] = ["# Manual merge — copy these into the right files\n"]
|
||
|
||
# 1. Property arrays per slug
|
||
# The source page sometimes serves NFD-normalised umlauts and the
|
||
# PROP_CODES dict is hand-typed in NFC — normalise both before lookup
|
||
# so Python's `==` doesn't trip over decomposed combining marks.
|
||
nfc_codes = {unicodedata.normalize("NFC", k): v for k, v in PROP_CODES.items()}
|
||
|
||
helper_lines.append("## src/api/products.js — properties\n")
|
||
for slug, p in data.items():
|
||
codes = []
|
||
for label in p.get("properties", []):
|
||
code = nfc_codes.get(unicodedata.normalize("NFC", label))
|
||
if code is None:
|
||
unknown_props.add(label)
|
||
continue
|
||
codes.append(code)
|
||
if codes:
|
||
helper_lines.append(f"- `{slug}` → `properties: {json.dumps(codes)}`")
|
||
helper_lines.append("")
|
||
|
||
# 2. messages.js DE + EN keys
|
||
helper_lines.append("## src/i18n/messages.js — DE block\n```js")
|
||
for label, code in PROP_CODES.items():
|
||
helper_lines.append(f" 'product.prop.{code}': '{label}',")
|
||
helper_lines.append("```\n")
|
||
helper_lines.append("## src/i18n/messages.js — EN block\n```js")
|
||
for code, en in EN_LABELS.items():
|
||
helper_lines.append(f" 'product.prop.{code}': '{en}',")
|
||
helper_lines.append("```\n")
|
||
|
||
if unknown_props:
|
||
helper_lines.append("## Unknown labels (add to PROP_CODES in emit_de_locale.py)\n")
|
||
for p in sorted(unknown_props):
|
||
helper_lines.append(f"- {p!r}")
|
||
|
||
OUT_HELP.write_text("\n".join(helper_lines), encoding="utf-8")
|
||
print(f"wrote {OUT_HELP.relative_to(ROOT)}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
emit()
|