#!/usr/bin/env python3 """Download all IndeeHub film images from CloudFront CDN""" import json import urllib.request import os from pathlib import Path import time # Read the film data with open('assets/films/indeedhub-films.json', 'r') as f: films_raw = json.load(f) # Create directories Path('public/images/films/posters').mkdir(parents=True, exist_ok=True) Path('public/images/films/backdrops').mkdir(parents=True, exist_ok=True) # Extract unique films (remove duplicates) films = {} for item in films_raw: if item.get('posterSrc') and item['id'] not in films: films[item['id']] = item print(f"Found {len(films)} unique films to download") # Download each film's poster and backdrop for film_id, film in films.items(): poster_url = film.get('posterSrc', '') alt_text = film.get('alt', film_id) if poster_url: # Extract the actual CloudFront URL (remove Next.js image optimization wrapper) if 'url=' in poster_url: # Extract the encoded URL import urllib.parse poster_url = urllib.parse.unquote(poster_url.split('url=')[1].split('&')[0]) print(f"\nDownloading: {alt_text}") print(f" URL: {poster_url}") # Determine file extension ext = 'jpg' if '.png' in poster_url.lower(): ext = 'png' elif '.webp' in poster_url.lower(): ext = 'webp' elif '.jpeg' in poster_url.lower(): ext = 'jpeg' # Download poster poster_path = f'public/images/films/posters/{film_id}.{ext}' try: urllib.request.urlretrieve(poster_url, poster_path) print(f" ✓ Saved poster: {poster_path}") except Exception as e: print(f" ✗ Failed to download poster: {e}") # Also save as backdrop (same image for now) backdrop_path = f'public/images/films/backdrops/{film_id}.{ext}' try: urllib.request.urlretrieve(poster_url, backdrop_path) print(f" ✓ Saved backdrop: {backdrop_path}") except Exception as e: print(f" ✗ Failed to download backdrop: {e}") # Be nice to the server time.sleep(0.5) print("\n✅ Download complete!") print(f"Images saved to public/images/films/")