diff --git a/core/archipelago/src/container/prod_orchestrator.rs b/core/archipelago/src/container/prod_orchestrator.rs index e0b54a2c..4a33c757 100644 --- a/core/archipelago/src/container/prod_orchestrator.rs +++ b/core/archipelago/src/container/prod_orchestrator.rs @@ -382,6 +382,14 @@ impl ProdContainerOrchestrator { if let Some(action) = self.migrate_to_quadlet_if_needed(lm, &name).await? { return Ok(action); } + // Sync drift: keep an existing Quadlet unit's bytes in step + // with what the renderer produces today, even when nothing + // else triggers an install. Without this, every renderer change + // (new directive, fixed bug) requires a fresh package.install + // RPC per app to take effect — observed live on .228 2026-05-02 + // where the TimeoutStartSec=600 fix shipped in code but no + // existing units picked it up. + self.sync_quadlet_unit(lm, &name).await?; } match self.runtime.get_container_status(&name).await { @@ -596,6 +604,54 @@ impl ProdContainerOrchestrator { Ok(Some(ReconcileAction::Installed)) } + /// Drift-sync an existing Quadlet unit file's bytes against what the + /// current renderer produces. No-op when the flag is off, when the + /// app is a companion (companion.rs owns those units), or when no + /// unit file exists yet (install_via_quadlet handles first-write). + /// + /// We DON'T restart the .service when content changes — running + /// containers keep their current config until an operator-initiated + /// restart picks up the new file. That's the right tradeoff: file + /// updates are cheap and non-destructive; service restarts are + /// destructive (the SIGKILL cascade we're trying to eliminate). + /// systemctl --user daemon-reload runs only when content actually + /// changed, so steady-state reconcile ticks pay just one fs read. + async fn sync_quadlet_unit(&self, lm: &LoadedManifest, name: &str) -> Result<()> { + // Companions: same reasoning as migrate_to_quadlet_if_needed — + // companion.rs renders these units with a different shape, syncing + // here would clobber them. + let app_id = lm.manifest.app.id.as_str(); + if UI_APP_IDS.contains(&app_id) { + return Ok(()); + } + let unit_dir = quadlet::unit_dir() + .await + .context("locate user quadlet unit dir for drift sync")?; + let unit_path = unit_dir.join(format!("{name}.container")); + // Only sync when an existing file is on disk — otherwise this is + // a fresh install and install_via_quadlet will write it anyway. + if !tokio::fs::try_exists(&unit_path).await.unwrap_or(false) { + return Ok(()); + } + let mut resolved = lm.manifest.clone(); + self.resolve_dynamic_env(&mut resolved)?; + let unit = quadlet::QuadletUnit::from_manifest(&resolved, name); + let changed = quadlet::write_if_changed(&unit, &unit_dir) + .await + .with_context(|| format!("drift-sync quadlet unit for {name}"))?; + if changed { + quadlet::daemon_reload_user() + .await + .context("systemctl --user daemon-reload after drift-syncing quadlet unit")?; + tracing::info!( + app_id = %lm.manifest.app.id, + container = %name, + "Quadlet unit drift-synced — file rewritten, .service NOT restarted (operator restart picks up new config)" + ); + } + Ok(()) + } + /// Phase 3.2 install path. Renders the manifest as a Quadlet unit, /// writes it atomically into ~/.config/containers/systemd/, asks /// systemd to reload, and starts the generated service. Errors at