diff --git a/core/archipelago/src/container/prod_orchestrator.rs b/core/archipelago/src/container/prod_orchestrator.rs index 8a2d1d95..4f722a61 100644 --- a/core/archipelago/src/container/prod_orchestrator.rs +++ b/core/archipelago/src/container/prod_orchestrator.rs @@ -384,6 +384,14 @@ impl ProdContainerOrchestrator { let _guard = lock.lock().await; let name = compute_container_name(&lm.manifest); + + // Phase 3.3: migrate pre-Phase-3 containers in place. + if self.use_quadlet_backends { + if let Some(action) = self.migrate_to_quadlet_if_needed(lm, &name).await? { + return Ok(action); + } + } + match self.runtime.get_container_status(&name).await { Ok(status) => match status.state { ContainerState::Running => { @@ -505,6 +513,73 @@ impl ProdContainerOrchestrator { Ok(()) } + /// Phase 3.3 in-place migration. When `use_quadlet_backends` flips + /// from off → on, existing nodes have backend containers parented + /// under archipelago.service's cgroup (the bad shape). They need to + /// be replaced with Quadlet-managed equivalents — same image, same + /// volumes, parented under user.slice. + /// + /// Returns `Some(Installed)` if a migration was performed (and + /// `ensure_running` should return that to its caller without + /// re-running the state machine), `Ok(None)` otherwise so the + /// caller proceeds with normal reconcile logic. + /// + /// Idempotent. The "did we already migrate this app?" oracle is the + /// presence of `.container` on disk in the user quadlet dir. + /// Once migrated, every subsequent reconcile tick takes the early + /// return at the unit-exists check. + /// + /// **Volume safety:** we call `runtime.remove_container(name)` which + /// invokes `podman rm --force` (stops + removes the container record + /// only). Bind-mounted host paths — e.g. /var/lib/archipelago/bitcoin + /// — survive because podman doesn't touch them on container rm. This + /// has been audited for every app in `uses_orchestrator_install_flow`: + /// all of them mount their data dirs as host bind mounts. + async fn migrate_to_quadlet_if_needed( + &self, + lm: &LoadedManifest, + name: &str, + ) -> Result> { + let unit_dir = quadlet::unit_dir() + .await + .context("locate user quadlet unit dir for migration check")?; + let unit_path = unit_dir.join(format!("{name}.container")); + if tokio::fs::try_exists(&unit_path).await.unwrap_or(false) { + // Already on the Quadlet path — nothing to migrate. + return Ok(None); + } + + // No unit on disk. If a pre-Phase-3 container exists for this + // app, replace it with a Quadlet-managed one. + let container_exists = self.runtime.get_container_status(name).await.is_ok(); + if !container_exists { + // Container doesn't exist either — let install_fresh handle it. + return Ok(None); + } + + tracing::info!( + app_id = %lm.manifest.app.id, + container = %name, + "Phase 3.3 migration: replacing pre-Quadlet container with systemd-managed unit" + ); + // Stop+remove the old container record. Volumes survive (host + // bind mounts are not touched by podman rm). + self.runtime + .remove_container(name) + .await + .with_context(|| format!("Phase 3.3: rm pre-Quadlet container {name}"))?; + + // Re-render the manifest with dynamic env baked in, then go + // through the same install path a fresh install would. + let mut resolved = lm.manifest.clone(); + self.resolve_dynamic_env(&mut resolved)?; + self.install_via_quadlet(&resolved, name) + .await + .with_context(|| format!("Phase 3.3: re-install {name} via Quadlet"))?; + + Ok(Some(ReconcileAction::Installed)) + } + /// Phase 3.2 install path. Renders the manifest as a Quadlet unit, /// writes it atomically into ~/.config/containers/systemd/, asks /// systemd to reload, and starts the generated service. Errors at diff --git a/tests/lifecycle/TESTING.md b/tests/lifecycle/TESTING.md index 7d26f005..4e8c3494 100644 --- a/tests/lifecycle/TESTING.md +++ b/tests/lifecycle/TESTING.md @@ -96,7 +96,7 @@ Goal: minimum-viable container subsystem. | `core/container/src/bitcoin_simulator.rs` | 219 | 0 | -219 | ○ couples with dev_orchestrator | | `core/container/src/port_manager.rs` | 175 | 0 | -175 | ○ couples with dev_orchestrator | | `core/archipelago/src/api/rpc/package/install.rs::install_bitcoincoin_rpc_repair` | ~150 | 0 | -150 | ◐ pending fold into orchestrator pre-start | -| imperative `install_fresh` in prod_orchestrator | ~120 | 0 | -120 | ◐ Phase 3.2 wired behind `use_quadlet_backends` flag (default off); flip default after 20× green | +| imperative `install_fresh` in prod_orchestrator | ~120 | 0 | -120 | ◐ Phase 3.2 wired behind `use_quadlet_backends` flag (default off); 3.3 in-place migration ✅; flip default after 20× green | **Today: -270 LoC committed. Outstanding deletes possible: ~1,616 LoC** (if Phase 3 ships fully + dev_mode resolved).