test(bootstrap): regression gate for the heal_podman_state socket bug
Extracted the heal_podman_state cleanup list as a module-level
HEAL_RUNTIME_SUBDIRS const so a unit test can structurally enforce
the invariant: the list must contain "containers" + "libpod" but
must NOT contain "podman" (which holds systemd's podman.sock
listener and was the bug fixed in commit bb421803).
If anyone re-adds "podman" — accidentally, by reverting, or by
copy-paste from old plan memory — this test fires before we ship,
not on the next deploy when it nukes the orchestrator's HTTP path.
Total tests: 614 → 615.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -122,15 +122,23 @@ enum PodmanHealOutcome {
|
||||
/// - Networking corruption (netavark cache). Currently `podman info`
|
||||
/// doesn't diagnose that; if cleanup doesn't fix it, the operator
|
||||
/// will see the warning in the journal.
|
||||
/// Subdirectories of `$XDG_RUNTIME_DIR` that hold podman's transient
|
||||
/// state and are safe to remove when `podman info` is wedged. The
|
||||
/// `podman/` subdir is **deliberately absent** — that's where systemd's
|
||||
/// socket-activated `podman.sock` listener lives. Removing it would
|
||||
/// silently break every libpod HTTP call from the orchestrator until
|
||||
/// `systemctl --user restart podman.socket`. See
|
||||
/// `heal_podman_state` docstring for the full rationale and the
|
||||
/// `heal_podman_state_does_not_clean_socket_dir` regression test.
|
||||
const HEAL_RUNTIME_SUBDIRS: &[&str] = &["containers", "libpod"];
|
||||
|
||||
async fn heal_podman_state() -> Result<PodmanHealOutcome> {
|
||||
if probe_podman_ok().await {
|
||||
return Ok(PodmanHealOutcome::Healthy);
|
||||
}
|
||||
// Wedged. Clean runtime state and try again. Note: `podman/` is
|
||||
// intentionally absent from this list — see fn docstring.
|
||||
let xdg = std::env::var("XDG_RUNTIME_DIR")
|
||||
.context("XDG_RUNTIME_DIR not set; can't locate podman runtime state to clean")?;
|
||||
for sub in &["containers", "libpod"] {
|
||||
for sub in HEAL_RUNTIME_SUBDIRS {
|
||||
let path = PathBuf::from(&xdg).join(sub);
|
||||
match fs::remove_dir_all(&path).await {
|
||||
Ok(()) => debug!(path = %path.display(), "removed podman runtime state dir"),
|
||||
@@ -602,3 +610,27 @@ async fn run_nginx() -> Result<bool> {
|
||||
let _ = host_sudo(&["rm", "-f", &backup]).await;
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Regression gate for the 2026-05-01 bootstrap bug: heal_podman_state
|
||||
/// was removing $XDG_RUNTIME_DIR/podman/ alongside containers/ and
|
||||
/// libpod/, which silently broke the systemd-bound podman.sock and
|
||||
/// every libpod HTTP call from the orchestrator. If anyone re-adds
|
||||
/// "podman" to HEAL_RUNTIME_SUBDIRS this test fires before we ship.
|
||||
#[test]
|
||||
fn heal_podman_state_does_not_clean_socket_dir() {
|
||||
assert!(
|
||||
!HEAL_RUNTIME_SUBDIRS.contains(&"podman"),
|
||||
"HEAL_RUNTIME_SUBDIRS must not include 'podman' — that dir holds \
|
||||
systemd's podman.sock listener; removing it breaks every libpod \
|
||||
HTTP call from the orchestrator. See bootstrap.rs commit bb421803."
|
||||
);
|
||||
// Sanity: the actually-runtime-state dirs are still in the list so
|
||||
// we don't accidentally turn the heal into a no-op.
|
||||
assert!(HEAL_RUNTIME_SUBDIRS.contains(&"containers"));
|
||||
assert!(HEAL_RUNTIME_SUBDIRS.contains(&"libpod"));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user