diff --git a/core/archipelago/src/container/quadlet.rs b/core/archipelago/src/container/quadlet.rs index ecd5b07a..1ec54d56 100644 --- a/core/archipelago/src/container/quadlet.rs +++ b/core/archipelago/src/container/quadlet.rs @@ -257,6 +257,20 @@ impl QuadletUnit { // OnFailure (clean stops stay stopped). let _ = writeln!(s, "Restart={}", self.restart_policy.as_systemd()); let _ = writeln!(s, "RestartSec=10"); + if self.health.is_some() { + // Notify=healthy makes systemd block the unit's "started" + // state on the first green health probe. systemd's default + // TimeoutStartSec is 90s — but `HealthInterval=30s` × + // `HealthRetries=3` is itself 90s, so the timeout fires the + // moment the third probe MIGHT succeed. On .228 every backend + // (lnd, electrumx, fedimint, btcpay-server, mempool-api, + // bitcoin-knots) timed out at 90s and systemd terminated the + // container while it was still warming up. Bump to 600s — long + // enough for slow-starting backends (electrumx replays its + // index, lnd unlocks its wallet) without being so long that a + // truly stuck unit hangs forever. + let _ = writeln!(s, "TimeoutStartSec=600"); + } let _ = writeln!(s); let _ = writeln!(s, "[Install]"); let _ = writeln!(s, "WantedBy=default.target"); @@ -876,17 +890,22 @@ app: assert!(s.contains("HealthTimeout=5s")); assert!(s.contains("HealthRetries=3")); assert!(s.contains("Notify=healthy")); + // Notify=healthy needs a long-enough TimeoutStartSec or systemd + // kills the unit before the first probe can pass — observed live + // on .228 2026-05-02 across all six backends. + assert!(s.contains("TimeoutStartSec=600"), "got: {s}"); } #[test] fn render_skips_health_directives_when_absent() { - // No health spec → no Notify=healthy and no HealthCmd, so companion - // units (which never set health) keep their existing behavior: the - // unit is "started" the moment the process spawns. + // No health spec → no Notify=healthy, no HealthCmd, no + // TimeoutStartSec override (default 90s applies). Companions rely + // on this so their rendered bytes stay unchanged. let s = sample_unit().render(); assert!(!s.contains("HealthCmd=")); assert!(!s.contains("Notify=healthy")); assert!(!s.contains("HealthRetries=")); + assert!(!s.contains("TimeoutStartSec=")); } #[test]