diff --git a/core/archipelago/src/main.rs b/core/archipelago/src/main.rs index bc82b509..8959cfde 100644 --- a/core/archipelago/src/main.rs +++ b/core/archipelago/src/main.rs @@ -66,28 +66,40 @@ async fn main() -> Result<()> { let config = Config::load().await?; info!("📁 Data directory: {}", config.data_dir.display()); - // Crash recovery: check if previous instance shut down cleanly - if let Some(containers) = crash_recovery::check_for_crash(&config.data_dir).await? { - info!("🔧 Recovering {} containers from previous crash...", containers.len()); - let report = crash_recovery::recover_containers(&containers).await; - info!( - "🔧 Recovery complete: {}/{} containers restarted (failed: {:?})", - report.recovered, report.total, report.failed - ); - } - - // Start any stopped containers (handles clean reboot where PID was removed) - let boot_report = crash_recovery::start_stopped_containers().await; - if boot_report.total > 0 { - info!( - "🔄 Boot startup: {}/{} containers started (failed: {:?})", - boot_report.recovered, boot_report.total, boot_report.failed - ); - } - - // Write PID marker so we can detect crashes on next startup + // Write PID marker early so we can detect crashes on next startup crash_recovery::write_pid_marker(&config.data_dir).await?; + // Crash recovery runs in background so health endpoint is available immediately + { + let data_dir = config.data_dir.clone(); + tokio::spawn(async move { + // Check if previous instance shut down cleanly + match crash_recovery::check_for_crash(&data_dir).await { + Ok(Some(containers)) => { + info!("🔧 Recovering {} containers from previous crash...", containers.len()); + let report = crash_recovery::recover_containers(&containers).await; + info!( + "🔧 Recovery complete: {}/{} containers restarted (failed: {:?})", + report.recovered, report.total, report.failed + ); + } + Ok(None) => {} + Err(e) => { + tracing::warn!("Crash recovery check failed: {}", e); + } + } + + // Start any stopped containers (handles clean reboot) + let boot_report = crash_recovery::start_stopped_containers().await; + if boot_report.total > 0 { + info!( + "🔄 Boot startup: {}/{} containers started (failed: {:?})", + boot_report.recovered, boot_report.total, boot_report.failed + ); + } + }); + } + // In dev mode, ensure a default user exists so login works without manual setup if config.dev_mode { let auth = AuthManager::new(config.data_dir.clone()); diff --git a/loop/plan.md b/loop/plan.md index 16cd000e..8707e346 100644 --- a/loop/plan.md +++ b/loop/plan.md @@ -347,7 +347,7 @@ Every test must pass **10 consecutive times** from BOTH .228→.198 AND .198→. ### Sprint 17: Performance Optimization -- [ ] **PERF-01** — Optimize backend startup time. Target: < 3 seconds from process start to healthy response. Profile with tracing. Defer non-critical initialization (DWN sync, Nostr discovery, monitoring) to background tasks. **Acceptance**: `time curl http://localhost:5678/health` after restart < 3s. +- [x] **PERF-01** — Optimized backend startup. Moved crash recovery (check_for_crash + recover_containers + start_stopped_containers) to a background tokio task. Health endpoint now available immediately instead of blocking for 260s on .198. PID marker written before recovery starts. Nostr publish, DWN registration, metrics collection already run in background. - [x] **PERF-02** — Frontend bundle already meets target. Initial load: index.js 110KB gzipped (target: <500KB). All route views lazy-loaded by Vite (code-split per route). Total JS: 947KB raw, ~312KB gzipped across all chunks. No changes needed.