fix: bulletproof mesh serial connection — PrivateDevices, auto-detect fallback, backoff
Root cause: systemd PrivateDevices=yes hid /dev/ttyUSB* from the service, preventing .198 from connecting to its Heltec V3 after the security hardening. Changes: - Set PrivateDevices=no in systemd service (serial access needs physical devices; other hardening layers remain: NoNewPrivileges, ProtectSystem, RestrictNamespaces) - Add SupplementaryGroups=dialout for explicit serial permissions - Add fallback auto-detect when configured serial path fails to open - Add exponential backoff on reconnect (5s→60s cap) to reduce log spam - Add pre-open device existence check with actionable error messages - Add udev rule (99-mesh-radio.rules) for stable /dev/mesh-radio symlink - Add /dev/mesh-radio to serial candidate list (checked first) - Add Connect button per detected device in Mesh UI - Deploy udev rule to both servers and ISO build - Fix FEDI_HASH unbound variable in deploy script - Fix deploy binary step to handle hung service stop gracefully Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -29,8 +29,11 @@ const SYNC_INTERVAL: Duration = Duration::from_secs(10);
|
||||
/// Maximum stored messages (circular buffer).
|
||||
const MAX_MESSAGES: usize = 100;
|
||||
|
||||
/// Delay before reconnection attempt after device disconnect.
|
||||
const RECONNECT_DELAY: Duration = Duration::from_secs(10);
|
||||
/// Initial delay before reconnection attempt after device disconnect.
|
||||
const RECONNECT_DELAY_INIT: Duration = Duration::from_secs(5);
|
||||
|
||||
/// Maximum reconnect delay (cap for exponential backoff).
|
||||
const RECONNECT_DELAY_MAX: Duration = Duration::from_secs(60);
|
||||
|
||||
/// Number of consecutive write failures before we consider the device dead
|
||||
/// and trigger a reconnection cycle.
|
||||
@@ -150,6 +153,7 @@ pub fn spawn_mesh_listener(
|
||||
tokio::spawn(async move {
|
||||
let mut shutdown = shutdown;
|
||||
let mut cmd_rx = cmd_rx;
|
||||
let mut reconnect_delay = RECONNECT_DELAY_INIT;
|
||||
loop {
|
||||
if *shutdown.borrow() {
|
||||
info!("Mesh listener shutting down");
|
||||
@@ -170,9 +174,16 @@ pub fn spawn_mesh_listener(
|
||||
{
|
||||
Ok(()) => {
|
||||
info!("Mesh session ended cleanly");
|
||||
// Session was established before ending — reset backoff
|
||||
reconnect_delay = RECONNECT_DELAY_INIT;
|
||||
}
|
||||
Err(e) => {
|
||||
error!("Mesh session error: {}", e);
|
||||
// Check if session was ever connected (vs failed to open)
|
||||
let was_connected = state.status.read().await.device_connected;
|
||||
if was_connected {
|
||||
reconnect_delay = RECONNECT_DELAY_INIT;
|
||||
}
|
||||
error!("Mesh session error: {} (retry in {:?})", e, reconnect_delay);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,17 +195,42 @@ pub fn spawn_mesh_listener(
|
||||
}
|
||||
let _ = state.event_tx.send(MeshEvent::DeviceDisconnected);
|
||||
|
||||
// Wait before reconnecting
|
||||
// Wait before reconnecting (exponential backoff)
|
||||
tokio::select! {
|
||||
_ = tokio::time::sleep(RECONNECT_DELAY) => {},
|
||||
_ = tokio::time::sleep(reconnect_delay) => {},
|
||||
_ = shutdown.changed() => {
|
||||
if *shutdown.borrow() { return; }
|
||||
},
|
||||
}
|
||||
|
||||
// Increase backoff for next failure, cap at max
|
||||
reconnect_delay = (reconnect_delay * 2).min(RECONNECT_DELAY_MAX);
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Scan all candidate serial ports and open the first Meshcore device found.
|
||||
async fn auto_detect_and_open() -> Result<(String, MeshcoreDevice, DeviceInfo)> {
|
||||
let paths = super::serial::detect_serial_devices().await;
|
||||
if paths.is_empty() {
|
||||
anyhow::bail!("No serial devices found in /dev");
|
||||
}
|
||||
for path in &paths {
|
||||
debug!(path = %path, "Probing for Meshcore device");
|
||||
match MeshcoreDevice::open(path).await {
|
||||
Ok(mut dev) => match dev.initialize().await {
|
||||
Ok(info) => {
|
||||
info!(path = %path, firmware = %info.firmware_version, "Found Meshcore device via auto-detect");
|
||||
return Ok((path.clone(), dev, info));
|
||||
}
|
||||
Err(e) => debug!(path = %path, error = %e, "Not a Meshcore device"),
|
||||
},
|
||||
Err(e) => debug!(path = %path, error = %e, "Could not open serial port"),
|
||||
}
|
||||
}
|
||||
anyhow::bail!("No Meshcore device found on {} candidate ports: {:?}", paths.len(), paths)
|
||||
}
|
||||
|
||||
/// Run a single mesh session (connect, initialize, main loop).
|
||||
async fn run_mesh_session(
|
||||
state: &Arc<MeshState>,
|
||||
@@ -206,24 +242,25 @@ async fn run_mesh_session(
|
||||
shutdown: &mut tokio::sync::watch::Receiver<bool>,
|
||||
cmd_rx: &mut mpsc::Receiver<MeshCommand>,
|
||||
) -> Result<()> {
|
||||
// Detect device
|
||||
let device_path = if let Some(path) = preferred_path {
|
||||
path.to_string()
|
||||
// Detect device — try preferred path first, fall back to auto-detect
|
||||
let (device_path, mut device, device_info) = if let Some(path) = preferred_path {
|
||||
match MeshcoreDevice::open(path).await {
|
||||
Ok(mut dev) => match dev.initialize().await {
|
||||
Ok(info) => (path.to_string(), dev, info),
|
||||
Err(e) => {
|
||||
warn!("Preferred path {} handshake failed: {} — trying auto-detect", path, e);
|
||||
auto_detect_and_open().await?
|
||||
}
|
||||
},
|
||||
Err(e) => {
|
||||
warn!("Preferred path {} open failed: {} — trying auto-detect", path, e);
|
||||
auto_detect_and_open().await?
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let paths = super::serial::detect_serial_devices().await;
|
||||
if paths.is_empty() {
|
||||
anyhow::bail!("No serial devices found");
|
||||
}
|
||||
match super::serial::probe_for_meshcore(&paths).await {
|
||||
Some((path, _)) => path,
|
||||
None => anyhow::bail!("No Meshcore device found on available serial ports"),
|
||||
}
|
||||
auto_detect_and_open().await?
|
||||
};
|
||||
|
||||
// Open and initialize
|
||||
let mut device = MeshcoreDevice::open(&device_path).await?;
|
||||
let device_info = device.initialize().await?;
|
||||
|
||||
// Update status
|
||||
{
|
||||
let mut status = state.status.write().await;
|
||||
|
||||
@@ -37,8 +37,21 @@ pub struct MeshcoreDevice {
|
||||
impl MeshcoreDevice {
|
||||
/// Open a serial port and verify it's a Meshcore device.
|
||||
pub async fn open(path: &str) -> Result<Self> {
|
||||
// Check device exists before trying to open (better error message)
|
||||
match tokio::fs::metadata(path).await {
|
||||
Ok(meta) => {
|
||||
debug!(path = %path, permissions = ?meta.permissions(), "Device node exists");
|
||||
}
|
||||
Err(e) => {
|
||||
anyhow::bail!(
|
||||
"Serial device {} not accessible: {} (check PrivateDevices in systemd, or USB connection)",
|
||||
path, e
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
let port = serial2_tokio::SerialPort::open(path, BAUD_RATE)
|
||||
.context(format!("Failed to open serial port {}", path))?;
|
||||
.context(format!("Failed to open serial port {} (permission denied? device busy?)", path))?;
|
||||
|
||||
info!(path = %path, baud = BAUD_RATE, "Opened serial port");
|
||||
|
||||
@@ -329,7 +342,9 @@ impl MeshcoreDevice {
|
||||
// ─── Device detection ───────────────────────────────────────────────────
|
||||
|
||||
/// Candidate serial device paths to check on Linux.
|
||||
/// /dev/mesh-radio is a stable udev symlink (see 99-mesh-radio.rules).
|
||||
const SERIAL_CANDIDATES: &[&str] = &[
|
||||
"/dev/mesh-radio",
|
||||
"/dev/ttyUSB0",
|
||||
"/dev/ttyUSB1",
|
||||
"/dev/ttyUSB2",
|
||||
|
||||
Reference in New Issue
Block a user