From 99bbd83c34473c5e8735d5952d9cb38cddee70c9 Mon Sep 17 00:00:00 2001 From: Dorian Date: Sat, 9 May 2026 16:36:15 +0100 Subject: [PATCH] Harden Datum container discovery --- apps/api/src/datum/poller.ts | 88 ++++++++++++++++++++++++++++++++---- 1 file changed, 80 insertions(+), 8 deletions(-) diff --git a/apps/api/src/datum/poller.ts b/apps/api/src/datum/poller.ts index d16dc6a..9d0b11e 100644 --- a/apps/api/src/datum/poller.ts +++ b/apps/api/src/datum/poller.ts @@ -106,7 +106,15 @@ type DockerInspect = { }; }; -let lastResolvedDatumUrl = config.datum.url; +type DockerContainerSummary = { + Id: string; + Names?: string[]; + Image?: string; + Labels?: Record; + State?: string; +}; + +let lastResolvedDatumUrl: string | null = null; function dockerGetJson(path: string): Promise { return new Promise((resolve, reject) => { @@ -141,11 +149,8 @@ async function resolveDatumUrl(): Promise { if (!container || !network) return config.datum.url; try { - const inspected = await dockerGetJson(`/containers/${encodeURIComponent(container)}/json`); - const address = inspected.NetworkSettings?.Networks?.[network]?.IPAddress; - if (!address) { - throw new Error(`container ${container} has no IP on network ${network}`); - } + const inspected = await inspectDatumContainer(container); + const address = selectNetworkAddress(inspected, network); const port = new URL(config.datum.url).port || "21000"; lastResolvedDatumUrl = `http://${address}:${port}`; } catch (err) { @@ -153,9 +158,64 @@ async function resolveDatumUrl(): Promise { { reason: formatErr(err), container, network, fallbackUrl: lastResolvedDatumUrl }, "datum_docker_discovery_failed", ); + if (!lastResolvedDatumUrl) throw err; } - return lastResolvedDatumUrl; + return lastResolvedDatumUrl ?? config.datum.url; +} + +async function inspectDatumContainer(container: string): Promise { + try { + return await dockerGetJson(`/containers/${encodeURIComponent(container)}/json`); + } catch (err) { + logger.warn({ reason: formatErr(err), container }, "datum_named_container_inspect_failed"); + } + + const containers = await dockerGetJson("/containers/json?all=1"); + const candidates = containers + .filter(isLikelyDatumContainer) + .sort((a, b) => Number(b.State === "running") - Number(a.State === "running")); + + for (const candidate of candidates) { + try { + return await dockerGetJson(`/containers/${candidate.Id}/json`); + } catch (err) { + logger.warn({ reason: formatErr(err), id: candidate.Id, names: candidate.Names }, "datum_candidate_inspect_failed"); + } + } + + throw new Error( + `Could not find a Datum container through Docker; checked ${containers.length} containers and expected ${container}`, + ); +} + +function isLikelyDatumContainer(container: DockerContainerSummary): boolean { + const names = (container.Names ?? []).join(" ").toLowerCase(); + const image = (container.Image ?? "").toLowerCase(); + const labels = Object.values(container.Labels ?? {}).join(" ").toLowerCase(); + const haystack = `${names} ${image} ${labels}`; + + if (haystack.includes("gashboard")) return false; + return ( + image.includes("retropex/datum") || + image.includes("datum_gateway") || + names.includes("datum") || + labels.includes("datum") + ); +} + +function selectNetworkAddress(inspected: DockerInspect, preferredNetwork: string): string { + const networks = inspected.NetworkSettings?.Networks ?? {}; + const preferred = networks[preferredNetwork]?.IPAddress; + if (preferred) return preferred; + + const entries = Object.entries(networks).filter(([, value]) => value.IPAddress); + const umbrel = entries.find(([name]) => name.toLowerCase().includes("umbrel")); + const first = umbrel ?? entries[0]; + if (first?.[1].IPAddress) return first[1].IPAddress; + + const networkNames = Object.keys(networks).join(", ") || "none"; + throw new Error(`Datum container has no Docker network IP; networks: ${networkNames}`); } async function fetchNetworkStats(): Promise { @@ -186,7 +246,19 @@ async function fetchNetworkStats(): Promise { async function pollOnce(): Promise { const fetchedAt = Date.now(); - const datumUrl = await resolveDatumUrl(); + let datumUrl: string; + try { + datumUrl = await resolveDatumUrl(); + } catch (err) { + const reason = formatErr(err); + logger.warn({ reason }, "datum_discovery_failed"); + return { + ...lastSnapshot, + ok: false, + fetchedAt, + error: { code: "DATUM_DISCOVERY_FAILED", message: reason }, + }; + } // /clients (admin Digest-gated) and /threads (public) in parallel. const clientsTimeout = abortableSignal(DEFAULT_TIMEOUT_MS);