Harden Datum container discovery

This commit is contained in:
Dorian
2026-05-09 16:36:15 +01:00
parent 87e114a2aa
commit 99bbd83c34

View File

@@ -106,7 +106,15 @@ type DockerInspect = {
};
};
let lastResolvedDatumUrl = config.datum.url;
type DockerContainerSummary = {
Id: string;
Names?: string[];
Image?: string;
Labels?: Record<string, string>;
State?: string;
};
let lastResolvedDatumUrl: string | null = null;
function dockerGetJson<T>(path: string): Promise<T> {
return new Promise((resolve, reject) => {
@@ -141,11 +149,8 @@ async function resolveDatumUrl(): Promise<string> {
if (!container || !network) return config.datum.url;
try {
const inspected = await dockerGetJson<DockerInspect>(`/containers/${encodeURIComponent(container)}/json`);
const address = inspected.NetworkSettings?.Networks?.[network]?.IPAddress;
if (!address) {
throw new Error(`container ${container} has no IP on network ${network}`);
}
const inspected = await inspectDatumContainer(container);
const address = selectNetworkAddress(inspected, network);
const port = new URL(config.datum.url).port || "21000";
lastResolvedDatumUrl = `http://${address}:${port}`;
} catch (err) {
@@ -153,9 +158,64 @@ async function resolveDatumUrl(): Promise<string> {
{ reason: formatErr(err), container, network, fallbackUrl: lastResolvedDatumUrl },
"datum_docker_discovery_failed",
);
if (!lastResolvedDatumUrl) throw err;
}
return lastResolvedDatumUrl;
return lastResolvedDatumUrl ?? config.datum.url;
}
async function inspectDatumContainer(container: string): Promise<DockerInspect> {
try {
return await dockerGetJson<DockerInspect>(`/containers/${encodeURIComponent(container)}/json`);
} catch (err) {
logger.warn({ reason: formatErr(err), container }, "datum_named_container_inspect_failed");
}
const containers = await dockerGetJson<DockerContainerSummary[]>("/containers/json?all=1");
const candidates = containers
.filter(isLikelyDatumContainer)
.sort((a, b) => Number(b.State === "running") - Number(a.State === "running"));
for (const candidate of candidates) {
try {
return await dockerGetJson<DockerInspect>(`/containers/${candidate.Id}/json`);
} catch (err) {
logger.warn({ reason: formatErr(err), id: candidate.Id, names: candidate.Names }, "datum_candidate_inspect_failed");
}
}
throw new Error(
`Could not find a Datum container through Docker; checked ${containers.length} containers and expected ${container}`,
);
}
function isLikelyDatumContainer(container: DockerContainerSummary): boolean {
const names = (container.Names ?? []).join(" ").toLowerCase();
const image = (container.Image ?? "").toLowerCase();
const labels = Object.values(container.Labels ?? {}).join(" ").toLowerCase();
const haystack = `${names} ${image} ${labels}`;
if (haystack.includes("gashboard")) return false;
return (
image.includes("retropex/datum") ||
image.includes("datum_gateway") ||
names.includes("datum") ||
labels.includes("datum")
);
}
function selectNetworkAddress(inspected: DockerInspect, preferredNetwork: string): string {
const networks = inspected.NetworkSettings?.Networks ?? {};
const preferred = networks[preferredNetwork]?.IPAddress;
if (preferred) return preferred;
const entries = Object.entries(networks).filter(([, value]) => value.IPAddress);
const umbrel = entries.find(([name]) => name.toLowerCase().includes("umbrel"));
const first = umbrel ?? entries[0];
if (first?.[1].IPAddress) return first[1].IPAddress;
const networkNames = Object.keys(networks).join(", ") || "none";
throw new Error(`Datum container has no Docker network IP; networks: ${networkNames}`);
}
async function fetchNetworkStats(): Promise<NetworkStat> {
@@ -186,7 +246,19 @@ async function fetchNetworkStats(): Promise<NetworkStat> {
async function pollOnce(): Promise<DatumSnapshot> {
const fetchedAt = Date.now();
const datumUrl = await resolveDatumUrl();
let datumUrl: string;
try {
datumUrl = await resolveDatumUrl();
} catch (err) {
const reason = formatErr(err);
logger.warn({ reason }, "datum_discovery_failed");
return {
...lastSnapshot,
ok: false,
fetchedAt,
error: { code: "DATUM_DISCOVERY_FAILED", message: reason },
};
}
// /clients (admin Digest-gated) and /threads (public) in parallel.
const clientsTimeout = abortableSignal(DEFAULT_TIMEOUT_MS);