Files
archy/scripts/node-profile.sh
Dorian ae4791d438
Some checks failed
Build Archipelago ISO (dev) / build-iso (push) Has been cancelled
Build Archipelago ISO / build-iso (push) Has been cancelled
fix: companion indicator shows relay state, add node-profile script
CompanionIndicator: show muted icon when relay connected but idle,
orange when companion actively sending input. Removes Transition
wrapper for always-visible relay status.

Add scripts/node-profile.sh utility.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-02 11:14:34 +01:00

253 lines
7.5 KiB
Bash
Executable File

#!/bin/bash
# node-profile.sh — CPU/memory/container profiling across all Archipelago nodes
#
# Usage:
# ./scripts/node-profile.sh # All reachable nodes
# ./scripts/node-profile.sh 192.168.1.228 # Single node
# ./scripts/node-profile.sh --watch # Repeat every 30s
#
# Requires: SSH key at ~/.ssh/archipelago-deploy (or ARCHIPELAGO_SSH_KEY)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
source "$SCRIPT_DIR/lib/common.sh"
source "$SCRIPT_DIR/deploy-config-defaults.sh"
[ -f "$SCRIPT_DIR/deploy-config.sh" ] && source "$SCRIPT_DIR/deploy-config.sh"
ALL_NODES=(
"$DEFAULT_PRIMARY"
"$DEFAULT_SECONDARY"
"$TAILSCALE_ARCH1"
"$TAILSCALE_ARCH2"
"$TAILSCALE_ARCH3"
)
NODE_LABELS=(
"primary (.228)"
"secondary (.198)"
"tailscale-1"
"tailscale-2"
"tailscale-3"
)
WATCH_MODE=false
WATCH_INTERVAL=30
TARGET_NODES=()
# ── Parse args ─────────────────────────────────────────────────────────
while [[ $# -gt 0 ]]; do
case "$1" in
--watch)
WATCH_MODE=true
shift
;;
--interval)
WATCH_INTERVAL="$2"
shift 2
;;
*)
TARGET_NODES+=("$1")
shift
;;
esac
done
# If specific nodes given, use those; otherwise use all
if [ ${#TARGET_NODES[@]} -eq 0 ]; then
TARGET_NODES=("${ALL_NODES[@]}")
fi
# ── Remote profiling command ───────────────────────────────────────────
PROFILE_CMD='
hostname_val=$(hostname 2>/dev/null || echo "unknown")
uptime_val=$(uptime -p 2>/dev/null || uptime | sed "s/.*up/up/;s/,.*//")
# CPU info
cpu_cores=$(nproc 2>/dev/null || echo "?")
load_avg=$(cat /proc/loadavg 2>/dev/null | awk "{print \$1, \$2, \$3}")
# Memory
mem_info=$(free -h 2>/dev/null | awk "/^Mem:/{printf \"%s / %s (%s free)\", \$3, \$2, \$4}")
swap_info=$(free -h 2>/dev/null | awk "/^Swap:/{if(\$2 != \"0B\" && \$2 != \"0\") printf \"%s / %s\", \$3, \$2; else print \"none\"}")
# Disk
disk_info=$(df -h / 2>/dev/null | awk "NR==2{printf \"%s / %s (%s)\", \$3, \$2, \$5}")
# CPU temperature (if available)
temp="n/a"
if [ -f /sys/class/thermal/thermal_zone0/temp ]; then
raw=$(cat /sys/class/thermal/thermal_zone0/temp)
temp="$((raw / 1000))°C"
fi
echo "HEADER|${hostname_val}|${uptime_val}|${cpu_cores} cores|load ${load_avg}|${temp}"
echo "MEM|${mem_info}"
echo "SWAP|${swap_info}"
echo "DISK|${disk_info}"
# Top 10 processes by CPU
echo "PROCS_START"
ps aux --sort=-%cpu 2>/dev/null | head -11 | awk "NR>1{printf \"%-6s %-5s %-5s %s\n\", \$2, \$3, \$4, \$11}" 2>/dev/null
echo "PROCS_END"
# Container status
echo "CONTAINERS_START"
if command -v podman >/dev/null 2>&1; then
podman ps -a --format "{{.Names}}|{{.Status}}|{{.Size}}" 2>/dev/null || \
podman ps -a --format "{{.Names}}|{{.Status}}" 2>/dev/null || \
echo "podman error"
elif command -v docker >/dev/null 2>&1; then
docker ps -a --format "{{.Names}}|{{.Status}}" 2>/dev/null || echo "docker error"
else
echo "no container runtime"
fi
echo "CONTAINERS_END"
'
# ── Formatting ─────────────────────────────────────────────────────────
BOLD="\033[1m"
DIM="\033[2m"
GREEN="\033[0;32m"
YELLOW="\033[0;33m"
RED="\033[0;31m"
CYAN="\033[0;36m"
RESET="\033[0m"
SEP="━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
print_node_report() {
local ip="$1"
local label="$2"
local output="$3"
echo -e "\n${BOLD}${CYAN}${SEP}${RESET}"
echo -e "${BOLD}${CYAN} ${label} ${DIM}(${ip})${RESET}"
echo -e "${BOLD}${CYAN}${SEP}${RESET}"
# Parse HEADER line
local header
header=$(echo "$output" | grep "^HEADER|" | head -1)
if [ -n "$header" ]; then
IFS='|' read -r _ hostname uptime cores load temp <<< "$header"
echo -e " ${BOLD}Host:${RESET} ${hostname} ${DIM}${uptime}${RESET}"
echo -e " ${BOLD}CPU:${RESET} ${cores} ${load} ${temp}"
fi
# Memory
local mem
mem=$(echo "$output" | grep "^MEM|" | cut -d'|' -f2)
[ -n "$mem" ] && echo -e " ${BOLD}Mem:${RESET} ${mem}"
local swap
swap=$(echo "$output" | grep "^SWAP|" | cut -d'|' -f2)
[ -n "$swap" ] && echo -e " ${BOLD}Swap:${RESET} ${swap}"
local disk
disk=$(echo "$output" | grep "^DISK|" | cut -d'|' -f2)
[ -n "$disk" ] && echo -e " ${BOLD}Disk:${RESET} ${disk}"
# Top processes
echo ""
echo -e " ${BOLD}Top processes by CPU:${RESET}"
echo -e " ${DIM}PID CPU% MEM% Command${RESET}"
local procs
procs=$(echo "$output" | sed -n '/^PROCS_START$/,/^PROCS_END$/p' | grep -v "^PROCS_")
if [ -n "$procs" ]; then
while IFS= read -r line; do
local cpu_pct
cpu_pct=$(echo "$line" | awk '{print $2}' | tr -d '.')
if [ "${cpu_pct:-0}" -gt 500 ] 2>/dev/null; then
echo -e " ${RED}${line}${RESET}"
elif [ "${cpu_pct:-0}" -gt 100 ] 2>/dev/null; then
echo -e " ${YELLOW}${line}${RESET}"
else
echo -e " ${line}"
fi
done <<< "$procs"
else
echo -e " ${DIM}(no process data)${RESET}"
fi
# Containers
echo ""
echo -e " ${BOLD}Containers:${RESET}"
local containers
containers=$(echo "$output" | sed -n '/^CONTAINERS_START$/,/^CONTAINERS_END$/p' | grep -v "^CONTAINERS_")
if [ -n "$containers" ] && [ "$containers" != "no container runtime" ] && [ "$containers" != "podman error" ]; then
while IFS='|' read -r name status size; do
local icon
if echo "$status" | grep -qi "up"; then
icon="${GREEN}${RESET}"
else
icon="${RED}${RESET}"
fi
echo -e " ${icon} ${BOLD}${name}${RESET} ${DIM}${status}${RESET}"
done <<< "$containers"
else
echo -e " ${DIM}${containers:-none}${RESET}"
fi
}
# ── Main profiling loop ───────────────────────────────────────────────
profile_all() {
echo -e "\n${BOLD}Archipelago Node Profile${RESET} ${DIM}$(date '+%Y-%m-%d %H:%M:%S')${RESET}"
local tmpdir
tmpdir=$(mktemp -d)
# Probe all nodes in parallel
local pids=()
for i in "${!TARGET_NODES[@]}"; do
local ip="${TARGET_NODES[$i]}"
local label="${NODE_LABELS[$i]:-$ip}"
(
result=$(ssh_cmd "$ip" "$PROFILE_CMD" 2>/dev/null) && \
echo "$result" > "$tmpdir/$i.out" || \
echo "UNREACHABLE" > "$tmpdir/$i.out"
) &
pids+=($!)
done
# Wait for all probes
for pid in "${pids[@]}"; do
wait "$pid" 2>/dev/null || true
done
# Print reports
local reachable=0 unreachable=0
for i in "${!TARGET_NODES[@]}"; do
local ip="${TARGET_NODES[$i]}"
local label="${NODE_LABELS[$i]:-$ip}"
local outfile="$tmpdir/$i.out"
if [ -f "$outfile" ] && [ "$(cat "$outfile")" != "UNREACHABLE" ]; then
print_node_report "$ip" "$label" "$(cat "$outfile")"
reachable=$((reachable + 1))
else
echo -e "\n${DIM}${SEP}${RESET}"
echo -e "${RED} ${label} (${ip}) — unreachable${RESET}"
echo -e "${DIM}${SEP}${RESET}"
unreachable=$((unreachable + 1))
fi
done
echo -e "\n${DIM}${reachable} reachable, ${unreachable} unreachable${RESET}\n"
rm -rf "$tmpdir"
}
if $WATCH_MODE; then
while true; do
clear
profile_all
echo -e "${DIM}Refreshing every ${WATCH_INTERVAL}s — Ctrl+C to stop${RESET}"
sleep "$WATCH_INTERVAL"
done
else
profile_all
fi