CompanionIndicator: show muted icon when relay connected but idle, orange when companion actively sending input. Removes Transition wrapper for always-visible relay status. Add scripts/node-profile.sh utility. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
253 lines
7.5 KiB
Bash
Executable File
253 lines
7.5 KiB
Bash
Executable File
#!/bin/bash
|
|
# node-profile.sh — CPU/memory/container profiling across all Archipelago nodes
|
|
#
|
|
# Usage:
|
|
# ./scripts/node-profile.sh # All reachable nodes
|
|
# ./scripts/node-profile.sh 192.168.1.228 # Single node
|
|
# ./scripts/node-profile.sh --watch # Repeat every 30s
|
|
#
|
|
# Requires: SSH key at ~/.ssh/archipelago-deploy (or ARCHIPELAGO_SSH_KEY)
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
source "$SCRIPT_DIR/lib/common.sh"
|
|
source "$SCRIPT_DIR/deploy-config-defaults.sh"
|
|
[ -f "$SCRIPT_DIR/deploy-config.sh" ] && source "$SCRIPT_DIR/deploy-config.sh"
|
|
|
|
ALL_NODES=(
|
|
"$DEFAULT_PRIMARY"
|
|
"$DEFAULT_SECONDARY"
|
|
"$TAILSCALE_ARCH1"
|
|
"$TAILSCALE_ARCH2"
|
|
"$TAILSCALE_ARCH3"
|
|
)
|
|
|
|
NODE_LABELS=(
|
|
"primary (.228)"
|
|
"secondary (.198)"
|
|
"tailscale-1"
|
|
"tailscale-2"
|
|
"tailscale-3"
|
|
)
|
|
|
|
WATCH_MODE=false
|
|
WATCH_INTERVAL=30
|
|
TARGET_NODES=()
|
|
|
|
# ── Parse args ─────────────────────────────────────────────────────────
|
|
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--watch)
|
|
WATCH_MODE=true
|
|
shift
|
|
;;
|
|
--interval)
|
|
WATCH_INTERVAL="$2"
|
|
shift 2
|
|
;;
|
|
*)
|
|
TARGET_NODES+=("$1")
|
|
shift
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# If specific nodes given, use those; otherwise use all
|
|
if [ ${#TARGET_NODES[@]} -eq 0 ]; then
|
|
TARGET_NODES=("${ALL_NODES[@]}")
|
|
fi
|
|
|
|
# ── Remote profiling command ───────────────────────────────────────────
|
|
|
|
PROFILE_CMD='
|
|
hostname_val=$(hostname 2>/dev/null || echo "unknown")
|
|
uptime_val=$(uptime -p 2>/dev/null || uptime | sed "s/.*up/up/;s/,.*//")
|
|
|
|
# CPU info
|
|
cpu_cores=$(nproc 2>/dev/null || echo "?")
|
|
load_avg=$(cat /proc/loadavg 2>/dev/null | awk "{print \$1, \$2, \$3}")
|
|
|
|
# Memory
|
|
mem_info=$(free -h 2>/dev/null | awk "/^Mem:/{printf \"%s / %s (%s free)\", \$3, \$2, \$4}")
|
|
swap_info=$(free -h 2>/dev/null | awk "/^Swap:/{if(\$2 != \"0B\" && \$2 != \"0\") printf \"%s / %s\", \$3, \$2; else print \"none\"}")
|
|
|
|
# Disk
|
|
disk_info=$(df -h / 2>/dev/null | awk "NR==2{printf \"%s / %s (%s)\", \$3, \$2, \$5}")
|
|
|
|
# CPU temperature (if available)
|
|
temp="n/a"
|
|
if [ -f /sys/class/thermal/thermal_zone0/temp ]; then
|
|
raw=$(cat /sys/class/thermal/thermal_zone0/temp)
|
|
temp="$((raw / 1000))°C"
|
|
fi
|
|
|
|
echo "HEADER|${hostname_val}|${uptime_val}|${cpu_cores} cores|load ${load_avg}|${temp}"
|
|
echo "MEM|${mem_info}"
|
|
echo "SWAP|${swap_info}"
|
|
echo "DISK|${disk_info}"
|
|
|
|
# Top 10 processes by CPU
|
|
echo "PROCS_START"
|
|
ps aux --sort=-%cpu 2>/dev/null | head -11 | awk "NR>1{printf \"%-6s %-5s %-5s %s\n\", \$2, \$3, \$4, \$11}" 2>/dev/null
|
|
echo "PROCS_END"
|
|
|
|
# Container status
|
|
echo "CONTAINERS_START"
|
|
if command -v podman >/dev/null 2>&1; then
|
|
podman ps -a --format "{{.Names}}|{{.Status}}|{{.Size}}" 2>/dev/null || \
|
|
podman ps -a --format "{{.Names}}|{{.Status}}" 2>/dev/null || \
|
|
echo "podman error"
|
|
elif command -v docker >/dev/null 2>&1; then
|
|
docker ps -a --format "{{.Names}}|{{.Status}}" 2>/dev/null || echo "docker error"
|
|
else
|
|
echo "no container runtime"
|
|
fi
|
|
echo "CONTAINERS_END"
|
|
'
|
|
|
|
# ── Formatting ─────────────────────────────────────────────────────────
|
|
|
|
BOLD="\033[1m"
|
|
DIM="\033[2m"
|
|
GREEN="\033[0;32m"
|
|
YELLOW="\033[0;33m"
|
|
RED="\033[0;31m"
|
|
CYAN="\033[0;36m"
|
|
RESET="\033[0m"
|
|
|
|
SEP="━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
|
|
print_node_report() {
|
|
local ip="$1"
|
|
local label="$2"
|
|
local output="$3"
|
|
|
|
echo -e "\n${BOLD}${CYAN}${SEP}${RESET}"
|
|
echo -e "${BOLD}${CYAN} ${label} ${DIM}(${ip})${RESET}"
|
|
echo -e "${BOLD}${CYAN}${SEP}${RESET}"
|
|
|
|
# Parse HEADER line
|
|
local header
|
|
header=$(echo "$output" | grep "^HEADER|" | head -1)
|
|
if [ -n "$header" ]; then
|
|
IFS='|' read -r _ hostname uptime cores load temp <<< "$header"
|
|
echo -e " ${BOLD}Host:${RESET} ${hostname} ${DIM}${uptime}${RESET}"
|
|
echo -e " ${BOLD}CPU:${RESET} ${cores} ${load} ${temp}"
|
|
fi
|
|
|
|
# Memory
|
|
local mem
|
|
mem=$(echo "$output" | grep "^MEM|" | cut -d'|' -f2)
|
|
[ -n "$mem" ] && echo -e " ${BOLD}Mem:${RESET} ${mem}"
|
|
|
|
local swap
|
|
swap=$(echo "$output" | grep "^SWAP|" | cut -d'|' -f2)
|
|
[ -n "$swap" ] && echo -e " ${BOLD}Swap:${RESET} ${swap}"
|
|
|
|
local disk
|
|
disk=$(echo "$output" | grep "^DISK|" | cut -d'|' -f2)
|
|
[ -n "$disk" ] && echo -e " ${BOLD}Disk:${RESET} ${disk}"
|
|
|
|
# Top processes
|
|
echo ""
|
|
echo -e " ${BOLD}Top processes by CPU:${RESET}"
|
|
echo -e " ${DIM}PID CPU% MEM% Command${RESET}"
|
|
local procs
|
|
procs=$(echo "$output" | sed -n '/^PROCS_START$/,/^PROCS_END$/p' | grep -v "^PROCS_")
|
|
if [ -n "$procs" ]; then
|
|
while IFS= read -r line; do
|
|
local cpu_pct
|
|
cpu_pct=$(echo "$line" | awk '{print $2}' | tr -d '.')
|
|
if [ "${cpu_pct:-0}" -gt 500 ] 2>/dev/null; then
|
|
echo -e " ${RED}${line}${RESET}"
|
|
elif [ "${cpu_pct:-0}" -gt 100 ] 2>/dev/null; then
|
|
echo -e " ${YELLOW}${line}${RESET}"
|
|
else
|
|
echo -e " ${line}"
|
|
fi
|
|
done <<< "$procs"
|
|
else
|
|
echo -e " ${DIM}(no process data)${RESET}"
|
|
fi
|
|
|
|
# Containers
|
|
echo ""
|
|
echo -e " ${BOLD}Containers:${RESET}"
|
|
local containers
|
|
containers=$(echo "$output" | sed -n '/^CONTAINERS_START$/,/^CONTAINERS_END$/p' | grep -v "^CONTAINERS_")
|
|
if [ -n "$containers" ] && [ "$containers" != "no container runtime" ] && [ "$containers" != "podman error" ]; then
|
|
while IFS='|' read -r name status size; do
|
|
local icon
|
|
if echo "$status" | grep -qi "up"; then
|
|
icon="${GREEN}●${RESET}"
|
|
else
|
|
icon="${RED}○${RESET}"
|
|
fi
|
|
echo -e " ${icon} ${BOLD}${name}${RESET} ${DIM}${status}${RESET}"
|
|
done <<< "$containers"
|
|
else
|
|
echo -e " ${DIM}${containers:-none}${RESET}"
|
|
fi
|
|
}
|
|
|
|
# ── Main profiling loop ───────────────────────────────────────────────
|
|
|
|
profile_all() {
|
|
echo -e "\n${BOLD}Archipelago Node Profile${RESET} ${DIM}$(date '+%Y-%m-%d %H:%M:%S')${RESET}"
|
|
|
|
local tmpdir
|
|
tmpdir=$(mktemp -d)
|
|
|
|
# Probe all nodes in parallel
|
|
local pids=()
|
|
for i in "${!TARGET_NODES[@]}"; do
|
|
local ip="${TARGET_NODES[$i]}"
|
|
local label="${NODE_LABELS[$i]:-$ip}"
|
|
(
|
|
result=$(ssh_cmd "$ip" "$PROFILE_CMD" 2>/dev/null) && \
|
|
echo "$result" > "$tmpdir/$i.out" || \
|
|
echo "UNREACHABLE" > "$tmpdir/$i.out"
|
|
) &
|
|
pids+=($!)
|
|
done
|
|
|
|
# Wait for all probes
|
|
for pid in "${pids[@]}"; do
|
|
wait "$pid" 2>/dev/null || true
|
|
done
|
|
|
|
# Print reports
|
|
local reachable=0 unreachable=0
|
|
for i in "${!TARGET_NODES[@]}"; do
|
|
local ip="${TARGET_NODES[$i]}"
|
|
local label="${NODE_LABELS[$i]:-$ip}"
|
|
local outfile="$tmpdir/$i.out"
|
|
|
|
if [ -f "$outfile" ] && [ "$(cat "$outfile")" != "UNREACHABLE" ]; then
|
|
print_node_report "$ip" "$label" "$(cat "$outfile")"
|
|
reachable=$((reachable + 1))
|
|
else
|
|
echo -e "\n${DIM}${SEP}${RESET}"
|
|
echo -e "${RED} ${label} (${ip}) — unreachable${RESET}"
|
|
echo -e "${DIM}${SEP}${RESET}"
|
|
unreachable=$((unreachable + 1))
|
|
fi
|
|
done
|
|
|
|
echo -e "\n${DIM}${reachable} reachable, ${unreachable} unreachable${RESET}\n"
|
|
rm -rf "$tmpdir"
|
|
}
|
|
|
|
if $WATCH_MODE; then
|
|
while true; do
|
|
clear
|
|
profile_all
|
|
echo -e "${DIM}Refreshing every ${WATCH_INTERVAL}s — Ctrl+C to stop${RESET}"
|
|
sleep "$WATCH_INTERVAL"
|
|
done
|
|
else
|
|
profile_all
|
|
fi
|