diff --git a/bin/smoke.sh b/bin/smoke.sh new file mode 100755 index 0000000..cc7306b --- /dev/null +++ b/bin/smoke.sh @@ -0,0 +1,93 @@ +#!/usr/bin/env bash +# Post-deploy smoke check. Runs against the live host and exits non-zero on +# anything obviously broken. Intentionally noisy so failures are easy to spot. +# +# Usage: bin/smoke.sh +# +# What it covers: +# 1. Frontend reachable (homepage HTTP code). +# 2. Mercure hub reachable (HEAD on /.well-known/mercure). +# 3. Container statuses on the host (none should be Restarting/Exited). +# 4. Recent CRITICAL / Fatal / 502 entries in php + worker + nginx logs. +# 5. Authenticated /api/devices round-trip via the testbot account. +# 6. Mercure publish→subscribe round-trip via a no-op PATCH. +# +# Designed to fail fast — first red check exits 1. + +set -uo pipefail + +HOST="pictureframe.edholm.me" +SSH_HOST="pictureframe" +TESTBOT_EMAIL="testbot@example.com" +TESTBOT_PASS="testpass123" +RED="\033[31m"; GREEN="\033[32m"; YELLOW="\033[33m"; RESET="\033[0m" + +ok() { echo -e " ${GREEN}✓${RESET} $1"; } +fail() { echo -e " ${RED}✗${RESET} $1"; exit 1; } +warn() { echo -e " ${YELLOW}!${RESET} $1"; } + +echo "── 1/6 Frontend reachable ─────────────────────────────" +code=$(curl -sI -o /dev/null -w "%{http_code}" "https://$HOST/") +[ "$code" = "302" ] || [ "$code" = "200" ] || fail "homepage returned $code (expected 302 redirect to login)" +ok "homepage → $code" + +echo "── 2/6 Mercure hub reachable ──────────────────────────" +code=$(curl -sI -o /dev/null -w "%{http_code}" "https://$HOST/.well-known/mercure?topic=ping") +[ "$code" = "200" ] || fail "mercure returned $code" +ok "mercure /.well-known/mercure → $code" + +echo "── 3/6 Container statuses ─────────────────────────────" +bad=$(ssh "$SSH_HOST" 'docker ps -a --filter name=pictureframe --format "{{.Names}}\t{{.Status}}"' \ + | grep -viE "Up|healthy" || true) +if [ -n "$bad" ]; then + echo "$bad" + fail "one or more containers are not Up/healthy" +fi +ok "all pictureframe-* containers Up" + +echo "── 4/6 No recent CRITICAL/502 in logs ─────────────────" +hits=$(ssh "$SSH_HOST" ' + for c in pictureframe-php-1 pictureframe-worker-1 pictureframe-nginx-1; do + docker logs --since 5m "$c" 2>&1 | grep -iE "CRITICAL|Fatal|\" 5[0-9][0-9] |Error thrown" | sed "s|^|$c: |"; + done +' || true) +if [ -n "$hits" ]; then + echo "$hits" | head -10 + fail "found error-level log entries in the last 5 minutes" +fi +ok "no CRITICAL/5xx in last 5 min" + +echo "── 5/6 /api/devices round-trip (testbot) ──────────────" +JAR=$(mktemp) +trap 'rm -f "$JAR"' EXIT +csrf=$(curl -s -c "$JAR" "https://$HOST/login" | grep -oP 'name="_csrf_token"[^>]*value="\K[^"]+' | head -1) +[ -n "$csrf" ] || fail "could not extract CSRF token from /login" +login_code=$(curl -s -b "$JAR" -c "$JAR" -L -X POST "https://$HOST/login" \ + --data-urlencode "_username=$TESTBOT_EMAIL" \ + --data-urlencode "_password=$TESTBOT_PASS" \ + --data-urlencode "_csrf_token=$csrf" \ + -o /dev/null -w "%{http_code}") +[ "$login_code" = "200" ] || fail "login returned $login_code" +devices_json=$(curl -s -b "$JAR" "https://$HOST/api/devices") +echo "$devices_json" | grep -q '"id":' || fail "/api/devices did not return a JSON array with id fields: $devices_json" +ok "testbot login + /api/devices → device list with ids" + +echo "── 6/6 Mercure publish→subscribe round-trip ───────────" +device_id=$(echo "$devices_json" | grep -oE '"id":[0-9]+' | head -1 | grep -oE '[0-9]+') +[ -n "$device_id" ] || fail "no device id available for round-trip test" +out=$(mktemp) +trap 'rm -f "$JAR" "$out"' EXIT +topic_url="https://$HOST/.well-known/mercure?topic=$(printf 'https://%s/devices/%s' "$HOST" "$device_id" | sed 's|:|%3A|g; s|/|%2F|g')" +timeout 8 curl -sN "$topic_url" > "$out" 2>&1 & +sub_pid=$! +sleep 2 +patch_code=$(curl -s -b "$JAR" -X PATCH "https://$HOST/api/devices/$device_id" \ + -H "Content-Type: application/json" -d '{"name":"smoke-check"}' \ + -o /dev/null -w "%{http_code}") +[ "$patch_code" = "200" ] || fail "PATCH returned $patch_code" +wait "$sub_pid" 2>/dev/null || true +grep -q '"smoke-check"' "$out" || fail "publish did not arrive on the subscriber within 8s: $(cat "$out")" +ok "publish → subscribe round-trip OK" + +echo +echo -e "${GREEN}all smoke checks passed${RESET}"