f45cfcc967
CI / test (push) Has been cancelled
Runs after every deploy. Six checks against pictureframe.edholm.me: 1. Frontend reachable (302/200 expected). 2. Mercure hub reachable (200 expected). 3. All pictureframe-* containers Up/healthy. 4. No CRITICAL / Fatal / 5xx in last 5 min of php/worker/nginx logs. 5. Authenticated /api/devices round-trip via testbot. 6. Mercure publish→subscribe round-trip via no-op PATCH. Catches the class of bug that just bit us today: nginx caching a stale PHP container IP after `docker compose up -d`, and a silently-dropped composer dep crash-looping the worker. Neither shows up in unit tests because they're infra-level. Per the new "post-deploy smoke check" rule: if a unit test doesn't cover a change, run this script (or an equivalent cURL) before declaring the deploy done. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
94 lines
4.3 KiB
Bash
Executable File
94 lines
4.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Post-deploy smoke check. Runs against the live host and exits non-zero on
|
|
# anything obviously broken. Intentionally noisy so failures are easy to spot.
|
|
#
|
|
# Usage: bin/smoke.sh
|
|
#
|
|
# What it covers:
|
|
# 1. Frontend reachable (homepage HTTP code).
|
|
# 2. Mercure hub reachable (HEAD on /.well-known/mercure).
|
|
# 3. Container statuses on the host (none should be Restarting/Exited).
|
|
# 4. Recent CRITICAL / Fatal / 502 entries in php + worker + nginx logs.
|
|
# 5. Authenticated /api/devices round-trip via the testbot account.
|
|
# 6. Mercure publish→subscribe round-trip via a no-op PATCH.
|
|
#
|
|
# Designed to fail fast — first red check exits 1.
|
|
|
|
set -uo pipefail
|
|
|
|
HOST="pictureframe.edholm.me"
|
|
SSH_HOST="pictureframe"
|
|
TESTBOT_EMAIL="testbot@example.com"
|
|
TESTBOT_PASS="testpass123"
|
|
RED="\033[31m"; GREEN="\033[32m"; YELLOW="\033[33m"; RESET="\033[0m"
|
|
|
|
ok() { echo -e " ${GREEN}✓${RESET} $1"; }
|
|
fail() { echo -e " ${RED}✗${RESET} $1"; exit 1; }
|
|
warn() { echo -e " ${YELLOW}!${RESET} $1"; }
|
|
|
|
echo "── 1/6 Frontend reachable ─────────────────────────────"
|
|
code=$(curl -sI -o /dev/null -w "%{http_code}" "https://$HOST/")
|
|
[ "$code" = "302" ] || [ "$code" = "200" ] || fail "homepage returned $code (expected 302 redirect to login)"
|
|
ok "homepage → $code"
|
|
|
|
echo "── 2/6 Mercure hub reachable ──────────────────────────"
|
|
code=$(curl -sI -o /dev/null -w "%{http_code}" "https://$HOST/.well-known/mercure?topic=ping")
|
|
[ "$code" = "200" ] || fail "mercure returned $code"
|
|
ok "mercure /.well-known/mercure → $code"
|
|
|
|
echo "── 3/6 Container statuses ─────────────────────────────"
|
|
bad=$(ssh "$SSH_HOST" 'docker ps -a --filter name=pictureframe --format "{{.Names}}\t{{.Status}}"' \
|
|
| grep -viE "Up|healthy" || true)
|
|
if [ -n "$bad" ]; then
|
|
echo "$bad"
|
|
fail "one or more containers are not Up/healthy"
|
|
fi
|
|
ok "all pictureframe-* containers Up"
|
|
|
|
echo "── 4/6 No recent CRITICAL/502 in logs ─────────────────"
|
|
hits=$(ssh "$SSH_HOST" '
|
|
for c in pictureframe-php-1 pictureframe-worker-1 pictureframe-nginx-1; do
|
|
docker logs --since 5m "$c" 2>&1 | grep -iE "CRITICAL|Fatal|\" 5[0-9][0-9] |Error thrown" | sed "s|^|$c: |";
|
|
done
|
|
' || true)
|
|
if [ -n "$hits" ]; then
|
|
echo "$hits" | head -10
|
|
fail "found error-level log entries in the last 5 minutes"
|
|
fi
|
|
ok "no CRITICAL/5xx in last 5 min"
|
|
|
|
echo "── 5/6 /api/devices round-trip (testbot) ──────────────"
|
|
JAR=$(mktemp)
|
|
trap 'rm -f "$JAR"' EXIT
|
|
csrf=$(curl -s -c "$JAR" "https://$HOST/login" | grep -oP 'name="_csrf_token"[^>]*value="\K[^"]+' | head -1)
|
|
[ -n "$csrf" ] || fail "could not extract CSRF token from /login"
|
|
login_code=$(curl -s -b "$JAR" -c "$JAR" -L -X POST "https://$HOST/login" \
|
|
--data-urlencode "_username=$TESTBOT_EMAIL" \
|
|
--data-urlencode "_password=$TESTBOT_PASS" \
|
|
--data-urlencode "_csrf_token=$csrf" \
|
|
-o /dev/null -w "%{http_code}")
|
|
[ "$login_code" = "200" ] || fail "login returned $login_code"
|
|
devices_json=$(curl -s -b "$JAR" "https://$HOST/api/devices")
|
|
echo "$devices_json" | grep -q '"id":' || fail "/api/devices did not return a JSON array with id fields: $devices_json"
|
|
ok "testbot login + /api/devices → device list with ids"
|
|
|
|
echo "── 6/6 Mercure publish→subscribe round-trip ───────────"
|
|
device_id=$(echo "$devices_json" | grep -oE '"id":[0-9]+' | head -1 | grep -oE '[0-9]+')
|
|
[ -n "$device_id" ] || fail "no device id available for round-trip test"
|
|
out=$(mktemp)
|
|
trap 'rm -f "$JAR" "$out"' EXIT
|
|
topic_url="https://$HOST/.well-known/mercure?topic=$(printf 'https://%s/devices/%s' "$HOST" "$device_id" | sed 's|:|%3A|g; s|/|%2F|g')"
|
|
timeout 8 curl -sN "$topic_url" > "$out" 2>&1 &
|
|
sub_pid=$!
|
|
sleep 2
|
|
patch_code=$(curl -s -b "$JAR" -X PATCH "https://$HOST/api/devices/$device_id" \
|
|
-H "Content-Type: application/json" -d '{"name":"smoke-check"}' \
|
|
-o /dev/null -w "%{http_code}")
|
|
[ "$patch_code" = "200" ] || fail "PATCH returned $patch_code"
|
|
wait "$sub_pid" 2>/dev/null || true
|
|
grep -q '"smoke-check"' "$out" || fail "publish did not arrive on the subscriber within 8s: $(cat "$out")"
|
|
ok "publish → subscribe round-trip OK"
|
|
|
|
echo
|
|
echo -e "${GREEN}all smoke checks passed${RESET}"
|