#!/bin/bash
# dailies-status — at-a-glance health for the dailies deploy.
#
# Answers the questions you actually want when something feels off:
#   - Is the service up and responsive?
#   - What version is live and when did it deploy?
#   - Are the timers running?
#   - Is the database reachable?
#   - Has anything erred in the last day?
#
# Business-logic monitoring (ingestion counts, notification stats) belongs
# in the app UI; cert / email pipeline health belongs in their own checks.
# Run as: dailies-status [--brief]

set -u

APP_DIR="/opt/vis-daily-tracker"
ENV_FILE="$APP_DIR/.env"
SERVICE_URL="${SERVICE_URL:-http://localhost:3001}"
HEALTH_PATH="${HEALTH_PATH:-/}"
BRIEF=false
[[ "${1:-}" == "--brief" || "${1:-}" == "-b" ]] && BRIEF=true

RED='\033[0;31m'; GREEN='\033[0;32m'; YELLOW='\033[0;33m'
BLUE='\033[0;34m'; DIM='\033[90m'; BOLD='\033[1m'; NC='\033[0m'

ok()      { printf "  ${GREEN}✓${NC} %s\n" "$1"; }
warn()    { printf "  ${YELLOW}!${NC} %s\n" "$1"; WARN=$((WARN+1)); }
fail()    { printf "  ${RED}✗${NC} %s\n" "$1"; ERR=$((ERR+1)); }
info()    { printf "  ${DIM}%s${NC}\n" "$1"; }
section() { printf "\n${BLUE}%s${NC}\n" "$1"; }
ERR=0; WARN=0

# Service + HTTP.
section "Service"
if systemctl is-active --quiet dailies; then
    UPTIME=$(systemctl show dailies -p ActiveEnterTimestamp --value | cut -d' ' -f2-3)
    MEM=$(systemctl show dailies -p MemoryCurrent --value | numfmt --to=iec 2>/dev/null || echo "?")
    ok "dailies.service running (${MEM}, since ${UPTIME})"
    TIME=$(curl -sf --max-time 3 -o /dev/null -w "%{time_total}" "${SERVICE_URL}${HEALTH_PATH}" 2>/dev/null || true)
    if [[ -n "$TIME" && "$TIME" != "0.000000" ]]; then
        MS=$(awk -v t="$TIME" 'BEGIN{printf "%d", t*1000}')
        ok "HTTP responding (${MS}ms at ${HEALTH_PATH})"
    else
        fail "HTTP not responding at ${SERVICE_URL}${HEALTH_PATH}"
    fi
else
    fail "dailies.service not running"
fi

# Deploy state.
section "Deploy"
DEPLOYED="$(cat "$APP_DIR/.deployed-version" 2>/dev/null || echo unknown)"
if [[ -L "$APP_DIR/current" ]]; then
    TARGET=$(basename "$(readlink -f "$APP_DIR/current")")
    ok "current -> $TARGET (deployed-version: $DEPLOYED)"
else
    warn "No $APP_DIR/current symlink (pre-artifact layout?)"
fi

if systemctl is-active --quiet dailies-deploy.path; then
    ok "Webhook watcher active"
else
    fail "Webhook watcher inactive (dailies-deploy.path)"
fi

LAST=$(grep -E "\\[END\\] Dailies" /var/log/server-admin/dailies-deploy.log 2>/dev/null \
    | tail -1 | grep -oE '^\[[0-9 :-]+\]' | tr -d '[]' || true)
[[ -n "$LAST" ]] && info "Last deploy end: $LAST"

# Timers.
section "Timers"
for t in dailies-notifications-tick dailies-uploads-sweep dailies-submissions-auto-lock backup-dailies backup-dailies-midday maintain-dailies; do
    if systemctl is-active --quiet "${t}.timer"; then
        NEXT=$(systemctl list-timers --all --no-pager "${t}.timer" 2>/dev/null | awk 'NR==2 && $1 != "-" {printf "%s %s", $2, $3}')
        ok "${t}.timer${NEXT:+ (next: $NEXT)}"
    else
        fail "${t}.timer inactive"
    fi
done

# Database.
section "Database"
if [[ -f "$ENV_FILE" ]]; then
    # shellcheck disable=SC1090
    PG_URL=$(set -a; source "$ENV_FILE"; set +a; echo "${DATABASE_URL%%\?*}")
    if [[ -n "$PG_URL" ]] && psql "$PG_URL" -tAXc 'SELECT 1' >/dev/null 2>&1; then
        SIZE=$(psql "$PG_URL" -tAXc "SELECT pg_size_pretty(pg_database_size(current_database()));" 2>/dev/null)
        ok "Postgres reachable (${SIZE:-size?})"
    else
        fail "Postgres unreachable"
    fi
else
    fail ".env missing at $ENV_FILE"
fi

# Disk.
section "Disk"
info "App:    $(df -h "$APP_DIR" | awk 'NR==2 {print $5" used, "$4" free"}')"
BDIR="/mnt/storage/backups/dailies"
[[ -d "$BDIR" ]] && info "Backup: $(df -h "$BDIR" | awk 'NR==2 {print $5" used, "$4" free"}')"

# Backups.
section "Backups"
if [[ -d "$BDIR" ]]; then
    LATEST=$(ls -t "$BDIR"/dailies_*.tar.gz 2>/dev/null | head -1 || true)
    if [[ -n "$LATEST" ]]; then
        AGE_H=$(( ( $(date +%s) - $(stat -c %Y "$LATEST") ) / 3600 ))
        SZ=$(du -h "$LATEST" | cut -f1)
        COUNT=$(find "$BDIR" -maxdepth 1 -name 'dailies_*.tar.gz' -type f | wc -l)
        if (( AGE_H > 24 )); then
            warn "Latest backup ${AGE_H}h old (${SZ}, ${COUNT} stored)"
        else
            ok "Latest backup ${AGE_H}h ago (${SZ}, ${COUNT} stored)"
        fi
    else
        fail "No backups in $BDIR"
    fi
else
    warn "Backup dir missing: $BDIR"
fi

# Recent errors.
section "Logs (24h)"
COUNT=$(journalctl -u dailies -u dailies-notifications-tick -u dailies-uploads-sweep -u dailies-submissions-auto-lock \
        --since "24 hours ago" --priority=err --no-pager 2>/dev/null | wc -l)
if (( COUNT == 0 )); then
    ok "No errors"
else
    warn "$COUNT error lines — journalctl -u dailies -u dailies-notifications-tick -u dailies-uploads-sweep -u dailies-submissions-auto-lock --priority=err --since '24h ago'"
fi

# Summary.
echo
if (( ERR == 0 && WARN == 0 )); then
    printf "${GREEN}${BOLD}All checks passed${NC}\n"
elif (( ERR > 0 )); then
    printf "${RED}${BOLD}%d error(s), %d warning(s)${NC}\n" "$ERR" "$WARN"
else
    printf "${YELLOW}${BOLD}%d warning(s)${NC}\n" "$WARN"
fi
echo

$BRIEF && exit "$ERR"
exit "$ERR"
