#!/bin/bash
# dailies-maintain — Postgres maintenance for the dailies app.
#
# Runs VACUUM (ANALYZE) across the whole database, surfaces before/after
# size deltas, flags bloated tables over a threshold, and confirms the
# database is reachable. Weekly cadence via maintain-dailies.timer.
#
# Emits Healthchecks.io heartbeats (/start, success, /fail) when
# HEALTHCHECK_MAINTAIN_URL is set in /etc/sysconfig/dailies-healthchecks.

set -u
source /usr/lib/server-admin/admin-common.sh
[[ -f /etc/sysconfig/dailies-healthchecks ]] && source /etc/sysconfig/dailies-healthchecks

APP_DIR="/opt/vis-daily-tracker"
ENV_FILE="$APP_DIR/.env"
HC_URL="${HEALTHCHECK_MAINTAIN_URL:-}"
BLOAT_THRESHOLD_MB="${BLOAT_THRESHOLD_MB:-100}"

require_root
require_command psql curl
lock_or_exit "dailies-maintain"
enable_error_trap
log_start "dailies-maintain"
healthcheck_ping "$HC_URL" /start

report_failure() {
    local exit_code="${1:-1}"
    healthcheck_ping "$HC_URL" "/fail"
    exit "$exit_code"
}
trap 'report_failure $?' ERR

# Source the app's env for DATABASE_URL — single source of truth.
set -a
# shellcheck disable=SC1090
source "$ENV_FILE"
set +a
PG_URL="${DATABASE_URL%%\?*}"
[[ -n "$PG_URL" ]] || { log_error "DATABASE_URL missing from $ENV_FILE"; report_failure 2; }

# Thin wrapper so the URL appears once.
psql_q() {
    psql "$PG_URL" -tAX "$@"
}

# 1. Reachability.
psql_q -c 'SELECT 1;' > /dev/null || { log_error "Postgres unreachable"; report_failure 3; }

# 2. Size before.
SIZE_BEFORE=$(psql_q -c "SELECT pg_size_pretty(pg_database_size(current_database()));")
log_info "Database size before: $SIZE_BEFORE"

# 3. VACUUM ANALYZE. Swallow the "permission denied to vacuum <shared catalog>"
# noise — harmless, emitted for catalog tables the dailies role can't touch.
log_info "Running VACUUM (VERBOSE, ANALYZE)..."
psql "$PG_URL" -c "VACUUM (VERBOSE, ANALYZE)" 2>&1 \
    | grep -v "permission denied to vacuum\|skipping.*only superuser" \
    >> "$ADMIN_LOG" 2>&1 || true
log_ok "VACUUM ANALYZE complete"

# 4. Size after + row stats.
SIZE_AFTER=$(psql_q -c "SELECT pg_size_pretty(pg_database_size(current_database()));")
log_info "Database size after: $SIZE_AFTER"

log_info "Table row counts (top 10 by live rows):"
psql_q -c "
    SELECT relname || ': ' || n_live_tup || ' rows'
      FROM pg_stat_user_tables
     ORDER BY n_live_tup DESC
     LIMIT 10;
" | while IFS= read -r line; do
    [[ -n "$line" ]] && log_info "  $line"
done

# 5. Bloat surfacing: tables over threshold where dead_tup > live_tup.
log_info "Bloat check (dead > live, total > ${BLOAT_THRESHOLD_MB}MB):"
BLOATED=$(psql_q -c "
    SELECT relname
             || ': ' || n_live_tup || ' live / ' || n_dead_tup || ' dead'
             || ' (' || pg_size_pretty(pg_total_relation_size(relid)) || ')'
      FROM pg_stat_user_tables
     WHERE n_dead_tup > n_live_tup
       AND pg_total_relation_size(relid) > ${BLOAT_THRESHOLD_MB} * 1024 * 1024
     ORDER BY n_dead_tup DESC;
")
if [[ -z "$BLOATED" ]]; then
    log_info "  (none)"
else
    while IFS= read -r line; do
        [[ -n "$line" ]] && log_warn "  $line"
    done <<< "$BLOATED"
    log_warn "Consider VACUUM FULL (locks table) or pg_repack on the above."
fi

trap - ERR
healthcheck_ping "$HC_URL"
log_end "dailies-maintain"
