#!/bin/bash
# backup-dailies — durable, self-verifying backup.
#
# Dumps the Postgres database (custom format) + tars the STORAGE_PATH
# tree alongside a MANIFEST describing the archive contents, then
# verifies both the dump and the tarball before acknowledging success.
# Retention is hybrid: drop backups older than RETENTION_DAYS, but
# floor at MIN_KEEP most-recent regardless of age so we never end up
# with zero backups after a long outage.
#
# Emits Healthchecks.io heartbeats (/start, success, /fail) when
# HEALTHCHECK_BACKUP_URL is set in /etc/sysconfig/dailies-healthchecks.

set -u
source /usr/lib/server-admin/admin-common.sh
[[ -f /etc/sysconfig/dailies-healthchecks ]] && source /etc/sysconfig/dailies-healthchecks

APP_DIR="/opt/vis-daily-tracker"
ENV_FILE="$APP_DIR/.env"
BACKUP_DIR="/mnt/storage/backups/dailies"
RETENTION_DAYS="${RETENTION_DAYS:-14}"
MIN_KEEP="${MIN_KEEP:-7}"
DATE=$(date +%Y%m%d_%H%M%S)
BACKUP_FILE="${BACKUP_DIR}/dailies_${DATE}.tar.gz"
HC_URL="${HEALTHCHECK_BACKUP_URL:-}"

require_root
require_command pg_dump pg_restore tar curl
lock_or_exit "backup-dailies"
enable_error_trap
log_start "backup-dailies"
healthcheck_ping "$HC_URL" /start

# Notify Healthchecks on any failure (including enable_error_trap path).
report_failure() {
    local exit_code="${1:-1}"
    healthcheck_ping "$HC_URL" "/fail"
    exit "$exit_code"
}
trap 'report_failure $?' ERR

# Read DB URL + STORAGE_PATH from the app env (single source of truth).
# Strip Prisma-style query params from the URL for pg_dump.
set -a
# shellcheck disable=SC1090
source "$ENV_FILE"
set +a
PG_URL="${DATABASE_URL%%\?*}"
[[ -n "$PG_URL" ]] || { log_error "DATABASE_URL missing from $ENV_FILE"; report_failure 2; }
[[ -n "${STORAGE_PATH:-}" && -d "$STORAGE_PATH" ]] || { log_error "STORAGE_PATH invalid: ${STORAGE_PATH:-<unset>}"; report_failure 3; }
log_info "DB: $(echo "$PG_URL" | sed 's|://[^@]*@|://<creds>@|')"
log_info "Storage: $STORAGE_PATH"
log_info "Retention: ${RETENTION_DAYS}d (floor ${MIN_KEEP})"

TEMP_DIR=$(mktemp -d -t backup-dailies-XXXXXX)
cleanup() { rm -rf "$TEMP_DIR"; unlock; }
trap cleanup EXIT

mkdir -p "$BACKUP_DIR"
require_disk_space "$BACKUP_DIR" 2048

# 1. Database dump.
log_info "pg_dump..."
pg_dump -Fc "$PG_URL" -f "$TEMP_DIR/dailies.dump"
pg_restore -l "$TEMP_DIR/dailies.dump" > /dev/null
DUMP_BYTES=$(stat -c %s "$TEMP_DIR/dailies.dump")
log_ok "pg_dump: $(numfmt --to=iec "$DUMP_BYTES")"

# 2. Storage inventory for the manifest.
FILE_COUNT=$(find "$STORAGE_PATH" -type f | wc -l)
STORAGE_BYTES=$(du -sb "$STORAGE_PATH" 2>/dev/null | awk '{print $1}')
DEPLOYED_TAG="$(cat "$APP_DIR/.deployed-version" 2>/dev/null || echo unknown)"

# 3. MANIFEST so any backup is self-describing for disaster recovery.
cat > "$TEMP_DIR/MANIFEST" <<MANIFEST
backup:       dailies
created_at:   $(date -Iseconds)
hostname:     $(hostname)
deployed_tag: $DEPLOYED_TAG
pg_dump:      $(pg_dump --version)
db_url:       $(echo "$PG_URL" | sed 's|://[^@]*@|://<creds>@|')
dump_bytes:   $DUMP_BYTES
storage_path: $STORAGE_PATH
file_count:   $FILE_COUNT
storage_size: $STORAGE_BYTES
retention:    ${RETENTION_DAYS}d (floor ${MIN_KEEP})
MANIFEST

# 4. Tarball: MANIFEST + dump + storage tree.
log_info "Creating tarball..."
tar -czf "$BACKUP_FILE" \
    -C "$TEMP_DIR" MANIFEST dailies.dump \
    -C "$STORAGE_PATH" .

chmod 640 "$BACKUP_FILE"
chown root:tyler "$BACKUP_FILE"

# 5. Verify: tar integrity + MANIFEST roundtrip.
log_info "Verifying tarball..."
tar -tzf "$BACKUP_FILE" > /dev/null
tar -xzOf "$BACKUP_FILE" MANIFEST | grep -q "^backup: *dailies$" \
    || { log_error "MANIFEST roundtrip failed"; report_failure 4; }
BACKUP_SIZE=$(du -h "$BACKUP_FILE" | cut -f1)
log_ok "Tarball verified: $BACKUP_FILE ($BACKUP_SIZE)"

# 6. Retention: drop old, but floor at MIN_KEEP most-recent.
log_info "Pruning backups older than ${RETENTION_DAYS}d (keeping ≥${MIN_KEEP})..."
mapfile -t ALL < <(find "$BACKUP_DIR" -maxdepth 1 -name 'dailies_*.tar.gz' -type f -printf '%T@ %p\n' | sort -nr | awk '{print $2}')
DELETED=0
for ((i=0; i<${#ALL[@]}; i++)); do
    f="${ALL[$i]}"
    # Keep the MIN_KEEP newest regardless of age.
    if (( i < MIN_KEEP )); then continue; fi
    # Age in days (floor). Delete if older than retention window.
    age_days=$(( ( $(date +%s) - $(stat -c %Y "$f") ) / 86400 ))
    if (( age_days > RETENTION_DAYS )); then
        rm -f "$f"
        DELETED=$((DELETED + 1))
    fi
done

TOTAL_BACKUPS=$(find "$BACKUP_DIR" -maxdepth 1 -name 'dailies_*.tar.gz' -type f | wc -l)
TOTAL_SIZE=$(du -sh "$BACKUP_DIR" | cut -f1)
log_info "Pruned $DELETED old; $TOTAL_BACKUPS stored (${TOTAL_SIZE} total)"

trap - ERR
healthcheck_ping "$HC_URL"
log_end "backup-dailies"
