#!/bin/bash
# website-deploy — deploy script for tylervigario.com.
#
# Implements the prod-side half of the website's deploy contract
# (https://github.com/TylerVigario/website/blob/main/docs/deployment.md).
# The contract says: build-on-prod from a tagged commit, run
# `npm ci && npm run build`, satisfy `src/lib/required-env.json`
# (currently just `SQLITE_PATH`), probe liveness at `GET /api/health`,
# honour SIGTERM with a 30-second drain. Everything else here (trigger
# file, release dir layout, SQLite snapshot mechanics, pre-swap smoke,
# atomic swap, rollback policy) is prod-side fill-in.
#
# Triggered by /run/website/deploy-trigger (written by the GitHub release
# webhook on release.published). Curate a specific tag by passing it
# as $1. Roll back with --rollback <tag>.
#
# Usage:
#   sudo website-deploy                       # consume trigger file
#   sudo website-deploy v1.4.0                # curate a specific tag
#   sudo website-deploy --skip-health         # bypass the post-restart health check
#   sudo website-deploy --rollback v1.3.0     # roll back to an on-disk release

source /usr/lib/server-admin/admin-common.sh

# ----------------------------------------------------------------------------
# Config
# ----------------------------------------------------------------------------

APP_DIR="/opt/website"
RELEASES_DIR="$APP_DIR/releases"
CURRENT_LINK="$APP_DIR/current"
ENV_FILE="$APP_DIR/.env"
DATA_DIR="$APP_DIR/data"
VERSION_FILE="$APP_DIR/.deployed-version"
SERVICE_USER="website"
SERVICE_NAME="website.service"
DROPIN_DIR="/etc/systemd/system/${SERVICE_NAME}.d"
DROPIN_FILE="$DROPIN_DIR/exec.conf"
TRIGGER_FILE="/run/website/deploy-trigger"

REPO_URL="git@github.com:TylerVigario/website.git"
DEPLOY_KEY="/etc/website/deploy_key"

KEEP_RELEASES=5
BUILD_DISK_MB=2000

# Health check — path is contract-fixed (GET /api/health, 200 when SQLite is
# openable and the schema parses, 503 otherwise).
HEALTH_PATH="/api/health"
HEALTH_RETRIES=6
HEALTH_TIMEOUT=5

# Pre-swap smoke — boots the new bundle against the real env on an ephemeral
# port. SQLite uses CREATE TABLE IF NOT EXISTS in getDb(), so there's no
# migration step to invalidate this invariant: the smoke just confirms the
# new bundle binds and exits cleanly against the live SQLite file.
SMOKE_BUDGET=30

SKIP_HEALTH=false

# ----------------------------------------------------------------------------
# Argument parsing
# ----------------------------------------------------------------------------
TARGET_TAG=""
ROLLBACK_TAG=""
while [[ $# -gt 0 ]]; do
    case "$1" in
        --skip-health) SKIP_HEALTH=true; shift ;;
        --rollback)    ROLLBACK_TAG="${2:-}"; shift 2 ;;
        --rollback=*)  ROLLBACK_TAG="${1#--rollback=}"; shift ;;
        v[0-9]*.[0-9]*.[0-9]*) TARGET_TAG="$1"; shift ;;
        [0-9]*.[0-9]*.[0-9]*)  TARGET_TAG="v$1"; shift ;;
        *) shift ;;
    esac
done
[[ -n "$ROLLBACK_TAG" && "$ROLLBACK_TAG" =~ ^[0-9] ]] && ROLLBACK_TAG="v$ROLLBACK_TAG"

require_root
# sqlite3 is the prod-side DB snapshot tool. No prisma/psql here — the
# website's contract is clear that no migration step runs at deploy time
# (schema is created lazily inside getDb() via CREATE IF NOT EXISTS).
require_command git jq curl sqlite3 systemctl npm node
lock_or_exit "website-deploy"
enable_error_trap

CURRENT_VERSION="none"
[[ -f "$VERSION_FILE" ]] && CURRENT_VERSION=$(cat "$VERSION_FILE")

if [[ -n "$ROLLBACK_TAG" ]]; then
    [[ "$ROLLBACK_TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] \
        || { log_error "Invalid rollback tag (expected vX.Y.Z): $ROLLBACK_TAG"; exit 1; }
else
    if [[ -z "$TARGET_TAG" ]]; then
        if [[ -s "$TRIGGER_FILE" ]]; then
            TARGET_TAG=$(cat "$TRIGGER_FILE")
            rm -f "$TRIGGER_FILE"
        else
            log_info "No tag argument and no trigger file — nothing to deploy"
            exit 0
        fi
    fi
    [[ "$TARGET_TAG" =~ ^v[0-9]+\.[0-9]+\.[0-9]+$ ]] \
        || { log_error "Invalid tag (expected vX.Y.Z): $TARGET_TAG"; exit 1; }
    log_start "Website deploy $CURRENT_VERSION -> $TARGET_TAG"
fi

# ----------------------------------------------------------------------------
# App-specific helpers
# ----------------------------------------------------------------------------

# Pre-deploy SQLite snapshot. Uses sqlite3 .backup (online, no read lock on
# the source) rather than a plain file copy — copying mid-write yields a
# corrupt snapshot. Retains last 5.
backup_database() {
    local sqlite_path
    sqlite_path=$(sudo -u "$SERVICE_USER" bash -c "set -a; source '$ENV_FILE'; set +a; printf '%s' \"\$SQLITE_PATH\"")
    [[ -n "$sqlite_path" ]] \
        || { log_error "SQLITE_PATH not set in $ENV_FILE — cannot snapshot"; return 1; }
    [[ -f "$sqlite_path" ]] \
        || { log_info "SQLite file does not exist yet at $sqlite_path — first deploy, skipping snapshot"; return 0; }

    local backup_path="$DATA_DIR/website.pre-deploy-$(date +%Y%m%d-%H%M%S).db"
    log_info "sqlite3 .backup → $backup_path"
    sudo -u "$SERVICE_USER" sqlite3 "$sqlite_path" ".backup '$backup_path'" \
        >> "$ADMIN_LOG" 2>&1 || { log_error "sqlite3 .backup failed"; return 1; }
    ls -t "$DATA_DIR/website.pre-deploy-"*.db 2>/dev/null | tail -n +6 | xargs -r rm -f
    log_ok "Database snapshot written"
}

# Roll back to a build-on-prod release dir on disk.
rollback_to() {
    local tag="$1"
    local release_dir="$RELEASES_DIR/$tag"

    [[ -d "$release_dir" ]] \
        || { log_error "Rollback target not on disk: $release_dir"; return 1; }
    [[ -f "$release_dir/package.json" ]] \
        || { log_error "Rollback target is not a build-on-prod release (no package.json): $tag"; return 1; }
    [[ -f "$release_dir/.failed-deploy" ]] \
        && { log_error "Rollback target is marked failed: $tag"; return 1; }

    log_start "Rollback $CURRENT_VERSION -> $tag"

    log_info "Symlink swap: current -> $tag"
    ln -sfn "$release_dir" "$CURRENT_LINK.tmp"
    mv -Tf "$CURRENT_LINK.tmp" "$CURRENT_LINK"
    restorecon "$CURRENT_LINK" 2>/dev/null || true
    echo "$tag" > "$VERSION_FILE"

    write_dropin "$release_dir" "$DROPIN_FILE" "$CURRENT_LINK" "$DATA_DIR" "$CURRENT_LINK/.next/cache" \
        || return 1
    systemctl daemon-reload

    if ! restart_service "$SERVICE_NAME"; then
        log_error "Rollback target failed to restart — manual investigation required"
        mark_failed_release "$release_dir" "rollback target failed to restart"
        return 1
    fi

    if [[ "$SKIP_HEALTH" != "true" ]]; then
        local port
        port=$(sudo -u "$SERVICE_USER" bash -c "set -a; source '$ENV_FILE'; set +a; printf '%s' \"\${PORT:-3000}\"")
        log_info "Health check on http://localhost:${port}${HEALTH_PATH}"
        if retry "$HEALTH_RETRIES" curl -fsS --max-time "$HEALTH_TIMEOUT" \
                "http://localhost:${port}${HEALTH_PATH}" -o /dev/null; then
            log_ok "Service healthy"
        else
            log_error "Health check failed after rollback to $tag"
            mark_failed_release "$release_dir" "rollback target also failed health check"
            return 1
        fi
    fi

    log_ok "Rolled back to $tag"
    log_end "Rollback"
}

dispatch_rollback() {
    local prev_tag="$1"
    if [[ -n "$prev_tag" ]]; then
        rollback_to "$prev_tag" \
            || log_error "Rollback to $prev_tag did not converge — service may be in a degraded state"
    else
        log_error "No previous build-on-prod release to roll back to"
    fi
}

# ----------------------------------------------------------------------------
# Rollback short-circuit
# ----------------------------------------------------------------------------
if [[ -n "$ROLLBACK_TAG" ]]; then
    rollback_to "$ROLLBACK_TAG"
    exit $?
fi

# ----------------------------------------------------------------------------
# Forward deploy
# ----------------------------------------------------------------------------

require_disk_space "$RELEASES_DIR" "$BUILD_DISK_MB"

RELEASE_DIR="$RELEASES_DIR/$TARGET_TAG"
if [[ -d "$RELEASE_DIR" ]]; then
    log_warn "Release dir exists; removing for clean clone: $RELEASE_DIR"
    rm -rf "$RELEASE_DIR"
fi
clone_tag "$REPO_URL" "$TARGET_TAG" "$RELEASE_DIR" "$DEPLOY_KEY" || exit 6
fix_ownership "$RELEASE_DIR" "$SERVICE_USER"

# Required env per contract §Environment variables — currently just
# SQLITE_PATH. Source of truth is src/lib/required-env.json in the release.
validate_required_env "$RELEASE_DIR" "$ENV_FILE" "$SERVICE_USER" || exit 9

# Build. npm run build runs prebuild (check:public-env, build:server),
# next build, postbuild real-boot smoke against hermetic stub env.
build_release "$RELEASE_DIR" "$SERVICE_USER" "$ENV_FILE" || {
    mark_failed_release "$RELEASE_DIR" "build failed (npm ci or npm run build)"
    exit 12
}
fix_selinux "$RELEASE_DIR"

PREV_TAG=$(select_prev_tag "$RELEASES_DIR" "$TARGET_TAG")
[[ -n "$PREV_TAG" ]] && log_info "Rollback target if needed: $PREV_TAG"

# SQLite snapshot. Skipped harmlessly on first deploy when the file doesn't
# exist yet.
backup_database

# Pre-swap smoke. No migration step to bracket — the smoke just confirms the
# new bundle binds and exits cleanly against the real env (real SQLITE_PATH,
# real Sentry transport, real SMTP if configured).
SMOKE_START=$(read_start_command "$RELEASE_DIR") || exit 16
NODE_BIN=$(resolve_node_binary "$RELEASE_DIR")
[[ -n "$NODE_BIN" ]] || { log_error "No node binary resolvable for smoke"; exit 16; }
SMOKE_RENDERED=$(render_exec_start "$NODE_BIN" "$SMOKE_START")
if ! smoke_test_release "$RELEASE_DIR" "$SERVICE_USER" "$ENV_FILE" "$SMOKE_RENDERED" "$HEALTH_PATH" "$SMOKE_BUDGET"; then
    mark_failed_release "$RELEASE_DIR" "pre-swap smoke test failed"
    log_error "Aborting deploy — current symlink untouched"
    exit 16
fi

# Atomic swap.
log_info "Atomic symlink swap: current -> $TARGET_TAG"
ln -sfn "$RELEASE_DIR" "$CURRENT_LINK.tmp"
mv -Tf "$CURRENT_LINK.tmp" "$CURRENT_LINK"
restorecon "$CURRENT_LINK" 2>/dev/null || true
echo "$TARGET_TAG" > "$VERSION_FILE"

write_dropin "$RELEASE_DIR" "$DROPIN_FILE" "$CURRENT_LINK" "$DATA_DIR" "$CURRENT_LINK/.next/cache" \
    || exit 13
systemctl daemon-reload

if ! restart_service "$SERVICE_NAME"; then
    log_error "Service failed to restart cleanly after swap to $TARGET_TAG"
    mark_failed_release "$RELEASE_DIR" "service failed to restart after swap"
    dispatch_rollback "$PREV_TAG"
    exit 14
fi

PORT=$(sudo -u "$SERVICE_USER" bash -c "set -a; source '$ENV_FILE'; set +a; printf '%s' \"\${PORT:-3000}\"")
if [[ "$SKIP_HEALTH" == "true" ]]; then
    log_info "Skipping health check (--skip-health)"
else
    log_info "Health check on http://localhost:${PORT}${HEALTH_PATH}"
    if retry "$HEALTH_RETRIES" curl -fsS --max-time "$HEALTH_TIMEOUT" \
            "http://localhost:${PORT}${HEALTH_PATH}" -o /dev/null; then
        log_ok "Service healthy"
    else
        log_error "Health check failed"
        mark_failed_release "$RELEASE_DIR" "post-restart health check failed at ${HEALTH_PATH}"
        dispatch_rollback "$PREV_TAG"
        exit 14
    fi
fi

gc_releases "$RELEASES_DIR" "$CURRENT_LINK" "$KEEP_RELEASES"

log_ok "Deployed $TARGET_TAG"
log_end "Website deploy"
