#!/bin/bash

# EC2 LLM Gateway Deployment Script
# Download from: https://llmgateway.deep-cells.com/downloads/deployment/ec2-deploy.sh
# Usage: ./ec2-deploy.sh [--prod] [domain_name]
#        ./ec2-deploy.sh                    # Development mode (default)
#        ./ec2-deploy.sh --prod your-domain.com  # Production mode with SSL

set -e

# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color

log() {
    echo -e "${GREEN}[$(date +'%Y-%m-%d %H:%M:%S')] $1${NC}"
}

warn() {
    echo -e "${YELLOW}[$(date +'%Y-%m-%d %H:%M:%S')] WARNING: $1${NC}"
}

error() {
    echo -e "${RED}[$(date +'%Y-%m-%d %H:%M:%S')] ERROR: $1${NC}"
    exit 1
}

info() {
    echo -e "${BLUE}[$(date +'%Y-%m-%d %H:%M:%S')] INFO: $1${NC}"
}

# URL encode function for passwords with special characters
url_encode() {
    local string="${1}"
    local strlen=${#string}
    local encoded=""
    local pos c o

    for (( pos=0 ; pos<strlen ; pos++ )); do
        c=${string:$pos:1}
        case "$c" in
            [-_.~a-zA-Z0-9] ) o="${c}" ;;
            * ) printf -v o '%%%02x' "'$c"
        esac
        encoded+="${o}"
    done
    echo "${encoded}"
}

# Generate Redis-safe password (alphanumeric only)
generate_redis_password() {
    # Generate a 32-character password using only safe characters
    # This avoids URL encoding issues in Redis connection strings
    openssl rand -hex 16
}

# Parse command line arguments
PRODUCTION_MODE=false
DOMAIN_NAME=""

while [[ $# -gt 0 ]]; do
    case $1 in
        --prod|--production)
            PRODUCTION_MODE=true
            shift
            ;;
        --help|-h)
            echo "LLM Gateway EC2 Deployment Script"
            echo ""
            echo "Usage:"
            echo "  $0                           # Development mode (default)"
            echo "  $0 --prod <domain_name>     # Production mode with SSL"
            echo "  $0 --reset-network          # Reset Docker network state"
            echo "  $0 --check-dns <domain>     # Check DNS configuration"
            echo ""
            echo "Examples:"
            echo "  $0                          # Deploy in development mode"
            echo "  $0 --prod example.com       # Deploy in production with SSL"
            echo "  $0 --reset-network          # Fix Docker network issues"
            echo "  $0 --check-dns example.com # Verify DNS setup"
            echo ""
            echo "Development mode:"
            echo "  - No domain name required"
            echo "  - No SSL/Nginx configuration"
            echo "  - Access via http://localhost:3000"
            echo ""
            echo "Production mode:"
            echo "  - Domain name required"
            echo "  - SSL certificate setup"
            echo "  - Nginx reverse proxy"
            echo "  - Access via https://your-domain.com"
            echo ""
            echo "Network reset:"
            echo "  - Stops all containers"
            echo "  - Cleans up networks and volumes"
            echo "  - Restarts Docker daemon"
            echo "  - Use when encountering network errors"
            exit 0
            ;;
        --reset-network)
            echo "Resetting Docker network state..."
            reset_docker_network
            log "Network reset completed. You can now run the deployment again."
            exit 0
            ;;
        --check-dns)
            if [[ -z "$2" ]]; then
                error "Usage: $0 --check-dns <domain_name>"
            fi
            if check_dns "$2"; then
                log "DNS configuration is correct for $2"
            else
                error "DNS configuration issues found for $2"
            fi
            exit 0
            ;;
        *)
            if [[ -z "$DOMAIN_NAME" ]]; then
                DOMAIN_NAME="$1"
            else
                error "Unknown parameter: $1"
            fi
            shift
            ;;
    esac
done

# Validate arguments
if [[ "$PRODUCTION_MODE" == true && -z "$DOMAIN_NAME" ]]; then
    error "Production mode requires a domain name. Usage: $0 --prod <domain_name>"
fi

# Set default values
if [[ "$PRODUCTION_MODE" == false ]]; then
    DOMAIN_NAME="localhost"
fi

# Configuration
PROJECT_NAME="llm-gateway"
DEPLOY_DIR="/opt/llm-gateway"
DOWNLOAD_BASE="https://llmgateway.deep-cells.com/v1/downloads"
DOCKER_IMAGE="deepcells/llm-gateway:latest"

# Check if running as root
check_root() {
    if [[ $EUID -eq 0 ]]; then
        error "This script should not be run as root for security reasons"
    fi
}

# Check Docker installation and permissions
check_docker() {
    if ! command -v docker &> /dev/null; then
        warn "Docker not found, will install it"
        return 1
    fi
    
    if ! docker info &> /dev/null; then
        warn "Docker daemon not accessible, checking permissions..."
        if ! groups $USER | grep -q docker; then
            warn "User not in docker group, will fix permissions"
            return 1
        else
            warn "User in docker group but still can't access Docker"
            warn "This usually happens after adding user to group"
            warn "Will temporarily fix socket permissions"
            sudo chmod 666 /var/run/docker.sock
        fi
    fi
    
    return 0
}

# Reset Docker network state
reset_docker_network() {
    log "Resetting Docker network state..."
    
    # Stop all running containers
    if [ "$(docker ps -q)" ]; then
        warn "Stopping all running containers..."
        docker stop $(docker ps -q) 2>/dev/null || true
    fi
    
    # Remove all stopped containers
    if [ "$(docker ps -aq)" ]; then
        warn "Removing all containers..."
        docker rm $(docker ps -aq) 2>/dev/null || true
    fi
    
    # Clean up networks
    warn "Cleaning up Docker networks..."
    docker network prune -f 2>/dev/null || true
    
    # Remove unused volumes (be careful with this)
    docker volume prune -f 2>/dev/null || true
    
    # Restart Docker daemon
    warn "Restarting Docker daemon..."
    sudo systemctl restart docker
    
    # Wait for Docker to restart
    sleep 5
    
    log "Docker network reset completed"
}

# Install Docker and Docker Compose
install_docker() {
    log "Installing Docker and Docker Compose..."
    
    # Update system
    sudo apt-get update
    
    # Install prerequisites
    sudo apt-get install -y \
        apt-transport-https \
        ca-certificates \
        curl \
        gnupg \
        lsb-release \
        git \
        htop \
        unzip \
        dnsutils
    
    # Add Docker's official GPG key
    sudo mkdir -p /etc/apt/keyrings
    curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo gpg --dearmor -o /etc/apt/keyrings/docker.gpg
    
    # Set up the repository
    echo \
        "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.gpg] https://download.docker.com/linux/ubuntu \
        $(lsb_release -cs) stable" | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null
    
    # Install Docker Engine
    sudo apt-get update
    sudo apt-get install -y docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin
    
    # Add current user to docker group
    sudo usermod -aG docker $USER
    
    # Start and enable Docker
    sudo systemctl start docker
    sudo systemctl enable docker
    
    # Apply group changes without logout
    sudo chmod 666 /var/run/docker.sock
    
    log "Docker installation completed"
    warn "Docker group membership requires a new shell session to take effect"
    warn "Temporarily setting socket permissions for this session"
}

# Create deployment directory and download files
setup_deployment_dir() {
    log "Setting up deployment directory..."
    
    sudo mkdir -p $DEPLOY_DIR
    sudo chown $USER:$USER $DEPLOY_DIR
    
    cd $DEPLOY_DIR
    
    # Download configuration files
    # log "Downloading configuration files..."
    # curl -O $DOWNLOAD_BASE/docker-compose/docker-compose.yml
    
    # Create necessary directories
    if [[ "$PRODUCTION_MODE" == true ]]; then
        mkdir -p {data/mysql,data/redis,data/llmgateway,logs,ssl,certbot,nginx/conf.d}
        
        # Download nginx configuration for production
        log "Downloading Nginx configuration for production mode..."
        curl -o nginx/nginx.conf $DOWNLOAD_BASE/configs/nginx.conf
        curl -o nginx/conf.d/default.conf $DOWNLOAD_BASE/configs/nginx-default.conf
        echo "test" > ./certbot/test-file
    else
        mkdir -p {data/mysql,data/redis,data/llmgateway,logs}
        info "Development mode: Skipping Nginx configuration"
    fi
    
    log "Deployment directory setup completed"
}

# Generate environment file
generate_env_file() {
    log "Generating environment configuration..."
    
    # Generate secure passwords
    # For MySQL passwords, base64 is fine as they're used in connection strings
    MYSQL_ROOT_PASSWORD=$(openssl rand -base64 32)
    MYSQL_PASSWORD=$(openssl rand -base64 32)
    SESSION_SECRET=$(openssl rand -base64 32)
    
    # For Redis password, use our safe generator
    REDIS_PASSWORD=$(generate_redis_password)
    
    # Use a safer approach to update environment file
    # Create a temporary file with the new values
    {
        echo "# LLM Gateway Environment Configuration"
        echo "# Auto-generated on $(date)"
        echo ""
        echo "# MySQL Configuration"
        echo "MYSQL_ROOT_PASSWORD=$MYSQL_ROOT_PASSWORD"
        echo "MYSQL_USER=llmgateway"
        echo "MYSQL_PASSWORD=$MYSQL_PASSWORD"
        echo "MYSQL_DATABASE=llmgateway"
        echo ""
        echo "# Redis Configuration"
        echo "REDIS_PASSWORD=$REDIS_PASSWORD"
        echo ""
        echo "# Application Configuration"
        echo "SESSION_SECRET=$SESSION_SECRET"
        echo "DOMAIN_NAME=$DOMAIN_NAME"
        echo "GIN_MODE=release"
        echo "LOG_DIR=/app/logs"
        echo ""
        echo "# Database Connection"
        echo "SQL_DSN=llmgateway:$MYSQL_PASSWORD@tcp(mysql:3306)/llmgateway"
        echo ""
        echo "# Redis Connection (using hex password for URL safety)"
        echo "REDIS_CONN_STRING=redis://:$REDIS_PASSWORD@redis:6379/0"
        echo ""
        echo "# Optional: External API Keys (add your keys here)"
        echo "# OPENAI_API_KEY=your_openai_key_here"
        echo "# CLAUDE_API_KEY=your_claude_key_here"
        echo "# GEMINI_API_KEY=your_gemini_key_here"
    } > .env
    
    # Secure the env file
    chmod 600 .env
    
    log "Environment file generated and secured"
    warn "Please save these credentials securely:"
    echo "MySQL Root Password: $MYSQL_ROOT_PASSWORD"
    echo "MySQL User Password: $MYSQL_PASSWORD"
    echo "Redis Password: $REDIS_PASSWORD (hex format for compatibility)"
    
    # Additional safety check for Redis password
    if [[ "$REDIS_PASSWORD" =~ [^a-zA-Z0-9] ]]; then
        REDIS_PASSWORD_ENCODED=$(url_encode "$REDIS_PASSWORD")
        warn "Redis password contains special characters"
        warn "If you encounter Redis connection issues, use URL-encoded version:"
        warn "REDIS_CONN_STRING=redis://:$REDIS_PASSWORD_ENCODED@redis:6379/0"
    fi
}

# Check DNS configuration
check_dns() {
    local domain=$1
    log "Checking DNS configuration for $domain..."
    
    # Get server's public IP
    local server_ip=$(curl -s ifconfig.me 2>/dev/null || curl -s ipinfo.io/ip 2>/dev/null || echo "unknown")
    
    # Check if domain resolves to this server
    local domain_ip=$(dig +short $domain 2>/dev/null | head -n1)
    
    if [[ -z "$domain_ip" ]]; then
        error "DNS Error: Domain $domain does not resolve to any IP address"
        echo ""
        echo "Please configure DNS records:"
        echo "  A record: $domain → $server_ip"
        echo "  A record: www.$domain → $server_ip"
        echo ""
        echo "Wait 5-10 minutes for DNS propagation after creating records."
        echo "You can check DNS status with: dig $domain"
        return 1
    fi
    
    if [[ "$domain_ip" != "$server_ip" ]]; then
        warn "DNS Warning: Domain $domain resolves to $domain_ip but server IP is $server_ip"
        echo ""
        echo "Expected DNS configuration:"
        echo "  A record: $domain → $server_ip"
        echo "  A record: www.$domain → $server_ip"
        echo ""
        read -p "Continue anyway? (y/N): " -n 1 -r
        echo
        if [[ ! $REPLY =~ ^[Yy]$ ]]; then
            return 1
        fi
    else
        log "DNS configuration is correct: $domain → $server_ip"
    fi
    
    return 0
}

# Setup SSL with Let's Encrypt
setup_ssl() {
    log "Setting up SSL certificates with Let's Encrypt..."
    
    # Check DNS first
    if ! check_dns $DOMAIN_NAME; then
        error "Cannot proceed with SSL setup due to DNS issues"
    fi
    
    # Install certbot
    sudo apt-get install -y snapd
    sudo snap install core; sudo snap refresh core
    sudo snap install --classic certbot
    sudo ln -sf /snap/bin/certbot /usr/bin/certbot
    
    # Create webroot directory for challenges
    sudo mkdir -p /var/www/certbot
    sudo chown -R www-data:www-data /var/www/certbot
    
    # Generate certificates
    log "Requesting SSL certificate for $DOMAIN_NAME..."
    if sudo certbot certonly \
        --webroot \
        --webroot-path=/var/www/certbot \
        --email admin@$DOMAIN_NAME \
        --agree-tos \
        --no-eff-email \
        --domains $DOMAIN_NAME,www.$DOMAIN_NAME; then
        
        # Copy certificates to project directory
        sudo cp /etc/letsencrypt/live/$DOMAIN_NAME/fullchain.pem ssl/
        sudo cp /etc/letsencrypt/live/$DOMAIN_NAME/privkey.pem ssl/
        sudo chown $USER:$USER ssl/*.pem
        
        # Setup automatic renewal
        echo "0 12 * * * /usr/bin/certbot renew --quiet && cd $DEPLOY_DIR && docker compose restart nginx" | sudo crontab -
        
        log "SSL certificates configured successfully"
        return 0
    else
        warn "SSL certificate generation failed"
        warn "The application will run without SSL (HTTP only)"
        return 1
    fi
}

# Configure firewall
setup_firewall() {
    log "Configuring firewall..."
    
    # Enable UFW
    sudo ufw --force enable
    
    # Default policies
    sudo ufw default deny incoming
    sudo ufw default allow outgoing
    
    # Allow SSH
    sudo ufw allow ssh
    
    if [[ "$PRODUCTION_MODE" == true ]]; then
        # Production mode: Allow HTTP and HTTPS for Nginx
        sudo ufw allow 80/tcp
        sudo ufw allow 443/tcp
        info "Production mode: Opened ports 80 (HTTP) and 443 (HTTPS)"
    else
        # Development mode: Allow direct access to application port
        sudo ufw allow 3000/tcp
        info "Development mode: Opened port 3000 for direct access"
    fi
    
    # Show status
    sudo ufw status verbose
    
    log "Firewall configured"
}

# Deploy application
deploy_application() {
    log "Deploying LLM Gateway application..."
    
    cd $DEPLOY_DIR
    
    # Ensure Docker is accessible before deployment
    if ! docker info &> /dev/null; then
        warn "Docker still not accessible, attempting to fix permissions..."
        sudo chmod 666 /var/run/docker.sock
        
        # Test again
        if ! docker info &> /dev/null; then
            error "Unable to access Docker daemon. Please run 'newgrp docker' and try again"
        fi
    fi
    
    # Clean up any existing containers and networks to avoid conflicts
    log "Cleaning up existing containers and networks..."
    
    # Stop and remove any existing containers
    if docker compose ps -q 2>/dev/null | grep -q .; then
        warn "Found existing containers, stopping them..."
        docker compose down --remove-orphans 2>/dev/null || true
    fi
    
    # Clean up dangling networks
    docker network prune -f 2>/dev/null || true
    
    # Pull latest Docker image
    docker pull $DOCKER_IMAGE
    
    # Start services based on mode with retry logic
    local max_retries=3
    local retry_count=0
    
    while [ $retry_count -lt $max_retries ]; do
        if [[ "$PRODUCTION_MODE" == true ]]; then
            # Production mode: Start with nginx profile
            log "Starting services in production mode with Nginx... (attempt $((retry_count + 1)))"
            if docker compose --profile production up -d 2>&1; then
                break
            fi
        else
            # Development mode: Start basic services only
            log "Starting services in development mode... (attempt $((retry_count + 1)))"
            if docker compose up -d 2>&1; then
                break
            fi
        fi
        
        # If we get here, the command failed
        retry_count=$((retry_count + 1))
        
        if [ $retry_count -lt $max_retries ]; then
            warn "Docker compose failed, attempting network reset..."
            reset_docker_network
            
            # Re-check Docker permissions after restart
            if ! docker info &> /dev/null; then
                sudo chmod 666 /var/run/docker.sock
            fi
        else
            error "Failed to start services after $max_retries attempts"
        fi
        
        sleep 5
    done
    
    # Wait for services to be ready
    log "Waiting for services to start..."
    sleep 30
    
    # Check health
    if curl -f http://localhost:3000/api/status > /dev/null 2>&1; then
        log "Application deployed successfully!"
        
        if [[ "$PRODUCTION_MODE" == true ]]; then
            if [ -f "ssl/fullchain.pem" ]; then
                log "HTTPS is available at: https://$DOMAIN_NAME"
            else
                log "HTTP is available at: http://$DOMAIN_NAME"
                warn "SSL certificates not found. Please configure SSL manually."
            fi
        else
            log "Application is available at: http://localhost:3000"
            log "External access: http://$(curl -s ifconfig.me 2>/dev/null || echo 'YOUR_SERVER_IP'):3000"
        fi
    else
        error "Application health check failed"
    fi
}

# Setup log rotation
setup_log_rotation() {
    log "Setting up log rotation..."
    
    sudo tee /etc/logrotate.d/llm-gateway << EOF
$DEPLOY_DIR/logs/*.log {
    daily
    missingok
    rotate 30
    compress
    delaycompress
    notifempty
    create 644 $USER $USER
    postrotate
        docker compose -f $DEPLOY_DIR/docker-compose.yml restart llm-gateway
    endscript
}
EOF
    
    log "Log rotation configured"
}

# Main deployment function
main() {
    if [[ "$PRODUCTION_MODE" == true ]]; then
        log "Starting LLM Gateway deployment in PRODUCTION mode..."
        info "Domain: $DOMAIN_NAME"
        info "SSL and Nginx will be configured"
    else
        log "Starting LLM Gateway deployment in DEVELOPMENT mode..."
        info "Direct access via port 3000"
        info "No SSL or Nginx configuration"
    fi
    
    # Preflight checks
    check_root
    
    info "Deploy directory: $DEPLOY_DIR"
    info "Download base: $DOWNLOAD_BASE"
    
    read -p "Continue with deployment? (y/N): " -n 1 -r
    echo
    if [[ ! $REPLY =~ ^[Yy]$ ]]; then
        error "Deployment cancelled"
    fi
    
    # Check and install Docker if needed
    if ! check_docker; then
        install_docker
    else
        log "Docker is already installed and accessible"
    fi
    setup_deployment_dir
    generate_env_file
    setup_firewall
    setup_log_rotation
    
    # Try to setup SSL if in production mode
    ssl_configured=false
    if [[ "$PRODUCTION_MODE" == true ]]; then
        if setup_ssl; then
            ssl_configured=true
        fi
    fi
    
    deploy_application
    
    # Show appropriate next steps
    if [[ "$PRODUCTION_MODE" == true ]]; then
        if [[ "$ssl_configured" == false ]]; then
            warn "Production mode deployed without SSL"
            warn "To configure SSL later:"
            warn "1. Ensure DNS points to this server: A record $DOMAIN_NAME → $(curl -s ifconfig.me 2>/dev/null)"
            warn "2. Wait for DNS propagation (5-10 minutes)"
            warn "3. Run: sudo certbot certonly --webroot --webroot-path=/var/www/certbot -d $DOMAIN_NAME -d www.$DOMAIN_NAME"
            warn "4. Copy certificates: sudo cp /etc/letsencrypt/live/$DOMAIN_NAME/*.pem $DEPLOY_DIR/ssl/ && sudo chown $USER:$USER $DEPLOY_DIR/ssl/*.pem"
            warn "5. Restart nginx: cd $DEPLOY_DIR && docker compose --profile production restart nginx"
        
        log "Production deployment completed!"
        echo "Access your application:"
        if [[ "$ssl_configured" == true ]]; then
            echo "HTTPS: https://$DOMAIN_NAME"
            echo "HTTP:  http://$DOMAIN_NAME (redirects to HTTPS)"
        else
            echo "HTTP:  http://$DOMAIN_NAME"
            echo "HTTPS: https://$DOMAIN_NAME (after SSL setup)"
        fi
    else
        log "Development deployment completed!"
        echo "Access your application:"
        echo "Local:    http://localhost:3000"
        echo "External: http://$(curl -s ifconfig.me 2>/dev/null || echo 'YOUR_SERVER_IP'):3000"
        echo ""
        echo "To upgrade to production mode later:"
        echo "1. Run: $0 --prod your-domain.com"
        echo "2. Or manually configure SSL and Nginx"
        echo ""
        echo "Troubleshooting:"
        echo "- Docker permissions: newgrp docker (or logout and login again)"
        echo "- Network issues: $0 --reset-network"
        echo "- Container management: docker compose logs -f (view logs)"
    fi
}

# Run main function
main "$@"