#!/bin/bash
set -euo pipefail

# =============================================================================
# nginx charset_map utf-8 source charset NULL-deref / segfault reproduction
# =============================================================================
# Vulnerability: charset_map with utf-8 in the first column (source charset)
# creates wrong-format single-byte tables. When the charset filter's
# recode_from_utf8() path casts the 256-byte table to u_char** and dereferences
# table[n >> 8] as a pointer, it reads garbage bytes as a pointer address and
# segfaults (SIGSEGV / signal 11).
#
# Fix (commit 29c23ad846787e8baa1390b2edca479eb63ea8d7): rejects charset_map
# with utf-8 in the first column at config-parse time.
# =============================================================================

ROOT="${PRUVA_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}"
LOGS="$ROOT/logs"
REPRO_DIR="$ROOT/repro"
mkdir -p "$LOGS" "$REPRO_DIR"

cd "$ROOT"

FIXED_COMMIT="29c23ad846787e8baa1390b2edca479eb63ea8d7"

# Random port base to avoid TIME_WAIT conflicts between consecutive runs
PORT_BASE=$((20000 + (RANDOM % 2000) + 100))

# ----------------------------------------------------------------------------
# Read project cache context to locate pre-built nginx binaries
# ----------------------------------------------------------------------------
CACHE_DIR=""
if [ -f "$ROOT/project_cache_context.json" ]; then
    CACHE_DIR=$(python3 -c \
        "import json; d=json.load(open('$ROOT/project_cache_context.json')); print(d.get('project_cache_dir',''))" \
        2>/dev/null || echo "")
fi

# ----------------------------------------------------------------------------
# Build nginx from source if pre-built binaries are not available
# ----------------------------------------------------------------------------
build_nginx() {
    local repo_dir="$1"
    local commit="$2"
    local prefix="$3"

    echo "[BUILD] Building nginx at commit $commit -> $prefix"
    git -C "$repo_dir" checkout -f "$commit" >/dev/null 2>&1
    cd "$repo_dir"
    make clean 2>/dev/null || true
    rm -rf objs Makefile
    ./auto/configure --prefix="$prefix" \
        --without-http_rewrite_module --without-http_gzip_module \
        --with-cc-opt='-g -O0' >>"$LOGS/build_$(basename "$prefix").log" 2>&1
    make -j"$(nproc)" >>"$LOGS/build_$(basename "$prefix").log" 2>&1
    make install >>"$LOGS/build_$(basename "$prefix").log" 2>&1
    cd "$ROOT"
}

VULN_NGINX=""
FIXED_NGINX=""

if [ -n "$CACHE_DIR" ] && [ -x "$CACHE_DIR/nginx-vuln/sbin/nginx" ] && [ -x "$CACHE_DIR/nginx-fixed/sbin/nginx" ]; then
    VULN_NGINX="$CACHE_DIR/nginx-vuln/sbin/nginx"
    FIXED_NGINX="$CACHE_DIR/nginx-fixed/sbin/nginx"
    echo "[INFO] Using pre-built nginx binaries from project cache"
    echo "[INFO] Vuln:  $VULN_NGINX"
    echo "[INFO] Fixed: $FIXED_NGINX"
else
    echo "[INFO] Pre-built binaries not found; building from source"
    REPO=""
    if [ -n "$CACHE_DIR" ] && [ -d "$CACHE_DIR/repo" ]; then
        REPO="$CACHE_DIR/repo"
    else
        REPO="$ROOT/artifacts/nginx-src"
        mkdir -p "$ROOT/artifacts"
        git clone --quiet https://github.com/nginx/nginx.git "$REPO"
    fi

    VULN_PREFIX="${CACHE_DIR:-$ROOT/artifacts}/nginx-vuln"
    FIXED_PREFIX="${CACHE_DIR:-$ROOT/artifacts}/nginx-fixed"
    mkdir -p "$VULN_PREFIX" "$FIXED_PREFIX"

    VULN_COMMIT=$(git -C "$REPO" rev-parse "${FIXED_COMMIT}^")
    build_nginx "$REPO" "$VULN_COMMIT" "$VULN_PREFIX"
    VULN_NGINX="$VULN_PREFIX/sbin/nginx"

    build_nginx "$REPO" "$FIXED_COMMIT" "$FIXED_PREFIX"
    FIXED_NGINX="$FIXED_PREFIX/sbin/nginx"
fi

[ -x "$VULN_NGINX" ] || { echo "[FAIL] Vulnerable nginx binary not found: $VULN_NGINX"; exit 2; }
[ -x "$FIXED_NGINX" ] || { echo "[FAIL] Fixed nginx binary not found: $FIXED_NGINX"; exit 2; }

echo "[INFO] Vuln nginx version:"
"$VULN_NGINX" -V 2>&1 | head -1
echo "[INFO] Fixed nginx version:"
"$FIXED_NGINX" -V 2>&1 | head -1
echo "[INFO] Port base: $PORT_BASE"

# ----------------------------------------------------------------------------
# Create HTML test content with real UTF-8 multi-byte characters.
# Content must contain bytes >= 0x80 to trigger the recode_from_utf8 path.
# ----------------------------------------------------------------------------
HTML_DIR="$LOGS/html"
mkdir -p "$HTML_DIR"
python3 -c "
import sys
# Cyrillic: a(U+0430)=D0 B0, b(U+0431)=D0 B1, v(U+0432)=D0 B2
content = '<html><body>\u0430\u0431\u0432 test \u0430 ABCD</body></html>'
sys.stdout.buffer.write(content.encode('utf-8'))
" > "$HTML_DIR/index.html"
echo "[INFO] Test HTML created with UTF-8 non-ASCII content"

# ----------------------------------------------------------------------------
# Cleanup helper: kill nginx master and its workers
# ----------------------------------------------------------------------------
cleanup_nginx() {
    local pid=$1
    # Try graceful stop first, then force kill
    kill -TERM "$pid" 2>/dev/null || true
    sleep 1
    kill -9 "$pid" 2>/dev/null || true
    pkill -9 -P "$pid" 2>/dev/null || true
    sleep 1
}

# ----------------------------------------------------------------------------
# Vulnerable version test: start nginx, send HTTP request, check for segfault
# ----------------------------------------------------------------------------
VULN_SEGFAULTS=0

test_vulnerable() {
    local attempt=$1
    local port=$((PORT_BASE + attempt))
    local conf="$LOGS/vuln_conf_${attempt}.conf"
    local err_log="$LOGS/vuln_error_${attempt}.log"
    local acc_log="$LOGS/vuln_access_${attempt}.log"
    local pid_file="$LOGS/vuln_pid_${attempt}.pid"
    local stdout_log="$LOGS/vuln_stdout_${attempt}.log"
    local response="$LOGS/vuln_response_${attempt}.txt"

    # Clear previous log files so evidence is from this run only
    rm -f "$err_log" "$acc_log" "$pid_file" "$stdout_log" "$response"

    # Write nginx config with the vulnerable charset_map directive.
    # charset_map utf-8 in the first column (source) is the trigger.
    # charset windows-1251 + source_charset utf-8 drives the from_utf8 path
    # which casts the 256-byte table to u_char** and dereferences it.
    cat > "$conf" <<CONFEOF
worker_processes 1;
daemon off;
error_log $err_log;
pid $pid_file;
events { worker_connections 64; }
http {
    access_log $acc_log;
    charset_map utf-8 windows-1251 { }
    charset windows-1251;
    source_charset utf-8;
    server {
        listen $port;
        server_name localhost;
        location / { root $HTML_DIR; index index.html; }
    }
}
CONFEOF

    local start_ts
    start_ts=$(date '+%Y/%m/%d %H:%M:%S')
    echo "[VULN attempt $attempt] start_ts=$start_ts port=$port"

    # Start nginx (daemon off keeps it in foreground; background with &)
    "$VULN_NGINX" -c "$conf" > "$stdout_log" 2>&1 &
    local master_pid=$!
    sleep 2

    # Verify master started and is listening
    if ! kill -0 "$master_pid" 2>/dev/null; then
        echo "[VULN attempt $attempt] FAIL: master process died immediately"
        tail -5 "$stdout_log" 2>/dev/null
        return 1
    fi

    local worker_pid
    worker_pid=$(pgrep -P "$master_pid" 2>/dev/null | head -1)
    echo "[VULN attempt $attempt] master=$master_pid worker=$worker_pid"

    # Send HTTP request through the TCP socket
    local http_code
    http_code=$(timeout 5 curl -s -o "$response" -w "%{http_code}" \
        "http://127.0.0.1:$port/" 2>/dev/null) || http_code="000"
    echo "[VULN attempt $attempt] HTTP response code: $http_code"
    sleep 1

    # Check for segfault (signal 11) in error log
    local segfault=0
    if [ -f "$err_log" ] && grep -q "exited on signal 11" "$err_log" 2>/dev/null; then
        segfault=1
        echo "[VULN attempt $attempt] *** SEGFAULT DETECTED (signal 11) ***"
        grep "signal 11" "$err_log"
    fi

    cleanup_nginx "$master_pid"

    if [ "$segfault" -eq 1 ]; then
        echo "[VULN attempt $attempt] PASS: worker segfaulted on signal 11"
        return 0
    else
        echo "[VULN attempt $attempt] FAIL: no segfault detected"
        tail -5 "$err_log" 2>/dev/null
        return 1
    fi
}

# ----------------------------------------------------------------------------
# Fixed version test: verify config is rejected at parse time
# ----------------------------------------------------------------------------
FIXED_REJECTED=0

test_fixed() {
    local attempt=$1
    local conf="$LOGS/fixed_conf_${attempt}.conf"
    local test_log="$LOGS/fixed_test_${attempt}.log"

    cat > "$conf" <<CONFEOF
worker_processes 1;
events { worker_connections 64; }
http {
    charset_map utf-8 windows-1251 { }
    charset windows-1251;
    source_charset utf-8;
    server { listen $((PORT_BASE + 100 + attempt)); server_name localhost; location / { root $HTML_DIR; index index.html; } }
}
CONFEOF

    local output
    output=$("$FIXED_NGINX" -t -c "$conf" 2>&1) || true
    echo "$output" > "$test_log"

    if echo "$output" | grep -q 'should be given in the second column'; then
        echo "[FIXED attempt $attempt] PASS: config rejected with patch message"
        echo "$output" | grep "charset_map"
        return 0
    else
        echo "[FIXED attempt $attempt] FAIL: config not rejected"
        echo "$output"
        return 1
    fi
}

# Also verify the vulnerable version ACCEPTS the config (contrast with fixed)
echo ""
echo "=== Verifying vulnerable version accepts the config ==="
VULN_CONF_ACCEPT_LOG="$LOGS/vuln_config_accept.log"
cat > "$LOGS/vuln_accept_test.conf" <<CONFEOF
worker_processes 1;
events { worker_connections 64; }
http {
    charset_map utf-8 windows-1251 { }
    charset windows-1251;
    source_charset utf-8;
    server { listen $((PORT_BASE + 200)); server_name localhost; location / { root $HTML_DIR; index index.html; } }
}
CONFEOF
"$VULN_NGINX" -t -c "$LOGS/vuln_accept_test.conf" > "$VULN_CONF_ACCEPT_LOG" 2>&1
VULN_CONFIG_ACCEPTED=$?
echo "[INFO] Vulnerable config test exit code: $VULN_CONFIG_ACCEPTED (0=accepted)"
cat "$VULN_CONF_ACCEPT_LOG"

# ----------------------------------------------------------------------------
# Run vulnerable tests (must segfault both times)
# ----------------------------------------------------------------------------
echo ""
echo "=========================================="
echo "=== VULNERABLE VERSION CRASH TESTS    ==="
echo "=========================================="
for i in 1 2; do
    if test_vulnerable "$i"; then
        VULN_SEGFAULTS=$((VULN_SEGFAULTS + 1))
    fi
    sleep 2
done
echo "Vulnerable segfaults: $VULN_SEGFAULTS / 2"

# ----------------------------------------------------------------------------
# Run fixed tests (must reject config both times)
# ----------------------------------------------------------------------------
echo ""
echo "=========================================="
echo "=== FIXED VERSION REJECTION TESTS    ==="
echo "=========================================="
for i in 1 2; do
    if test_fixed "$i"; then
        FIXED_REJECTED=$((FIXED_REJECTED + 1))
    fi
done
echo "Fixed rejections: $FIXED_REJECTED / 2"

# ----------------------------------------------------------------------------
# Determine overall result
# ----------------------------------------------------------------------------
echo ""
echo "=========================================="
echo "=== SUMMARY                          ==="
echo "=========================================="
echo "Vulnerable segfaults: $VULN_SEGFAULTS / 2"
echo "Fixed rejections:     $FIXED_REJECTED / 2"
echo "Vuln config accepted: $VULN_CONFIG_ACCEPTED (0=yes)"

OVERALL_SUCCESS=0
if [ "$VULN_SEGFAULTS" -eq 2 ] && [ "$FIXED_REJECTED" -eq 2 ] && [ "$VULN_CONFIG_ACCEPTED" -eq 0 ]; then
    OVERALL_SUCCESS=1
    echo "RESULT: VULNERABILITY CONFIRMED"
else
    echo "RESULT: VULNERABILITY NOT FULLY REPRODUCED"
fi

# ----------------------------------------------------------------------------
# Write runtime manifest (strict JSON via python)
# ----------------------------------------------------------------------------
python3 <<PYEOF
import json, os
root = "$ROOT"
logs = "$LOGS"
repro = "$REPRO_DIR"
vuln_segfaults = $VULN_SEGFAULTS
fixed_rejected = $FIXED_REJECTED
overall = $OVERALL_SUCCESS

artifacts = []
for name in ["vuln_error_1.log", "vuln_error_2.log",
             "vuln_conf_1.conf", "vuln_conf_2.conf",
             "vuln_response_1.txt", "vuln_response_2.txt",
             "fixed_test_1.log", "fixed_test_2.log",
             "vuln_config_accept.log"]:
    p = os.path.join(logs, name)
    if os.path.exists(p):
        artifacts.append(os.path.relpath(p, root))

manifest = {
    "entrypoint_kind": "tcp_peer",
    "entrypoint_detail": "HTTP GET request to nginx TCP listener with charset_map utf-8 windows-1251 config",
    "service_started": overall == 1,
    "healthcheck_passed": overall == 1,
    "target_path_reached": overall == 1,
    "runtime_stack": ["nginx/1.31.3 (vulnerable: pre-fix commit 8f3465ac7)", "nginx/1.31.3 (fixed: commit 29c23ad84)"],
    "proof_artifacts": artifacts,
    "notes": "Vulnerable nginx worker segfaults (signal 11/SIGSEGV) when processing HTTP request with charset_map utf-8 in first column. Fixed version rejects the config at parse time. Vuln segfaults: {}/2, Fixed rejections: {}/2.".format(vuln_segfaults, fixed_rejected)
}
with open(os.path.join(repro, "runtime_manifest.json"), "w") as f:
    json.dump(manifest, f, indent=2)
print("Runtime manifest written to", os.path.join(repro, "runtime_manifest.json"))
PYEOF

echo ""
if [ "$OVERALL_SUCCESS" -eq 1 ]; then
    exit 0
else
    exit 1
fi
