#!/bin/bash
set -euo pipefail

# =============================================================================
# CVE-2026-54500 — VARIANT / BYPASS ANALYSIS
#
# Goal: Determine whether the fix (commit bbde91a, v3.17.3) — the one-character
# change rb_intern3(buf,...) -> rb_intern3(b,...) in intern.c form_attr() —
# fully closes the uninitialized-stack-memory-read bug, or whether a DIFFERENT
# entry point / mode / data path still triggers the same underlying bug on the
# FIXED version (a bypass), or on the VULNERABLE version in a way the fix misses
# (an alternate-trigger variant).
#
# The vulnerable sink is intern.c:form_attr() long-key path. Static analysis
# shows the ONLY Oj.load path that reaches it is :object mode (object.c ->
# oj_set_obj_ivar -> oj_attr_intern -> cache_intern -> intern.c form_attr).
# usual.c has its own copy of form_attr but it was fixed earlier (ec368db,
# ancestor of v3.17.2). The newer Oj::Parser API uses usual.c's form_attr (via
# get_attr_id) — also already fixed — and its :object mode is unimplemented (// TBD).
#
# This script EMPIRICALLY verifies that conclusion by testing every Oj parse
# mode + the newer Oj::Parser API with a long (300-byte) key on BOTH the
# vulnerable (495cc38) and fixed (bbde91a) versions.
#
# Exit codes:
#   0 = a distinct variant/bypass IS confirmed (some mode leaks on the FIXED version)
#   1 = no variant/bypass found (fix is complete); script still ran fully
# =============================================================================

ROOT="${PRUVA_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}"
LOGS="$ROOT/logs"
VVDIR="$ROOT/vuln_variant"
mkdir -p "$LOGS" "$VVDIR"

cd "$ROOT"

# ---- Resolve project cache (durable volume) ---------------------------------
PROJECT_CACHE_DIR=""
if [ -f "$ROOT/project_cache_context.json" ]; then
  PREPARED=$(jq -r '.prepared // false' "$ROOT/project_cache_context.json" 2>/dev/null || echo false)
  if [ "$PREPARED" = "true" ]; then
    PROJECT_CACHE_DIR=$(jq -r '.project_cache_dir // empty' "$ROOT/project_cache_context.json")
  fi
fi
if [ -z "$PROJECT_CACHE_DIR" ] || [ ! -d "$PROJECT_CACHE_DIR" ]; then
  PROJECT_CACHE_DIR="$ROOT/artifacts/oj-cache"
fi
mkdir -p "$PROJECT_CACHE_DIR"
REPO="$PROJECT_CACHE_DIR/repo"

LOG="$LOGS/vuln_variant_repro.log"
exec > >(tee -a "$LOG") 2>&1
echo "==== vuln_variant reproduction_steps.sh start $(date -u +%FT%TZ) ===="

# ---- Commits ----------------------------------------------------------------
FIXED_COMMIT="bbde91a679728f94c4492ebc3683f4fa3309049f"   # v3.17.3 (the fix)
VULN_COMMIT="495cc38fc5a02681da2175960d4a667fae48f3c9"    # v3.17.2 (parent of fix)
KEY_LEN=300
NRUNS=4

# The ORIGINAL vulnerable entry point (positive control)
ORIG_MODE="object"

# All candidate variant entry points / modes to test
ALL_MODES=(
  object
  compat_obj
  compat_hash
  rails
  strict
  null
  wab
  custom
  np_usual_obj
  np_usual_hash
  np_usual_obj_symcache
)

# ---- Install dependencies (idempotent) --------------------------------------
if ! command -v ruby >/dev/null 2>&1; then
  echo "[*] Installing Ruby + build tools"
  sudo apt-get update -qq 2>&1 | tail -1
  sudo apt-get install -y -qq ruby ruby-dev build-essential 2>&1 | tail -3
fi
echo "[*] ruby=$(ruby --version 2>&1)"

# ---- Clone / reuse repo -----------------------------------------------------
if [ -d "$REPO/.git" ]; then
  echo "[*] Reusing existing repo at $REPO (HEAD=$(git -C "$REPO" rev-parse --short HEAD 2>/dev/null))"
  git -C "$REPO" fetch --quiet origin 2>&1 | tail -2 || true
else
  echo "[*] Cloning ohler55/oj into $REPO"
  git clone --quiet https://github.com/ohler55/oj.git "$REPO"
fi

PROBE="$VVDIR/probe_variant.rb"

# ---- Build helper (manual extconf + make, avoids rake/bundler) --------------
build_oj() {
  local commit="$1"
  local label="$2"
  echo "[*] Checking out $label commit $commit"
  git -C "$REPO" checkout --quiet "$commit" 2>&1
  local resolved
  resolved=$(git -C "$REPO" rev-parse HEAD)
  echo "[*] $label resolved HEAD=$resolved"
  git -C "$REPO" clean -fdx ext/oj lib/oj 2>/dev/null || true
  rm -rf "$REPO/tmp"
  echo "[*] Building $label C extension (manual extconf + make)"
  ( cd "$REPO/ext/oj" && ruby extconf.rb && make ) 2>&1 | tail -4
  if [ ! -f "$REPO/ext/oj/oj.so" ] && [ ! -f "$REPO/ext/oj/oj.bundle" ]; then
    echo "[!] BUILD FAILED for $label — no shared object found"
    ls -la "$REPO/ext/oj/" | tail -10
    return 1
  fi
  mkdir -p "$REPO/lib/oj"
  cp -f "$REPO/ext/oj/oj.so" "$REPO/lib/oj/oj.so" 2>/dev/null || \
    cp -f "$REPO/ext/oj/oj.bundle" "$REPO/lib/oj/oj.so"
  echo "[*] $label build OK (HEAD=$(git -C "$REPO" rev-parse --short HEAD))"
}

# ---- Run probe for one mode N times (separate processes) --------------------
# Sets globals: <label>_<mode>_leak (0/1), <label>_<mode>_encerr (0/1),
# <label>_<mode>_correct (0/1), and appends per-run lines to the outcome file.
run_mode() {
  local label="$1"
  local mode="$2"
  local leak=0
  local encerr=0
  local correct=0
  local line
  for i in $(seq 1 "$NRUNS"); do
    line=$(cd "$REPO" && ruby -Ilib "$PROBE" "$mode" "$KEY_LEN" 2>&1) || true
    echo "[$label $mode run $i] $line" | tee -a "$LOGS/${label}_variant_outcomes.txt" >/dev/null
    echo "$line" >> "$LOGS/${label}_variant_outcomes.txt"
    case "$line" in
      *OUTCOME=encoding_error*) encerr=1; leak=1 ;;
      *OUTCOME=leak*)           leak=1 ;;
      *OUTCOME=correct*)        correct=1 ;;
      *) ;;
    esac
  done
  eval "${label}_${mode}_leak=$leak"
  eval "${label}_${mode}_encerr=$encerr"
  eval "${label}_${mode}_correct=$correct"
  echo "  [$label] $mode : leak=$leak encoding_error=$encerr correct=$correct"
}

# =============================================================================
# PHASE 1: VULNERABLE version (495cc38, v3.17.2)
# =============================================================================
echo ""
echo "========== PHASE 1: VULNERABLE (495cc38, v3.17.2) =========="
build_oj "$VULN_COMMIT" "VULNERABLE"
echo "[*] Verifying vulnerable form_attr uses buf (not b):"
grep -n 'rb_intern3(buf' "$REPO/ext/oj/intern.c" || echo "[!] Expected rb_intern3(buf,...) not found!"
echo "[*] Verifying usual.c form_attr already fixed (uses b):"
grep -n 'rb_intern3(b,' "$REPO/ext/oj/usual.c" || echo "[!] usual.c form_attr NOT fixed on this commit!"

: > "$LOGS/vuln_variant_outcomes.txt"
echo "[*] Testing all modes on VULNERABLE version ($NRUNS runs each):"
for m in "${ALL_MODES[@]}"; do
  run_mode "vuln" "$m"
done

# =============================================================================
# PHASE 2: FIXED version (bbde91a, v3.17.3)
# =============================================================================
echo ""
echo "========== PHASE 2: FIXED (bbde91a, v3.17.3) =========="
build_oj "$FIXED_COMMIT" "FIXED"
echo "[*] Verifying fixed form_attr uses b (not buf) in long-key path:"
grep -n 'rb_intern3(b,' "$REPO/ext/oj/intern.c" || echo "[!] Expected rb_intern3(b,...) not found!"

: > "$LOGS/fixed_variant_outcomes.txt"
echo "[*] Testing all modes on FIXED version ($NRUNS runs each):"
for m in "${ALL_MODES[@]}"; do
  run_mode "fixed" "$m"
done

# =============================================================================
# PHASE 3: Variant / Bypass matrix & verdict
# =============================================================================
echo ""
echo "========== PHASE 3: VARIANT / BYPASS MATRIX =========="
printf "%-22s | %-10s | %-10s | %-12s\n" "MODE" "VULN_LEAK" "FIXED_LEAK" "CLASSIFICATION"
printf "%-22s-+-%-10s-+-%-10s-+-%-12s\n" "----------------------" "----------" "----------" "------------"

BYPASS_FOUND=0
ALT_TRIGGER=0
ORIG_CONFIRMED=0

for m in "${ALL_MODES[@]}"; do
  eval "vl=\${vuln_${m}_leak:-0}"
  eval "fl=\${fixed_${m}_leak:-0}"
  eval "vc=\${vuln_${m}_correct:-0}"
  eval "fc=\${fixed_${m}_correct:-0}"
  classification="not_affected"
  if [ "$fl" = "1" ]; then
    classification="BYPASS"
    BYPASS_FOUND=1
  elif [ "$vl" = "1" ] && [ "$fl" = "0" ]; then
    classification="covered_by_fix"
    if [ "$m" = "$ORIG_MODE" ]; then ORIG_CONFIRMED=1; else ALT_TRIGGER=1; fi
  elif [ "$vl" = "0" ] && [ "$fl" = "0" ]; then
    classification="not_affected"
  fi
  printf "%-22s | %-10s | %-10s | %-12s\n" "$m" "$vl" "$fl" "$classification"
done

echo ""
echo "=============================================="
echo "VERDICT"
echo "  orig_object_vuln_leak   = ${vuln_object_leak:-0}"
echo "  orig_object_fixed_clean = ${fixed_object_correct:-0}"
echo "  alt_trigger_found       = $ALT_TRIGGER  (mode leaks on vuln but NOT the original object path)"
echo "  bypass_found            = $BYPASS_FOUND (some mode leaks on the FIXED version)"
echo "=============================================="

# ---- Write runtime manifest -------------------------------------------------
if [ "$BYPASS_FOUND" = "1" ]; then
  VERDICT_STR="bypass_confirmed"
  NOTES="A distinct entry point still leaks uninitialized stack memory on the FIXED (v3.17.3) version."
elif [ "$ALT_TRIGGER" = "1" ]; then
  VERDICT_STR="alt_trigger_confirmed"
  NOTES="An alternate entry point triggers the same bug on the vulnerable version (covered by the fix)."
else
  VERDICT_STR="no_variant_found"
  NOTES="No bypass or alternate trigger found. The fix (intern.c buf->b) fully closes the only path (Oj.load :object mode -> object.c -> oj_attr_intern -> intern.c form_attr). All other modes either do not reach form_attr or use usual.c's already-fixed copy (ec368db, pre-v3.17.2). The newer Oj::Parser API uses usual.c form_attr (already fixed) and its :object mode is unimplemented (// TBD)."
fi

jq -n \
  --arg entrypoint_kind "library_api" \
  --arg entrypoint_detail "Multi-mode sweep: Oj.load {:object,:compat,:rails,:strict,:null,:wab,:custom} + Oj::Parser.new(:usual) with 300-byte key, tested on vulnerable 495cc38 and fixed bbde91a" \
  --argjson service_started false \
  --argjson healthcheck_passed false \
  --argjson target_path_reached true \
  --argjson bypass_found "$BYPASS_FOUND" \
  --argjson alt_trigger_found "$ALT_TRIGGER" \
  --arg verdict "$VERDICT_STR" \
  --arg notes "$NOTES" \
  '{
    entrypoint_kind: $entrypoint_kind,
    entrypoint_detail: $entrypoint_detail,
    service_started: $service_started,
    healthcheck_passed: $healthcheck_passed,
    target_path_reached: $target_path_reached,
    runtime_stack: ["ruby","oj-c-extension"],
    bypass_found: $bypass_found,
    alt_trigger_found: $alt_trigger_found,
    verdict: $verdict,
    notes: $notes
  }' > "$VVDIR/runtime_manifest.json"
echo "[*] runtime_manifest.json written"

# ---- Restore repo to the fixed commit (the state used by repro) -------------
echo "[*] Restoring repo to fixed commit $FIXED_COMMIT"
git -C "$REPO" checkout --quiet "$FIXED_COMMIT" 2>&1
git -C "$REPO" clean -fdx ext/oj lib/oj 2>/dev/null || true
echo "[*] Final repo HEAD=$(git -C "$REPO" rev-parse HEAD)"

echo "==== vuln_variant reproduction_steps.sh end $(date -u +%FT%TZ) ===="

if [ "$BYPASS_FOUND" = "1" ] || [ "$ALT_TRIGGER" = "1" ]; then
  echo "[+] VARIANT/BYPASS FOUND (exit 0)"
  exit 0
else
  echo "[*] No variant/bypass found (exit 1) — fix appears complete"
  exit 1
fi
