#!/usr/bin/env bash
set -euo pipefail

usage() {
  cat <<USAGE
Usage: $(basename "$0") <url> [wordlist]

Lightweight web challenge reconnaissance:
- headers / redirects / cookies
- common files (robots, sitemap, .git/HEAD, .env, backup names)
- homepage links / forms / scripts
- JS keyword hunt
- optional ffuf path fuzzing if a wordlist is provided and ffuf exists
USAGE
}

have() { command -v "$1" >/dev/null 2>&1; }

[[ $# -lt 1 ]] && usage && exit 1
URL="$1"
WORDLIST="${2:-}"
TS="$(date +%Y%m%d_%H%M%S)"
OUTDIR="web_recon_${TS}"
mkdir -p "$OUTDIR"/{responses,parsed,notes}

fetch() {
  local url="$1" outbase="$2"
  curl -skL --max-time 20 -D "$OUTDIR/responses/${outbase}.headers" -o "$OUTDIR/responses/${outbase}.body" "$url" || true
}

log() { printf '[+] %s\n' "$*"; }

log "Fetching main page"
fetch "$URL" root

log "Saving basic metadata"
{
  echo "URL=$URL"
  echo "DATE=$(date -Iseconds)"
  echo "TITLE=$(grep -oim1 '<title[^>]*>.*</title>' "$OUTDIR/responses/root.body" | sed -E 's#<[^>]+>##g')"
} > "$OUTDIR/notes/meta.txt"

log "Checking common files"
for path in \
  robots.txt sitemap.xml .git/HEAD .env .DS_Store server-status \
  backup.zip backup.tar.gz backup.tgz backup.rar app.js main.js swagger.json openapi.json ; do
  fetch "${URL%/}/$path" "$(echo "$path" | tr '/.' '__')"
done

log "Extracting links / forms / script sources"
python3 - "$OUTDIR" <<'PY'
from pathlib import Path
from html.parser import HTMLParser
import re, sys
out = Path(sys.argv[1])
html = (out/'responses/root.body').read_text(errors='ignore')

class P(HTMLParser):
    def __init__(self):
        super().__init__()
        self.links=[]; self.scripts=[]; self.forms=[]; self.inputs=[]
    def handle_starttag(self, tag, attrs):
        d = dict(attrs)
        if tag == 'a' and d.get('href'): self.links.append(d['href'])
        if tag == 'script' and d.get('src'): self.scripts.append(d['src'])
        if tag == 'form': self.forms.append((d.get('method','GET'), d.get('action','')))
        if tag == 'input': self.inputs.append((d.get('name',''), d.get('type','text')))

p=P(); p.feed(html)
(out/'parsed/links.txt').write_text('\n'.join(sorted(set(p.links))))
(out/'parsed/scripts.txt').write_text('\n'.join(sorted(set(p.scripts))))
(out/'parsed/forms.txt').write_text('\n'.join(f'{m}\t{a}' for m,a in p.forms))
(out/'parsed/inputs.txt').write_text('\n'.join(f'{n}\t{t}' for n,t in p.inputs))

interesting = sorted(set(re.findall(r'(api|graphql|debug|admin|token|jwt|secret|flag|upload|internal|swagger)', html, flags=re.I)))
(out/'parsed/keywords.txt').write_text('\n'.join(interesting))
PY

log "Fetching discovered scripts"
while IFS= read -r src; do
  [[ -z "$src" ]] && continue
  case "$src" in
    http://*|https://*) target="$src" ;;
    //*) target="https:${src}" ;;
    /*) target="${URL%/}${src}" ;;
    *) target="${URL%/}/$src" ;;
  esac
  base="script_$(printf '%s' "$src" | tr '/:?&.=' '_')"
  fetch "$target" "$base"
done < "$OUTDIR/parsed/scripts.txt"

log "JS keyword hunt"
cat "$OUTDIR"/responses/script_*.body 2>/dev/null | \
  rg -ni 'api|graphql|fetch\(|axios|token|jwt|secret|admin|debug|localhost|127\.0\.0\.1|internal|upload|csrf' \
  > "$OUTDIR/parsed/js_hits.txt" || true

log "Header summary"
{
  echo '== root headers =='
  sed -n '1,40p' "$OUTDIR/responses/root.headers"
  echo
  echo '== security headers =='
  rg -ni 'server:|x-powered-by:|content-security-policy:|access-control-|strict-transport-security:|set-cookie:' "$OUTDIR/responses/root.headers" || true
} > "$OUTDIR/parsed/header_summary.txt"

if [[ -n "$WORDLIST" ]] && have ffuf; then
  log "Running ffuf with $WORDLIST"
  ffuf -u "${URL%/}/FUZZ" -w "$WORDLIST" -fc 404 -of md -o "$OUTDIR/parsed/ffuf_paths.md" >/dev/null 2>&1 || true
else
  log "Skipping ffuf (no wordlist or ffuf missing)"
fi

cat > "$OUTDIR/notes/next_steps.txt" <<'STEPS'
Suggested next steps:
1. Read parsed/header_summary.txt
2. Review parsed/forms.txt and parsed/inputs.txt
3. Inspect parsed/scripts.txt and parsed/js_hits.txt
4. Check responses for robots.txt, .git/HEAD, .env, swagger.json
5. If ffuf ran, review parsed/ffuf_paths.md
STEPS

log "Done. Output: $OUTDIR"
