Add hayabusa smoke test (fetch sample-evtx → scan → eval → cleanup)

Verifies image present, EVTX corpus available (clones on demand), the container exits cleanly, all four output artefact types are produced non-empty, then prints detection count + MITRE TTP coverage. Default SUBSET=DeepBlueCLI (21 EVTX, ~30s). Documented alternatives: YamatoSecurity, EVTX-ATTACK-SAMPLES, EVTX-to-MITRE-Attack, or empty for the full 599-file bundle. KEEP_DATA=1 keeps the cloned corpus on disk for fast reruns. Validated end-to-end on amd64 Linux: 7/7 PASS, 8,626 detections from DeepBlueCLI subset, 31 distinct MITRE TTPs. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-07 11:45:33 +02:00
parent 6267ae15c5
commit d5f4ba8862
1 changed files with 92 additions and 0 deletions
@@ -0,0 +1,92 @@
 #!/bin/bash
 # Smoke test for hayabusa: fetch sample EVTX, run the container, check that
 # all expected output artefacts landed and contain detections, print stats.
 #
 # Defaults to the DeepBlueCLI subset (21 files, ~30s).
 #
 # Available SUBSET values (under test-data/sample-evtx/):
 #   DeepBlueCLI           21 files, ~30s   — well-known PowerShell + auth attacks
 #   YamatoSecurity        16 files, ~20s   — Yamato Security's own samples
 #   EVTX-ATTACK-SAMPLES  278 files, ~5min  — sbousseaden's MITRE-mapped corpus
 #   EVTX-to-MITRE-Attack 284 files, ~5min  — mdecrevoisier's MITRE-mapped corpus
 #   ""                   599 files, ~10min — full bundle (set SUBSET="" or empty)
 #
 # Env vars:
 #   TAG=ls-hayabusa:test       which image to test
 #   SUBSET=DeepBlueCLI         which subdir of sample-evtx to scan (or "" for all)
 #   KEEP_DATA=1                don't delete test-data/sample-evtx after run
 set -u
 TAG="${TAG:-ls-hayabusa:test}"
 SUBSET="${SUBSET:-DeepBlueCLI}"
 KEEP_DATA="${KEEP_DATA:-0}"
 cd "$(dirname "$0")"
 ROOT="$(pwd)"
 DATA="$ROOT/test-data/sample-evtx"
 OUT="$(mktemp -d)"
 trap 'rm -rf "$OUT"; [ "$KEEP_DATA" = 0 ] && rm -rf "$ROOT/test-data"' EXIT
 pass=0; fail=0
 ok()  { echo "PASS  $1"; pass=$((pass+1)); }
 bad() { echo "FAIL  $1"; fail=$((fail+1)); }
 # 1. Image present?
 if docker image inspect "$TAG" >/dev/null 2>&1; then
  ok "image $TAG present"
 else
  bad "image $TAG not present (build it first)"; exit 1
 fi
 # 2. Sample EVTX available (clone on demand).
 if [ ! -d "$DATA" ]; then
  echo "Fetching sample EVTX (Yamato bundle, ~250 MB)..."
  ./fetch-test-data.sh >/dev/null
 fi
 SCAN_PATH="$DATA/$SUBSET"
 [ -z "$SUBSET" ] && SCAN_PATH="$DATA"
 n_evtx=$(find "$SCAN_PATH" -name "*.evtx" | wc -l | tr -d ' ')
 if [ "$n_evtx" -gt 0 ]; then
  ok "found $n_evtx EVTX files in ${SUBSET:-<full bundle>}"
 else
  bad "no EVTX in $SCAN_PATH"; exit 1
 fi
 # 3. Run scan.
 echo "Running scan against $SUBSET → $OUT ..."
 if docker run --rm --network=none \
     -v "$SCAN_PATH:/data:ro" \
     -v "$OUT:/output" \
     "$TAG" >"$OUT/.run.log" 2>&1; then
  ok "container exited cleanly"
 else
  bad "container exited non-zero"
  tail -20 "$OUT/.run.log"
 fi
 # 4. Expected artefacts.
 html=$(ls "$OUT"/hayabusa_*.html 2>/dev/null | head -1)
 csv=$(ls "$OUT"/hayabusa_*.ts.csv 2>/dev/null | head -1)
 jsonl=$(ls "$OUT"/hayabusa_*_takajo.jsonl 2>/dev/null | head -1)
 takajo="$OUT/takajo"
 [ -s "$html"  ] && ok "HTML report exists ($(du -h "$html" | cut -f1))"  || bad "HTML report missing/empty"
 [ -s "$csv"   ] && ok "Timesketch CSV exists ($(du -h "$csv" | cut -f1))" || bad "CSV missing/empty"
 [ -s "$jsonl" ] && ok "Takajo JSONL exists ($(du -h "$jsonl" | cut -f1))" || bad "JSONL missing/empty"
 [ -d "$takajo" ] && [ -n "$(ls "$takajo" 2>/dev/null)" ] && ok "takajo/ directory has $(ls "$takajo" | wc -l | tr -d ' ') files" || bad "takajo/ missing"
 # 5. Detection count from CSV (skip header row).
 if [ -s "$csv" ]; then
  detections=$(($(wc -l < "$csv") - 1))
  echo
  echo "Detections in timeline: $detections"
 fi
 # 6. TTP coverage.
 if [ -s "$takajo/MitreTTP-Heatmap.json" ]; then
  ttp_count=$(grep -oE '"T[0-9]+' "$takajo/MitreTTP-Heatmap.json" | sort -u | wc -l | tr -d ' ')
  echo "Distinct MITRE TTPs surfaced: $ttp_count"
 fi
 echo
 echo "Summary: $pass pass, $fail fail"
 [ "$fail" -eq 0 ]