#!/bin/bash # Smoke test for hayabusa: fetch sample EVTX, run the container, check that # all expected output artefacts landed and contain detections, print stats. # # Defaults to the DeepBlueCLI subset (21 files, ~30s). # # Available SUBSET values (under test-data/sample-evtx/): # DeepBlueCLI 21 files, ~30s — well-known PowerShell + auth attacks # YamatoSecurity 16 files, ~20s — Yamato Security's own samples # EVTX-ATTACK-SAMPLES 278 files, ~5min — sbousseaden's MITRE-mapped corpus # EVTX-to-MITRE-Attack 284 files, ~5min — mdecrevoisier's MITRE-mapped corpus # "" 599 files, ~10min — full bundle (set SUBSET="" or empty) # # Env vars: # TAG=ls-hayabusa:test which image to test # SUBSET=DeepBlueCLI which subdir of sample-evtx to scan (or "" for all) # KEEP_DATA=1 don't delete test-data/sample-evtx after run set -u TAG="${TAG:-ls-hayabusa:test}" SUBSET="${SUBSET:-DeepBlueCLI}" KEEP_DATA="${KEEP_DATA:-0}" cd "$(dirname "$0")" ROOT="$(pwd)" DATA="$ROOT/test-data/sample-evtx" OUT="$(mktemp -d)" trap 'rm -rf "$OUT"; [ "$KEEP_DATA" = 0 ] && rm -rf "$ROOT/test-data"' EXIT pass=0; fail=0 ok() { echo "PASS $1"; pass=$((pass+1)); } bad() { echo "FAIL $1"; fail=$((fail+1)); } # 1. Image present? if docker image inspect "$TAG" >/dev/null 2>&1; then ok "image $TAG present" else bad "image $TAG not present (build it first)"; exit 1 fi # 2. Sample EVTX available (clone on demand). if [ ! -d "$DATA" ]; then echo "Fetching sample EVTX (Yamato bundle, ~250 MB)..." ./fetch-test-data.sh >/dev/null fi SCAN_PATH="$DATA/$SUBSET" [ -z "$SUBSET" ] && SCAN_PATH="$DATA" n_evtx=$(find "$SCAN_PATH" -name "*.evtx" | wc -l | tr -d ' ') if [ "$n_evtx" -gt 0 ]; then ok "found $n_evtx EVTX files in ${SUBSET:-}" else bad "no EVTX in $SCAN_PATH"; exit 1 fi # 3. Run scan. echo "Running scan against $SUBSET → $OUT ..." if docker run --rm --network=none \ -v "$SCAN_PATH:/data:ro" \ -v "$OUT:/output" \ "$TAG" >"$OUT/.run.log" 2>&1; then ok "container exited cleanly" else bad "container exited non-zero" tail -20 "$OUT/.run.log" fi # 4. Expected artefacts. html=$(ls "$OUT"/hayabusa_*.html 2>/dev/null | head -1) csv=$(ls "$OUT"/hayabusa_*.ts.csv 2>/dev/null | head -1) jsonl=$(ls "$OUT"/hayabusa_*_takajo.jsonl 2>/dev/null | head -1) takajo="$OUT/takajo" [ -s "$html" ] && ok "HTML report exists ($(du -h "$html" | cut -f1))" || bad "HTML report missing/empty" [ -s "$csv" ] && ok "Timesketch CSV exists ($(du -h "$csv" | cut -f1))" || bad "CSV missing/empty" [ -s "$jsonl" ] && ok "Takajo JSONL exists ($(du -h "$jsonl" | cut -f1))" || bad "JSONL missing/empty" [ -d "$takajo" ] && [ -n "$(ls "$takajo" 2>/dev/null)" ] && ok "takajo/ directory has $(ls "$takajo" | wc -l | tr -d ' ') files" || bad "takajo/ missing" # 5. Detection count from CSV (skip header row). if [ -s "$csv" ]; then detections=$(($(wc -l < "$csv") - 1)) echo echo "Detections in timeline: $detections" fi # 6. TTP coverage. if [ -s "$takajo/MitreTTP-Heatmap.json" ]; then ttp_count=$(grep -oE '"T[0-9]+' "$takajo/MitreTTP-Heatmap.json" | sort -u | wc -l | tr -d ' ') echo "Distinct MITRE TTPs surfaced: $ttp_count" fi echo echo "Summary: $pass pass, $fail fail" [ "$fail" -eq 0 ]