diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 5fc1da0..1ea0578 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -156,9 +156,13 @@ Reliability rules that keep "one click → one step" true under load: one. `STEPFORGE_CLICK_SELFTEST=1 npm start` exercises the whole pipeline in a -real Electron session: it reports steps-per-click and marker offsets, then -runs a fast-burst-then-finish scenario that must save every click. -`STEPFORGE_CAPTURE_LOG=1` prints one diagnostic line per click decision. +real Electron session across four scenarios — marker accuracy (0.00% +offset), a fast-burst-then-finish that must save every click, the +warm-before-arm first click, and the ~200ms debounce. It runs automatically +as `tests/checks/test_click_capture_selftest.sh` (skipped only when the host +has no capture environment), so a regression in click→screenshot→step +behavior fails the suite. `STEPFORGE_CAPTURE_LOG=1` prints one diagnostic +line per click decision. ## Security Rules diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index cece653..0ac8263 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -36,6 +36,11 @@ Keep-a-Changelog conventions; versions follow semver. screenshot is late" — while every later click was fine. Now frames are buffering by the time the window tucks away, so the first click is served a pre-click frame like the rest. + - The whole click→screenshot→step pipeline is guarded end to end by + `tests/checks/test_click_capture_selftest.sh`, which runs a real Electron + session and asserts marker accuracy, no dropped burst clicks, the first + click of a session captured, and the debounce — so this behavior fails + the suite if it ever regresses. ### Added diff --git a/tests/checks/test_click_capture_selftest.sh b/tests/checks/test_click_capture_selftest.sh new file mode 100755 index 0000000..5e5b9b9 --- /dev/null +++ b/tests/checks/test_click_capture_selftest.sh @@ -0,0 +1,70 @@ +#!/usr/bin/env bash +# Workflow check: run the full click-capture pipeline end to end in a real +# Electron session (STEPFORGE_CLICK_SELFTEST) and assert every scenario +# passes. This guards the click→screenshot→step behavior — exact markers, +# one step per click, fast bursts not dropped on finish, the first click of a +# session captured (warm-before-arm), and the ~200ms debounce — against +# regressions that unit tests alone can't catch because they don't exercise +# the live capture stream and window timing. +# +# Scenarios and their pass lines (see app/main.js STEPFORGE_CLICK_SELFTEST): +# steps: 3 of 3, each marker "off by 0.00% of screen" +# burst: 8 of 8 (fast clicks + immediate finish, none lost) +# arm: warmup click ignored, first armed click captured +# debounce: 4 of 4 (40ms burst collapses to 1, three 300ms clicks kept) +# +# If the environment can't run a desktop capture at all (no display/stream), +# the scenarios never print, so the check skips rather than failing CI. + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +cd "$ROOT_DIR" + +TMP_ROOT="$(mktemp -d)" +trap 'rm -rf "$TMP_ROOT"' EXIT + +LOG_FILE="$TMP_ROOT/selftest.log" +set +e +STEPFORGE_DATA_DIR="$TMP_ROOT/data" STEPFORGE_CLICK_SELFTEST=1 \ + timeout 120s npm start >"$LOG_FILE" 2>&1 +set -e + +# The self-test always prints this first line once it begins; without it the +# app never reached the scenarios (couldn't launch / no capture environment). +if ! grep -q 'CLICK-SELFTEST source:' "$LOG_FILE"; then + echo "click capture selftest SKIPPED (no capture environment on this host)" + exit 0 +fi + +fail() { + echo "click capture selftest FAILED: $1" >&2 + echo "----- selftest output -----" >&2 + grep -E 'CLICK-SELFTEST' "$LOG_FILE" >&2 || true + exit 1 +} + +# Any scenario that detected a problem prints FAIL or an ERROR line. +if grep -Eq 'CLICK-SELFTEST.*(FAIL|ERROR)' "$LOG_FILE"; then + fail "a scenario reported FAIL/ERROR" +fi + +# Per-scenario positive assertions (deterministic with synthetic clicks). +grep -q 'CLICK-SELFTEST steps: 3 of 3' "$LOG_FILE" \ + || fail "marker scenario did not capture 3 of 3 clicks" + +# All three markers must land exactly on the injected click positions. +marker_ok="$(grep -c 'CLICK-SELFTEST marker [0-9]*: off by 0.00% of screen' "$LOG_FILE" || true)" +[[ "$marker_ok" -eq 3 ]] \ + || fail "expected 3 markers at 0.00% offset, found $marker_ok" + +grep -q 'CLICK-SELFTEST burst: 8 of 8' "$LOG_FILE" \ + || fail "burst scenario lost clicks on finish" + +grep -q 'CLICK-SELFTEST arm:.*OK' "$LOG_FILE" \ + || fail "arm scenario did not capture the first armed click" + +grep -q 'CLICK-SELFTEST debounce: 4 of 4 expected OK' "$LOG_FILE" \ + || fail "debounce scenario did not collapse the burst / keep deliberate clicks" + +echo "click capture selftest OK (markers, burst, arm, debounce all verified)"