From aafed7eca7250179401740e94429ce93ed65cc4e Mon Sep 17 00:00:00 2001
From: Iisyourdad <tyler.westbrook1@gmail.com>
Date: Fri, 12 Jun 2026 09:25:39 -0500
Subject: [PATCH] Guard the click-capture pipeline with an automated end-to-end
 check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add tests/checks/test_click_capture_selftest.sh: runs the real Electron
STEPFORGE_CLICK_SELFTEST session and asserts every scenario passes — 3/3
markers at 0.00% offset, 8/8 burst clicks kept on finish, the first armed
click captured (warmup click ignored), and the debounce (4/4). Picked up
automatically by tests/run_test.sh. Skips cleanly when the host has no
capture environment so it never falsely fails CI, but fails the suite on any
real regression in click->screenshot->step behavior.

Document the guard in ARCHITECTURE.md and CHANGELOG.md.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
---
 docs/ARCHITECTURE.md                        | 10 ++-
 docs/CHANGELOG.md                           |  5 ++
 tests/checks/test_click_capture_selftest.sh | 70 +++++++++++++++++++++
 3 files changed, 82 insertions(+), 3 deletions(-)
 create mode 100755 tests/checks/test_click_capture_selftest.sh

diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index 5fc1da0..1ea0578 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -156,9 +156,13 @@ Reliability rules that keep "one click → one step" true under load:
   one.
 
 `STEPFORGE_CLICK_SELFTEST=1 npm start` exercises the whole pipeline in a
-real Electron session: it reports steps-per-click and marker offsets, then
-runs a fast-burst-then-finish scenario that must save every click.
-`STEPFORGE_CAPTURE_LOG=1` prints one diagnostic line per click decision.
+real Electron session across four scenarios — marker accuracy (0.00%
+offset), a fast-burst-then-finish that must save every click, the
+warm-before-arm first click, and the ~200ms debounce. It runs automatically
+as `tests/checks/test_click_capture_selftest.sh` (skipped only when the host
+has no capture environment), so a regression in click→screenshot→step
+behavior fails the suite. `STEPFORGE_CAPTURE_LOG=1` prints one diagnostic
+line per click decision.
 
 ## Security Rules
 
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
index cece653..0ac8263 100644
--- a/docs/CHANGELOG.md
+++ b/docs/CHANGELOG.md
@@ -36,6 +36,11 @@ Keep-a-Changelog conventions; versions follow semver.
     screenshot is late" — while every later click was fine. Now frames are
     buffering by the time the window tucks away, so the first click is
     served a pre-click frame like the rest.
+  - The whole click→screenshot→step pipeline is guarded end to end by
+    `tests/checks/test_click_capture_selftest.sh`, which runs a real Electron
+    session and asserts marker accuracy, no dropped burst clicks, the first
+    click of a session captured, and the debounce — so this behavior fails
+    the suite if it ever regresses.
 
 ### Added
 
diff --git a/tests/checks/test_click_capture_selftest.sh b/tests/checks/test_click_capture_selftest.sh
new file mode 100755
index 0000000..5e5b9b9
--- /dev/null
+++ b/tests/checks/test_click_capture_selftest.sh
@@ -0,0 +1,70 @@
+#!/usr/bin/env bash
+# Workflow check: run the full click-capture pipeline end to end in a real
+# Electron session (STEPFORGE_CLICK_SELFTEST) and assert every scenario
+# passes. This guards the click→screenshot→step behavior — exact markers,
+# one step per click, fast bursts not dropped on finish, the first click of a
+# session captured (warm-before-arm), and the ~200ms debounce — against
+# regressions that unit tests alone can't catch because they don't exercise
+# the live capture stream and window timing.
+#
+# Scenarios and their pass lines (see app/main.js STEPFORGE_CLICK_SELFTEST):
+#   steps:    3 of 3, each marker "off by 0.00% of screen"
+#   burst:    8 of 8  (fast clicks + immediate finish, none lost)
+#   arm:      warmup click ignored, first armed click captured
+#   debounce: 4 of 4  (40ms burst collapses to 1, three 300ms clicks kept)
+#
+# If the environment can't run a desktop capture at all (no display/stream),
+# the scenarios never print, so the check skips rather than failing CI.
+
+set -euo pipefail
+
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+cd "$ROOT_DIR"
+
+TMP_ROOT="$(mktemp -d)"
+trap 'rm -rf "$TMP_ROOT"' EXIT
+
+LOG_FILE="$TMP_ROOT/selftest.log"
+set +e
+STEPFORGE_DATA_DIR="$TMP_ROOT/data" STEPFORGE_CLICK_SELFTEST=1 \
+  timeout 120s npm start >"$LOG_FILE" 2>&1
+set -e
+
+# The self-test always prints this first line once it begins; without it the
+# app never reached the scenarios (couldn't launch / no capture environment).
+if ! grep -q 'CLICK-SELFTEST source:' "$LOG_FILE"; then
+  echo "click capture selftest SKIPPED (no capture environment on this host)"
+  exit 0
+fi
+
+fail() {
+  echo "click capture selftest FAILED: $1" >&2
+  echo "----- selftest output -----" >&2
+  grep -E 'CLICK-SELFTEST' "$LOG_FILE" >&2 || true
+  exit 1
+}
+
+# Any scenario that detected a problem prints FAIL or an ERROR line.
+if grep -Eq 'CLICK-SELFTEST.*(FAIL|ERROR)' "$LOG_FILE"; then
+  fail "a scenario reported FAIL/ERROR"
+fi
+
+# Per-scenario positive assertions (deterministic with synthetic clicks).
+grep -q 'CLICK-SELFTEST steps: 3 of 3' "$LOG_FILE" \
+  || fail "marker scenario did not capture 3 of 3 clicks"
+
+# All three markers must land exactly on the injected click positions.
+marker_ok="$(grep -c 'CLICK-SELFTEST marker [0-9]*: off by 0.00% of screen' "$LOG_FILE" || true)"
+[[ "$marker_ok" -eq 3 ]] \
+  || fail "expected 3 markers at 0.00% offset, found $marker_ok"
+
+grep -q 'CLICK-SELFTEST burst: 8 of 8' "$LOG_FILE" \
+  || fail "burst scenario lost clicks on finish"
+
+grep -q 'CLICK-SELFTEST arm:.*OK' "$LOG_FILE" \
+  || fail "arm scenario did not capture the first armed click"
+
+grep -q 'CLICK-SELFTEST debounce: 4 of 4 expected OK' "$LOG_FILE" \
+  || fail "debounce scenario did not collapse the burst / keep deliberate clicks"
+
+echo "click capture selftest OK (markers, burst, arm, debounce all verified)"