Fix dropped clicks and late screenshots in fast recording
Template tests / tests (push) Successful in 2m13s
Template tests / tests (pull_request) Successful in 1m48s

Root cause of 'I clicked many times but only got two screenshots':
finishing/pausing a session called backend.stop(), which cancelled every
in-flight frame request to null. Clicks whose PNG had not finished
encoding yet were then dropped — only the first few survived.

Fixes:
- Stream backend now *drains* on stop: it stops accepting new requests but
  keeps the worker alive until frames already selected for queued clicks
  finish encoding. stop({ immediate: true }) keeps the old abandon-now
  behavior for an unhealthy worker.
- Two-stage worker reply: a fast 'frame-selected' ack pins the pairing and
  proves liveness; the slow PNG payload follows. A slow encode (seconds on
  software-rendered hosts) is no longer mistaken for a dead worker, which
  had been forcing the post-click fresh-shot fallback (late screenshots).
- Queued clicks carry their guide id and are stored even if the session
  ends while they wait in the queue.
- The tray gesture that stops a session is discarded by matching its
  recorded screen position, not a time window — a fast workflow click near
  the stop is no longer collateral damage. (Replaces the earlier grace
  window, which dropped whole bursts.)
- A click on a display with no ready stream resolves null so the caller
  fresh-shots the correct monitor instead of returning another screen.
- STEPFORGE_CAPTURE_LOG=1 prints one line per click decision; the
  second-instance handler now surfaces the running window instead of
  exiting silently.
- Self-test gains a fast-burst-then-finish scenario (8/8 saved) and the
  marker/coordinate checks remain at 0.00% offset.

Tests: 133 unit + all repo checks passing.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Iisyourdad
2026-06-12 07:56:31 -05:00
parent 5ca59805dc
commit 951bba7a21
7 changed files with 464 additions and 62 deletions
+105 -16
View File
@@ -73,6 +73,20 @@ const CLICK_CAPTURE_HIDE_DELAY_MS = 25;
// window wide enough to outlast any processing hiccup but the count low. // window wide enough to outlast any processing hiccup but the count low.
const RECENT_FRAME_RETENTION_MS = 4000; const RECENT_FRAME_RETENTION_MS = 4000;
const RECENT_FRAME_LIMIT = 4; const RECENT_FRAME_LIMIT = 4;
// The click that stops/pauses a session via the tray reaches the OS hook at
// almost the same instant the tray handler fires. We discard at most that
// one click — and only when it matches the recorded gesture in *both* time
// and position, so a fast workflow click that merely happens to land near
// the stop is never mistaken for the stop itself.
const SESSION_STOP_CLICK_WINDOW_MS = 700;
const SESSION_STOP_CLICK_RADIUS_PX = 8;
// Per-click diagnostics, enabled with STEPFORGE_CAPTURE_LOG=1. Cheap enough
// to leave in: one line per click/frame decision, nothing per frame-loop tick.
const CAPTURE_LOG = Boolean(process.env.STEPFORGE_CAPTURE_LOG);
function clog(...args) {
if (CAPTURE_LOG) console.log('[capture]', ...args);
}
function hasBinary(name) { function hasBinary(name) {
try { try {
@@ -198,23 +212,25 @@ class CaptureService {
{ label: 'Capture now', click: () => this.sessionCapture('manual').then(rebuild).catch(() => {}) }, { label: 'Capture now', click: () => this.sessionCapture('manual').then(rebuild).catch(() => {}) },
{ {
label: this.session && this.session.paused ? 'Resume capturing' : 'Pause capturing', label: this.session && this.session.paused ? 'Resume capturing' : 'Pause capturing',
click: () => { this.togglePause(); rebuild(); }, click: () => { this.noteUiStopGesture(); this.togglePause(); rebuild(); },
}, },
{ {
label: 'Open StepForge (pauses capture)', label: 'Open StepForge (pauses capture)',
click: () => { click: () => {
this.noteUiStopGesture();
this.togglePause(true); this.togglePause(true);
this.showWindow(); this.showWindow();
rebuild(); rebuild();
}, },
}, },
{ type: 'separator' }, { type: 'separator' },
{ label: 'Finish session', click: () => this.finishSession() }, { label: 'Finish session', click: () => { this.noteUiStopGesture(); this.finishSession(); } },
])); ]));
}; };
rebuild(); rebuild();
this.rebuildTrayMenu = rebuild; this.rebuildTrayMenu = rebuild;
this.tray.on('click', () => { this.tray.on('click', () => {
this.noteUiStopGesture();
this.togglePause(true); this.togglePause(true);
this.showWindow(); this.showWindow();
rebuild(); rebuild();
@@ -230,6 +246,36 @@ class CaptureService {
this.rebuildTrayMenu = null; this.rebuildTrayMenu = null;
} }
/**
* Record that the user just stopped/paused capture from StepForge's own UI
* (tray icon or its menu). The physical click that did so is also seen by
* the OS hook and would otherwise queue as a workflow step; isStopGesture
* uses this to discard exactly that one click — matched by position, not
* just time, so a real fast click elsewhere is never lost.
*/
noteUiStopGesture() {
let pos = null;
try { pos = this.screen.getCursorScreenPoint(); } catch { pos = null; }
this.uiStopGesture = { at: Date.now(), pos };
}
/** True when a queued click is the tray gesture that stopped the session. */
isStopGesture(clickPos, clickAt) {
const g = this.uiStopGesture;
if (!g) return false;
if (Math.abs((clickAt || Date.now()) - g.at) > SESSION_STOP_CLICK_WINDOW_MS) return false;
// No position to compare (e.g. cursor read failed): fall back to the
// time window alone, but only consume the gesture once.
if (!g.pos || !clickPos) {
this.uiStopGesture = null;
return true;
}
const near = Math.abs(clickPos.x - g.pos.x) <= SESSION_STOP_CLICK_RADIUS_PX
&& Math.abs(clickPos.y - g.pos.y) <= SESSION_STOP_CLICK_RADIUS_PX;
if (near) this.uiStopGesture = null; // one stop click per gesture
return near;
}
showWindow() { showWindow() {
const win = this.getWindow(); const win = this.getWindow();
if (win && !win.isDestroyed()) { if (win && !win.isDestroyed()) {
@@ -319,10 +365,19 @@ class CaptureService {
/** One capture inside the active session (hotkey/click/interval/manual). */ /** One capture inside the active session (hotkey/click/interval/manual). */
async sessionCapture(trigger = 'hotkey', clickPos = null, clickMeta = null) { async sessionCapture(trigger = 'hotkey', clickPos = null, clickMeta = null) {
if (!this.session || this.session.paused) return { ok: false, reason: 'no active capture session' }; // A click that was registered while recording carries its guide id
// (see enqueueClickCapture) and must become a step even if the session
// was paused or finished while it sat behind slower clicks in the
// queue. Dropping queued clicks at stop time is how "I clicked five
// times and only got two steps" happens on hosts with slow encodes.
const queuedClickGuide = trigger === 'click' && clickMeta && clickMeta.guideId
? clickMeta.guideId
: null;
if (!this.session || this.session.paused) {
if (!queuedClickGuide) return { ok: false, reason: 'no active capture session' };
} else if (trigger !== 'manual' && this.userIsInApp()) {
// Automatic triggers stand down while the user is in StepForge, so the // Automatic triggers stand down while the user is in StepForge, so the
// app stays clickable mid-session and never screenshots itself. // app stays clickable mid-session and never screenshots itself.
if (trigger !== 'manual' && this.userIsInApp()) {
return { ok: false, reason: 'skipped — StepForge is focused' }; return { ok: false, reason: 'skipped — StepForge is focused' };
} }
@@ -338,14 +393,29 @@ class CaptureService {
const frame = clickMeta && clickMeta.framePromise const frame = clickMeta && clickMeta.framePromise
? await clickMeta.framePromise ? await clickMeta.framePromise
: await this.frameForClick(clickPos, clickAt); : await this.frameForClick(clickPos, clickAt);
if (!this.session || this.session.paused) return { ok: false, reason: 'no active capture session' }; const sessionLive = this.session && !this.session.paused;
const guideId = sessionLive ? this.session.guideId : queuedClickGuide;
if (!guideId) return { ok: false, reason: 'no active capture session' };
// The tray gesture that stopped the session is itself a hook click in
// the queue — storing it would append a junk step of the menu. Discard
// only that one click, matched by position so a fast workflow click is
// never collateral damage.
if (!sessionLive && this.isStopGesture(clickPos, clickAt)) {
clog('click@', clickAt, 'discarded — it triggered the session stop');
return { ok: false, reason: 'click stopped the session' };
}
if (frame) { if (frame) {
const result = this.storeFrameAsStep(this.session.guideId, frame.mode, frame, clickPos); clog('click@', clickAt, 'frame', frame.source || 'loop',
if (result.ok) this.noteStepAdded(result.step, trigger); 'started', frame.startedAt - clickAt, 'ms, captured', frame.capturedAt - clickAt, 'ms rel. click');
const result = this.storeFrameAsStep(guideId, frame.mode, frame, clickPos);
if (result.ok) this.noteStepAdded(result.step, trigger, guideId);
return result; return result;
} }
// No usable frame (loop not running or grab failing): fall through // No usable frame: fall through to a one-off fresh shot — but only
// to a one-off fresh shot. // while still recording. After a stop, a fresh shot would show
// whatever replaced the user's workflow on screen.
clog('click@', clickAt, 'no frame qualified — falling back to a fresh (post-click) shot');
if (!sessionLive) return { ok: false, reason: 'session ended before the fallback shot' };
} }
if (this.shooting) return { ok: false, reason: 'capture already in progress' }; if (this.shooting) return { ok: false, reason: 'capture already in progress' };
@@ -368,9 +438,14 @@ class CaptureService {
} }
} }
noteStepAdded(step, trigger) { noteStepAdded(step, trigger, guideId = null) {
this.session.count += 1; // Steps from queued clicks can land after the session object is gone.
this.notify('capture:added', { guideId: this.session.guideId, step, trigger }); if (this.session) this.session.count += 1;
this.notify('capture:added', {
guideId: guideId || (this.session && this.session.guideId),
step,
trigger,
});
this.notify('capture:state', this.state()); this.notify('capture:state', this.state());
if (this.rebuildTrayMenu) this.rebuildTrayMenu(); // refresh step counter if (this.rebuildTrayMenu) this.rebuildTrayMenu(); // refresh step counter
} }
@@ -573,13 +648,20 @@ class CaptureService {
}); });
if (!ok || !this.session || this.session.paused) { if (!ok || !this.session || this.session.paused) {
backend.stop(); backend.stop();
if (this.session && !this.session.paused) this.startFrameLoop(); if (this.session && !this.session.paused) {
console.error('[stepforge] stream capture backend failed to start — using in-process frame loop');
this.startFrameLoop();
}
return; return;
} }
this.streamBackend = backend; this.streamBackend = backend;
clog('stream capture backend active');
this.notify('capture:state', this.state()); this.notify('capture:state', this.state());
} catch { } catch (err) {
if (this.session && !this.session.paused) this.startFrameLoop(); if (this.session && !this.session.paused) {
console.error(`[stepforge] stream capture backend error (${err && err.message}) — using in-process frame loop`);
this.startFrameLoop();
}
} finally { } finally {
this.streamBackendStarting = false; this.streamBackendStarting = false;
} }
@@ -982,7 +1064,10 @@ public static class SFMouseHook {
// click however fast it follows the previous one. Only an *identical* // click however fast it follows the previous one. Only an *identical*
// event a few ms later — duplicate delivery of one physical press — is // event a few ms later — duplicate delivery of one physical press — is
// suppressed. // suppressed.
if (this.isDuplicateClickEvent(clickAt, osPoint, button)) return; if (this.isDuplicateClickEvent(clickAt, osPoint, button)) {
clog('click@', clickAt, button, 'suppressed as duplicate delivery');
return;
}
// Prefer the position the watcher sampled with the button-down event // Prefer the position the watcher sampled with the button-down event
// (physical px -> DIP); otherwise read the cursor synchronously, right // (physical px -> DIP); otherwise read the cursor synchronously, right
// now, so the marker lands where the user clicked even if the shot // now, so the marker lands where the user clicked even if the shot
@@ -991,6 +1076,7 @@ public static class SFMouseHook {
// window focus — WSLg reports focus unreliably.) // window focus — WSLg reports focus unreliably.)
let clickPos = osPoint ? this.osPointToDip(osPoint) : null; let clickPos = osPoint ? this.osPointToDip(osPoint) : null;
if (!clickPos) clickPos = this.screen.getCursorScreenPoint(); if (!clickPos) clickPos = this.screen.getCursorScreenPoint();
clog('click@', clickAt, button, 'os', osPoint, '-> dip', clickPos);
this.enqueueClickCapture(clickPos, clickAt, button || 'mouse'); this.enqueueClickCapture(clickPos, clickAt, button || 'mouse');
} }
@@ -1047,6 +1133,9 @@ public static class SFMouseHook {
enqueueClickCapture(clickPos, clickAt = Date.now(), button = 'mouse') { enqueueClickCapture(clickPos, clickAt = Date.now(), button = 'mouse') {
const clickMeta = { at: Number.isFinite(clickAt) ? clickAt : Date.now(), button: button || 'mouse' }; const clickMeta = { at: Number.isFinite(clickAt) ? clickAt : Date.now(), button: button || 'mouse' };
if (this.session && !this.session.paused && !this.userIsInApp()) { if (this.session && !this.session.paused && !this.userIsInApp()) {
// The guide id pins the click to its recording so it can still be
// stored if the session stops while this click waits in the queue.
clickMeta.guideId = this.session.guideId;
clickMeta.framePromise = this.frameForClick(clickPos, clickMeta.at) clickMeta.framePromise = this.frameForClick(clickPos, clickMeta.at)
.catch(() => null); .catch(() => null);
} }
+37 -2
View File
@@ -161,6 +161,34 @@ function createWindow() {
console.log(`CLICK-SELFTEST marker ${i}: off by ${(offBy * 100).toFixed(2)}% of screen`); console.log(`CLICK-SELFTEST marker ${i}: off by ${(offBy * 100).toFixed(2)}% of screen`);
}); });
capture.finishSession(); capture.finishSession();
// Second scenario, reproducing the "I clicked many times but only
// got two screenshots" report: a fast burst of clicks immediately
// followed by finishing the session, so most clicks are still
// queued (frames still encoding) when the stop lands.
const burstGuide = store.createGuide({ title: 'burst selftest' });
capture.startSession(burstGuide.guideId, { intervalSec: 0 });
capture.stopClickWatcher();
capture.togglePause(false);
mainWindow.hide();
await capture.startClickFrameBackend();
await new Promise((res) => setTimeout(res, 1500));
const burstCount = 8;
for (let i = 0; i < burstCount; i++) {
const p = {
x: Math.round(bounds.x + bounds.width * (0.15 + 0.08 * i)),
y: Math.round(bounds.y + bounds.height * 0.5),
};
capture.onOsClick(Date.now(), toPhysical(p), 'button-1');
await new Promise((res) => setTimeout(res, 30)); // very fast clicking
}
// Finish right away — clicks are still mid-encode in the queue.
capture.finishSession();
await capture.clickQueue;
await new Promise((res) => setTimeout(res, 1000));
const burstSteps = store.getGuide(burstGuide.guideId).stepsOrder.length;
console.log('CLICK-SELFTEST burst:', burstSteps, 'of', burstCount,
burstSteps === burstCount ? 'OK — no clicks dropped on finish' : 'FAIL — clicks lost');
} catch (err) { } catch (err) {
console.log('CLICK-SELFTEST ERROR', err.message); console.log('CLICK-SELFTEST ERROR', err.message);
} finally { } finally {
@@ -533,13 +561,20 @@ function setupIpc() {
const gotLock = app.requestSingleInstanceLock(); const gotLock = app.requestSingleInstanceLock();
if (!gotLock) { if (!gotLock) {
// Exiting silently here looks like a broken install ("npm start does
// nothing") — say why, and let the running instance surface itself.
console.error('[stepforge] already running — surfacing the existing window (check the tray).');
app.quit(); app.quit();
} else { } else {
app.on('second-instance', () => { app.on('second-instance', () => {
if (mainWindow) { if (!mainWindow) return;
// The window may be tucked away by a recording session; opening the
// app again is an explicit request to see it, so pause and show, the
// same as the tray's "Open StepForge".
if (capture) capture.togglePause(true);
if (mainWindow.isMinimized()) mainWindow.restore(); if (mainWindow.isMinimized()) mainWindow.restore();
mainWindow.show();
mainWindow.focus(); mainWindow.focus();
}
}); });
app.whenReady().then(() => { app.whenReady().then(() => {
+9
View File
@@ -150,6 +150,15 @@
strict: cmd.strict !== false, strict: cmd.strict !== false,
}); });
if (!frame) return reply({ ok: false, reason: 'no frame at or before the click' }); if (!frame) return reply({ ok: false, reason: 'no frame at or before the click' });
// Stage one: confirm the selection immediately. The encode below can
// take seconds on software-rendered hosts; without this ack the main
// process couldn't tell a slow encode from a dead worker.
send({
type: 'frame-selected',
requestId: cmd.requestId,
startedAt: frame.startedAt,
capturedAt: frame.capturedAt,
});
try { try {
const canvas = new OffscreenCanvas(frame.width, frame.height); const canvas = new OffscreenCanvas(frame.width, frame.height);
canvas.getContext('2d').drawImage(frame.bitmap, 0, 0); canvas.getContext('2d').drawImage(frame.bitmap, 0, 0);
+95 -28
View File
@@ -1,7 +1,7 @@
'use strict'; 'use strict';
const path = require('node:path'); const path = require('node:path');
const { displayForDipPoint } = require('./coords'); const { displayForDipPoint, pointInBounds } = require('./coords');
/** /**
* Off-main-process click-frame backend. * Off-main-process click-frame backend.
@@ -31,14 +31,17 @@ const { displayForDipPoint } = require('./coords');
*/ */
const DEFAULT_SAMPLE_MS = 100; const DEFAULT_SAMPLE_MS = 100;
// Generous on purpose: the worker selects the frame the moment the request // The reply protocol is two-stage so a *slow* worker is never mistaken for a
// arrives (that pins the click↔frame pairing), but PNG-encoding a 4K-class // *dead* one: the worker acknowledges frame selection within milliseconds
// frame can take seconds on software-rendered hosts (WSLg, VMs). A slow // (that pins the click↔frame pairing and proves liveness), then ships the
// reply is still the *correct* frame; only a worker that never answers // PNG whenever the encode finishes — which can take seconds per frame on
// should count as unhealthy. // software-rendered hosts (WSLg, VMs). Only a missing ack marks the worker
const DEFAULT_FRAME_TIMEOUT_MS = 10_000; // unhealthy; a slow payload merely arrives late but is still the exact
// frame chosen at click time.
const DEFAULT_ACK_TIMEOUT_MS = 2000;
const DEFAULT_ENCODE_TIMEOUT_MS = 30_000;
const DEFAULT_START_TIMEOUT_MS = 8000; const DEFAULT_START_TIMEOUT_MS = 8000;
// Consecutive frame-request timeouts before the backend declares itself // Consecutive unanswered requests before the backend declares itself
// unhealthy and the capture service degrades to the in-process loop. // unhealthy and the capture service degrades to the in-process loop.
const MAX_CONSECUTIVE_FAILURES = 2; const MAX_CONSECUTIVE_FAILURES = 2;
@@ -50,10 +53,17 @@ class StreamCaptureBackend {
* production, a fake in tests). * production, a fake in tests).
* @param {(reason: string) => void} [opts.onUnhealthy] * @param {(reason: string) => void} [opts.onUnhealthy]
*/ */
constructor({ createHost, onUnhealthy = null, frameTimeoutMs = DEFAULT_FRAME_TIMEOUT_MS, startTimeoutMs = DEFAULT_START_TIMEOUT_MS } = {}) { constructor({
createHost,
onUnhealthy = null,
ackTimeoutMs = DEFAULT_ACK_TIMEOUT_MS,
encodeTimeoutMs = DEFAULT_ENCODE_TIMEOUT_MS,
startTimeoutMs = DEFAULT_START_TIMEOUT_MS,
} = {}) {
this.createHost = createHost; this.createHost = createHost;
this.onUnhealthy = onUnhealthy; this.onUnhealthy = onUnhealthy;
this.frameTimeoutMs = frameTimeoutMs; this.ackTimeoutMs = ackTimeoutMs;
this.encodeTimeoutMs = encodeTimeoutMs;
this.startTimeoutMs = startTimeoutMs; this.startTimeoutMs = startTimeoutMs;
this.host = null; this.host = null;
this.active = false; this.active = false;
@@ -62,6 +72,7 @@ class StreamCaptureBackend {
this.nextRequestId = 1; this.nextRequestId = 1;
this.consecutiveFailures = 0; this.consecutiveFailures = 0;
this.startWaiters = []; this.startWaiters = [];
this.draining = false;
} }
isActive() { isActive() {
@@ -146,18 +157,26 @@ class StreamCaptureBackend {
for (const check of [...this.startWaiters]) check(); for (const check of [...this.startWaiters]) check();
return; return;
} }
if (msg.type === 'frame-selected') {
// Stage one: the worker picked a frame for this click. The pairing is
// now pinned and the worker is provably alive — swap the short ack
// deadline for the long encode deadline and wait for the pixels.
const pending = this.requests.get(msg.requestId);
if (!pending) return;
this.consecutiveFailures = 0;
clearTimeout(pending.timer);
pending.timer = setTimeout(() => {
this.settleRequest(msg.requestId, null);
this.noteFailure();
}, this.encodeTimeoutMs);
return;
}
if (msg.type === 'frame-response') { if (msg.type === 'frame-response') {
const pending = this.requests.get(msg.requestId); const pending = this.requests.get(msg.requestId);
if (!pending) return; // late reply after timeout — already handled if (!pending) return; // late reply after timeout — already handled
this.requests.delete(msg.requestId);
clearTimeout(pending.timer);
// Any answer — even "no qualifying frame" — proves the worker is alive. // Any answer — even "no qualifying frame" — proves the worker is alive.
this.consecutiveFailures = 0; this.consecutiveFailures = 0;
if (!msg.ok || !msg.png) { const value = (!msg.ok || !msg.png) ? null : {
pending.resolve(null);
return;
}
pending.resolve({
mode: 'fullscreen', mode: 'fullscreen',
png: Buffer.from(msg.png), png: Buffer.from(msg.png),
size: { width: msg.width, height: msg.height }, size: { width: msg.width, height: msg.height },
@@ -165,10 +184,27 @@ class StreamCaptureBackend {
startedAt: msg.startedAt, startedAt: msg.startedAt,
capturedAt: msg.capturedAt, capturedAt: msg.capturedAt,
source: 'stream', source: 'stream',
}); };
this.settleRequest(msg.requestId, value);
} }
} }
/**
* Resolve one pending request and clean it up. When the backend is
* draining (stop() was called while requests were still in flight), the
* last settled request triggers the deferred worker teardown — this is
* what lets clicks queued at finish time still receive their frames
* instead of being cancelled to null.
*/
settleRequest(requestId, value) {
const pending = this.requests.get(requestId);
if (!pending) return;
this.requests.delete(requestId);
clearTimeout(pending.timer);
pending.resolve(value);
if (this.draining && this.requests.size === 0) this.finalizeTeardown();
}
/** /**
* Frame for one click, selected in the worker under the given strictness. * Frame for one click, selected in the worker under the given strictness.
* Resolves null when no frame qualifies (caller falls back) — and also on * Resolves null when no frame qualifies (caller falls back) — and also on
@@ -179,14 +215,19 @@ class StreamCaptureBackend {
const displays = [...this.streams.values()].filter((s) => s.ready).map((s) => s.display); const displays = [...this.streams.values()].filter((s) => s.ready).map((s) => s.display);
const display = clickPos ? displayForDipPoint(clickPos, displays) : (displays[0] || null); const display = clickPos ? displayForDipPoint(clickPos, displays) : (displays[0] || null);
if (!display) return Promise.resolve(null); if (!display) return Promise.resolve(null);
// Never serve a click from another monitor's stream: if the clicked
// display has no ready stream, a "nearest display" frame would show the
// wrong screen entirely and the marker fractions would be meaningless.
// Resolve null instead so the caller's fallback captures the right one.
if (clickPos && !pointInBounds(clickPos, display.bounds)) return Promise.resolve(null);
const requestId = this.nextRequestId++; const requestId = this.nextRequestId++;
return new Promise((resolve) => { return new Promise((resolve) => {
const timer = setTimeout(() => { const pending = { resolve, display, timer: null };
this.requests.delete(requestId); pending.timer = setTimeout(() => {
resolve(null); this.settleRequest(requestId, null);
this.noteFailure(); this.noteFailure();
}, this.frameTimeoutMs); }, this.ackTimeoutMs);
this.requests.set(requestId, { resolve, timer, display }); this.requests.set(requestId, pending);
this.hostSend({ this.hostSend({
type: 'frame-request', type: 'frame-request',
requestId, requestId,
@@ -201,20 +242,45 @@ class StreamCaptureBackend {
this.consecutiveFailures += 1; this.consecutiveFailures += 1;
if (this.consecutiveFailures < MAX_CONSECUTIVE_FAILURES) return; if (this.consecutiveFailures < MAX_CONSECUTIVE_FAILURES) return;
const notify = this.onUnhealthy; const notify = this.onUnhealthy;
this.stop(); this.stop({ immediate: true });
if (notify) notify('frame requests timing out'); if (notify) notify('frame requests timing out');
} }
stop() { /**
* Stop the backend. By default this *drains*: it stops accepting new
* requests but keeps the worker alive so frames already selected for
* queued clicks finish encoding and resolve — without this, finishing a
* recording right after a fast click burst cancels every still-encoding
* frame to null and those clicks are lost ("only two screenshots saved").
* Pass { immediate: true } to abandon in-flight requests (used when the
* worker is already unhealthy).
*/
stop({ immediate = false } = {}) {
this.active = false; this.active = false;
for (const check of [...this.startWaiters]) check();
this.startWaiters = [];
if (immediate) {
for (const [, pending] of this.requests) { for (const [, pending] of this.requests) {
clearTimeout(pending.timer); clearTimeout(pending.timer);
pending.resolve(null); pending.resolve(null);
} }
this.requests.clear(); this.requests.clear();
this.finalizeTeardown();
return;
}
if (this.requests.size === 0) {
this.finalizeTeardown();
return;
}
// Let pending requests resolve naturally (their own encode timers still
// bound the wait); finalizeTeardown fires from settleRequest when the
// last one completes.
this.draining = true;
}
finalizeTeardown() {
this.draining = false;
this.streams.clear(); this.streams.clear();
for (const check of [...this.startWaiters]) check();
this.startWaiters = [];
if (this.host) { if (this.host) {
try { this.host.destroy(); } catch { /* already gone */ } try { this.host.destroy(); } catch { /* already gone */ }
this.host = null; this.host = null;
@@ -289,6 +355,7 @@ module.exports = {
createElectronHost, createElectronHost,
pairDisplaysToSources, pairDisplaysToSources,
DEFAULT_SAMPLE_MS, DEFAULT_SAMPLE_MS,
DEFAULT_FRAME_TIMEOUT_MS, DEFAULT_ACK_TIMEOUT_MS,
DEFAULT_ENCODE_TIMEOUT_MS,
MAX_CONSECUTIVE_FAILURES, MAX_CONSECUTIVE_FAILURES,
}; };
+24 -1
View File
@@ -134,8 +134,31 @@ click position. Three pieces make that hold:
click-time screen. Storing is serialized per click; pairing is not, so click-time screen. Storing is serialized per click; pairing is not, so
slow encodes never skew later clicks. slow encodes never skew later clicks.
Reliability rules that keep "one click → one step" true under load:
- **The worker reply is two-stage.** It acknowledges frame *selection*
within milliseconds (proving liveness and pinning the pairing), then
ships the PNG whenever the encode finishes — seconds later on
software-rendered hosts. A slow payload is never mistaken for a dead
worker; only a missing ack degrades the backend.
- **Stopping drains.** Finishing or pausing a recording keeps the worker
alive until frames already selected for queued clicks finish encoding.
Without this, ending a session right after a fast click burst cancelled
every still-encoding frame and those clicks vanished (the "I clicked ten
times but only got two screenshots" bug).
- **Queued clicks outlive the session.** A click registered while recording
carries its guide id and still becomes a step if the session ends while it
waits in the store queue. The lone exception is the tray gesture that
stopped the session, discarded by matching its recorded screen position.
- **A click is never served another monitor's frame.** If the clicked
display has no ready stream the backend returns null and the caller
fresh-shots the correct screen, rather than circling a point on the wrong
one.
`STEPFORGE_CLICK_SELFTEST=1 npm start` exercises the whole pipeline in a `STEPFORGE_CLICK_SELFTEST=1 npm start` exercises the whole pipeline in a
real Electron session and reports steps-per-click and marker offsets. real Electron session: it reports steps-per-click and marker offsets, then
runs a fast-burst-then-finish scenario that must save every click.
`STEPFORGE_CAPTURE_LOG=1` prints one diagnostic line per click decision.
## Security Rules ## Security Rules
+86
View File
@@ -276,6 +276,92 @@ test('fast clicks are paired with their frames at event time, not behind the sto
assert.equal(service.session.count, 2); assert.equal(service.session.count, 2);
}); });
test('clicks still queued when the session finishes are stored, not dropped', async () => {
// Reported as "I clicked N times but only got two screenshots": with slow
// encodes the queue lags, and finishing the session used to discard every
// click still waiting in it.
const service = makeService();
service.session = { guideId: 'guide-finish', paused: false, count: 0, intervalSec: 0 };
service.userIsInApp = () => false;
let releaseFrame;
const frameGate = new Promise((r) => { releaseFrame = r; });
service.frameForClick = () => frameGate.then(() => makeFrame('late-stored-frame'));
const added = [];
service.store.addStep = (guideId, fields, png) => {
added.push({ guideId, png: png.toString() });
return { stepId: 'step-late' };
};
const events = [];
service.notify = (channel, payload) => events.push({ channel, payload });
// Click happened comfortably before the user reached for the stop button.
const queue = service.enqueueClickCapture({ x: 5, y: 5 }, Date.now() - 2000, 'left');
service.finishSession();
releaseFrame();
await queue;
assert.deepEqual(added, [{ guideId: 'guide-finish', png: 'late-stored-frame' }],
'the click was recorded while the session was live — it must become a step');
const addedEvent = events.find((e) => e.channel === 'capture:added');
assert.equal(addedEvent.payload.guideId, 'guide-finish');
});
test('the tray click that stops the session does not become a junk step', async () => {
// The tray gesture that stops capture is also seen by the OS hook; storing
// it would append a step of the tray/menu to every recording. It is
// matched by position so only that exact click is dropped.
const service = makeService({
screenApi: {
getCursorScreenPoint: () => ({ x: 1900, y: 12 }), // over the tray
getAllDisplays: () => [],
},
});
service.session = { guideId: 'guide-stop', paused: false, count: 0, intervalSec: 0 };
service.userIsInApp = () => false;
service.frameForClick = async () => makeFrame('stop-click-frame');
const added = [];
service.store.addStep = (guideId, fields, png) => {
added.push(png.toString());
return { stepId: 'step-stop' };
};
// The hook reports the tray click at the tray position.
const queue = service.enqueueClickCapture({ x: 1900, y: 12 }, Date.now(), 'left');
service.noteUiStopGesture(); // tray handler records where it was clicked
service.finishSession();
await queue;
assert.deepEqual(added, [], 'the stop click must be discarded');
});
test('a fast workflow click near the stop time but elsewhere is NOT dropped', async () => {
// Position matching is what makes this safe: the user clicks their
// workflow, then reaches up to the tray. The last workflow click lands
// far from the tray and must survive even though it is close in time.
const service = makeService({
screenApi: {
getCursorScreenPoint: () => ({ x: 1900, y: 12 }), // tray location
getAllDisplays: () => [],
},
});
service.session = { guideId: 'guide-near', paused: false, count: 0, intervalSec: 0 };
service.userIsInApp = () => false;
service.frameForClick = async () => makeFrame('workflow-frame');
const added = [];
service.store.addStep = (guideId, fields, png) => {
added.push(png.toString());
return { stepId: 'step-near' };
};
// Workflow click in the middle of the screen, then the tray stop.
const queue = service.enqueueClickCapture({ x: 600, y: 500 }, Date.now(), 'left');
service.noteUiStopGesture();
service.finishSession();
await queue;
assert.deepEqual(added, ['workflow-frame'], 'a click away from the tray must be kept');
});
test('queued click captures preserve the original event time and button', async () => { test('queued click captures preserve the original event time and button', async () => {
const service = makeService(); const service = makeService();
const seen = []; const seen = [];
+100 -7
View File
@@ -31,7 +31,8 @@ function makeBackend({ autoReady = true, ...opts } = {}) {
destroy() { destroyed = true; }, destroy() { destroyed = true; },
}; };
}, },
frameTimeoutMs: 40, ackTimeoutMs: 40,
encodeTimeoutMs: 120,
startTimeoutMs: 100, startTimeoutMs: 100,
...opts, ...opts,
}); });
@@ -133,12 +134,12 @@ test('clicks on a multi-monitor setup route to the stream of the clicked display
backend.stop(); backend.stop();
}); });
test('repeated frame-request timeouts mark the backend unhealthy exactly once', async () => { test('repeated unanswered frame requests mark the backend unhealthy exactly once', async () => {
let unhealthy = 0; let unhealthy = 0;
const { backend, isDestroyed } = makeBackend({ onUnhealthy: () => { unhealthy += 1; } }); const { backend, isDestroyed } = makeBackend({ onUnhealthy: () => { unhealthy += 1; } });
await backend.start({ displays: oneDisplay, sources: oneSource }); await backend.start({ displays: oneDisplay, sources: oneSource });
// Two consecutive timeouts (the worker never answers). // Two consecutive ack timeouts (the worker never answers at all).
assert.equal(await backend.frameForClick({ clickAt: 1 }), null); assert.equal(await backend.frameForClick({ clickAt: 1 }), null);
assert.equal(await backend.frameForClick({ clickAt: 2 }), null); assert.equal(await backend.frameForClick({ clickAt: 2 }), null);
@@ -147,6 +148,67 @@ test('repeated frame-request timeouts mark the backend unhealthy exactly once',
assert.equal(isDestroyed(), true); assert.equal(isDestroyed(), true);
}); });
test('a slow PNG encode after a prompt selection ack is not mistaken for a dead worker', async () => {
// The ack window is 40ms here; the payload arrives at ~80ms — well past
// the ack deadline but inside the encode deadline. The frame must land
// and the failure counter must stay clean.
let unhealthy = 0;
const { backend, sent, worker } = makeBackend({ onUnhealthy: () => { unhealthy += 1; } });
await backend.start({ displays: oneDisplay, sources: oneSource });
const promise = backend.frameForClick({ clickPos: { x: 10, y: 10 }, clickAt: 5000 });
const request = sent.find((m) => m.type === 'frame-request');
worker({ type: 'frame-selected', requestId: request.requestId, startedAt: 4900, capturedAt: 4910 });
setTimeout(() => {
worker({
type: 'frame-response',
requestId: request.requestId,
ok: true,
png: Uint8Array.from([7]),
width: 1920,
height: 1080,
startedAt: 4900,
capturedAt: 4910,
});
}, 80);
const frame = await promise;
assert.ok(frame, 'the slowly-encoded frame must still be delivered');
assert.deepEqual([...frame.png], [7]);
assert.equal(unhealthy, 0);
assert.equal(backend.isActive(), true);
backend.stop();
});
test('an acked request whose payload never arrives resolves null after the encode deadline', async () => {
const { backend, sent, worker } = makeBackend();
await backend.start({ displays: oneDisplay, sources: oneSource });
const promise = backend.frameForClick({ clickAt: 5000 });
const request = sent.find((m) => m.type === 'frame-request');
worker({ type: 'frame-selected', requestId: request.requestId });
assert.equal(await promise, null, 'a stuck encode must not hang the click forever');
backend.stop();
});
test('a click on a display without a ready stream is not served from another display', async () => {
// Only display 1 has a screen source; a click on display 2 must resolve
// null (the caller falls back to a fresh shot of the correct monitor)
// rather than returning display 1 pixels with meaningless marker math.
const displays = [display(1, 0, 0, 1920, 1080), display(2, 1920, 0, 1920, 1080)];
const { backend, sent } = makeBackend();
await backend.start({ displays, sources: [{ id: 'screen:1:0', display_id: '1' }] });
const frame = await backend.frameForClick({ clickPos: { x: 2500, y: 400 }, clickAt: 1 });
assert.equal(frame, null);
assert.equal(sent.some((m) => m.type === 'frame-request'), false,
'no request should even be sent for the wrong display');
backend.stop();
});
test('a late worker reply after the timeout is ignored', async () => { test('a late worker reply after the timeout is ignored', async () => {
const { backend, sent, worker } = makeBackend(); const { backend, sent, worker } = makeBackend();
await backend.start({ displays: oneDisplay, sources: oneSource }); await backend.start({ displays: oneDisplay, sources: oneSource });
@@ -159,15 +221,46 @@ test('a late worker reply after the timeout is ignored', async () => {
backend.stop(); backend.stop();
}); });
test('stop() resolves all in-flight requests with null', async () => { test('stop() drains: a frame already selected at finish time still resolves', async () => {
const { backend } = makeBackend(); // This is the "I clicked many times but only got two screenshots" fix.
// The session finishes (stop) while a click's frame is still encoding;
// the frame must still come back, not be cancelled to null.
const { backend, sent, worker, isDestroyed } = makeBackend();
await backend.start({ displays: oneDisplay, sources: oneSource }); await backend.start({ displays: oneDisplay, sources: oneSource });
const pending = backend.frameForClick({ clickAt: 1 }); const pending = backend.frameForClick({ clickPos: { x: 10, y: 10 }, clickAt: 1 });
backend.stop(); const request = sent.find((m) => m.type === 'frame-request');
worker({ type: 'frame-selected', requestId: request.requestId, startedAt: 0, capturedAt: 0 });
backend.stop(); // user finishes the session while the encode is in flight
assert.equal(backend.isActive(), false);
assert.equal(isDestroyed(), false, 'the worker stays alive to finish encoding');
worker({
type: 'frame-response',
requestId: request.requestId,
ok: true,
png: Uint8Array.from([5]),
width: 1,
height: 1,
});
const frame = await pending;
assert.ok(frame, 'the in-flight frame must survive the stop');
assert.deepEqual([...frame.png], [5]);
assert.equal(isDestroyed(), true, 'the worker tears down once draining completes');
});
test('stop({ immediate: true }) abandons in-flight requests at once', async () => {
const { backend, isDestroyed } = makeBackend();
await backend.start({ displays: oneDisplay, sources: oneSource });
const pending = backend.frameForClick({ clickPos: { x: 10, y: 10 }, clickAt: 1 });
backend.stop({ immediate: true });
assert.equal(await pending, null); assert.equal(await pending, null);
assert.equal(backend.isActive(), false); assert.equal(backend.isActive(), false);
assert.equal(isDestroyed(), true);
}); });
test('displays pair to screen sources by display_id; single display pairs to a lone source', () => { test('displays pair to screen sources by display_id; single display pairs to a lone source', () => {