Fix dropped clicks and late screenshots in fast recording
Template tests / tests (push) Successful in 2m13s
Template tests / tests (pull_request) Successful in 1m48s

Root cause of 'I clicked many times but only got two screenshots':
finishing/pausing a session called backend.stop(), which cancelled every
in-flight frame request to null. Clicks whose PNG had not finished
encoding yet were then dropped — only the first few survived.

Fixes:
- Stream backend now *drains* on stop: it stops accepting new requests but
  keeps the worker alive until frames already selected for queued clicks
  finish encoding. stop({ immediate: true }) keeps the old abandon-now
  behavior for an unhealthy worker.
- Two-stage worker reply: a fast 'frame-selected' ack pins the pairing and
  proves liveness; the slow PNG payload follows. A slow encode (seconds on
  software-rendered hosts) is no longer mistaken for a dead worker, which
  had been forcing the post-click fresh-shot fallback (late screenshots).
- Queued clicks carry their guide id and are stored even if the session
  ends while they wait in the queue.
- The tray gesture that stops a session is discarded by matching its
  recorded screen position, not a time window — a fast workflow click near
  the stop is no longer collateral damage. (Replaces the earlier grace
  window, which dropped whole bursts.)
- A click on a display with no ready stream resolves null so the caller
  fresh-shots the correct monitor instead of returning another screen.
- STEPFORGE_CAPTURE_LOG=1 prints one line per click decision; the
  second-instance handler now surfaces the running window instead of
  exiting silently.
- Self-test gains a fast-burst-then-finish scenario (8/8 saved) and the
  marker/coordinate checks remain at 0.00% offset.

Tests: 133 unit + all repo checks passing.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Iisyourdad
2026-06-12 07:56:31 -05:00
parent 5ca59805dc
commit 951bba7a21
7 changed files with 464 additions and 62 deletions
+107 -18
View File
@@ -73,6 +73,20 @@ const CLICK_CAPTURE_HIDE_DELAY_MS = 25;
// window wide enough to outlast any processing hiccup but the count low.
const RECENT_FRAME_RETENTION_MS = 4000;
const RECENT_FRAME_LIMIT = 4;
// The click that stops/pauses a session via the tray reaches the OS hook at
// almost the same instant the tray handler fires. We discard at most that
// one click — and only when it matches the recorded gesture in *both* time
// and position, so a fast workflow click that merely happens to land near
// the stop is never mistaken for the stop itself.
const SESSION_STOP_CLICK_WINDOW_MS = 700;
const SESSION_STOP_CLICK_RADIUS_PX = 8;
// Per-click diagnostics, enabled with STEPFORGE_CAPTURE_LOG=1. Cheap enough
// to leave in: one line per click/frame decision, nothing per frame-loop tick.
const CAPTURE_LOG = Boolean(process.env.STEPFORGE_CAPTURE_LOG);
function clog(...args) {
if (CAPTURE_LOG) console.log('[capture]', ...args);
}
function hasBinary(name) {
try {
@@ -198,23 +212,25 @@ class CaptureService {
{ label: 'Capture now', click: () => this.sessionCapture('manual').then(rebuild).catch(() => {}) },
{
label: this.session && this.session.paused ? 'Resume capturing' : 'Pause capturing',
click: () => { this.togglePause(); rebuild(); },
click: () => { this.noteUiStopGesture(); this.togglePause(); rebuild(); },
},
{
label: 'Open StepForge (pauses capture)',
click: () => {
this.noteUiStopGesture();
this.togglePause(true);
this.showWindow();
rebuild();
},
},
{ type: 'separator' },
{ label: 'Finish session', click: () => this.finishSession() },
{ label: 'Finish session', click: () => { this.noteUiStopGesture(); this.finishSession(); } },
]));
};
rebuild();
this.rebuildTrayMenu = rebuild;
this.tray.on('click', () => {
this.noteUiStopGesture();
this.togglePause(true);
this.showWindow();
rebuild();
@@ -230,6 +246,36 @@ class CaptureService {
this.rebuildTrayMenu = null;
}
/**
* Record that the user just stopped/paused capture from StepForge's own UI
* (tray icon or its menu). The physical click that did so is also seen by
* the OS hook and would otherwise queue as a workflow step; isStopGesture
* uses this to discard exactly that one click — matched by position, not
* just time, so a real fast click elsewhere is never lost.
*/
noteUiStopGesture() {
let pos = null;
try { pos = this.screen.getCursorScreenPoint(); } catch { pos = null; }
this.uiStopGesture = { at: Date.now(), pos };
}
/** True when a queued click is the tray gesture that stopped the session. */
isStopGesture(clickPos, clickAt) {
const g = this.uiStopGesture;
if (!g) return false;
if (Math.abs((clickAt || Date.now()) - g.at) > SESSION_STOP_CLICK_WINDOW_MS) return false;
// No position to compare (e.g. cursor read failed): fall back to the
// time window alone, but only consume the gesture once.
if (!g.pos || !clickPos) {
this.uiStopGesture = null;
return true;
}
const near = Math.abs(clickPos.x - g.pos.x) <= SESSION_STOP_CLICK_RADIUS_PX
&& Math.abs(clickPos.y - g.pos.y) <= SESSION_STOP_CLICK_RADIUS_PX;
if (near) this.uiStopGesture = null; // one stop click per gesture
return near;
}
showWindow() {
const win = this.getWindow();
if (win && !win.isDestroyed()) {
@@ -319,10 +365,19 @@ class CaptureService {
/** One capture inside the active session (hotkey/click/interval/manual). */
async sessionCapture(trigger = 'hotkey', clickPos = null, clickMeta = null) {
if (!this.session || this.session.paused) return { ok: false, reason: 'no active capture session' };
// Automatic triggers stand down while the user is in StepForge, so the
// app stays clickable mid-session and never screenshots itself.
if (trigger !== 'manual' && this.userIsInApp()) {
// A click that was registered while recording carries its guide id
// (see enqueueClickCapture) and must become a step even if the session
// was paused or finished while it sat behind slower clicks in the
// queue. Dropping queued clicks at stop time is how "I clicked five
// times and only got two steps" happens on hosts with slow encodes.
const queuedClickGuide = trigger === 'click' && clickMeta && clickMeta.guideId
? clickMeta.guideId
: null;
if (!this.session || this.session.paused) {
if (!queuedClickGuide) return { ok: false, reason: 'no active capture session' };
} else if (trigger !== 'manual' && this.userIsInApp()) {
// Automatic triggers stand down while the user is in StepForge, so the
// app stays clickable mid-session and never screenshots itself.
return { ok: false, reason: 'skipped — StepForge is focused' };
}
@@ -338,14 +393,29 @@ class CaptureService {
const frame = clickMeta && clickMeta.framePromise
? await clickMeta.framePromise
: await this.frameForClick(clickPos, clickAt);
if (!this.session || this.session.paused) return { ok: false, reason: 'no active capture session' };
const sessionLive = this.session && !this.session.paused;
const guideId = sessionLive ? this.session.guideId : queuedClickGuide;
if (!guideId) return { ok: false, reason: 'no active capture session' };
// The tray gesture that stopped the session is itself a hook click in
// the queue — storing it would append a junk step of the menu. Discard
// only that one click, matched by position so a fast workflow click is
// never collateral damage.
if (!sessionLive && this.isStopGesture(clickPos, clickAt)) {
clog('click@', clickAt, 'discarded — it triggered the session stop');
return { ok: false, reason: 'click stopped the session' };
}
if (frame) {
const result = this.storeFrameAsStep(this.session.guideId, frame.mode, frame, clickPos);
if (result.ok) this.noteStepAdded(result.step, trigger);
clog('click@', clickAt, 'frame', frame.source || 'loop',
'started', frame.startedAt - clickAt, 'ms, captured', frame.capturedAt - clickAt, 'ms rel. click');
const result = this.storeFrameAsStep(guideId, frame.mode, frame, clickPos);
if (result.ok) this.noteStepAdded(result.step, trigger, guideId);
return result;
}
// No usable frame (loop not running or grab failing): fall through
// to a one-off fresh shot.
// No usable frame: fall through to a one-off fresh shot — but only
// while still recording. After a stop, a fresh shot would show
// whatever replaced the user's workflow on screen.
clog('click@', clickAt, 'no frame qualified — falling back to a fresh (post-click) shot');
if (!sessionLive) return { ok: false, reason: 'session ended before the fallback shot' };
}
if (this.shooting) return { ok: false, reason: 'capture already in progress' };
@@ -368,9 +438,14 @@ class CaptureService {
}
}
noteStepAdded(step, trigger) {
this.session.count += 1;
this.notify('capture:added', { guideId: this.session.guideId, step, trigger });
noteStepAdded(step, trigger, guideId = null) {
// Steps from queued clicks can land after the session object is gone.
if (this.session) this.session.count += 1;
this.notify('capture:added', {
guideId: guideId || (this.session && this.session.guideId),
step,
trigger,
});
this.notify('capture:state', this.state());
if (this.rebuildTrayMenu) this.rebuildTrayMenu(); // refresh step counter
}
@@ -573,13 +648,20 @@ class CaptureService {
});
if (!ok || !this.session || this.session.paused) {
backend.stop();
if (this.session && !this.session.paused) this.startFrameLoop();
if (this.session && !this.session.paused) {
console.error('[stepforge] stream capture backend failed to start — using in-process frame loop');
this.startFrameLoop();
}
return;
}
this.streamBackend = backend;
clog('stream capture backend active');
this.notify('capture:state', this.state());
} catch {
if (this.session && !this.session.paused) this.startFrameLoop();
} catch (err) {
if (this.session && !this.session.paused) {
console.error(`[stepforge] stream capture backend error (${err && err.message}) — using in-process frame loop`);
this.startFrameLoop();
}
} finally {
this.streamBackendStarting = false;
}
@@ -982,7 +1064,10 @@ public static class SFMouseHook {
// click however fast it follows the previous one. Only an *identical*
// event a few ms later — duplicate delivery of one physical press — is
// suppressed.
if (this.isDuplicateClickEvent(clickAt, osPoint, button)) return;
if (this.isDuplicateClickEvent(clickAt, osPoint, button)) {
clog('click@', clickAt, button, 'suppressed as duplicate delivery');
return;
}
// Prefer the position the watcher sampled with the button-down event
// (physical px -> DIP); otherwise read the cursor synchronously, right
// now, so the marker lands where the user clicked even if the shot
@@ -991,6 +1076,7 @@ public static class SFMouseHook {
// window focus — WSLg reports focus unreliably.)
let clickPos = osPoint ? this.osPointToDip(osPoint) : null;
if (!clickPos) clickPos = this.screen.getCursorScreenPoint();
clog('click@', clickAt, button, 'os', osPoint, '-> dip', clickPos);
this.enqueueClickCapture(clickPos, clickAt, button || 'mouse');
}
@@ -1047,6 +1133,9 @@ public static class SFMouseHook {
enqueueClickCapture(clickPos, clickAt = Date.now(), button = 'mouse') {
const clickMeta = { at: Number.isFinite(clickAt) ? clickAt : Date.now(), button: button || 'mouse' };
if (this.session && !this.session.paused && !this.userIsInApp()) {
// The guide id pins the click to its recording so it can still be
// stored if the session stops while this click waits in the queue.
clickMeta.guideId = this.session.guideId;
clickMeta.framePromise = this.frameForClick(clickPos, clickMeta.at)
.catch(() => null);
}
+39 -4
View File
@@ -161,6 +161,34 @@ function createWindow() {
console.log(`CLICK-SELFTEST marker ${i}: off by ${(offBy * 100).toFixed(2)}% of screen`);
});
capture.finishSession();
// Second scenario, reproducing the "I clicked many times but only
// got two screenshots" report: a fast burst of clicks immediately
// followed by finishing the session, so most clicks are still
// queued (frames still encoding) when the stop lands.
const burstGuide = store.createGuide({ title: 'burst selftest' });
capture.startSession(burstGuide.guideId, { intervalSec: 0 });
capture.stopClickWatcher();
capture.togglePause(false);
mainWindow.hide();
await capture.startClickFrameBackend();
await new Promise((res) => setTimeout(res, 1500));
const burstCount = 8;
for (let i = 0; i < burstCount; i++) {
const p = {
x: Math.round(bounds.x + bounds.width * (0.15 + 0.08 * i)),
y: Math.round(bounds.y + bounds.height * 0.5),
};
capture.onOsClick(Date.now(), toPhysical(p), 'button-1');
await new Promise((res) => setTimeout(res, 30)); // very fast clicking
}
// Finish right away — clicks are still mid-encode in the queue.
capture.finishSession();
await capture.clickQueue;
await new Promise((res) => setTimeout(res, 1000));
const burstSteps = store.getGuide(burstGuide.guideId).stepsOrder.length;
console.log('CLICK-SELFTEST burst:', burstSteps, 'of', burstCount,
burstSteps === burstCount ? 'OK — no clicks dropped on finish' : 'FAIL — clicks lost');
} catch (err) {
console.log('CLICK-SELFTEST ERROR', err.message);
} finally {
@@ -533,13 +561,20 @@ function setupIpc() {
const gotLock = app.requestSingleInstanceLock();
if (!gotLock) {
// Exiting silently here looks like a broken install ("npm start does
// nothing") — say why, and let the running instance surface itself.
console.error('[stepforge] already running — surfacing the existing window (check the tray).');
app.quit();
} else {
app.on('second-instance', () => {
if (mainWindow) {
if (mainWindow.isMinimized()) mainWindow.restore();
mainWindow.focus();
}
if (!mainWindow) return;
// The window may be tucked away by a recording session; opening the
// app again is an explicit request to see it, so pause and show, the
// same as the tray's "Open StepForge".
if (capture) capture.togglePause(true);
if (mainWindow.isMinimized()) mainWindow.restore();
mainWindow.show();
mainWindow.focus();
});
app.whenReady().then(() => {
+9
View File
@@ -150,6 +150,15 @@
strict: cmd.strict !== false,
});
if (!frame) return reply({ ok: false, reason: 'no frame at or before the click' });
// Stage one: confirm the selection immediately. The encode below can
// take seconds on software-rendered hosts; without this ack the main
// process couldn't tell a slow encode from a dead worker.
send({
type: 'frame-selected',
requestId: cmd.requestId,
startedAt: frame.startedAt,
capturedAt: frame.capturedAt,
});
try {
const canvas = new OffscreenCanvas(frame.width, frame.height);
canvas.getContext('2d').drawImage(frame.bitmap, 0, 0);
+99 -32
View File
@@ -1,7 +1,7 @@
'use strict';
const path = require('node:path');
const { displayForDipPoint } = require('./coords');
const { displayForDipPoint, pointInBounds } = require('./coords');
/**
* Off-main-process click-frame backend.
@@ -31,14 +31,17 @@ const { displayForDipPoint } = require('./coords');
*/
const DEFAULT_SAMPLE_MS = 100;
// Generous on purpose: the worker selects the frame the moment the request
// arrives (that pins the click↔frame pairing), but PNG-encoding a 4K-class
// frame can take seconds on software-rendered hosts (WSLg, VMs). A slow
// reply is still the *correct* frame; only a worker that never answers
// should count as unhealthy.
const DEFAULT_FRAME_TIMEOUT_MS = 10_000;
// The reply protocol is two-stage so a *slow* worker is never mistaken for a
// *dead* one: the worker acknowledges frame selection within milliseconds
// (that pins the click↔frame pairing and proves liveness), then ships the
// PNG whenever the encode finishes — which can take seconds per frame on
// software-rendered hosts (WSLg, VMs). Only a missing ack marks the worker
// unhealthy; a slow payload merely arrives late but is still the exact
// frame chosen at click time.
const DEFAULT_ACK_TIMEOUT_MS = 2000;
const DEFAULT_ENCODE_TIMEOUT_MS = 30_000;
const DEFAULT_START_TIMEOUT_MS = 8000;
// Consecutive frame-request timeouts before the backend declares itself
// Consecutive unanswered requests before the backend declares itself
// unhealthy and the capture service degrades to the in-process loop.
const MAX_CONSECUTIVE_FAILURES = 2;
@@ -50,10 +53,17 @@ class StreamCaptureBackend {
* production, a fake in tests).
* @param {(reason: string) => void} [opts.onUnhealthy]
*/
constructor({ createHost, onUnhealthy = null, frameTimeoutMs = DEFAULT_FRAME_TIMEOUT_MS, startTimeoutMs = DEFAULT_START_TIMEOUT_MS } = {}) {
constructor({
createHost,
onUnhealthy = null,
ackTimeoutMs = DEFAULT_ACK_TIMEOUT_MS,
encodeTimeoutMs = DEFAULT_ENCODE_TIMEOUT_MS,
startTimeoutMs = DEFAULT_START_TIMEOUT_MS,
} = {}) {
this.createHost = createHost;
this.onUnhealthy = onUnhealthy;
this.frameTimeoutMs = frameTimeoutMs;
this.ackTimeoutMs = ackTimeoutMs;
this.encodeTimeoutMs = encodeTimeoutMs;
this.startTimeoutMs = startTimeoutMs;
this.host = null;
this.active = false;
@@ -62,6 +72,7 @@ class StreamCaptureBackend {
this.nextRequestId = 1;
this.consecutiveFailures = 0;
this.startWaiters = [];
this.draining = false;
}
isActive() {
@@ -146,18 +157,26 @@ class StreamCaptureBackend {
for (const check of [...this.startWaiters]) check();
return;
}
if (msg.type === 'frame-selected') {
// Stage one: the worker picked a frame for this click. The pairing is
// now pinned and the worker is provably alive — swap the short ack
// deadline for the long encode deadline and wait for the pixels.
const pending = this.requests.get(msg.requestId);
if (!pending) return;
this.consecutiveFailures = 0;
clearTimeout(pending.timer);
pending.timer = setTimeout(() => {
this.settleRequest(msg.requestId, null);
this.noteFailure();
}, this.encodeTimeoutMs);
return;
}
if (msg.type === 'frame-response') {
const pending = this.requests.get(msg.requestId);
if (!pending) return; // late reply after timeout — already handled
this.requests.delete(msg.requestId);
clearTimeout(pending.timer);
// Any answer — even "no qualifying frame" — proves the worker is alive.
this.consecutiveFailures = 0;
if (!msg.ok || !msg.png) {
pending.resolve(null);
return;
}
pending.resolve({
const value = (!msg.ok || !msg.png) ? null : {
mode: 'fullscreen',
png: Buffer.from(msg.png),
size: { width: msg.width, height: msg.height },
@@ -165,10 +184,27 @@ class StreamCaptureBackend {
startedAt: msg.startedAt,
capturedAt: msg.capturedAt,
source: 'stream',
});
};
this.settleRequest(msg.requestId, value);
}
}
/**
* Resolve one pending request and clean it up. When the backend is
* draining (stop() was called while requests were still in flight), the
* last settled request triggers the deferred worker teardown — this is
* what lets clicks queued at finish time still receive their frames
* instead of being cancelled to null.
*/
settleRequest(requestId, value) {
const pending = this.requests.get(requestId);
if (!pending) return;
this.requests.delete(requestId);
clearTimeout(pending.timer);
pending.resolve(value);
if (this.draining && this.requests.size === 0) this.finalizeTeardown();
}
/**
* Frame for one click, selected in the worker under the given strictness.
* Resolves null when no frame qualifies (caller falls back) — and also on
@@ -179,14 +215,19 @@ class StreamCaptureBackend {
const displays = [...this.streams.values()].filter((s) => s.ready).map((s) => s.display);
const display = clickPos ? displayForDipPoint(clickPos, displays) : (displays[0] || null);
if (!display) return Promise.resolve(null);
// Never serve a click from another monitor's stream: if the clicked
// display has no ready stream, a "nearest display" frame would show the
// wrong screen entirely and the marker fractions would be meaningless.
// Resolve null instead so the caller's fallback captures the right one.
if (clickPos && !pointInBounds(clickPos, display.bounds)) return Promise.resolve(null);
const requestId = this.nextRequestId++;
return new Promise((resolve) => {
const timer = setTimeout(() => {
this.requests.delete(requestId);
resolve(null);
const pending = { resolve, display, timer: null };
pending.timer = setTimeout(() => {
this.settleRequest(requestId, null);
this.noteFailure();
}, this.frameTimeoutMs);
this.requests.set(requestId, { resolve, timer, display });
}, this.ackTimeoutMs);
this.requests.set(requestId, pending);
this.hostSend({
type: 'frame-request',
requestId,
@@ -201,20 +242,45 @@ class StreamCaptureBackend {
this.consecutiveFailures += 1;
if (this.consecutiveFailures < MAX_CONSECUTIVE_FAILURES) return;
const notify = this.onUnhealthy;
this.stop();
this.stop({ immediate: true });
if (notify) notify('frame requests timing out');
}
stop() {
/**
* Stop the backend. By default this *drains*: it stops accepting new
* requests but keeps the worker alive so frames already selected for
* queued clicks finish encoding and resolve — without this, finishing a
* recording right after a fast click burst cancels every still-encoding
* frame to null and those clicks are lost ("only two screenshots saved").
* Pass { immediate: true } to abandon in-flight requests (used when the
* worker is already unhealthy).
*/
stop({ immediate = false } = {}) {
this.active = false;
for (const [, pending] of this.requests) {
clearTimeout(pending.timer);
pending.resolve(null);
}
this.requests.clear();
this.streams.clear();
for (const check of [...this.startWaiters]) check();
this.startWaiters = [];
if (immediate) {
for (const [, pending] of this.requests) {
clearTimeout(pending.timer);
pending.resolve(null);
}
this.requests.clear();
this.finalizeTeardown();
return;
}
if (this.requests.size === 0) {
this.finalizeTeardown();
return;
}
// Let pending requests resolve naturally (their own encode timers still
// bound the wait); finalizeTeardown fires from settleRequest when the
// last one completes.
this.draining = true;
}
finalizeTeardown() {
this.draining = false;
this.streams.clear();
if (this.host) {
try { this.host.destroy(); } catch { /* already gone */ }
this.host = null;
@@ -289,6 +355,7 @@ module.exports = {
createElectronHost,
pairDisplaysToSources,
DEFAULT_SAMPLE_MS,
DEFAULT_FRAME_TIMEOUT_MS,
DEFAULT_ACK_TIMEOUT_MS,
DEFAULT_ENCODE_TIMEOUT_MS,
MAX_CONSECUTIVE_FAILURES,
};