Merge pull request 'Fix/mouse click screenshot align' (#2) from fix/mouse_click_screenshot_align into main
Template tests / tests (push) Waiting to run

Reviewed-on: #2
This commit was merged in pull request #2.
This commit is contained in:
2026-06-12 14:41:42 +00:00
16 changed files with 3186 additions and 187 deletions
+17
View File
@@ -0,0 +1,17 @@
'use strict';
const { contextBridge, ipcRenderer } = require('electron');
/**
* Bridge for the hidden capture-worker window. The worker only ever talks to
* the StreamCaptureBackend in the main process: commands in (start streams,
* frame requests), events out (stream health, PNG-encoded frames).
*/
contextBridge.exposeInMainWorld('captureWorkerBridge', {
onCommand(fn) {
ipcRenderer.on('capture-worker:command', (_event, msg) => fn(msg));
},
send(msg) {
ipcRenderer.send('capture-worker:event', msg);
},
});
+602 -130
View File
@@ -6,51 +6,91 @@ const { desktopCapturer, screen, BrowserWindow, nativeImage, Tray, Menu, Notific
const { expandPlaceholders } = require('../core/placeholders');
const raster = require('../core/raster');
const { encodePng } = require('../core/png');
const {
selectFrameForClick,
frameUsableForClick,
pointInBounds,
DEFAULT_MAX_AGE_MS,
DEFAULT_START_SLACK_MS,
} = require('./click-frames');
const { physicalToDip } = require('./coords');
/**
* Capture service: full-screen, active-window, and region capture via
* Electron's desktopCapturer, plus a click-marker annotation at the cursor
* position and a capture session (start/pause/resume/finish).
* Capture service: full-screen, active-window, and region capture, plus a
* click-marker annotation at the click position and a capture session
* (start/pause/resume/finish).
*
* A session captures continuously, with three triggers layered by what the
* platform supports:
* - click-capture via an OS adapter (xinput on X11, PowerShell on Windows),
* - click-capture via an OS adapter (xinput on X11, a low-level mouse hook
* on Windows),
* - a global hotkey (unreliable on some Wayland compositors),
* - interval auto-capture as the always-works fallback.
*
* Click captures are served from one of two frame recorders:
* - the stream backend (app/stream-backend.js): a hidden worker window
* samples a desktop media stream per display into a timestamped ring
* buffer, entirely off the main process. This is the preferred path —
* the main-process event loop stays free, so OS click events arrive on
* time, and the tight sampling cadence keeps a genuinely fresh pre-click
* frame available for every click;
* - the legacy in-process frame loop below, kept as the fallback when
* streams can't start (portal-less Wayland, exotic drivers).
*
* Either way the pairing rule is the same (click-frames.js): in strict mode
* a click only ever gets a frame captured at or before the click — never one
* whose grab started after it.
*
* Note: under Wayland/WSLg, screen capture may require portal support; all
* failures surface as { ok: false, reason } instead of crashing.
*/
// Dedupe duplicate watcher events for one physical click while still
// allowing intentionally fast clicking.
const CLICK_DEBOUNCE_MS = 40;
// Idle gap between frame-loop grabs. Must stay well above zero: grabbing
// back-to-back starves the main-process event loop, which delays delivery
// of click events from the OS watcher by whole seconds. The frame history
// plus hook-side click timestamps tolerate the coarser cadence.
// Leading-edge click debounce: the first click of a button is captured, and
// further clicks of that button within this window of the last *accepted*
// click are ignored. This collapses accidental fast / double clicks into one
// step, while any two deliberate clicks spaced more than the window apart
// each register. Tunable via capture.clickDebounceMs; this is only the
// default when the setting is absent.
const DEFAULT_CLICK_DEBOUNCE_MS = 200;
// How long a Linux raw button event waits for its regular twin (the
// representation that carries root coordinates) before firing without them.
const LINUX_CLICK_TWIN_MS = 25;
// Longest the window stays visible warming up the recorder at recording
// start. A slow capture-stream start (Windows can take several seconds) must
// not keep the window up and recording un-armed indefinitely.
const WARMUP_MAX_MS = 1500;
// Idle gap between legacy frame-loop grabs. Must stay well above zero:
// grabbing back-to-back starves the main-process event loop, which delays
// delivery of click events from the OS watcher by whole seconds. (The
// stream backend exists precisely because of this constraint.)
const FRAME_LOOP_IDLE_MS = 200;
// A buffered frame older than this is too stale to pass off as "the screen
// at the instant of the click".
const CLICK_FRAME_MAX_AGE_MS = 600;
// at the instant of the click". Shared with click-frames.js.
const CLICK_FRAME_MAX_AGE_MS = DEFAULT_MAX_AGE_MS;
// How long a click waits for the in-flight grab before falling back to a
// one-off fresh shot.
const CLICK_FRAME_WAIT_MS = 2000;
// A loop grab that started at most this long after the click still shows
// the screen the user clicked on (UI reactions render slower than this).
const CLICK_FRAME_START_SLACK_MS = 300;
// Balanced (non-strict) mode only: a loop grab that started at most this
// long after the click is still accepted. Strict mode never does this.
const CLICK_FRAME_START_SLACK_MS = DEFAULT_START_SLACK_MS;
const CLICK_CAPTURE_HIDE_DELAY_MS = 25;
// Frames now hold raw images (~20MB each at 2880x1800), so keep the history
// Frames hold raw images (~20MB each at 2880x1800), so keep the history
// window wide enough to outlast any processing hiccup but the count low.
const RECENT_FRAME_RETENTION_MS = 4000;
const RECENT_FRAME_LIMIT = 4;
// The click that stops/pauses a session via the tray reaches the OS hook at
// almost the same instant the tray handler fires. We discard at most that
// one click — and only when it matches the recorded gesture in *both* time
// and position, so a fast workflow click that merely happens to land near
// the stop is never mistaken for the stop itself.
const SESSION_STOP_CLICK_WINDOW_MS = 700;
const SESSION_STOP_CLICK_RADIUS_PX = 8;
function pointInBounds(point, bounds) {
if (!point || !bounds) return false;
return point.x >= bounds.x
&& point.x <= bounds.x + bounds.width
&& point.y >= bounds.y
&& point.y <= bounds.y + bounds.height;
// Per-click diagnostics, enabled with STEPFORGE_CAPTURE_LOG=1. Cheap enough
// to leave in: one line per click/frame decision, nothing per frame-loop tick.
const CAPTURE_LOG = Boolean(process.env.STEPFORGE_CAPTURE_LOG);
function clog(...args) {
if (CAPTURE_LOG) console.log('[capture]', ...args);
}
function hasBinary(name) {
@@ -63,11 +103,14 @@ function hasBinary(name) {
}
class CaptureService {
constructor({ store, settings, getWindow, notify }) {
constructor({ store, settings, getWindow, notify, screenApi = screen }) {
this.store = store;
this.settings = settings;
this.getWindow = getWindow;
this.notify = notify;
// Injectable for tests; the click/coordinate paths must never reach for
// the global `screen` directly so coordinate handling stays testable.
this.screen = screenApi;
this.session = null; // { guideId, paused, count, intervalSec }
this.intervalTimer = null;
this.clickWatcher = null;
@@ -76,14 +119,21 @@ class CaptureService {
this.frameWaiters = [];
this.latestFrame = null;
this.clickWatcherBuf = '';
this.clickWatcherPendingPress = false;
this.clickWatcherErrTail = '';
this.linuxEvent = null; // event block currently being parsed
this.pendingRawClick = null; // raw press waiting for its coordinate twin
this.clickQueue = Promise.resolve();
this.frameLoopInFlight = false;
this.frameLoopGrabStartedAt = null;
this.recentFrames = [];
this.shooting = false;
this.lastClickCaptureByButton = new Map();
this.lastAcceptedClickByButton = new Map();
this.streamBackend = null;
this.streamBackendStarting = false;
this.captureGen = 0; // bumped on stop to invalidate in-flight backend starts
// True only while a resume is warming up (window still visible, buffer
// not yet primed). Clicks are ignored until it clears — see armRecording.
this.warmingUp = false;
}
state() {
@@ -96,10 +146,24 @@ class CaptureService {
intervalSec: this.session.intervalSec || 0,
clickCapture: Boolean(this.clickWatcher),
clickCaptureAvailable: this.clickCaptureAvailable(),
clickFrameSource: this.streamBackend ? 'stream' : (this.frameLoopRunning ? 'loop' : 'idle'),
strictClickFrames: this.strictClickFrames(),
}
: { active: false, clickCaptureAvailable: this.clickCaptureAvailable() };
}
/**
* Strict is the default: a stored step must never show the screen *after*
* its click (a frame whose grab started post-click can already contain the
* click's effects). The setting exists as an explicit escape hatch for
* machines where capture is too slow to keep pre-click frames buffered —
* there, the legacy slack heuristics trade accuracy for fewer fresh-shot
* fallbacks.
*/
strictClickFrames() {
return this.settings.get('capture.strictClickFrames') !== false;
}
clickCaptureAvailable() {
if (this._clickAvail === undefined) {
this._clickAvail = process.platform === 'win32' || (process.platform === 'linux' && hasBinary('xinput'));
@@ -157,23 +221,25 @@ class CaptureService {
{ label: 'Capture now', click: () => this.sessionCapture('manual').then(rebuild).catch(() => {}) },
{
label: this.session && this.session.paused ? 'Resume capturing' : 'Pause capturing',
click: () => { this.togglePause(); rebuild(); },
click: () => { this.noteUiStopGesture(); this.togglePause(); rebuild(); },
},
{
label: 'Open StepForge (pauses capture)',
click: () => {
this.noteUiStopGesture();
this.togglePause(true);
this.showWindow();
rebuild();
},
},
{ type: 'separator' },
{ label: 'Finish session', click: () => this.finishSession() },
{ label: 'Finish session', click: () => { this.noteUiStopGesture(); this.finishSession(); } },
]));
};
rebuild();
this.rebuildTrayMenu = rebuild;
this.tray.on('click', () => {
this.noteUiStopGesture();
this.togglePause(true);
this.showWindow();
rebuild();
@@ -189,6 +255,36 @@ class CaptureService {
this.rebuildTrayMenu = null;
}
/**
* Record that the user just stopped/paused capture from StepForge's own UI
* (tray icon or its menu). The physical click that did so is also seen by
* the OS hook and would otherwise queue as a workflow step; isStopGesture
* uses this to discard exactly that one click — matched by position, not
* just time, so a real fast click elsewhere is never lost.
*/
noteUiStopGesture() {
let pos = null;
try { pos = this.screen.getCursorScreenPoint(); } catch { pos = null; }
this.uiStopGesture = { at: Date.now(), pos };
}
/** True when a queued click is the tray gesture that stopped the session. */
isStopGesture(clickPos, clickAt) {
const g = this.uiStopGesture;
if (!g) return false;
if (Math.abs((clickAt || Date.now()) - g.at) > SESSION_STOP_CLICK_WINDOW_MS) return false;
// No position to compare (e.g. cursor read failed): fall back to the
// time window alone, but only consume the gesture once.
if (!g.pos || !clickPos) {
this.uiStopGesture = null;
return true;
}
const near = Math.abs(clickPos.x - g.pos.x) <= SESSION_STOP_CLICK_RADIUS_PX
&& Math.abs(clickPos.y - g.pos.y) <= SESSION_STOP_CLICK_RADIUS_PX;
if (near) this.uiStopGesture = null; // one stop click per gesture
return near;
}
showWindow() {
const win = this.getWindow();
if (win && !win.isDestroyed()) {
@@ -223,34 +319,88 @@ class CaptureService {
const wasPaused = this.session.paused;
this.session.paused = typeof force === 'boolean' ? force : !this.session.paused;
// Starting/resuming tucks the window away again for clean shots (after
// a brief delay so the user sees it happen) and starts the frame loop
// that serves click captures. Pausing stops the loop and discards the
// buffered frame, so a resume can never serve a pre-pause screen.
// a brief delay so the user sees it happen) and starts the frame
// recorder that serves click captures. Pausing stops it and discards
// buffered frames, so a resume can never serve a pre-pause screen.
if (wasPaused && !this.session.paused) {
const win = this.getWindow();
const arm = () => {
if (!this.session || this.session.paused) return;
if (this.hiddenForSession && win && !win.isDestroyed() && win.isVisible()) win.hide();
if (this.settings.get('capture.captureOutsideClicks') !== false && this.clickCaptureAvailable()) {
this.startFrameLoop();
}
};
if (this.hiddenForSession && win && !win.isDestroyed()) setTimeout(arm, 400);
else arm();
this.armRecording();
} else if (!wasPaused && this.session.paused) {
this.warmingUp = false; // cancel any in-flight warmup
this.stopFrameLoop();
this.stopClickFrameBackend();
}
if (this.rebuildTrayMenu) this.rebuildTrayMenu();
this.notify('capture:state', this.state());
}
/**
* Bring a session from paused to recording. The order matters for the
* first click: the frame recorder is warmed up *while the window is still
* visible*, then the window is hidden. Warming after the hide (the old
* order) left a ~1s gap where the worker had no buffered frame yet, so the
* first click fell back to a post-click fresh shot — "the first screenshot
* is late". By the time the window tucks away here, frames are already
* being buffered, so the first click is served a pre-click frame like
* every other.
*/
armRecording() {
const win = this.getWindow();
const wantHide = Boolean(this.hiddenForSession && win && !win.isDestroyed());
const recorderWanted = this.settings.get('capture.captureOutsideClicks') !== false
&& this.clickCaptureAvailable();
// Recording is not "live" until the window is hidden and the buffer is
// primed. While warming up, the window is still visible and over the
// user's work, so clicks in this period are ignored (onOsClick checks
// warmingUp) instead of being skipped erratically or shot post-click —
// the bug that made a restarted recording "stop after one click".
this.warmingUp = Boolean(wantHide || recorderWanted);
const settleMs = Number(this.settings.get('capture.postHideSettleMs'));
const run = async () => {
if (!this.session || this.session.paused) { this.warmingUp = false; return; }
const startedAt = Date.now();
if (recorderWanted) {
// Warm the recorder, but never let a slow backend start (it waits up
// to several seconds for the capture stream) keep the window visible
// and recording un-armed. Cap the wait; if it isn't ready by then,
// hide anyway and let the first click or two take the fresh-shot
// fallback while the stream finishes coming up in the background.
const warm = this.startClickFrameBackend().catch(() => {});
let capTimer = null;
const cap = new Promise((r) => { capTimer = setTimeout(r, WARMUP_MAX_MS); });
await Promise.race([warm, cap]);
if (capTimer) clearTimeout(capTimer);
if (!this.session || this.session.paused) { this.warmingUp = false; return; }
}
// Keep the window visible briefly so the user sees the transition even
// when warmup was instant; warmup time counts toward this.
const minVisibleMs = wantHide ? 400 : 0;
const elapsed = Date.now() - startedAt;
if (elapsed < minVisibleMs) {
await new Promise((r) => setTimeout(r, minVisibleMs - elapsed));
if (!this.session || this.session.paused) { this.warmingUp = false; return; }
}
if (wantHide && win && !win.isDestroyed() && win.isVisible()) {
win.hide();
// Let a couple of frames of the now-unobscured screen land before
// the user's first click, so that frame shows their work, not the
// app window that was just dismissed.
await new Promise((r) => setTimeout(r, Number.isFinite(settleMs) ? settleMs : 150));
}
// Window hidden and buffer primed — clicks now count.
this.warmingUp = false;
};
run().catch(() => { this.warmingUp = false; });
}
finishSession() {
if (this.intervalTimer) {
clearInterval(this.intervalTimer);
this.intervalTimer = null;
}
this.warmingUp = false;
this.stopClickWatcher();
this.stopFrameLoop();
this.stopClickFrameBackend();
this.destroySessionTray();
this.session = null;
if (this.hiddenForSession) {
@@ -269,36 +419,64 @@ class CaptureService {
userIsInApp() {
const win = this.getWindow();
if (!win || win.isDestroyed() || !win.isVisible() || win.isMinimized()) return false;
const cur = screen.getCursorScreenPoint();
const cur = this.screen.getCursorScreenPoint();
const b = win.getBounds();
return cur.x >= b.x && cur.x <= b.x + b.width && cur.y >= b.y && cur.y <= b.y + b.height;
}
/** One capture inside the active session (hotkey/click/interval/manual). */
async sessionCapture(trigger = 'hotkey', clickPos = null, clickMeta = null) {
if (!this.session || this.session.paused) return { ok: false, reason: 'no active capture session' };
// A click that was registered while recording carries its guide id
// (see enqueueClickCapture) and must become a step even if the session
// was paused or finished while it sat behind slower clicks in the
// queue. Dropping queued clicks at stop time is how "I clicked five
// times and only got two steps" happens on hosts with slow encodes.
const queuedClickGuide = trigger === 'click' && clickMeta && clickMeta.guideId
? clickMeta.guideId
: null;
if (!this.session || this.session.paused) {
if (!queuedClickGuide) return { ok: false, reason: 'no active capture session' };
} else if (trigger !== 'manual' && this.userIsInApp()) {
// Automatic triggers stand down while the user is in StepForge, so the
// app stays clickable mid-session and never screenshots itself.
if (trigger !== 'manual' && this.userIsInApp()) {
return { ok: false, reason: 'skipped — StepForge is focused' };
}
// Clicks are served from the frame loop: the buffered frame was grabbed
// at (or moments before) the click instant, so the background matches
// what the user clicked on. A click that lands while a grab is in
// flight waits for that frame instead of being dropped, so fast
// Clicks are served from the frame recorder: the chosen frame was
// captured at (or moments before) the click instant, so the background
// matches what the user clicked on. A click that lands while a grab is
// in flight waits for that frame instead of being dropped, so fast
// clicking still yields one step per click.
if (trigger === 'click') {
const clickAt = clickMeta && Number.isFinite(clickMeta.at) ? clickMeta.at : Date.now();
const frame = await this.frameForClick(clickPos, clickAt);
if (!this.session || this.session.paused) return { ok: false, reason: 'no active capture session' };
// Prefer the frame the click was paired with at event time (see
// enqueueClickCapture); ask now only when no eager pairing happened.
const frame = clickMeta && clickMeta.framePromise
? await clickMeta.framePromise
: await this.frameForClick(clickPos, clickAt);
const sessionLive = this.session && !this.session.paused;
const guideId = sessionLive ? this.session.guideId : queuedClickGuide;
if (!guideId) return { ok: false, reason: 'no active capture session' };
// The tray gesture that stopped the session is itself a hook click in
// the queue — storing it would append a junk step of the menu. Discard
// only that one click, matched by position so a fast workflow click is
// never collateral damage.
if (!sessionLive && this.isStopGesture(clickPos, clickAt)) {
clog('click@', clickAt, 'discarded — it triggered the session stop');
return { ok: false, reason: 'click stopped the session' };
}
if (frame) {
const result = this.storeFrameAsStep(this.session.guideId, frame.mode, frame, clickPos);
if (result.ok) this.noteStepAdded(result.step, trigger);
clog('click@', clickAt, 'frame', frame.source || 'loop',
'started', frame.startedAt - clickAt, 'ms, captured', frame.capturedAt - clickAt, 'ms rel. click');
const result = this.storeFrameAsStep(guideId, frame.mode, frame, clickPos);
if (result.ok) this.noteStepAdded(result.step, trigger, guideId);
return result;
}
// No usable frame (loop not running or grab failing): fall through
// to a one-off fresh shot.
// No usable frame: fall through to a one-off fresh shot — but only
// while still recording. After a stop, a fresh shot would show
// whatever replaced the user's workflow on screen.
clog('click@', clickAt, 'no frame qualified — falling back to a fresh (post-click) shot');
if (!sessionLive) return { ok: false, reason: 'session ended before the fallback shot' };
}
if (this.shooting) return { ok: false, reason: 'capture already in progress' };
@@ -321,9 +499,14 @@ class CaptureService {
}
}
noteStepAdded(step, trigger) {
this.session.count += 1;
this.notify('capture:added', { guideId: this.session.guideId, step, trigger });
noteStepAdded(step, trigger, guideId = null) {
// Steps from queued clicks can land after the session object is gone.
if (this.session) this.session.count += 1;
this.notify('capture:added', {
guideId: guideId || (this.session && this.session.guideId),
step,
trigger,
});
this.notify('capture:state', this.state());
if (this.rebuildTrayMenu) this.rebuildTrayMenu(); // refresh step counter
}
@@ -335,11 +518,14 @@ class CaptureService {
// ---- click-triggered capture --------------------------------------------
/**
* Continuous screen-grab loop that runs while recording. It keeps the most
* recent frame in `latestFrame` so a click can be served from a frame
* grabbed at (or moments before) the instant of the click — a fresh grab
* started after the click would land hundreds of ms late and show the
* click's effects instead of what the user clicked on.
* Fallback frame recorder: a continuous screen-grab loop in the main
* process, used only when the stream backend can't run. It keeps the most
* recent frames buffered so a click can be served from a frame grabbed at
* (or moments before) the instant of the click — a fresh grab started
* after the click would land hundreds of ms late and show the click's
* effects instead of what the user clicked on. Its cadence is capped at
* FRAME_LOOP_IDLE_MS because tighter grabbing here starves the event loop
* and delays the very click events it serves.
*/
startFrameLoop() {
if (this.frameLoopRunning) return;
@@ -416,45 +602,77 @@ class CaptureService {
}
/**
* Freshest frame usable for a click capture: the buffered frame when it's
* recent enough, otherwise the next frame the loop delivers. Null when the
* loop isn't running or can't deliver in time.
* Frame representing the screen at the instant of one click.
*
* Order of preference:
* 1. the stream backend's ring buffer (off-main-process, tight cadence);
* 2. the legacy loop's buffered frames;
* 3. waiting for the loop grab that was already in flight when the user
* clicked.
* Selection semantics live in click-frames.js. In strict mode every path
* obeys the same rule — never a frame whose grab started after the click —
* and when nothing qualifies this returns null so the caller takes the
* *explicit* fresh-shot fallback rather than silently passing a post-click
* frame off as the click-time screen.
*/
async frameForClick(clickPos = null, clickAt = Date.now()) {
const mode = this.settings.get('capture.mode') || 'fullscreen';
const grabMode = mode === 'region' ? 'fullscreen' : mode;
const clickTime = Number.isFinite(clickAt) ? clickAt : Date.now();
// Fast clicks can move to another monitor before the buffered frame is
// consumed; only reuse frames from the clicked display.
const usable = (f, { allowInFlight = false } = {}) => {
const sameDisplay = !clickPos || pointInBounds(clickPos, f && f.display && f.display.bounds);
const startedAt = Number.isFinite(f && f.startedAt) ? f.startedAt : (f && f.capturedAt);
const completedBeforeClick = Number.isFinite(f && f.capturedAt) && f.capturedAt <= clickTime;
// A grab that began within the slack window after the click still
// shows the click-instant screen (UI reactions take longer than the
// slack to render), and it beats the alternative — a fresh shot that
// both starts later and stalls the loop for every queued click.
const startedNearClick = Number.isFinite(startedAt)
&& startedAt <= clickTime + CLICK_FRAME_START_SLACK_MS;
const timingMatches = completedBeforeClick
? clickTime - f.capturedAt <= CLICK_FRAME_MAX_AGE_MS
: allowInFlight && startedNearClick;
return Boolean(f)
&& f.mode === grabMode
&& timingMatches
&& sameDisplay;
// Click lead: prefer a frame captured a little *before* the hook
// timestamp. The hook fires on button-down, but the visible UI often
// starts reacting within a frame or two (hover→press states, the cursor
// settling) and capture-stream pixels lag the real screen slightly, so a
// frame timestamped right at the click can still show the click's onset.
// The lead is a *preference*: selection falls back to any pre-click
// frame when none is old enough, so it never forces a post-click fresh
// shot. Tunable via capture.clickLeadMs.
const leadMs = Math.max(0, Number(this.settings.get('capture.clickLeadMs')) || 0);
const strict = this.strictClickFrames();
const opts = {
clickAt: clickTime,
leadMs,
clickPos,
mode: grabMode,
strict,
maxAgeMs: CLICK_FRAME_MAX_AGE_MS,
startSlackMs: CLICK_FRAME_START_SLACK_MS,
};
const buffered = [...this.recentFrames, this.latestFrame]
.filter((f, i, arr) => f && arr.indexOf(f) === i && usable(f))
.sort((a, b) => b.capturedAt - a.capturedAt)[0];
if (this.streamBackend && this.streamBackend.isActive() && grabMode === 'fullscreen') {
const frame = await this.streamBackend.frameForClick({ clickPos, clickAt: clickTime, strict, leadMs });
if (frame) return frame;
// No qualifying frame (or the backend just went unhealthy): fall
// through to the loop buffer / fresh-shot fallbacks below.
}
const buffered = selectFrameForClick(
[...this.recentFrames, this.latestFrame].filter((f, i, arr) => f && arr.indexOf(f) === i),
opts,
);
if (buffered) return buffered;
// As long as the loop is running, the next grab is at most one idle gap
// away — wait for it rather than racing it with a one-off shot.
if (!this.frameLoopRunning) return null;
if (strict) {
// Only a grab already in flight when the user clicked can still
// qualify: its pixels predate the click even though it completes
// after. Any grab starting later is post-click by definition, so
// don't wait around for one — return immediately and let the caller
// take the fresh-shot fallback.
const inFlightStartedBeforeClick = this.frameLoopInFlight
&& Number.isFinite(this.frameLoopGrabStartedAt)
&& this.frameLoopGrabStartedAt <= clickTime;
if (!inFlightStartedBeforeClick) return null;
const next = await this.nextFrame(CLICK_FRAME_WAIT_MS);
return frameUsableForClick(next, { ...opts, allowInFlight: true }) ? next : null;
}
// Balanced (legacy) mode: wait for the next loop frame and accept it if
// its grab started within the slack window after the click.
const deadline = Date.now() + CLICK_FRAME_WAIT_MS;
while (this.frameLoopRunning && Date.now() < deadline) {
const next = await this.nextFrame(Math.max(1, deadline - Date.now()));
if (usable(next, { allowInFlight: true })) return next;
if (frameUsableForClick(next, { ...opts, allowInFlight: true })) return next;
if (next && Number.isFinite(next.startedAt)
&& next.startedAt > clickTime + CLICK_FRAME_START_SLACK_MS) {
// Grabs only get later from here; let the fresh-shot path handle it.
@@ -464,11 +682,97 @@ class CaptureService {
return null;
}
// ---- click-frame backends -------------------------------------------------
/**
* Bring up the frame recorder for a recording run. The stream backend is
* the architecture path (capture entirely off the main process); the
* in-process frame loop is the fallback when streams can't start — and the
* automatic degradation target if the worker stops answering mid-session.
*/
async startClickFrameBackend() {
const mode = this.settings.get('capture.mode') || 'fullscreen';
// The worker streams screens; window-mode grabs need the loop's
// source-filtering logic.
if (this.settings.get('capture.streamCapture') === false || mode === 'window') {
this.startFrameLoop();
return;
}
if (this.streamBackend || this.streamBackendStarting) return;
// Generation token: a stop/finish/pause bumps it. If it changes while
// this async start is in flight (e.g. the user finishes and restarts
// before a slow start resolves), the backend we built belongs to a dead
// session — discard it instead of installing it, and never leave
// streamBackendStarting stuck so the new session can start its own.
const gen = this.captureGen;
this.streamBackendStarting = true;
try {
// eslint-disable-next-line global-require
const { StreamCaptureBackend, createElectronHost } = require('./stream-backend');
const backend = new StreamCaptureBackend({
createHost: createElectronHost,
onUnhealthy: () => this.degradeToFrameLoop(),
});
const displays = this.screen.getAllDisplays();
const sources = await desktopCapturer.getSources({
types: ['screen'],
thumbnailSize: { width: 1, height: 1 }, // ids only — skip thumbnail work
});
const ok = await backend.start({
displays,
sources: sources.map((s) => ({ id: s.id, display_id: s.display_id })),
sampleMs: this.settings.get('capture.frameSampleMs') || 100,
});
const stale = gen !== this.captureGen;
if (!ok || stale || !this.session || this.session.paused) {
backend.stop();
if (!stale && this.session && !this.session.paused) {
console.error('[stepforge] stream capture backend failed to start — using in-process frame loop');
this.startFrameLoop();
}
return;
}
this.streamBackend = backend;
clog('stream capture backend active');
this.notify('capture:state', this.state());
} catch (err) {
if (gen === this.captureGen && this.session && !this.session.paused) {
console.error(`[stepforge] stream capture backend error (${err && err.message}) — using in-process frame loop`);
this.startFrameLoop();
}
} finally {
if (gen === this.captureGen) this.streamBackendStarting = false;
}
}
stopClickFrameBackend() {
// Invalidate any in-flight start (see captureGen above) and free the
// guard so the next session can start a fresh backend immediately.
this.captureGen += 1;
this.streamBackendStarting = false;
if (!this.streamBackend) return;
const backend = this.streamBackend;
this.streamBackend = null;
backend.stop();
}
/**
* The worker stopped answering frame requests. Capture must not silently
* stop mid-session: drop the backend and run the in-process loop for the
* rest of the recording.
*/
degradeToFrameLoop() {
this.streamBackend = null;
console.error('[stepforge] stream capture backend unhealthy — falling back to in-process frame loop');
if (this.session && !this.session.paused) this.startFrameLoop();
this.notify('capture:state', this.state());
}
startClickWatcher() {
this.stopClickWatcher();
try {
this.clickWatcherBuf = '';
this.clickWatcherPendingPress = false;
this.linuxEvent = null;
if (process.platform === 'linux' && hasBinary('xinput')) {
// Stream raw button events from the X server; one capture per press.
// xinput block-buffers stdout when piped, so a press event can sit
@@ -660,7 +964,8 @@ public static class SFMouseHook {
* the session to interval captures, and tell the UI.
*/
handleClickWatcherLoss(reason) {
this.clickWatcherPendingPress = false;
this.linuxEvent = null;
this.discardPendingRawClick();
const detail = [reason, this.clickWatcherErrTail].filter(Boolean).join(' — ');
console.error(`[stepforge] click watcher stopped${detail ? `: ${detail}` : ''}`);
if (!this.session) return;
@@ -677,8 +982,9 @@ public static class SFMouseHook {
this.clickWatcher = null;
}
this.clickWatcherBuf = '';
this.clickWatcherPendingPress = false;
this.lastClickCaptureByButton.clear();
this.linuxEvent = null;
this.discardPendingRawClick();
this.lastAcceptedClickByButton.clear();
}
/**
@@ -698,29 +1004,58 @@ public static class SFMouseHook {
processClickWatcherData(text, platform = process.platform) {
const lines = String(text).split(/\r?\n/);
if (platform === 'linux') {
// xinput prints each event as a multi-line block: an "EVENT type …
// (RawButtonPress)" header followed by a "detail: N" line carrying the
// button number. Fire on the detail line so scroll-wheel ticks (X11
// reports them as buttons 4-7) neither create steps nor debounce away
// the real clicks that follow them.
// xinput test-xi2 --root prints each event as a multi-line block:
//
// EVENT type 4 (ButtonPress) EVENT type 15 (RawButtonPress)
// device: 11 (10) device: 11 (11)
// detail: 1 detail: 1
// root: 644.52/343.55 valuators: …
//
// Regular (non-raw) blocks carry the event-time root coordinates —
// exactly what the click marker needs, because a cursor read at parse
// time drifts whenever delivery is delayed or the pointer keeps
// moving after the click. Raw blocks have no coordinates, but on many
// servers they are the only representation delivered for the root
// window, so both kinds must fire. One physical press can produce
// *both* representations; that duplication is resolved structurally
// in fireLinuxClick (raw press briefly waits for its regular twin and
// they merge into one click), never by a time-only debounce that
// could swallow legitimate fast clicks.
for (const line of lines) {
if (!line) continue;
if (/RawButtonPress|ButtonPress/.test(line)) {
if (this.clickWatcherPendingPress) this.onOsClick();
this.clickWatcherPendingPress = true;
const header = /EVENT type \d+ \(([A-Za-z]+)\)/.exec(line);
if (header) {
this.finishLinuxEvent();
const name = header[1];
this.linuxEvent = /ButtonPress$/.test(name)
? { name, raw: /^Raw/.test(name), button: null, at: Date.now(), fired: false }
: null;
continue;
}
if (!this.clickWatcherPendingPress) continue;
const detail = line.match(/detail:\s*(\d+)/);
const ev = this.linuxEvent;
if (!ev || ev.fired) continue;
const detail = /detail:\s*(\d+)/.exec(line);
if (detail) {
this.clickWatcherPendingPress = false;
const button = Number(detail[1]);
if (button < 4 || button > 7) this.onOsClick(Date.now(), null, `button-${button}`);
} else if (line.includes('EVENT type')) {
// Next event arrived without a detail line in between — treat the
// pending press as a plain click rather than dropping it.
this.clickWatcherPendingPress = false;
this.onOsClick();
ev.button = Number(detail[1]);
if (ev.button >= 4 && ev.button <= 7) {
// Scroll-wheel ticks (X11 buttons 4-7) are not clicks.
this.linuxEvent = null;
} else if (ev.raw) {
// Raw blocks never carry coordinates; this one is complete.
ev.fired = true;
this.linuxEvent = null;
this.fireLinuxClick(ev.at, null, ev.button, { raw: true });
}
continue;
}
const root = /root:\s*(-?[\d.]+)\/(-?[\d.]+)/.exec(line);
if (root && !ev.raw && ev.button != null) {
ev.fired = true;
this.linuxEvent = null;
this.fireLinuxClick(ev.at, {
x: Math.round(parseFloat(root[1])),
y: Math.round(parseFloat(root[2])),
}, ev.button, { raw: false });
}
}
return;
@@ -737,27 +1072,148 @@ public static class SFMouseHook {
}
}
/**
* A new event header arrived while a press block was still open: the block
* ended without the line we fire on. Old xinput builds sometimes omit
* detail lines entirely — treat such a press as a plain click rather than
* dropping it.
*/
finishLinuxEvent() {
const ev = this.linuxEvent;
this.linuxEvent = null;
if (!ev || ev.fired) return;
if (ev.button == null) {
this.onOsClick(ev.at, null, 'mouse');
} else if (!ev.raw) {
// Regular press whose root line never showed up — fire without
// coordinates; onOsClick falls back to a cursor read.
this.fireLinuxClick(ev.at, null, ev.button, { raw: false });
}
}
/**
* Funnel for parsed Linux button presses. Raw and regular blocks for the
* same physical press are merged here: a raw press (no coordinates) is
* held for LINUX_CLICK_TWIN_MS; if the regular twin (with root
* coordinates) arrives inside that window the pair fires once, with the
* raw block's earlier timestamp and the regular block's coordinates.
* Distinct presses always fire — there is no time-based dropping.
*/
fireLinuxClick(at, osPoint, button, { raw = false } = {}) {
const pending = this.pendingRawClick;
if (raw) {
// Two raw presses can't be one click — release the held one first.
this.flushPendingRawClick();
const entry = { button, at, timer: null };
entry.timer = setTimeout(() => {
if (this.pendingRawClick !== entry) return;
this.pendingRawClick = null;
this.onOsClick(entry.at, null, `button-${entry.button}`);
}, LINUX_CLICK_TWIN_MS);
if (entry.timer.unref) entry.timer.unref();
this.pendingRawClick = entry;
return;
}
if (pending && pending.button === button) {
// The regular twin of the held raw press: one physical click.
this.pendingRawClick = null;
clearTimeout(pending.timer);
this.onOsClick(Math.min(pending.at, at), osPoint, `button-${button}`);
return;
}
this.onOsClick(at, osPoint, `button-${button}`);
}
/** Fire the held raw press immediately (its twin is not coming). */
flushPendingRawClick() {
const pending = this.pendingRawClick;
if (!pending) return;
this.pendingRawClick = null;
clearTimeout(pending.timer);
this.onOsClick(pending.at, null, `button-${pending.button}`);
}
discardPendingRawClick() {
if (!this.pendingRawClick) return;
clearTimeout(this.pendingRawClick.timer);
this.pendingRawClick = null;
}
/** Debounce window in ms (capture.clickDebounceMs, default 200). */
clickDebounceMs() {
const raw = this.settings.get('capture.clickDebounceMs');
const v = Number(raw);
return raw != null && Number.isFinite(v) && v >= 0 ? v : DEFAULT_CLICK_DEBOUNCE_MS;
}
onOsClick(at = Date.now(), osPoint = null, button = 'mouse') {
if (!this.session || this.session.paused) return;
// Recording isn't live until the window is hidden and the buffer primed
// (see armRecording). Clicks during warmup land on the still-visible app
// window, not the user's work, so ignore them rather than capturing junk.
if (this.warmingUp) {
clog('click@', Number.isFinite(at) ? at : Date.now(), button, 'ignored — still warming up');
return;
}
const clickAt = Number.isFinite(at) ? at : Date.now();
const debounceKey = button || 'mouse';
const last = this.lastClickCaptureByButton.get(debounceKey) || 0;
if (clickAt >= last && clickAt - last < CLICK_DEBOUNCE_MS) return;
this.lastClickCaptureByButton.set(debounceKey, clickAt);
// Leading-edge debounce: ignore a click that lands within the debounce
// window of the last accepted click of the same button. This makes fast
// / accidental repeat clicks register once, while two deliberate clicks
// spaced more than the window apart each register (one step per click).
if (this.isDebouncedClick(clickAt, button)) {
clog('click@', clickAt, button, 'debounced (within', this.clickDebounceMs(), 'ms of last accepted)');
return;
}
// Prefer the position the watcher sampled with the button-down event
// (physical px -> DIP); otherwise read the cursor synchronously,
// right now, so the marker lands where the user clicked even if the
// shot itself takes a moment to grab. (Clicks on StepForge itself are
// (physical px -> DIP); otherwise read the cursor synchronously, right
// now, so the marker lands where the user clicked even if the shot
// itself takes a moment to grab. (Clicks on StepForge itself are
// filtered by the cursor-position check in sessionCapture, not by
// window focus — WSLg reports focus unreliably.)
let clickPos = null;
if (osPoint) {
clickPos = typeof screen.screenToDipPoint === 'function'
? screen.screenToDipPoint(osPoint)
: osPoint;
let clickPos = osPoint ? this.osPointToDip(osPoint) : null;
if (!clickPos) clickPos = this.screen.getCursorScreenPoint();
clog('click@', clickAt, button, 'os', osPoint, '-> dip', clickPos);
this.enqueueClickCapture(clickPos, clickAt, button || 'mouse');
}
if (!clickPos) clickPos = screen.getCursorScreenPoint();
this.enqueueClickCapture(clickPos, clickAt, debounceKey);
/**
* Whether this click should be dropped by the debounce. A click is dropped
* only when it follows the last *accepted* click of the same button by
* less than the debounce window — so the window is measured from accepted
* clicks, never from dropped ones, and a run of fast clicks can't push the
* next deliberate click out indefinitely. Accepting a click records it as
* the new reference point. Different buttons debounce independently.
*/
isDebouncedClick(at, button) {
const key = button || 'mouse';
const windowMs = this.clickDebounceMs();
const last = this.lastAcceptedClickByButton.get(key);
if (last != null && at >= last && at - last < windowMs) return true;
this.lastAcceptedClickByButton.set(key, at);
return false;
}
/**
* Physical (OS event) pixels -> DIP. Windows exposes the canonical
* conversion; on Linux/X11 it is reconstructed from display geometry (see
* app/coords.js). Without this, the click marker drifts on any display
* scaled away from 100% and on secondary monitors.
*/
osPointToDip(osPoint) {
if (this.screen && typeof this.screen.screenToDipPoint === 'function') {
try {
const dip = this.screen.screenToDipPoint(osPoint);
if (dip && Number.isFinite(dip.x) && Number.isFinite(dip.y)) return dip;
} catch { /* fall through to manual conversion */ }
}
try {
const displays = this.screen && typeof this.screen.getAllDisplays === 'function'
? this.screen.getAllDisplays()
: [];
const dip = physicalToDip(osPoint, displays);
if (dip) return dip;
} catch { /* no display geometry available */ }
return osPoint;
}
/**
@@ -765,9 +1221,23 @@ public static class SFMouseHook {
* still being stored queues behind it instead of being dropped by the
* "capture already in progress" guard. The marker position was already
* read at click time, so a queued step still circles the right spot.
*
* Crucially, only the *storing* is serialized. The click is paired with
* its frame right here, at event time: behind a slow store or PNG encode
* the queue can run seconds late, and a frame request issued that late
* could find the click-time frame already evicted from the ring buffer.
* Eager pairing keeps one-click-one-frame semantics intact no matter how
* fast the user clicks or how slow the encoder is.
*/
enqueueClickCapture(clickPos, clickAt = Date.now(), button = 'mouse') {
const clickMeta = { at: Number.isFinite(clickAt) ? clickAt : Date.now(), button: button || 'mouse' };
if (this.session && !this.session.paused && !this.userIsInApp()) {
// The guide id pins the click to its recording so it can still be
// stored if the session stops while this click waits in the queue.
clickMeta.guideId = this.session.guideId;
clickMeta.framePromise = this.frameForClick(clickPos, clickMeta.at)
.catch(() => null);
}
this.clickQueue = this.clickQueue
.then(() => this.sessionCapture('click', clickPos, clickMeta))
.catch(() => {});
@@ -795,8 +1265,10 @@ public static class SFMouseHook {
storeFrameAsStep(guideId, mode, frame, clickPos = null) {
if (!frame) return { ok: false, reason: 'no capture frame available' };
const annotations = [];
const cursor = clickPos || frame.cursor;
if (mode !== 'window' && this.settings.get('capture.clickMarker')) {
// The click position (DIP, read at event time) wins over the frame's
// grab-time cursor; stream-backend frames carry no cursor at all.
const cursor = clickPos || frame.cursor || null;
if (cursor && mode !== 'window' && this.settings.get('capture.clickMarker')) {
const fx = (cursor.x - frame.display.bounds.x) / frame.display.bounds.width;
const fy = (cursor.y - frame.display.bounds.y) / frame.display.bounds.height;
if (fx >= 0 && fx <= 1 && fy >= 0 && fy <= 1) {
@@ -837,8 +1309,8 @@ public static class SFMouseHook {
/** Grab the screen/window image as { image, display } or throw. */
async grab(mode, cursorPoint = null) {
const cursor = cursorPoint || screen.getCursorScreenPoint();
const display = screen.getDisplayNearestPoint(cursor);
const cursor = cursorPoint || this.screen.getCursorScreenPoint();
const display = this.screen.getDisplayNearestPoint(cursor);
const { width, height } = display.size;
const scale = display.scaleFactor || 1;
// Ask for both kinds: some compositors (WSLg/Wayland portals) expose no
+183
View File
@@ -0,0 +1,183 @@
'use strict';
/**
* Click ↔ frame correlation logic, shared by the main process and the
* capture-worker renderer (loaded there via a plain <script> tag, hence the
* UMD-style export at the bottom and the total absence of dependencies).
*
* The model: a recorder keeps a ring buffer of timestamped frames, each with
* { startedAt, capturedAt } — when the grab began and when it completed.
* A click carries its own hook-time timestamp. Pairing the two answers
* "what did the screen look like when the user clicked?".
*
* Strict mode encodes the product requirement (Folge-like recording): a step
* must show the screen *at or before* the click, never after it. A frame
* whose grab started after the click can already contain the click's effects
* (menus opened, pages navigated), so strict mode rejects it outright — the
* caller falls back to an explicit fresh shot instead of silently passing a
* post-click frame off as the click-time screen. Balanced mode keeps the old
* slack-window behavior for platforms where capture is too slow to keep a
* pre-click frame buffered.
*/
const DEFAULT_FRAME_LIMIT = 6;
const DEFAULT_RETENTION_MS = 4000;
// A frame older than this is too stale to pass off as "the screen at the
// instant of the click".
const DEFAULT_MAX_AGE_MS = 600;
// Balanced mode only: a grab that began within this window after the click
// is accepted on the assumption that UI reactions render slower than this.
const DEFAULT_START_SLACK_MS = 300;
function pointInBounds(point, bounds) {
if (!point || !bounds) return false;
return point.x >= bounds.x
&& point.x <= bounds.x + bounds.width
&& point.y >= bounds.y
&& point.y <= bounds.y + bounds.height;
}
/**
* Ring buffer of recent frames, bounded by both count and age. Frames are
* raw images (potentially tens of MB each), so eviction is eager and an
* optional onEvict hook lets callers release native resources (e.g.
* ImageBitmap.close() in the capture worker).
*/
class FrameRing {
constructor({ limit = DEFAULT_FRAME_LIMIT, retentionMs = DEFAULT_RETENTION_MS, now = Date.now, onEvict = null } = {}) {
this.limit = limit;
this.retentionMs = retentionMs;
this.now = now;
this.onEvict = onEvict;
this.items = [];
}
push(frame) {
if (!frame) return null;
this.items.push(frame);
this.prune();
return frame;
}
prune() {
const cutoff = this.now() - this.retentionMs;
while (this.items.length
&& (this.items.length > this.limit || !(this.items[0].capturedAt >= cutoff))) {
const evicted = this.items.shift();
if (this.onEvict) this.onEvict(evicted);
}
}
frames() {
return [...this.items];
}
latest() {
return this.items.length ? this.items[this.items.length - 1] : null;
}
clear() {
const dropped = this.items;
this.items = [];
if (this.onEvict) for (const f of dropped) this.onEvict(f);
}
}
/**
* Whether one frame may represent one click.
*
* Strict mode accepts only:
* - a frame completed at or before the click (and not older than maxAgeMs), or
* - when allowInFlight is set, a frame whose grab *started* at or before the
* click — its pixels predate the click's effects even though encoding
* finished after.
* A frame whose grab started after the click is never acceptable in strict
* mode, no matter how close: that is exactly the "screenshot shows the menu
* already open" failure.
*
* Balanced mode additionally accepts in-flight frames that started within
* startSlackMs after the click (the legacy heuristic).
*/
function frameUsableForClick(frame, {
clickAt,
clickPos = null,
mode = null,
strict = true,
allowInFlight = false,
maxAgeMs = DEFAULT_MAX_AGE_MS,
startSlackMs = DEFAULT_START_SLACK_MS,
} = {}) {
if (!frame) return false;
if (mode && frame.mode !== mode) return false;
// Fast clicks can move to another monitor before a buffered frame is
// consumed; only reuse frames from the clicked display.
if (clickPos && frame.display && !pointInBounds(clickPos, frame.display.bounds)) return false;
const clickTime = Number.isFinite(clickAt) ? clickAt : Date.now();
const capturedAt = frame.capturedAt;
const startedAt = Number.isFinite(frame.startedAt) ? frame.startedAt : capturedAt;
const completedBeforeClick = Number.isFinite(capturedAt) && capturedAt <= clickTime;
if (completedBeforeClick) return clickTime - capturedAt <= maxAgeMs;
if (!allowInFlight || !Number.isFinite(startedAt)) return false;
if (strict) return startedAt <= clickTime;
return startedAt <= clickTime + startSlackMs;
}
function newestUsableFrame(frames, opts) {
let best = null;
for (const frame of frames || []) {
if (!frameUsableForClick(frame, { ...opts, allowInFlight: false })) continue;
if (!best || frame.capturedAt > best.capturedAt) best = frame;
}
return best;
}
/**
* Best already-buffered frame for a click, in two tiers:
* 1. with a click lead (opts.leadMs > 0): the newest frame captured at least
* leadMs *before* the click, so the step shows the screen the user was
* about to act on — clear of the click's own onset;
* 2. failing that, the newest frame captured before the click at all.
*
* The two tiers matter for correctness, not just polish: the lead is a
* *preference*, never a hard gate. If it were a gate, a click with no frame
* old enough to satisfy the lead would fall through to the caller's fresh
* shot — which captures the screen *after* the click. The tier-2 fallback
* guarantees that as long as any pre-click frame exists, we use it rather
* than shooting post-click. Buffered frames are always completed, so
* in-flight acceptance never applies here.
*/
function selectFrameForClick(frames, opts = {}) {
const leadMs = Math.max(0, Number(opts.leadMs) || 0);
const clickAt = Number.isFinite(opts.clickAt) ? opts.clickAt : Date.now();
if (leadMs > 0) {
// Widen the staleness budget by the lead so a frame that was fresh
// enough for the real click is still fresh enough for the lead target.
const maxAgeMs = (opts.maxAgeMs == null ? DEFAULT_MAX_AGE_MS : opts.maxAgeMs) + leadMs;
const led = newestUsableFrame(frames, { ...opts, clickAt: clickAt - leadMs, maxAgeMs });
if (led) return led;
}
return newestUsableFrame(frames, { ...opts, clickAt });
}
const api = {
FrameRing,
frameUsableForClick,
selectFrameForClick,
pointInBounds,
DEFAULT_FRAME_LIMIT,
DEFAULT_RETENTION_MS,
DEFAULT_MAX_AGE_MS,
DEFAULT_START_SLACK_MS,
};
/* eslint-disable no-undef */
if (typeof module === 'object' && module.exports) {
module.exports = api;
} else if (typeof self !== 'undefined') {
self.StepForgeClickFrames = api;
} else if (typeof window !== 'undefined') {
window.StepForgeClickFrames = api;
}
+110
View File
@@ -0,0 +1,110 @@
'use strict';
const { pointInBounds } = require('./click-frames');
/**
* Coordinate-space conversion between physical (OS event) pixels and
* Electron DIP points.
*
* Why this exists: OS-level click hooks report *physical* pixels (the X11
* root window space on Linux, virtual-screen pixels on Windows), while
* everything Electron-side — display bounds, cursor reads, the click-marker
* math in storeFrameAsStep — is in DIP. Mixing the two spaces is exactly the
* bug that makes the red marker drift on scaled displays: at 150% scaling a
* physical click at (1500, 900) is the DIP point (1000, 600), and a marker
* drawn at the physical values lands well below-right of the real click.
*
* On Windows, Electron exposes screen.screenToDipPoint() and the capture
* service prefers it. On Linux/X11 there is no such API, so we reconstruct
* the mapping from display geometry: each display's DIP bounds plus its
* scaleFactor give its physical rectangle, and a physical point inside that
* rectangle maps back linearly. With mixed-DPI multi-monitor X11 setups the
* origin reconstruction is an approximation (X11 itself has a single global
* coordinate space), but it is exact for the overwhelmingly common cases:
* single display at any scale, and multi-display with a uniform scale.
*/
/** Physical-pixel rectangle a display occupies, derived from DIP bounds. */
function physicalBoundsOf(display) {
const bounds = display && display.bounds;
if (!bounds) return null;
const scale = display.scaleFactor || 1;
return {
x: Math.round(bounds.x * scale),
y: Math.round(bounds.y * scale),
width: Math.round(bounds.width * scale),
height: Math.round(bounds.height * scale),
};
}
function centerDistanceSq(point, rect) {
const cx = rect.x + rect.width / 2;
const cy = rect.y + rect.height / 2;
return (point.x - cx) ** 2 + (point.y - cy) ** 2;
}
/**
* Display whose physical rectangle contains the point, or the nearest one
* (clicks on the very edge of a screen can round to one pixel outside it).
*/
function displayForPhysicalPoint(point, displays) {
if (!point || !Array.isArray(displays) || !displays.length) return null;
let nearest = null;
let nearestDist = Infinity;
for (const display of displays) {
const phys = physicalBoundsOf(display);
if (!phys) continue;
if (pointInBounds(point, phys)) return display;
const dist = centerDistanceSq(point, phys);
if (dist < nearestDist) {
nearestDist = dist;
nearest = display;
}
}
return nearest;
}
/**
* Convert a physical-pixel point (OS click hook) to DIP. Returns null when
* no display geometry is available — the caller should then fall back to a
* live cursor read rather than guessing.
*/
function physicalToDip(point, displays) {
if (!point || !Number.isFinite(point.x) || !Number.isFinite(point.y)) return null;
const display = displayForPhysicalPoint(point, displays);
if (!display) return null;
const phys = physicalBoundsOf(display);
const scale = display.scaleFactor || 1;
return {
x: display.bounds.x + (point.x - phys.x) / scale,
y: display.bounds.y + (point.y - phys.y) / scale,
};
}
/**
* Display whose DIP bounds contain the point, or the nearest one. Used to
* route a click to the capture stream of the monitor it landed on.
*/
function displayForDipPoint(point, displays) {
if (!point || !Array.isArray(displays) || !displays.length) return null;
let nearest = null;
let nearestDist = Infinity;
for (const display of displays) {
if (!display || !display.bounds) continue;
if (pointInBounds(point, display.bounds)) return display;
const dist = centerDistanceSq(point, display.bounds);
if (dist < nearestDist) {
nearestDist = dist;
nearest = display;
}
}
return nearest;
}
module.exports = {
physicalBoundsOf,
displayForPhysicalPoint,
displayForDipPoint,
physicalToDip,
pointInBounds,
};
+190 -2
View File
@@ -98,6 +98,187 @@ function createWindow() {
}
}, 1500);
}
// Dev-only self-test: exercise the full click-capture pipeline — resume
// session, wait for the frame recorder, inject OS-level clicks the way
// the watcher would, and verify one stored step per click.
if (process.env.STEPFORGE_CLICK_SELFTEST) {
setTimeout(async () => {
try {
// The marker/drain scenarios inject clicks faster than the default
// debounce to stress the frame pipeline; turn the debounce off for
// them so every injected click is captured. A dedicated scenario
// at the end re-enables it and verifies the debounce itself.
settings.set('capture.clickDebounceMs', 0);
const guide = store.createGuide({ title: 'click selftest' });
capture.startSession(guide.guideId, { intervalSec: 0 });
// Isolate the test from the user's real mouse: the session starts
// the live OS click watcher, and a stray real click (dismissing
// the toast, focusing the terminal) would add an extra step and
// shift every marker comparison below.
capture.stopClickWatcher();
capture.togglePause(false);
mainWindow.hide();
// Arm the frame recorder directly: this host may lack the click
// watcher binary (xinput), which normally gates the recorder, but
// the recorder itself must still be testable end to end.
await capture.startClickFrameBackend();
// Let the stream backend (or the fallback loop) come up and buffer.
await new Promise((res) => setTimeout(res, 3000));
console.log('CLICK-SELFTEST source:', capture.state().clickFrameSource);
// Targets are chosen in DIP; the OS hook reports *physical* pixels,
// so convert before injecting (identity on unscaled displays).
const { bounds } = screen.getPrimaryDisplay();
const dipTargets = [
{ x: Math.round(bounds.x + bounds.width * 0.2), y: Math.round(bounds.y + bounds.height * 0.2) },
{ x: Math.round(bounds.x + bounds.width * 0.5), y: Math.round(bounds.y + bounds.height * 0.5) },
{ x: Math.round(bounds.x + bounds.width * 0.8), y: Math.round(bounds.y + bounds.height * 0.8) },
];
const toPhysical = (p) => (typeof screen.dipToScreenPoint === 'function'
? screen.dipToScreenPoint(p)
: p);
for (const point of dipTargets) {
capture.onOsClick(Date.now(), toPhysical(point), 'button-1');
await new Promise((res) => setTimeout(res, 120)); // fast clicking
}
// Wait for the queue to drain (encodes can take seconds on WSLg).
await capture.clickQueue;
await new Promise((res) => setTimeout(res, 500));
const stepIds = store.getGuide(guide.guideId).stepsOrder;
const steps = store.listSteps(guide.guideId);
const markers = stepIds.map((id) => (steps.get(id).annotations || []).length);
console.log('CLICK-SELFTEST steps:', stepIds.length, 'of', dipTargets.length,
'markers:', JSON.stringify(markers));
if (stepIds.length !== dipTargets.length) {
console.log('CLICK-SELFTEST step count mismatch — marker offsets below are unreliable');
}
// Marker accuracy: each oval's center (fractional) must match the
// injected click position relative to the display bounds.
stepIds.forEach((id, i) => {
const a = (steps.get(id).annotations || [])[0];
const expectedClick = dipTargets[i];
if (!a || !expectedClick) return;
const center = { x: a.x + a.w / 2, y: a.y + a.h / 2 };
const expected = {
x: (expectedClick.x - bounds.x) / bounds.width,
y: (expectedClick.y - bounds.y) / bounds.height,
};
const offBy = Math.hypot(center.x - expected.x, center.y - expected.y);
console.log(`CLICK-SELFTEST marker ${i}: off by ${(offBy * 100).toFixed(2)}% of screen`);
});
capture.finishSession();
// Second scenario, reproducing the "I clicked many times but only
// got two screenshots" report: a fast burst of clicks immediately
// followed by finishing the session, so most clicks are still
// queued (frames still encoding) when the stop lands.
const burstGuide = store.createGuide({ title: 'burst selftest' });
capture.startSession(burstGuide.guideId, { intervalSec: 0 });
capture.stopClickWatcher();
capture.togglePause(false);
mainWindow.hide();
await capture.startClickFrameBackend();
await new Promise((res) => setTimeout(res, 1500));
const burstCount = 8;
for (let i = 0; i < burstCount; i++) {
const p = {
x: Math.round(bounds.x + bounds.width * (0.15 + 0.08 * i)),
y: Math.round(bounds.y + bounds.height * 0.5),
};
capture.onOsClick(Date.now(), toPhysical(p), 'button-1');
await new Promise((res) => setTimeout(res, 30)); // very fast clicking
}
// Finish right away — clicks are still mid-encode in the queue.
capture.finishSession();
await capture.clickQueue;
await new Promise((res) => setTimeout(res, 1000));
const burstSteps = store.getGuide(burstGuide.guideId).stepsOrder.length;
console.log('CLICK-SELFTEST burst:', burstSteps, 'of', burstCount,
burstSteps === burstCount ? 'OK — no clicks dropped on finish' : 'FAIL — clicks lost');
// Helper: wait until armRecording has finished warming (window
// hidden, buffer primed) so an injected click counts as a real
// recording click rather than being ignored as a warmup click.
const waitArmed = async () => {
for (let i = 0; i < 80 && capture.warmingUp; i++) {
await new Promise((res) => setTimeout(res, 50));
}
};
// Third scenario: the real "Start recording" path. armRecording
// warms the recorder while the window is visible and only arms the
// session once it hides; the first click *after* arming must get a
// pre-click frame (not the post-click shot that made "the first
// screenshot late"), and a click *during* warmup must be ignored,
// not mishandled. (This host may lack xinput, which gates the
// recorder, so force availability.)
const armGuide = store.createGuide({ title: 'arm selftest' });
mainWindow.show();
await new Promise((res) => setTimeout(res, 300));
capture.startSession(armGuide.guideId, { intervalSec: 0 });
capture.stopClickWatcher();
capture.clickCaptureAvailable = () => true;
capture.hiddenForSession = true; // window was visible at session start
capture.togglePause(false); // armRecording: warm → hide → arm
// A click during warmup must be ignored (window still visible).
await new Promise((res) => setTimeout(res, 200));
const warmupClicks = store.getGuide(armGuide.guideId).stepsOrder.length;
capture.onOsClick(Date.now(), toPhysical({ x: bounds.x + 100, y: bounds.y + 100 }), 'button-1');
await waitArmed();
const armPoint = {
x: Math.round(bounds.x + bounds.width * 0.4),
y: Math.round(bounds.y + bounds.height * 0.4),
};
capture.onOsClick(Date.now(), toPhysical(armPoint), 'button-1');
await capture.clickQueue;
await new Promise((res) => setTimeout(res, 800));
const armSteps = store.getGuide(armGuide.guideId).stepsOrder.length;
console.log('CLICK-SELFTEST arm: warmup-click steps', warmupClicks,
'-> after-arm steps', armSteps,
armSteps === 1 ? 'OK — warmup click ignored, first armed click captured' : 'FAIL');
capture.finishSession();
// Fourth scenario: the debounce itself, exercised end to end through
// onOsClick. A fast burst (40ms apart) must collapse to one step,
// and deliberate clicks (300ms apart) must each register.
settings.set('capture.clickDebounceMs', 200);
const dbGuide = store.createGuide({ title: 'debounce selftest' });
mainWindow.show();
await new Promise((res) => setTimeout(res, 200));
capture.startSession(dbGuide.guideId, { intervalSec: 0 });
capture.stopClickWatcher();
capture.clickCaptureAvailable = () => true;
capture.hiddenForSession = true;
capture.togglePause(false);
await capture.startClickFrameBackend();
await waitArmed();
await new Promise((res) => setTimeout(res, 300));
const dbPoint = {
x: Math.round(bounds.x + bounds.width * 0.55),
y: Math.round(bounds.y + bounds.height * 0.55),
};
// 4 clicks 40ms apart — accidental fast clicking → expect 1 step.
for (let i = 0; i < 4; i++) {
capture.onOsClick(Date.now(), toPhysical(dbPoint), 'button-1');
await new Promise((res) => setTimeout(res, 40));
}
// 3 deliberate clicks 300ms apart → expect 3 more steps.
for (let i = 0; i < 3; i++) {
await new Promise((res) => setTimeout(res, 300));
capture.onOsClick(Date.now(), toPhysical(dbPoint), 'button-1');
}
await capture.clickQueue;
await new Promise((res) => setTimeout(res, 800));
const dbSteps = store.getGuide(dbGuide.guideId).stepsOrder.length;
console.log('CLICK-SELFTEST debounce:', dbSteps, 'of 4 expected',
dbSteps === 4 ? 'OK — burst collapsed to 1, three deliberate clicks kept' : 'FAIL');
capture.finishSession();
} catch (err) {
console.log('CLICK-SELFTEST ERROR', err.message);
} finally {
app.quit();
}
}, 1500);
}
// Dev-only self-test: exercise the exact hotkey-session capture path
// (hide window -> grab -> showInactive) several times, then exit.
if (process.env.STEPFORGE_CAPTURE_SELFTEST) {
@@ -463,13 +644,20 @@ function setupIpc() {
const gotLock = app.requestSingleInstanceLock();
if (!gotLock) {
// Exiting silently here looks like a broken install ("npm start does
// nothing") — say why, and let the running instance surface itself.
console.error('[stepforge] already running — surfacing the existing window (check the tray).');
app.quit();
} else {
app.on('second-instance', () => {
if (mainWindow) {
if (!mainWindow) return;
// The window may be tucked away by a recording session; opening the
// app again is an explicit request to see it, so pause and show, the
// same as the tray's "Open StepForge".
if (capture) capture.togglePause(true);
if (mainWindow.isMinimized()) mainWindow.restore();
mainWindow.show();
mainWindow.focus();
}
});
app.whenReady().then(() => {
+11
View File
@@ -0,0 +1,11 @@
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<title>StepForge capture worker</title>
<!-- Shared click↔frame selection logic; sets window.StepForgeClickFrames. -->
<script src="../click-frames.js" defer></script>
<script src="capture-worker.js" defer></script>
</head>
<body><!-- hidden window; frames live in JS, nothing renders here --></body>
</html>
+210
View File
@@ -0,0 +1,210 @@
'use strict';
/**
* Capture worker: runs in a hidden renderer window and owns all continuous
* screen capture during a recording session.
*
* Per display it opens a desktop media stream (the desktopCapturer source id
* comes from the main process) and samples it on a fixed cadence into a
* timestamped ring buffer of ImageBitmaps. Sampling and PNG encoding happen
* entirely in this process, so the main-process event loop — which must stay
* responsive to deliver OS click events on time — never blocks on capture
* work. ImageBitmaps are GPU-backed and cheap to create from a <video>
* element, which is what lets the cadence be much tighter than the old
* 200ms main-process desktopCapturer loop.
*
* On a frame request the worker applies the shared strict selection rule
* (newest frame captured at or before the click; never one whose grab
* started after it), encodes that single frame to PNG, and ships the bytes
* to the main process.
*/
/* global StepForgeClickFrames, captureWorkerBridge */
(() => {
const FALLBACK_SAMPLE_MS = 50;
// Tight cadence means more frames per second; keep enough of them to span
// the click-lead window plus any encode/IPC hiccup, without hoarding GPU
// memory. 16 frames at the 50ms cadence is ~800ms of history.
const FALLBACK_FRAME_LIMIT = 16;
const FALLBACK_RETENTION_MS = 2000;
const streams = new Map(); // displayId(string) -> stream state
function send(msg) {
try {
captureWorkerBridge.send(msg);
return true;
} catch (err) {
// Either the main process is gone or the payload didn't survive the
// bridge; log it — a silently dropped frame-response would otherwise
// look like a worker hang from the main process.
console.error('capture-worker send failed:', err && err.message, 'type:', msg && msg.type);
return false;
}
}
async function startStream(cmd) {
const key = String(cmd.displayId);
stopStream(key);
const display = cmd.display || {};
const scale = display.scaleFactor || 1;
const bounds = display.bounds || { width: 1280, height: 720 };
const physWidth = Math.round(bounds.width * scale);
const physHeight = Math.round(bounds.height * scale);
const state = {
displayId: cmd.displayId,
media: null,
video: null,
timer: null,
sampling: false,
ring: new StepForgeClickFrames.FrameRing({
limit: cmd.frameLimit || FALLBACK_FRAME_LIMIT,
retentionMs: cmd.retentionMs || FALLBACK_RETENTION_MS,
onEvict: (frame) => {
if (frame && frame.bitmap && frame.bitmap.close) frame.bitmap.close();
},
}),
};
streams.set(key, state);
try {
// The chromeMediaSource constraint set is Electron's documented bridge
// from a desktopCapturer source id to a live media stream.
state.media = await navigator.mediaDevices.getUserMedia({
audio: false,
video: {
mandatory: {
chromeMediaSource: 'desktop',
chromeMediaSourceId: cmd.sourceId,
minWidth: physWidth,
maxWidth: physWidth,
minHeight: physHeight,
maxHeight: physHeight,
maxFrameRate: 30,
},
},
});
const video = document.createElement('video');
video.muted = true;
video.srcObject = state.media;
state.video = video;
await video.play();
const sampleMs = cmd.sampleMs || FALLBACK_SAMPLE_MS;
state.timer = setInterval(() => sampleFrame(state), sampleMs);
// Buffer a frame immediately so a click right after "Start recording"
// already has something captured before it.
await sampleFrame(state);
send({ type: 'stream-ready', displayId: cmd.displayId });
} catch (err) {
stopStream(key);
send({ type: 'stream-error', displayId: cmd.displayId, reason: String(err && err.message || err) });
}
}
async function sampleFrame(state) {
if (state.sampling || !state.video || state.video.readyState < 2) return;
state.sampling = true;
// startedAt/capturedAt bracket the grab so strict selection can tell
// pre-click frames from post-click ones.
const startedAt = Date.now();
try {
const bitmap = await createImageBitmap(state.video);
state.ring.push({
mode: 'fullscreen',
bitmap,
width: bitmap.width,
height: bitmap.height,
startedAt,
capturedAt: Date.now(),
});
} catch {
// A failed sample only means a slightly older best frame.
} finally {
state.sampling = false;
}
}
function stopStream(key) {
const state = streams.get(key);
if (!state) return;
if (state.timer) clearInterval(state.timer);
if (state.media) {
for (const track of state.media.getTracks()) {
try { track.stop(); } catch { /* already stopped */ }
}
}
state.ring.clear();
streams.delete(key);
}
async function handleFrameRequest(cmd) {
const state = streams.get(String(cmd.displayId));
const reply = (extra) => send({ type: 'frame-response', requestId: cmd.requestId, ...extra });
if (!state) return reply({ ok: false, reason: 'no stream for display' });
// One last sample: if the compositor delivered a newer video frame since
// the previous tick, a sub-millisecond grab here can only improve (never
// worsen) the match — its startedAt is still checked against the click.
await sampleFrame(state);
const frame = StepForgeClickFrames.selectFrameForClick(state.ring.frames(), {
clickAt: cmd.clickAt,
leadMs: cmd.leadMs || 0,
mode: 'fullscreen',
strict: cmd.strict !== false,
});
if (!frame) return reply({ ok: false, reason: 'no frame at or before the click' });
// Stage one: confirm the selection immediately. The encode below can
// take seconds on software-rendered hosts; without this ack the main
// process couldn't tell a slow encode from a dead worker.
send({
type: 'frame-selected',
requestId: cmd.requestId,
startedAt: frame.startedAt,
capturedAt: frame.capturedAt,
});
try {
const canvas = new OffscreenCanvas(frame.width, frame.height);
canvas.getContext('2d').drawImage(frame.bitmap, 0, 0);
const blob = await canvas.convertToBlob({ type: 'image/png' });
const png = await blob.arrayBuffer();
return reply({
ok: true,
png: new Uint8Array(png),
width: frame.width,
height: frame.height,
startedAt: frame.startedAt,
capturedAt: frame.capturedAt,
});
} catch (err) {
return reply({ ok: false, reason: String(err && err.message || err) });
}
}
/** Health/diagnostic snapshot of every stream. */
function reportStats(cmd) {
const stats = {};
for (const [key, state] of streams) {
stats[key] = {
frames: state.ring.frames().length,
latestCapturedAt: state.ring.latest() ? state.ring.latest().capturedAt : null,
videoReadyState: state.video ? state.video.readyState : null,
videoSize: state.video ? `${state.video.videoWidth}x${state.video.videoHeight}` : null,
sampling: state.sampling,
};
}
send({ type: 'stats', requestId: cmd && cmd.requestId, stats });
}
captureWorkerBridge.onCommand((msg) => {
if (!msg || typeof msg !== 'object') return;
if (msg.type === 'start-stream') startStream(msg);
else if (msg.type === 'stop-stream') stopStream(String(msg.displayId));
else if (msg.type === 'frame-request') {
// A request must always produce a response — an unanswered click
// counts toward backend unhealthiness in the main process.
handleFrameRequest(msg).catch((err) => {
console.error('capture-worker frame-request failed:', err && err.message);
send({ type: 'frame-response', requestId: msg.requestId, ok: false, reason: String(err && err.message || err) });
});
} else if (msg.type === 'stats-request') reportStats(msg);
});
})();
+362
View File
@@ -0,0 +1,362 @@
'use strict';
const path = require('node:path');
const { displayForDipPoint, pointInBounds } = require('./coords');
/**
* Off-main-process click-frame backend.
*
* The legacy design ran desktopCapturer.getSources() in a 200ms loop on the
* main process. That had two structural problems this backend removes:
* - every grab (and the occasional PNG encode) blocked the main-process
* event loop, which delayed delivery of OS click events — the very events
* the loop existed to serve — by up to whole seconds under load;
* - getSources() is a heavy thumbnail API, so the loop had to idle 200ms
* between grabs, leaving clicks to be matched against frames that could
* be hundreds of ms stale.
*
* Here, a hidden worker window opens a desktop media *stream* per display
* and samples it on a tight cadence into a timestamped ring buffer — all in
* the worker's renderer process. On click, the main process sends only a tiny
* IPC request carrying the hook-time click timestamp; the worker picks the
* newest frame captured at or before that instant (strict semantics from
* click-frames.js), PNG-encodes it off the main process, and ships the bytes
* back. The main process never grabs or encodes a frame while recording.
*
* Failure handling: the backend is an optimization, never a single point of
* failure. If streams don't come up (Wayland portals, WSLg quirks) start()
* reports false and the capture service falls back to the legacy loop; if
* frame requests start timing out mid-session, the backend declares itself
* unhealthy once and the service degrades the same way.
*/
const DEFAULT_SAMPLE_MS = 100;
// The reply protocol is two-stage so a *slow* worker is never mistaken for a
// *dead* one: the worker acknowledges frame selection within milliseconds
// (that pins the click↔frame pairing and proves liveness), then ships the
// PNG whenever the encode finishes — which can take seconds per frame on
// software-rendered hosts (WSLg, VMs). Only a missing ack marks the worker
// unhealthy; a slow payload merely arrives late but is still the exact
// frame chosen at click time.
const DEFAULT_ACK_TIMEOUT_MS = 2000;
const DEFAULT_ENCODE_TIMEOUT_MS = 30_000;
const DEFAULT_START_TIMEOUT_MS = 8000;
// Consecutive unanswered requests before the backend declares itself
// unhealthy and the capture service degrades to the in-process loop.
const MAX_CONSECUTIVE_FAILURES = 2;
class StreamCaptureBackend {
/**
* @param {object} opts
* @param {(onEvent: (msg) => void) => Promise<{send,destroy}>} opts.createHost
* Factory for the worker transport (the hidden BrowserWindow in
* production, a fake in tests).
* @param {(reason: string) => void} [opts.onUnhealthy]
*/
constructor({
createHost,
onUnhealthy = null,
ackTimeoutMs = DEFAULT_ACK_TIMEOUT_MS,
encodeTimeoutMs = DEFAULT_ENCODE_TIMEOUT_MS,
startTimeoutMs = DEFAULT_START_TIMEOUT_MS,
} = {}) {
this.createHost = createHost;
this.onUnhealthy = onUnhealthy;
this.ackTimeoutMs = ackTimeoutMs;
this.encodeTimeoutMs = encodeTimeoutMs;
this.startTimeoutMs = startTimeoutMs;
this.host = null;
this.active = false;
this.requests = new Map(); // requestId -> { resolve, timer }
this.streams = new Map(); // displayId(string) -> { display, ready }
this.nextRequestId = 1;
this.consecutiveFailures = 0;
this.startWaiters = [];
this.draining = false;
}
isActive() {
return this.active;
}
/**
* Spin up the worker and one stream per display that has a matching screen
* source. Resolves true when at least one stream is delivering frames.
*/
async start({ displays = [], sources = [], sampleMs = DEFAULT_SAMPLE_MS, retentionMs = null, frameLimit = null } = {}) {
if (this.host) return this.active;
const pairs = pairDisplaysToSources(displays, sources);
if (!pairs.length) return false;
try {
this.host = await this.createHost((msg) => this.handleWorkerEvent(msg));
} catch {
this.host = null;
return false;
}
for (const { display, sourceId } of pairs) {
this.streams.set(String(display.id), { display, ready: false, failed: false });
this.hostSend({
type: 'start-stream',
displayId: display.id,
sourceId,
// The worker needs the physical pixel size to request a full-res
// stream; bounds stay in DIP for marker math back in the main process.
display: {
id: display.id,
bounds: display.bounds,
scaleFactor: display.scaleFactor || 1,
},
sampleMs,
retentionMs,
frameLimit,
});
}
const anyReady = await this.waitForStreams();
this.active = anyReady;
if (!anyReady) this.stop();
return this.active;
}
/** Resolves true as soon as one stream reports ready, false on timeout/all-failed. */
waitForStreams() {
return new Promise((resolve) => {
const finish = (ok) => {
clearTimeout(timer);
this.startWaiters = this.startWaiters.filter((w) => w !== check);
resolve(ok);
};
const check = () => {
const states = [...this.streams.values()];
if (states.some((s) => s.ready)) return finish(true);
if (states.length && states.every((s) => s.failed)) return finish(false);
return null;
};
const timer = setTimeout(() => finish(false), this.startTimeoutMs);
this.startWaiters.push(check);
check();
});
}
hostSend(msg) {
if (!this.host) return;
try {
this.host.send(msg);
} catch {
// A dead host surfaces as request timeouts → unhealthy → degrade.
}
}
handleWorkerEvent(msg) {
if (!msg || typeof msg !== 'object') return;
if (msg.type === 'stream-ready' || msg.type === 'stream-error') {
const stream = this.streams.get(String(msg.displayId));
if (stream) {
stream.ready = msg.type === 'stream-ready';
stream.failed = msg.type === 'stream-error';
}
for (const check of [...this.startWaiters]) check();
return;
}
if (msg.type === 'frame-selected') {
// Stage one: the worker picked a frame for this click. The pairing is
// now pinned and the worker is provably alive — swap the short ack
// deadline for the long encode deadline and wait for the pixels.
const pending = this.requests.get(msg.requestId);
if (!pending) return;
this.consecutiveFailures = 0;
clearTimeout(pending.timer);
pending.timer = setTimeout(() => {
this.settleRequest(msg.requestId, null);
this.noteFailure();
}, this.encodeTimeoutMs);
return;
}
if (msg.type === 'frame-response') {
const pending = this.requests.get(msg.requestId);
if (!pending) return; // late reply after timeout — already handled
// Any answer — even "no qualifying frame" — proves the worker is alive.
this.consecutiveFailures = 0;
const value = (!msg.ok || !msg.png) ? null : {
mode: 'fullscreen',
png: Buffer.from(msg.png),
size: { width: msg.width, height: msg.height },
display: pending.display,
startedAt: msg.startedAt,
capturedAt: msg.capturedAt,
source: 'stream',
};
this.settleRequest(msg.requestId, value);
}
}
/**
* Resolve one pending request and clean it up. When the backend is
* draining (stop() was called while requests were still in flight), the
* last settled request triggers the deferred worker teardown — this is
* what lets clicks queued at finish time still receive their frames
* instead of being cancelled to null.
*/
settleRequest(requestId, value) {
const pending = this.requests.get(requestId);
if (!pending) return;
this.requests.delete(requestId);
clearTimeout(pending.timer);
pending.resolve(value);
if (this.draining && this.requests.size === 0) this.finalizeTeardown();
}
/**
* Frame for one click, selected in the worker under the given strictness.
* Resolves null when no frame qualifies (caller falls back) — and also on
* timeout, which additionally counts toward unhealthiness.
*/
frameForClick({ clickPos = null, clickAt = Date.now(), strict = true, leadMs = 0 } = {}) {
if (!this.active || !this.host) return Promise.resolve(null);
const displays = [...this.streams.values()].filter((s) => s.ready).map((s) => s.display);
const display = clickPos ? displayForDipPoint(clickPos, displays) : (displays[0] || null);
if (!display) return Promise.resolve(null);
// Never serve a click from another monitor's stream: if the clicked
// display has no ready stream, a "nearest display" frame would show the
// wrong screen entirely and the marker fractions would be meaningless.
// Resolve null instead so the caller's fallback captures the right one.
if (clickPos && !pointInBounds(clickPos, display.bounds)) return Promise.resolve(null);
const requestId = this.nextRequestId++;
return new Promise((resolve) => {
const pending = { resolve, display, timer: null };
pending.timer = setTimeout(() => {
this.settleRequest(requestId, null);
this.noteFailure();
}, this.ackTimeoutMs);
this.requests.set(requestId, pending);
this.hostSend({
type: 'frame-request',
requestId,
displayId: display.id,
clickAt,
strict,
leadMs,
});
});
}
noteFailure() {
this.consecutiveFailures += 1;
if (this.consecutiveFailures < MAX_CONSECUTIVE_FAILURES) return;
const notify = this.onUnhealthy;
this.stop({ immediate: true });
if (notify) notify('frame requests timing out');
}
/**
* Stop the backend. By default this *drains*: it stops accepting new
* requests but keeps the worker alive so frames already selected for
* queued clicks finish encoding and resolve — without this, finishing a
* recording right after a fast click burst cancels every still-encoding
* frame to null and those clicks are lost ("only two screenshots saved").
* Pass { immediate: true } to abandon in-flight requests (used when the
* worker is already unhealthy).
*/
stop({ immediate = false } = {}) {
this.active = false;
for (const check of [...this.startWaiters]) check();
this.startWaiters = [];
if (immediate) {
for (const [, pending] of this.requests) {
clearTimeout(pending.timer);
pending.resolve(null);
}
this.requests.clear();
this.finalizeTeardown();
return;
}
if (this.requests.size === 0) {
this.finalizeTeardown();
return;
}
// Let pending requests resolve naturally (their own encode timers still
// bound the wait); finalizeTeardown fires from settleRequest when the
// last one completes.
this.draining = true;
}
finalizeTeardown() {
this.draining = false;
this.streams.clear();
if (this.host) {
try { this.host.destroy(); } catch { /* already gone */ }
this.host = null;
}
}
}
/** Match each display to its desktopCapturer screen source by display_id. */
function pairDisplaysToSources(displays, sources) {
const screens = (sources || []).filter((s) => s && typeof s.id === 'string' && s.id.startsWith('screen:'));
const pairs = [];
const used = new Set();
for (const display of displays || []) {
let source = screens.find((s) => !used.has(s.id) && String(s.display_id) === String(display.id));
if (!source && displays.length === 1 && screens.length === 1) {
// Single display, single source: some platforms leave display_id empty.
source = screens[0];
}
if (!source) continue;
used.add(source.id);
pairs.push({ display, sourceId: source.id });
}
return pairs;
}
/**
* Production worker host: a hidden BrowserWindow running the capture-worker
* page. Lazy-required Electron so this module stays loadable under node for
* unit tests.
*/
async function createElectronHost(onEvent) {
// eslint-disable-next-line global-require
const { BrowserWindow, ipcMain } = require('electron');
const win = new BrowserWindow({
show: false,
width: 320,
height: 240,
skipTaskbar: true,
webPreferences: {
preload: path.join(__dirname, 'capture-worker-preload.js'),
contextIsolation: true,
nodeIntegration: false,
// The worker must keep sampling while hidden — throttling a hidden
// window is exactly the wrong default for a frame recorder.
backgroundThrottling: false,
},
});
const listener = (event, msg) => {
if (event.sender === win.webContents) onEvent(msg);
};
ipcMain.on('capture-worker:event', listener);
try {
await win.loadFile(path.join(__dirname, 'renderer', 'capture-worker.html'));
} catch (err) {
ipcMain.removeListener('capture-worker:event', listener);
if (!win.isDestroyed()) win.destroy();
throw err;
}
return {
send(msg) {
if (!win.isDestroyed()) win.webContents.send('capture-worker:command', msg);
},
destroy() {
ipcMain.removeListener('capture-worker:event', listener);
if (!win.isDestroyed()) win.destroy();
},
};
}
module.exports = {
StreamCaptureBackend,
createElectronHost,
pairDisplaysToSources,
DEFAULT_SAMPLE_MS,
DEFAULT_ACK_TIMEOUT_MS,
DEFAULT_ENCODE_TIMEOUT_MS,
MAX_CONSECUTIVE_FAILURES,
};
+23
View File
@@ -18,7 +18,30 @@ const DEFAULT_SETTINGS = {
hotkeyPauseResume: 'CommandOrControl+Shift+2',
captureOutsideClicks: true,
confirmSimpleCapture: false,
// Leading-edge click debounce (ms): clicks of the same button closer
// together than this collapse into one step, so accidental fast/double
// clicks don't each become a step. Clicks spaced further apart always
// register. Set to 0 to capture every click.
clickDebounceMs: 200,
autoIntervalSec: 5, // session fallback when click capture is unavailable
// Strict click timing: a step never uses a frame whose grab started
// after the click. Turn off only if captures are too slow to keep a
// pre-click frame buffered (re-enables the legacy slack heuristics).
strictClickFrames: true,
// Off-main-process frame recorder (hidden worker window sampling a
// desktop media stream). Falls back to the in-process loop when false
// or when streams cannot start on this desktop.
streamCapture: true,
frameSampleMs: 50, // stream backend sampling cadence (finer = fresher frames)
// Target the screen this many ms *before* each click. The hook fires on
// button-down but the UI/cursor often start reacting within a frame, and
// stream pixels lag slightly; a small lead keeps the saved screenshot
// clear of the click's onset. Raise it if screenshots still feel late.
clickLeadMs: 120,
// After the window hides at recording start, wait this long before the
// user is likely to click so the buffer holds frames of the now-visible
// screen rather than the just-dismissed app window.
postHideSettleMs: 150,
},
editor: {
focusedViewDefaultForNewSteps: false,
+62
View File
@@ -102,6 +102,68 @@ IPC API (`stepforge.*`), and `app/main.js` routes calls into `core/`. Screen
capture uses Electron's `desktopCapturer` (full screen, window) and an
overlay window for region selection; hotkeys use `globalShortcut`.
## Click-Capture Pipeline
Workflow recording must behave like one click → one step, with the
screenshot showing the screen *at* the click and the marker on the exact
click position. Three pieces make that hold:
1. **OS click events** (`app/capture.js`): a low-level mouse hook on Windows
(`CLICK x y button unixMs` lines), an `xinput test-xi2 --root` watcher on
X11. The Linux parser carries event-time `root:` coordinates and merges
raw/regular twin blocks structurally — there is no time-based debounce
that could drop fast clicks, only suppression of identical duplicate
deliveries. Physical coordinates convert to DIP via
`screen.screenToDipPoint` on Windows or display-geometry math in
`app/coords.js` elsewhere (multi-monitor and scale-factor aware).
2. **Frame recorders**: while recording, a hidden worker window
(`app/stream-backend.js` + `app/renderer/capture-worker.js`) samples a
desktop media stream per display into a timestamped ring buffer —
entirely off the main process, so click delivery is never delayed by
capture work, and PNG encoding happens in the worker. If streams can't
start (portal-less Wayland), or the worker stops answering, the service
degrades to the legacy in-process `desktopCapturer` loop.
3. **Click ↔ frame pairing** (`app/click-frames.js`, shared by the main
process, the worker, and tests): each click is paired *at event time*
with the newest frame captured at or before its hook timestamp. In strict
mode (`capture.strictClickFrames`, default on) a frame whose grab started
after the click is never used — when nothing qualifies, the service takes
an explicit fresh shot instead of passing a post-click frame off as the
click-time screen. Storing is serialized per click; pairing is not, so
slow encodes never skew later clicks.
Reliability rules that keep "one click → one step" true under load:
- **The worker reply is two-stage.** It acknowledges frame *selection*
within milliseconds (proving liveness and pinning the pairing), then
ships the PNG whenever the encode finishes — seconds later on
software-rendered hosts. A slow payload is never mistaken for a dead
worker; only a missing ack degrades the backend.
- **Stopping drains.** Finishing or pausing a recording keeps the worker
alive until frames already selected for queued clicks finish encoding.
Without this, ending a session right after a fast click burst cancelled
every still-encoding frame and those clicks vanished (the "I clicked ten
times but only got two screenshots" bug).
- **Queued clicks outlive the session.** A click registered while recording
carries its guide id and still becomes a step if the session ends while it
waits in the store queue. The lone exception is the tray gesture that
stopped the session, discarded by matching its recorded screen position.
- **A click is never served another monitor's frame.** If the clicked
display has no ready stream the backend returns null and the caller
fresh-shots the correct screen, rather than circling a point on the wrong
one.
`STEPFORGE_CLICK_SELFTEST=1 npm start` exercises the whole pipeline in a
real Electron session across four scenarios — marker accuracy (0.00%
offset), a fast-burst-then-finish that must save every click, the
warm-before-arm first click, and the ~200ms debounce. It runs automatically
as `tests/checks/test_click_capture_selftest.sh` (skipped only when the host
has no capture environment), so a regression in click→screenshot→step
behavior fails the suite. `STEPFORGE_CAPTURE_LOG=1` prints one diagnostic
line per click decision.
## Security Rules
- Zero network code paths: no sockets, no telemetry, no update or license
+71
View File
@@ -5,8 +5,79 @@ Keep-a-Changelog conventions; versions follow semver.
## [Unreleased]
### Changed
- **Click-capture pipeline rearchitected for Folge-like recording.** This is
the milestone where fast, real-world recording works end to end: every
mouse click during a session becomes exactly one saved step, the red
marker lands on the exact click position (verified at 0.00% offset across
scaled and multi-monitor displays), and the screenshot shows the screen at
the click rather than after it.
- Continuous capture now runs in a hidden worker process that samples a
desktop media stream per display into a timestamped ring buffer, so the
main process stays responsive and OS click events are never delayed by
capture work. Falls back to the legacy in-process loop where streams
cannot start (portal-less Wayland/WSLg).
- Each click is paired with the newest frame captured at or before its
hook timestamp (strict timing, `capture.strictClickFrames`, default on):
a frame whose grab started after the click is never used.
- Physical→DIP coordinate conversion is multi-monitor and scale-factor
aware (`screen.screenToDipPoint` on Windows, display-geometry math
elsewhere), fixing marker drift on displays scaled away from 100%.
- A configurable click-lead (`capture.clickLeadMs`, default 120ms) prefers
a frame captured a little before each click so the saved step shows what
the user was about to act on, not the click's onset; the stream sampling
cadence was tightened to 50ms so a frame near that target always exists.
The lead is a preference, not a gate: selection falls back to the newest
frame still before the click, so it never forces a post-click screenshot.
- The frame recorder now warms up *before* the window hides at recording
start, instead of after. Previously the first click of a session could
beat the ~1s warmup and fall back to a post-click shot — "the first
screenshot is late" — while every later click was fine. Now frames are
buffering by the time the window tucks away, so the first click is
served a pre-click frame like the rest.
- The whole click→screenshot→step pipeline is guarded end to end by
`tests/checks/test_click_capture_selftest.sh`, which runs a real Electron
session and asserts marker accuracy, no dropped burst clicks, the first
click of a session captured, and the debounce — so this behavior fails
the suite if it ever regresses.
### Added
- **Click debounce (`capture.clickDebounceMs`, default 200ms).** Clicks of
the same mouse button closer together than the window collapse into one
step, so accidental fast or double clicks don't each become a step, while
any two deliberate clicks spaced further apart both register. It is a
leading-edge debounce measured from the last *accepted* click, so a run of
fast clicks can't push the next real click out. Set it to 0 to capture
every click. Backed by behavioral unit tests that drive click sequences
through real timestamps (not keyword checks) plus an end-to-end self-test
scenario.
### Fixed
- **Restarting a recording no longer drops clicks or "stops after one
click."** While the recorder warmed up at the start of a session the
window was still visible, so clicks over it were skipped and clicks
elsewhere were shot post-click — and on a restart the backend start could
take several seconds, stretching that bad window out. Recording is now
"armed" only once the window is actually hidden and the buffer is primed:
clicks during warmup are cleanly ignored (the window is covering the
user's work anyway), the window hides within a bounded time even if the
backend is slow to start, and a slow start left over from a finished
session can no longer block the next session from starting its own.
- **Fast click bursts no longer lose screenshots.** Finishing or pausing a
recording used to cancel every screenshot still being encoded, so a quick
series of clicks saved only the first two or three. The capture worker now
drains on stop — frames already captured for queued clicks finish encoding
and are saved — so all clicks are recorded even on machines where PNG
encoding takes seconds. Verified end to end: an 8-click burst followed by
an immediate finish saves all 8.
- **Screenshots taken after the click instead of at it.** A slow PNG encode
was being mistaken for a dead capture worker, which kicked the click over
to a fallback that shot the screen after the click. The worker now
acknowledges frame selection immediately and ships the encoded image
separately, so a slow encode no longer triggers the post-click fallback.
- Windows continuous click capture now uses a low-level mouse hook instead
of timer polling, so normal left-clicks are not missed when the app or
target system is under load. Click captures also preserve the original
+70
View File
@@ -0,0 +1,70 @@
#!/usr/bin/env bash
# Workflow check: run the full click-capture pipeline end to end in a real
# Electron session (STEPFORGE_CLICK_SELFTEST) and assert every scenario
# passes. This guards the click→screenshot→step behavior — exact markers,
# one step per click, fast bursts not dropped on finish, the first click of a
# session captured (warm-before-arm), and the ~200ms debounce — against
# regressions that unit tests alone can't catch because they don't exercise
# the live capture stream and window timing.
#
# Scenarios and their pass lines (see app/main.js STEPFORGE_CLICK_SELFTEST):
# steps: 3 of 3, each marker "off by 0.00% of screen"
# burst: 8 of 8 (fast clicks + immediate finish, none lost)
# arm: warmup click ignored, first armed click captured
# debounce: 4 of 4 (40ms burst collapses to 1, three 300ms clicks kept)
#
# If the environment can't run a desktop capture at all (no display/stream),
# the scenarios never print, so the check skips rather than failing CI.
set -euo pipefail
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
cd "$ROOT_DIR"
TMP_ROOT="$(mktemp -d)"
trap 'rm -rf "$TMP_ROOT"' EXIT
LOG_FILE="$TMP_ROOT/selftest.log"
set +e
STEPFORGE_DATA_DIR="$TMP_ROOT/data" STEPFORGE_CLICK_SELFTEST=1 \
timeout 120s npm start >"$LOG_FILE" 2>&1
set -e
# The self-test always prints this first line once it begins; without it the
# app never reached the scenarios (couldn't launch / no capture environment).
if ! grep -q 'CLICK-SELFTEST source:' "$LOG_FILE"; then
echo "click capture selftest SKIPPED (no capture environment on this host)"
exit 0
fi
fail() {
echo "click capture selftest FAILED: $1" >&2
echo "----- selftest output -----" >&2
grep -E 'CLICK-SELFTEST' "$LOG_FILE" >&2 || true
exit 1
}
# Any scenario that detected a problem prints FAIL or an ERROR line.
if grep -Eq 'CLICK-SELFTEST.*(FAIL|ERROR)' "$LOG_FILE"; then
fail "a scenario reported FAIL/ERROR"
fi
# Per-scenario positive assertions (deterministic with synthetic clicks).
grep -q 'CLICK-SELFTEST steps: 3 of 3' "$LOG_FILE" \
|| fail "marker scenario did not capture 3 of 3 clicks"
# All three markers must land exactly on the injected click positions.
marker_ok="$(grep -c 'CLICK-SELFTEST marker [0-9]*: off by 0.00% of screen' "$LOG_FILE" || true)"
[[ "$marker_ok" -eq 3 ]] \
|| fail "expected 3 markers at 0.00% offset, found $marker_ok"
grep -q 'CLICK-SELFTEST burst: 8 of 8' "$LOG_FILE" \
|| fail "burst scenario lost clicks on finish"
grep -q 'CLICK-SELFTEST arm:.*OK' "$LOG_FILE" \
|| fail "arm scenario did not capture the first armed click"
grep -q 'CLICK-SELFTEST debounce: 4 of 4 expected OK' "$LOG_FILE" \
|| fail "debounce scenario did not collapse the burst / keep deliberate clicks"
echo "click capture selftest OK (markers, burst, arm, debounce all verified)"
+729 -48
View File
@@ -5,17 +5,20 @@ const assert = require('node:assert/strict');
const CaptureService = require('../../app/capture');
function makeService() {
function makeService({ settings: settingsOverrides, screenApi } = {}) {
const store = {
addStep() {
throw new Error('not used in this test');
},
};
const settingsData = {
'capture.mode': 'fullscreen',
'capture.delayMs': 0,
...settingsOverrides,
};
const settings = {
get(key) {
if (key === 'capture.mode') return 'fullscreen';
if (key === 'capture.delayMs') return 0;
return null;
return key in settingsData ? settingsData[key] : null;
},
};
return new CaptureService({
@@ -23,9 +26,31 @@ function makeService() {
settings,
getWindow: () => null,
notify: () => {},
screenApi: screenApi || {
getCursorScreenPoint: () => ({ x: 0, y: 0 }),
getAllDisplays: () => [],
},
});
}
// The raw/regular twin window plus margin: how long a test must wait for a
// held Linux raw press to fire when no coordinate twin arrives.
const TWIN_FLUSH_MS = 60;
const settle = (ms = TWIN_FLUSH_MS) => new Promise((r) => setTimeout(r, ms));
function makeFrame(name, ageMs = 0, overrides = {}) {
return {
mode: overrides.mode || 'fullscreen',
png: Buffer.from(name),
size: overrides.size || { width: 100, height: 100 },
display: overrides.display || { bounds: { x: 0, y: 0, width: 100, height: 100 } },
cursor: overrides.cursor || { x: 50, y: 50 },
capturedAt: Date.now() - ageMs,
};
}
// ---- fresh-shot fallback path ----------------------------------------------
test('click-triggered session capture uses the low-latency hide pause', async () => {
const service = makeService();
service.session = { guideId: 'guide-1', paused: false, count: 0, intervalSec: 0 };
@@ -50,16 +75,7 @@ test('click-triggered session capture uses the low-latency hide pause', async ()
});
});
function makeFrame(name, ageMs = 0, overrides = {}) {
return {
mode: overrides.mode || 'fullscreen',
png: Buffer.from(name),
size: overrides.size || { width: 100, height: 100 },
display: overrides.display || { bounds: { x: 0, y: 0, width: 100, height: 100 } },
cursor: overrides.cursor || { x: 50, y: 50 },
capturedAt: Date.now() - ageMs,
};
}
// ---- Linux watcher parsing ---------------------------------------------------
test('rapid click watcher bursts are parsed one click at a time', () => {
const service = makeService();
@@ -78,7 +94,7 @@ test('rapid click watcher bursts are parsed one click at a time', () => {
assert.equal(clicks, 2);
});
test('button presses fire on the detail line; scroll-wheel ticks are ignored', () => {
test('raw button presses fire; scroll-wheel ticks (buttons 4-7) are ignored', async () => {
const service = makeService();
let clicks = 0;
service.onOsClick = () => {
@@ -101,9 +117,78 @@ test('button presses fire on the detail line; scroll-wheel ticks are ignored', (
' detail: 3',
].join('\n'), 'linux');
await settle(); // raw presses hold briefly for a coordinate twin
assert.equal(clicks, 2, 'buttons 4-7 are scroll ticks, not clicks');
});
test('regular ButtonPress blocks carry their root coordinates into onOsClick', () => {
// The event-time root position is what keeps the marker on the real click
// even when the pointer keeps moving after the press — a live cursor read
// at parse time would drift.
const service = makeService();
const seen = [];
service.onOsClick = (at, osPoint, button) => {
seen.push({ osPoint, button });
};
service.processClickWatcherData([
'EVENT type 4 (ButtonPress)',
' device: 11 (10)',
' detail: 1',
' flags:',
' root: 644.52/343.55',
' event: 644.52/343.55',
].join('\n'), 'linux');
assert.deepEqual(seen, [{ osPoint: { x: 645, y: 344 }, button: 'button-1' }]);
});
test('a raw press and its regular twin merge into a single click with coordinates', async () => {
// One physical press can be delivered as both a RawButtonPress and a
// ButtonPress block. That duplication is resolved structurally — never by
// a time debounce that could swallow real fast clicks.
const service = makeService();
const seen = [];
service.onOsClick = (at, osPoint, button) => {
seen.push({ osPoint, button });
};
service.processClickWatcherData([
'EVENT type 15 (RawButtonPress)',
' device: 11 (11)',
' detail: 1',
' valuators:',
'EVENT type 4 (ButtonPress)',
' device: 11 (10)',
' detail: 1',
' root: 100.00/200.00',
].join('\n'), 'linux');
await settle();
assert.deepEqual(seen, [{ osPoint: { x: 100, y: 200 }, button: 'button-1' }],
'exactly one click, carrying the regular twin\'s coordinates');
});
test('two genuine fast presses of the same button both fire', async () => {
const service = makeService();
const seen = [];
service.onOsClick = (at, osPoint, button) => {
seen.push({ osPoint, button });
};
service.processClickWatcherData([
'EVENT type 4 (ButtonPress)',
' detail: 1',
' root: 10.00/10.00',
'EVENT type 4 (ButtonPress)',
' detail: 1',
' root: 12.00/11.00',
].join('\n'), 'linux');
await settle();
assert.equal(seen.length, 2, 'fast clicking must never be dropped by the parser');
});
test('motion events with detail lines do not fire clicks', () => {
const service = makeService();
let clicks = 0;
@@ -121,7 +206,7 @@ test('motion events with detail lines do not fire clicks', () => {
assert.equal(clicks, 0);
});
test('event lines split across stdout chunks are reassembled before parsing', () => {
test('event lines split across stdout chunks are reassembled before parsing', async () => {
const service = makeService();
let clicks = 0;
service.onOsClick = () => {
@@ -132,9 +217,12 @@ test('event lines split across stdout chunks are reassembled before parsing', ()
assert.equal(clicks, 0, 'a partial line must not be parsed yet');
service.ingestClickWatcherChunk('onPress)\n detail: 1\n', 'linux');
await settle();
assert.equal(clicks, 1);
});
// ---- click queue --------------------------------------------------------------
test('clicks queue behind an in-progress capture instead of being dropped', async () => {
const service = makeService();
const order = [];
@@ -156,6 +244,143 @@ test('clicks queue behind an in-progress capture instead of being dropped', asyn
'the second click must run after the first, not be dropped');
});
test('fast clicks are paired with their frames at event time, not behind the store queue', async () => {
// With a slow PNG encode or store, the click queue can run seconds late.
// The frame request must go out at click time anyway, or the second
// click's frame would be selected (and possibly evicted) far too late.
const service = makeService();
service.session = { guideId: 'guide-eager', paused: false, count: 0, intervalSec: 0 };
service.userIsInApp = () => false;
const requested = [];
let releaseFirst;
const firstGate = new Promise((r) => { releaseFirst = r; });
service.frameForClick = (clickPos, clickAt) => {
requested.push(clickAt);
const frame = makeFrame(`frame-${clickAt}`);
return clickAt === 1000 ? firstGate.then(() => frame) : Promise.resolve(frame);
};
let stored = 0;
service.storeFrameAsStep = () => {
stored += 1;
return { ok: true, step: { stepId: `step-${stored}` } };
};
service.enqueueClickCapture({ x: 1, y: 1 }, 1000, 'left');
const queue = service.enqueueClickCapture({ x: 2, y: 2 }, 1040, 'left');
assert.deepEqual(requested, [1000, 1040],
'both frames must be requested immediately, while the first store is still pending');
releaseFirst();
await queue;
assert.equal(stored, 2);
assert.equal(service.session.count, 2);
});
test('clicks still queued when the session finishes are stored, not dropped', async () => {
// Reported as "I clicked N times but only got two screenshots": with slow
// encodes the queue lags, and finishing the session used to discard every
// click still waiting in it.
const service = makeService();
service.session = { guideId: 'guide-finish', paused: false, count: 0, intervalSec: 0 };
service.userIsInApp = () => false;
let releaseFrame;
const frameGate = new Promise((r) => { releaseFrame = r; });
service.frameForClick = () => frameGate.then(() => makeFrame('late-stored-frame'));
const added = [];
service.store.addStep = (guideId, fields, png) => {
added.push({ guideId, png: png.toString() });
return { stepId: 'step-late' };
};
const events = [];
service.notify = (channel, payload) => events.push({ channel, payload });
// Click happened comfortably before the user reached for the stop button.
const queue = service.enqueueClickCapture({ x: 5, y: 5 }, Date.now() - 2000, 'left');
service.finishSession();
releaseFrame();
await queue;
assert.deepEqual(added, [{ guideId: 'guide-finish', png: 'late-stored-frame' }],
'the click was recorded while the session was live — it must become a step');
const addedEvent = events.find((e) => e.channel === 'capture:added');
assert.equal(addedEvent.payload.guideId, 'guide-finish');
});
test('the tray click that stops the session does not become a junk step', async () => {
// The tray gesture that stops capture is also seen by the OS hook; storing
// it would append a step of the tray/menu to every recording. It is
// matched by position so only that exact click is dropped.
const service = makeService({
screenApi: {
getCursorScreenPoint: () => ({ x: 1900, y: 12 }), // over the tray
getAllDisplays: () => [],
},
});
service.session = { guideId: 'guide-stop', paused: false, count: 0, intervalSec: 0 };
service.userIsInApp = () => false;
service.frameForClick = async () => makeFrame('stop-click-frame');
const added = [];
service.store.addStep = (guideId, fields, png) => {
added.push(png.toString());
return { stepId: 'step-stop' };
};
// The hook reports the tray click at the tray position.
const queue = service.enqueueClickCapture({ x: 1900, y: 12 }, Date.now(), 'left');
service.noteUiStopGesture(); // tray handler records where it was clicked
service.finishSession();
await queue;
assert.deepEqual(added, [], 'the stop click must be discarded');
});
test('a fast workflow click near the stop time but elsewhere is NOT dropped', async () => {
// Position matching is what makes this safe: the user clicks their
// workflow, then reaches up to the tray. The last workflow click lands
// far from the tray and must survive even though it is close in time.
const service = makeService({
screenApi: {
getCursorScreenPoint: () => ({ x: 1900, y: 12 }), // tray location
getAllDisplays: () => [],
},
});
service.session = { guideId: 'guide-near', paused: false, count: 0, intervalSec: 0 };
service.userIsInApp = () => false;
service.frameForClick = async () => makeFrame('workflow-frame');
const added = [];
service.store.addStep = (guideId, fields, png) => {
added.push(png.toString());
return { stepId: 'step-near' };
};
// Workflow click in the middle of the screen, then the tray stop.
const queue = service.enqueueClickCapture({ x: 600, y: 500 }, Date.now(), 'left');
service.noteUiStopGesture();
service.finishSession();
await queue;
assert.deepEqual(added, ['workflow-frame'], 'a click away from the tray must be kept');
});
test('queued click captures preserve the original event time and button', async () => {
const service = makeService();
const seen = [];
service.sessionCapture = async (trigger, clickPos, clickMeta) => {
seen.push({ trigger, clickPos, clickMeta });
return { ok: true };
};
await service.enqueueClickCapture({ x: 7, y: 8 }, 1770000000456, 'left');
assert.deepEqual(seen, [{
trigger: 'click',
clickPos: { x: 7, y: 8 },
clickMeta: { at: 1770000000456, button: 'left' },
}]);
});
// ---- Windows watcher parsing ---------------------------------------------------
test('windows click watcher output is counted line by line', () => {
const service = makeService();
let clicks = 0;
@@ -197,6 +422,343 @@ test('windows hook click lines carry button and event timestamp', () => {
}]);
});
// ---- click debounce (~200ms) ---------------------------------------------------
//
// These tests drive real onOsClick calls with controlled timestamps and
// record which clicks survive to enqueueClickCapture, so they exercise the
// actual debounce arithmetic rather than asserting on comments or constants.
// The behavior they lock in: clicks of one button closer together than the
// debounce window collapse to one; clicks spaced further apart all register.
/** Run a timestamp sequence through onOsClick; return the accepted times. */
function runClickSequence(service, times, { button = 'left', point = { x: 10, y: 20 } } = {}) {
service.session = service.session || { guideId: 'g', paused: false, count: 0, intervalSec: 0 };
const accepted = [];
service.enqueueClickCapture = (clickPos, at) => { accepted.push(at); };
for (const t of times) service.onOsClick(t, point, button);
return accepted;
}
test('default debounce is 200ms when the setting is absent', () => {
const service = makeService(); // settings stub has no clickDebounceMs
assert.equal(service.clickDebounceMs(), 200);
});
test('two deliberate clicks 400ms apart both register (the reported case)', () => {
const service = makeService({ settings: { 'capture.clickDebounceMs': 200 } });
const accepted = runClickSequence(service, [0, 400]);
assert.deepEqual(accepted, [0, 400], 'clicks well outside the window must not be dropped');
});
test('clicks 400500ms apart all register across a longer sequence', () => {
const service = makeService({ settings: { 'capture.clickDebounceMs': 200 } });
const times = [0, 450, 950, 1400, 1900]; // 450/500/450/500 ms gaps
assert.deepEqual(runClickSequence(service, times), times,
'every click spaced beyond the window is captured');
});
test('a click just past the window (250ms) registers; just inside (150ms) does not', () => {
const service = makeService({ settings: { 'capture.clickDebounceMs': 200 } });
assert.deepEqual(runClickSequence(makeService({ settings: { 'capture.clickDebounceMs': 200 } }), [0, 250]), [0, 250]);
assert.deepEqual(runClickSequence(service, [0, 150]), [0], '150ms < 200ms window is debounced away');
});
test('exactly 200ms apart registers (window is exclusive at the boundary)', () => {
const service = makeService({ settings: { 'capture.clickDebounceMs': 200 } });
assert.deepEqual(runClickSequence(service, [0, 200]), [0, 200]);
});
test('a fast burst collapses to a single step', () => {
const service = makeService({ settings: { 'capture.clickDebounceMs': 200 } });
// 5 clicks 30ms apart — accidental fast clicking.
assert.deepEqual(runClickSequence(service, [0, 30, 60, 90, 120]), [0],
'rapid repeats within the window are one click');
});
test('the window is measured from the last ACCEPTED click, not the last dropped one', () => {
// A run of fast clicks must not push the next real click out forever: once
// a click is accepted, only later clicks reset the reference, and dropped
// clicks never do. 0 accepted; 100/150 dropped; 250 is 250ms after 0 so
// accepted; 300 dropped; 500 is 250ms after 250 so accepted.
const service = makeService({ settings: { 'capture.clickDebounceMs': 200 } });
assert.deepEqual(runClickSequence(service, [0, 100, 150, 250, 300, 500]), [0, 250, 500]);
});
test('different mouse buttons debounce independently', () => {
const service = makeService({ settings: { 'capture.clickDebounceMs': 200 } });
service.session = { guideId: 'g', paused: false, count: 0, intervalSec: 0 };
const accepted = [];
service.enqueueClickCapture = (clickPos, at, button) => { accepted.push(`${button}@${at}`); };
// Left then right 50ms apart: different buttons, both register. A second
// left 50ms after the first left is debounced.
service.onOsClick(0, { x: 1, y: 1 }, 'left');
service.onOsClick(50, { x: 1, y: 1 }, 'right');
service.onOsClick(50, { x: 1, y: 1 }, 'left');
assert.deepEqual(accepted, ['left@0', 'right@50']);
});
test('the debounce window is configurable', () => {
const service = makeService({ settings: { 'capture.clickDebounceMs': 500 } });
// With a 500ms window, the 400ms-apart clicks now collapse.
assert.deepEqual(runClickSequence(service, [0, 400]), [0]);
// 600ms apart clears the larger window.
const service2 = makeService({ settings: { 'capture.clickDebounceMs': 500 } });
assert.deepEqual(runClickSequence(service2, [0, 600]), [0, 600]);
});
test('a debounce of 0 captures every click', () => {
const service = makeService({ settings: { 'capture.clickDebounceMs': 0 } });
assert.deepEqual(runClickSequence(service, [0, 1, 2, 3]), [0, 1, 2, 3]);
});
test('duplicate hook deliveries of one press collapse under the debounce', () => {
// The same physical press delivered twice a few ms apart is well inside
// any reasonable window, so it still yields exactly one step.
const service = makeService({ settings: { 'capture.clickDebounceMs': 200 } });
assert.deepEqual(runClickSequence(service, [1770000000000, 1770000000003]), [1770000000000]);
});
test('debounce state resets between sessions so the first click always registers', () => {
const service = makeService({ settings: { 'capture.clickDebounceMs': 200 } });
runClickSequence(service, [0]);
service.stopClickWatcher(); // clears per-button accepted times
const accepted = [];
service.session = { guideId: 'g2', paused: false, count: 0, intervalSec: 0 };
service.enqueueClickCapture = (clickPos, at) => { accepted.push(at); };
// A click 50ms later but in a fresh session must not be debounced away.
service.onOsClick(50, { x: 10, y: 20 }, 'left');
assert.deepEqual(accepted, [50]);
});
test('debounced clicks never reach the capture queue (end to end through onOsClick)', async () => {
// Integration: drive the real onOsClick → enqueueClickCapture → clickQueue
// → sessionCapture → store path with a stubbed frame source, and confirm
// the stored step count matches the number of accepted clicks.
const service = makeService({ settings: { 'capture.clickDebounceMs': 200, 'capture.clickMarker': false } });
service.session = { guideId: 'g-e2e', paused: false, count: 0, intervalSec: 0 };
service.userIsInApp = () => false;
service.frameForClick = async () => makeFrame('frame');
const stored = [];
service.store.addStep = (guideId, fields, png) => {
stored.push(png.toString());
return { stepId: `s${stored.length}` };
};
// Two deliberate clicks 450ms apart, with an accidental fast repeat 40ms
// after the first. Expect two stored steps, not three or one.
service.onOsClick(0, { x: 10, y: 20 }, 'left');
service.onOsClick(40, { x: 10, y: 20 }, 'left'); // debounced
service.onOsClick(450, { x: 30, y: 40 }, 'left');
await service.clickQueue;
assert.equal(stored.length, 2, 'one step per accepted click — burst repeat dropped, real clicks kept');
assert.equal(service.session.count, 2);
});
// ---- warmup gating (recording goes live only after the window hides) -----------
test('clicks during warmup are ignored, not captured as junk', () => {
const service = makeService();
service.session = { guideId: 'g-warm', paused: false, count: 0, intervalSec: 0 };
service.warmingUp = true;
const seen = [];
service.enqueueClickCapture = (clickPos, at) => { seen.push(at); };
service.onOsClick(1000, { x: 10, y: 20 }, 'left');
assert.deepEqual(seen, [], 'a click while warming up must not be captured');
// Once armed, clicks are captured again.
service.warmingUp = false;
service.onOsClick(2000, { x: 10, y: 20 }, 'left');
assert.deepEqual(seen, [2000]);
});
test('pausing and finishing clear the warmup flag', () => {
const service = makeService();
service.session = { guideId: 'g-warm2', paused: false, count: 0, intervalSec: 0 };
service.warmingUp = true;
service.togglePause(true);
assert.equal(service.warmingUp, false, 'pause cancels an in-flight warmup');
service.session = { guideId: 'g-warm3', paused: false, count: 0, intervalSec: 0 };
service.warmingUp = true;
service.finishSession();
assert.equal(service.warmingUp, false, 'finish cancels an in-flight warmup');
});
test('armRecording warms while visible, then hides and arms the session', async () => {
// Reproduces the restart path: recording must not be "live" until the
// window is hidden. A click injected during warmup is ignored; a click
// after arming is captured.
const service = makeService();
const win = {
destroyed: false, visible: true,
isDestroyed() { return this.destroyed; },
isVisible() { return this.visible; },
isMinimized() { return false; },
hide() { this.visible = false; },
show() { this.visible = true; },
focus() {}, getTitle() { return 'StepForge'; },
getBounds() { return { x: 0, y: 0, width: 800, height: 600 }; },
};
service.getWindow = () => win;
service.clickCaptureAvailable = () => true;
// Stub the recorder so warmup resolves fast without real Electron.
service.startClickFrameBackend = async () => {};
service.session = { guideId: 'g-arm', paused: false, count: 0, intervalSec: 0 };
service.hiddenForSession = true;
const captured = [];
service.enqueueClickCapture = (clickPos, at) => { captured.push(at); };
service.armRecording();
assert.equal(service.warmingUp, true, 'warming up begins immediately');
service.onOsClick(1, { x: 10, y: 10 }, 'left');
assert.deepEqual(captured, [], 'a click during warmup is ignored');
// Wait out the warmup (min-visible 400ms + settle 150ms here).
for (let i = 0; i < 40 && service.warmingUp; i++) {
await new Promise((r) => setTimeout(r, 50));
}
assert.equal(service.warmingUp, false, 'warmup clears');
assert.equal(win.visible, false, 'the window is hidden once armed');
service.onOsClick(2, { x: 10, y: 10 }, 'left');
assert.deepEqual(captured, [2], 'a click after arming is captured');
service.finishSession();
});
test('a slow recorder start still arms within the warmup cap', async () => {
// If the backend start hangs (Windows can take seconds), the window must
// still hide and recording must still arm — the restart bug was the
// window staying up for many seconds, dropping every click over it.
const service = makeService();
const win = {
destroyed: false, visible: true,
isDestroyed() { return this.destroyed; },
isVisible() { return this.visible; },
isMinimized() { return false; },
hide() { this.visible = false; },
show() { this.visible = true; },
focus() {}, getTitle() { return 'StepForge'; },
getBounds() { return { x: 0, y: 0, width: 800, height: 600 }; },
};
service.getWindow = () => win;
service.clickCaptureAvailable = () => true;
// Backend start that never resolves within the test.
service.startClickFrameBackend = () => new Promise(() => {});
service.session = { guideId: 'g-slow', paused: false, count: 0, intervalSec: 0 };
service.hiddenForSession = true;
service.armRecording();
// The cap is 1500ms; wait a bit beyond it.
for (let i = 0; i < 60 && service.warmingUp; i++) {
await new Promise((r) => setTimeout(r, 50));
}
assert.equal(service.warmingUp, false, 'a hung backend start must not keep recording un-armed');
assert.equal(win.visible, false, 'the window hides even when the recorder is slow to start');
service.finishSession();
});
test('a slow backend start from a finished session does not install itself or block a restart', async () => {
// Rapid restart: session A starts a backend that resolves slowly; the user
// finishes A and starts B before it resolves. A's late backend must not
// install into B, and the starting-guard must not block B from starting
// its own.
const service = makeService();
service.session = { guideId: 'A', paused: false, count: 0, intervalSec: 0 };
let releaseA;
const aReady = new Promise((r) => { releaseA = r; });
const built = [];
// Model the generation guard at the seam: a slow start that checks
// captureGen before installing the backend it built.
service.startClickFrameBackend = async function patched() {
const gen = this.captureGen;
this.streamBackendStarting = true;
try {
await aReady; // slow start
const backend = { isActive: () => true, stop: () => built.push('stopped') };
if (gen !== this.captureGen || !this.session || this.session.paused) {
backend.stop();
return;
}
this.streamBackend = backend;
built.push('installed');
} finally {
if (gen === this.captureGen) this.streamBackendStarting = false;
}
};
const startA = service.startClickFrameBackend();
service.finishSession(); // bumps captureGen, A is now stale
releaseA();
await startA;
assert.deepEqual(built, ['stopped'], 'the stale backend is stopped, never installed');
assert.equal(service.streamBackend, null);
assert.equal(service.streamBackendStarting, false, 'the guard is freed for the next session');
});
// ---- coordinate conversion ------------------------------------------------------
test('hook coordinates are converted physical → DIP via screenToDipPoint when available', () => {
const service = makeService({
screenApi: {
screenToDipPoint: (p) => ({ x: p.x / 2, y: p.y / 2 }),
getCursorScreenPoint: () => { throw new Error('must not fall back to a cursor read'); },
},
});
service.session = { guideId: 'guide-dip', paused: false, count: 0, intervalSec: 0 };
const seen = [];
service.enqueueClickCapture = (clickPos) => {
seen.push(clickPos);
};
service.onOsClick(1770000000000, { x: 1280, y: 640 }, 'left');
assert.deepEqual(seen, [{ x: 640, y: 320 }]);
});
test('without screenToDipPoint, coordinates convert via display geometry (Linux/X11)', () => {
const service = makeService({
screenApi: {
getAllDisplays: () => [
{ id: 1, scaleFactor: 2, bounds: { x: 0, y: 0, width: 1440, height: 900 } },
],
getCursorScreenPoint: () => { throw new Error('must not fall back to a cursor read'); },
},
});
service.session = { guideId: 'guide-x11', paused: false, count: 0, intervalSec: 0 };
const seen = [];
service.enqueueClickCapture = (clickPos) => {
seen.push(clickPos);
};
service.onOsClick(1770000000000, { x: 1500, y: 900 }, 'button-1');
assert.deepEqual(seen, [{ x: 750, y: 450 }],
'a physical click on a 2x display must land at the halved DIP point');
});
test('clicks without event coordinates fall back to a live cursor read', () => {
const service = makeService({
screenApi: {
getCursorScreenPoint: () => ({ x: 11, y: 22 }),
getAllDisplays: () => [],
},
});
service.session = { guideId: 'guide-cursor', paused: false, count: 0, intervalSec: 0 };
const seen = [];
service.enqueueClickCapture = (clickPos) => {
seen.push(clickPos);
};
service.onOsClick(1770000000000, null, 'mouse');
assert.deepEqual(seen, [{ x: 11, y: 22 }]);
});
// ---- watcher loss -----------------------------------------------------------------
test('losing the click watcher mid-session falls back to interval capture', () => {
const service = makeService();
service.settings.get = (key) => (key === 'capture.autoIntervalSec' ? 3 : null);
@@ -217,6 +779,8 @@ test('losing the click watcher mid-session falls back to interval capture', () =
}
});
// ---- strict frame selection -----------------------------------------------------
test('a click is served instantly from the freshly buffered frame', async () => {
const service = makeService();
service.session = { guideId: 'guide-2', paused: false, count: 0, intervalSec: 0 };
@@ -264,36 +828,25 @@ test('click capture uses the newest frame completed before the click time', asyn
assert.deepEqual(added, ['before-click']);
});
test('queued click captures preserve the original event time and button', async () => {
const service = makeService();
const seen = [];
service.sessionCapture = async (trigger, clickPos, clickMeta) => {
seen.push({ trigger, clickPos, clickMeta });
return { ok: true };
};
await service.enqueueClickCapture({ x: 7, y: 8 }, 1770000000456, 'left');
assert.deepEqual(seen, [{
trigger: 'click',
clickPos: { x: 7, y: 8 },
clickMeta: { at: 1770000000456, button: 'left' },
}]);
});
test('a buffered frame from a different display is ignored for click capture', async () => {
const service = makeService();
const clickAt = Date.now();
service.session = { guideId: 'guide-display', paused: false, count: 0, intervalSec: 0 };
service.frameLoopRunning = true;
service.frameLoopInFlight = true;
service.frameLoopGrabStartedAt = clickAt - 10; // the in-flight grab predates the click
service.latestFrame = makeFrame('wrong-display', 0, {
display: { bounds: { x: 0, y: 0, width: 100, height: 100 } },
});
service.nextFrame = async () => makeFrame('right-display', 0, {
service.nextFrame = async () => {
const f = makeFrame('right-display', 0, {
display: { bounds: { x: 100, y: 0, width: 100, height: 100 } },
cursor: { x: 150, y: 10 },
});
f.startedAt = clickAt - 10;
return f;
};
service.shoot = async () => {
throw new Error('click capture should not fall back when a matching frame arrives');
};
@@ -304,7 +857,7 @@ test('a buffered frame from a different display is ignored for click capture', a
return { stepId: 'step-display' };
};
const result = await service.sessionCapture('click', { x: 150, y: 10 });
const result = await service.sessionCapture('click', { x: 150, y: 10 }, { at: clickAt });
assert.equal(result.ok, true);
assert.deepEqual(added, ['right-display']);
@@ -328,12 +881,35 @@ test('a stale buffered frame is not reused — the click falls back to a fresh s
assert.equal(shootCalled, true, 'a stale buffered frame must not be reused');
});
test('an idle click capture waits for the imminent loop frame instead of racing it', async () => {
// Grabs take seconds while the idle gap is ~200ms, so the loop's next
// frame both starts sooner and avoids stalling the loop the way a
// competing one-off shot would.
test('strict mode: a frame whose grab started after the click is rejected', async () => {
// This replaces the old "idle click waits for the imminent loop frame"
// behavior: a grab that begins after the click can already show the
// click's effects, so strict mode takes the explicit fresh-shot fallback
// instead of passing it off as the click-time screen.
const service = makeService();
service.session = { guideId: 'guide-idle', paused: false, count: 0, intervalSec: 0 };
service.session = { guideId: 'guide-strict', paused: false, count: 0, intervalSec: 0 };
service.frameLoopRunning = true;
service.frameLoopInFlight = false; // nothing in flight at click time
const clickAt = Date.now();
service.nextFrame = async () => {
throw new Error('strict idle clicks must not wait for a post-click frame');
};
let shootCalled = false;
service.shoot = async () => {
shootCalled = true;
return { ok: true, step: { stepId: 'fresh-step' } };
};
const result = await service.sessionCapture('click', { x: 1, y: 1 }, { at: clickAt });
assert.equal(result.ok, true);
assert.equal(shootCalled, true);
});
test('balanced mode keeps the legacy slack: an imminent post-click frame is accepted', async () => {
const service = makeService({ settings: { 'capture.strictClickFrames': false } });
service.session = { guideId: 'guide-balanced', paused: false, count: 0, intervalSec: 0 };
service.frameLoopRunning = true;
service.frameLoopInFlight = false;
@@ -345,12 +921,12 @@ test('an idle click capture waits for the imminent loop frame instead of racing
return f;
};
service.shoot = async () => {
throw new Error('idle clicks must wait for the loop frame, not take a fresh shot');
throw new Error('balanced idle clicks wait for the loop frame');
};
const added = [];
service.store.addStep = (guideId, fields, png) => {
added.push(png.toString());
return { stepId: 'idle-step' };
return { stepId: 'balanced-step' };
};
const result = await service.sessionCapture('click', { x: 1, y: 1 }, { at: clickAt });
@@ -359,8 +935,8 @@ test('an idle click capture waits for the imminent loop frame instead of racing
assert.deepEqual(added, ['next-loop-frame']);
});
test('a loop frame started too long after the click falls back to a fresh shot', async () => {
const service = makeService();
test('balanced mode: a loop frame started too long after the click still falls back', async () => {
const service = makeService({ settings: { 'capture.strictClickFrames': false } });
service.session = { guideId: 'guide-late', paused: false, count: 0, intervalSec: 0 };
service.frameLoopRunning = true;
service.frameLoopInFlight = false;
@@ -384,7 +960,7 @@ test('a loop frame started too long after the click falls back to a fresh shot',
assert.equal(shootCalled, true, 'late frames must not be passed off as the click-time screen');
});
test('clicks during an in-flight grab wait for the frame instead of being dropped', async () => {
test('clicks during an in-flight pre-click grab wait for the frame instead of being dropped', async () => {
const service = makeService();
service.session = { guideId: 'guide-fast', paused: false, count: 0, intervalSec: 0 };
service.frameLoopRunning = true; // a grab is in flight, no frame buffered yet
@@ -395,10 +971,16 @@ test('clicks during an in-flight grab wait for the frame instead of being droppe
throw new Error('waiting clicks must use the loop frame, not a competing shot');
};
const added = [];
const frames = [];
service.store.addStep = (guideId, fields, png) => {
added.push(png.toString());
return { stepId: `step-${added.length}` };
};
const origStore = service.storeFrameAsStep.bind(service);
service.storeFrameAsStep = (guideId, mode, frame, clickPos) => {
frames.push(frame);
return origStore(guideId, mode, frame, clickPos);
};
// Two rapid clicks land before the grab completes.
const first = service.sessionCapture('click', { x: 1, y: 1 }, { at: clickAt });
@@ -410,23 +992,122 @@ test('clicks during an in-flight grab wait for the frame instead of being droppe
assert.equal(r1.ok, true);
assert.equal(r2.ok, true);
assert.deepEqual(added, ['loop-frame', 'loop-frame'],
'both clicks must become steps from the frame that was in flight');
assert.equal(added.length, 2, 'one step per click — fast clicks are never dropped');
assert.equal(service.session.count, 2);
for (const frame of frames) {
assert.ok(frame.startedAt <= clickAt,
'strict mode: no step may use a frame whose grab started after its click');
}
});
test('pausing stops the frame loop and discards the buffered frame', () => {
// ---- stream backend integration ---------------------------------------------------
test('click frames come from the stream backend when it is active', async () => {
const service = makeService();
const clickAt = Date.now();
service.session = { guideId: 'guide-stream', paused: false, count: 0, intervalSec: 0 };
const requests = [];
service.streamBackend = {
isActive: () => true,
frameForClick: async (req) => {
requests.push(req);
return {
mode: 'fullscreen',
png: Buffer.from('stream-frame'),
size: { width: 200, height: 100 },
display: { bounds: { x: 0, y: 0, width: 100, height: 100 } },
startedAt: clickAt - 50,
capturedAt: clickAt - 40,
source: 'stream',
};
},
stop: () => {},
};
service.shoot = async () => {
throw new Error('the stream frame must be used, not a fresh shot');
};
const added = [];
service.store.addStep = (guideId, fields, png) => {
added.push(png.toString());
return { stepId: 'stream-step' };
};
const result = await service.sessionCapture('click', { x: 10, y: 10 }, { at: clickAt });
assert.equal(result.ok, true);
assert.deepEqual(added, ['stream-frame']);
assert.deepEqual(requests, [{ clickPos: { x: 10, y: 10 }, clickAt, strict: true, leadMs: 0 }],
'the worker receives the hook-time click timestamp, strictness, and lead');
});
test('a stream backend with no qualifying frame falls through to the fresh-shot path', async () => {
const service = makeService();
service.session = { guideId: 'guide-stream-miss', paused: false, count: 0, intervalSec: 0 };
service.streamBackend = {
isActive: () => true,
frameForClick: async () => null,
stop: () => {},
};
let shootCalled = false;
service.shoot = async () => {
shootCalled = true;
return { ok: true, step: { stepId: 'fresh-step' } };
};
const result = await service.sessionCapture('click', { x: 1, y: 1 });
assert.equal(result.ok, true);
assert.equal(shootCalled, true);
});
test('pausing stops the frame loop, drops buffered frames, and stops the stream backend', () => {
const service = makeService();
service.session = { guideId: 'guide-pause', paused: false, count: 0, intervalSec: 0 };
service.frameLoopRunning = true;
service.latestFrame = makeFrame('pre-pause');
let backendStopped = false;
service.streamBackend = { isActive: () => true, stop: () => { backendStopped = true; } };
service.togglePause(true);
assert.equal(service.frameLoopRunning, false);
assert.equal(service.latestFrame, null, 'a resume must never serve a pre-pause frame');
assert.equal(backendStopped, true);
assert.equal(service.streamBackend, null);
});
test('an unhealthy stream backend degrades to the in-process frame loop', () => {
const service = makeService();
service.session = { guideId: 'guide-degrade', paused: false, count: 0, intervalSec: 0 };
service.streamBackend = { isActive: () => true, stop: () => {} };
let loopStarted = false;
service.startFrameLoop = () => { loopStarted = true; };
const states = [];
service.notify = (channel) => states.push(channel);
service.degradeToFrameLoop();
assert.equal(service.streamBackend, null);
assert.equal(loopStarted, true, 'capture must not silently stop when the worker dies');
assert.ok(states.includes('capture:state'));
});
test('session state reports which frame recorder is serving clicks', () => {
const service = makeService();
service.session = { guideId: 'guide-state', paused: false, count: 0, intervalSec: 0 };
assert.equal(service.state().clickFrameSource, 'idle');
assert.equal(service.state().strictClickFrames, true);
service.frameLoopRunning = true;
assert.equal(service.state().clickFrameSource, 'loop');
service.streamBackend = { isActive: () => true, stop: () => {} };
assert.equal(service.state().clickFrameSource, 'stream');
service.streamBackend = null;
service.frameLoopRunning = false;
});
// ---- marker + session lifecycle ------------------------------------------------
test('click capture marks the click-time cursor position', async () => {
const service = makeService();
service.settings.get = (key) => {
+167
View File
@@ -0,0 +1,167 @@
'use strict';
const test = require('node:test');
const assert = require('node:assert/strict');
const {
FrameRing,
frameUsableForClick,
selectFrameForClick,
} = require('../../app/click-frames');
function frame(name, { startedAt, capturedAt, mode = 'fullscreen', display = null } = {}) {
return { name, mode, startedAt, capturedAt, display };
}
// ---- FrameRing --------------------------------------------------------------
test('the ring keeps at most `limit` frames and drops the oldest first', () => {
let now = 1000;
const evicted = [];
const ring = new FrameRing({ limit: 2, retentionMs: 60_000, now: () => now, onEvict: (f) => evicted.push(f.name) });
ring.push(frame('a', { capturedAt: 1000 }));
ring.push(frame('b', { capturedAt: 1100 }));
now = 1200;
ring.push(frame('c', { capturedAt: 1200 }));
assert.deepEqual(ring.frames().map((f) => f.name), ['b', 'c']);
assert.deepEqual(evicted, ['a'], 'eviction must release the dropped frame');
assert.equal(ring.latest().name, 'c');
});
test('the ring evicts frames older than the retention window', () => {
let now = 1000;
const ring = new FrameRing({ limit: 10, retentionMs: 500, now: () => now });
ring.push(frame('old', { capturedAt: 1000 }));
now = 2000;
ring.push(frame('new', { capturedAt: 2000 }));
assert.deepEqual(ring.frames().map((f) => f.name), ['new']);
});
test('clear() releases every frame through onEvict', () => {
const evicted = [];
const ring = new FrameRing({ onEvict: (f) => evicted.push(f.name) });
ring.push(frame('a', { capturedAt: Date.now() }));
ring.push(frame('b', { capturedAt: Date.now() }));
ring.clear();
assert.deepEqual(ring.frames(), []);
assert.deepEqual(evicted.sort(), ['a', 'b']);
});
// ---- strict selection -------------------------------------------------------
test('strict mode picks the newest frame completed at or before the click', () => {
const clickAt = 10_000;
const frames = [
frame('older', { startedAt: 9300, capturedAt: 9400 }),
frame('best', { startedAt: 9800, capturedAt: 9900 }),
frame('post-click', { startedAt: 10_050, capturedAt: 10_150 }),
];
const chosen = selectFrameForClick(frames, { clickAt, mode: 'fullscreen', strict: true });
assert.equal(chosen.name, 'best');
});
test('strict mode never accepts a frame whose grab started after the click', () => {
const clickAt = 10_000;
// Even one millisecond after the click, and even via the in-flight path:
// a post-click grab can already show the click's effects.
const f = frame('late', { startedAt: 10_001, capturedAt: 10_200 });
assert.equal(frameUsableForClick(f, { clickAt, strict: true, allowInFlight: true }), false);
assert.equal(selectFrameForClick([f], { clickAt, strict: true }), null);
});
test('strict mode accepts an in-flight frame whose grab started before the click', () => {
const clickAt = 10_000;
const f = frame('in-flight', { startedAt: 9950, capturedAt: 10_300 });
assert.equal(frameUsableForClick(f, { clickAt, strict: true, allowInFlight: true }), true);
assert.equal(frameUsableForClick(f, { clickAt, strict: true, allowInFlight: false }), false,
'a not-yet-needed in-flight frame must not be selected from the buffer path');
});
test('a frame older than maxAgeMs is too stale for the click', () => {
const clickAt = 10_000;
const f = frame('stale', { startedAt: 9000, capturedAt: 9100 });
assert.equal(frameUsableForClick(f, { clickAt, strict: true, maxAgeMs: 600 }), false);
assert.equal(frameUsableForClick(f, { clickAt, strict: true, maxAgeMs: 2000 }), true);
});
test('balanced mode accepts a grab started within the slack window after the click', () => {
const clickAt = 10_000;
const f = frame('slack', { startedAt: 10_100, capturedAt: 10_350 });
assert.equal(frameUsableForClick(f, {
clickAt, strict: false, allowInFlight: true, startSlackMs: 300,
}), true);
assert.equal(frameUsableForClick(f, {
clickAt, strict: true, allowInFlight: true, startSlackMs: 300,
}), false, 'slack acceptance is balanced-mode only');
});
test('frames from another display are rejected when the click position is known', () => {
const clickAt = 10_000;
const left = { bounds: { x: 0, y: 0, width: 1920, height: 1080 } };
const right = { bounds: { x: 1920, y: 0, width: 1920, height: 1080 } };
const f = frame('left-screen', { startedAt: 9900, capturedAt: 9950, display: left });
assert.equal(frameUsableForClick(f, { clickAt, clickPos: { x: 2500, y: 500 } }), false);
assert.equal(frameUsableForClick(f, { clickAt, clickPos: { x: 500, y: 500 } }), true);
const g = frame('right-screen', { startedAt: 9960, capturedAt: 9980, display: right });
assert.equal(selectFrameForClick([f, g], { clickAt, clickPos: { x: 2500, y: 500 } }).name, 'right-screen');
});
test('frames of the wrong capture mode are rejected', () => {
const clickAt = 10_000;
const f = frame('window-grab', { startedAt: 9900, capturedAt: 9950, mode: 'window' });
assert.equal(frameUsableForClick(f, { clickAt, mode: 'fullscreen' }), false);
assert.equal(frameUsableForClick(f, { clickAt, mode: 'window' }), true);
});
test('the click lead prefers a frame captured at least leadMs before the click', () => {
const clickAt = 10_000;
const frames = [
frame('with-margin', { startedAt: 9780, capturedAt: 9800 }), // 200ms before
frame('right-at-click', { startedAt: 9970, capturedAt: 9985 }), // 15ms before
];
const chosen = selectFrameForClick(frames, { clickAt, mode: 'fullscreen', strict: true, leadMs: 120 });
assert.equal(chosen.name, 'with-margin',
'with a lead, the frame clear of the click onset wins over the one right at it');
});
test('the click lead falls back to any pre-click frame rather than forcing a post-click shot', () => {
// The whole point of the two-tier rule: when nothing satisfies the lead,
// we still return a pre-click frame (caller would otherwise fresh-shot
// *after* the click). Only "right-before" exists here.
const clickAt = 10_000;
const frames = [frame('right-before', { startedAt: 9960, capturedAt: 9980 })];
const chosen = selectFrameForClick(frames, { clickAt, mode: 'fullscreen', strict: true, leadMs: 120 });
assert.equal(chosen.name, 'right-before');
});
test('the click lead still returns null when no frame precedes the click at all', () => {
const clickAt = 10_000;
const frames = [frame('after', { startedAt: 10_050, capturedAt: 10_080 })];
assert.equal(selectFrameForClick(frames, { clickAt, strict: true, leadMs: 120 }), null,
'a post-click frame is never selected; the caller takes the fresh-shot fallback');
});
test('a frame without startedAt falls back to capturedAt for the strict check', () => {
const clickAt = 10_000;
const before = frame('legacy-before', { capturedAt: 9950 });
const after = frame('legacy-after', { capturedAt: 10_050 });
assert.equal(frameUsableForClick(before, { clickAt, strict: true }), true);
assert.equal(frameUsableForClick(after, { clickAt, strict: true, allowInFlight: true }), false);
});
+88
View File
@@ -0,0 +1,88 @@
'use strict';
const test = require('node:test');
const assert = require('node:assert/strict');
const {
physicalBoundsOf,
physicalToDip,
displayForPhysicalPoint,
displayForDipPoint,
} = require('../../app/coords');
const display = (id, x, y, width, height, scaleFactor = 1) => ({
id, scaleFactor, bounds: { x, y, width, height },
});
test('at 100% scale, physical and DIP coordinates are identical', () => {
const displays = [display(1, 0, 0, 1920, 1080, 1)];
assert.deepEqual(physicalToDip({ x: 640, y: 360 }, displays), { x: 640, y: 360 });
});
test('at 200% scale, physical pixels halve into DIP', () => {
// This is the classic marker-offset bug: a click at physical (1500, 900)
// on a 2x display is the DIP point (750, 450); drawing the marker at the
// raw values lands it far below-right of the real click.
const displays = [display(1, 0, 0, 1440, 900, 2)];
assert.deepEqual(physicalToDip({ x: 1500, y: 900 }, displays), { x: 750, y: 450 });
});
test('fractional scale factors convert exactly', () => {
const displays = [display(1, 0, 0, 1280, 800, 1.5)];
assert.deepEqual(physicalToDip({ x: 960, y: 600 }, displays), { x: 640, y: 400 });
});
test('physical bounds are DIP bounds times the scale factor', () => {
assert.deepEqual(physicalBoundsOf(display(1, 100, 50, 1280, 800, 2)),
{ x: 200, y: 100, width: 2560, height: 1600 });
});
test('multi-monitor: a click on the secondary display converts in that display space', () => {
// Two 1920x1080 displays side by side, uniform 2x scale. Physical x=4800
// is the middle of the second display; its DIP x must be 1920 + 480.
const displays = [
display(1, 0, 0, 1920, 1080, 2),
display(2, 1920, 0, 1920, 1080, 2),
];
assert.equal(displayForPhysicalPoint({ x: 4800, y: 500 }, displays).id, 2);
assert.deepEqual(physicalToDip({ x: 4800, y: 540 }, displays), { x: 2400, y: 270 });
});
test('multi-monitor with negative origin (display left of primary)', () => {
const displays = [
display(1, 0, 0, 1920, 1080, 1),
display(2, -1920, 0, 1920, 1080, 1),
];
assert.deepEqual(physicalToDip({ x: -960, y: 540 }, displays), { x: -960, y: 540 });
assert.equal(displayForPhysicalPoint({ x: -960, y: 540 }, displays).id, 2);
});
test('a point just outside every display maps via the nearest one', () => {
// Clicks on the outermost pixel row can round to one pixel outside the
// display bounds; they must not be dropped or mapped to the wrong screen.
const displays = [display(1, 0, 0, 1920, 1080, 1)];
assert.deepEqual(physicalToDip({ x: 1921, y: 540 }, displays), { x: 1921, y: 540 });
});
test('no display geometry means no conversion (caller falls back to a cursor read)', () => {
assert.equal(physicalToDip({ x: 10, y: 10 }, []), null);
assert.equal(physicalToDip(null, [display(1, 0, 0, 100, 100)]), null);
assert.equal(physicalToDip({ x: Number.NaN, y: 10 }, [display(1, 0, 0, 100, 100)]), null);
});
test('displayForDipPoint routes a click to the containing display, else the nearest', () => {
const displays = [
display(1, 0, 0, 1920, 1080, 1),
display(2, 1920, 0, 1920, 1080, 1),
];
assert.equal(displayForDipPoint({ x: 2000, y: 10 }, displays).id, 2);
assert.equal(displayForDipPoint({ x: 10, y: 10 }, displays).id, 1);
assert.equal(displayForDipPoint({ x: 5000, y: 10 }, displays).id, 2, 'nearest display wins for out-of-bounds points');
});
+284
View File
@@ -0,0 +1,284 @@
'use strict';
const test = require('node:test');
const assert = require('node:assert/strict');
const { StreamCaptureBackend, pairDisplaysToSources } = require('../../app/stream-backend');
const display = (id, x, y, width, height, scaleFactor = 1) => ({
id, scaleFactor, bounds: { x, y, width, height },
});
/**
* Test host: records commands, exposes the backend's event handler so a test
* can play the worker's part, and auto-acks start-stream commands so start()
* resolves without a real worker window.
*/
function makeBackend({ autoReady = true, ...opts } = {}) {
const sent = [];
let emit = null;
let destroyed = false;
const backend = new StreamCaptureBackend({
createHost: async (onEvent) => {
emit = onEvent;
return {
send(msg) {
sent.push(msg);
if (autoReady && msg.type === 'start-stream') {
queueMicrotask(() => emit({ type: 'stream-ready', displayId: msg.displayId }));
}
},
destroy() { destroyed = true; },
};
},
ackTimeoutMs: 40,
encodeTimeoutMs: 120,
startTimeoutMs: 100,
...opts,
});
return { backend, sent, worker: (msg) => emit(msg), isDestroyed: () => destroyed };
}
const oneDisplay = [display(7, 0, 0, 1920, 1080, 1)];
const oneSource = [{ id: 'screen:1:0', display_id: '7' }];
test('start() opens one stream per display and reports active once ready', async () => {
const { backend, sent } = makeBackend();
const ok = await backend.start({ displays: oneDisplay, sources: oneSource, sampleMs: 50 });
assert.equal(ok, true);
assert.equal(backend.isActive(), true);
assert.equal(sent.length, 1);
assert.equal(sent[0].type, 'start-stream');
assert.equal(sent[0].sourceId, 'screen:1:0');
assert.equal(sent[0].sampleMs, 50);
assert.deepEqual(sent[0].display.bounds, { x: 0, y: 0, width: 1920, height: 1080 });
backend.stop();
});
test('start() fails cleanly when every stream errors', async () => {
const { backend, sent, worker, isDestroyed } = makeBackend({ autoReady: false });
const startPromise = backend.start({ displays: oneDisplay, sources: oneSource });
await new Promise((r) => setImmediate(r));
assert.equal(sent.length, 1);
worker({ type: 'stream-error', displayId: 7, reason: 'no permission' });
const ok = await startPromise;
assert.equal(ok, false);
assert.equal(backend.isActive(), false);
assert.equal(isDestroyed(), true, 'a failed start must tear the worker down');
});
test('a frame request resolves with the worker frame, carrying its timestamps and display', async () => {
const { backend, sent, worker } = makeBackend();
await backend.start({ displays: oneDisplay, sources: oneSource });
const promise = backend.frameForClick({ clickPos: { x: 100, y: 100 }, clickAt: 5000, strict: true });
const request = sent.find((m) => m.type === 'frame-request');
assert.ok(request, 'a frame-request must be sent to the worker');
assert.equal(request.clickAt, 5000);
assert.equal(request.strict, true);
assert.equal(request.displayId, 7);
worker({
type: 'frame-response',
requestId: request.requestId,
ok: true,
png: Uint8Array.from([1, 2, 3]),
width: 1920,
height: 1080,
startedAt: 4900,
capturedAt: 4910,
});
const frame = await promise;
assert.equal(frame.mode, 'fullscreen');
assert.deepEqual([...frame.png], [1, 2, 3]);
assert.deepEqual(frame.size, { width: 1920, height: 1080 });
assert.equal(frame.startedAt, 4900);
assert.equal(frame.capturedAt, 4910);
assert.equal(frame.display.id, 7);
assert.equal(frame.source, 'stream');
backend.stop();
});
test('a "no qualifying frame" reply resolves null without counting as a failure', async () => {
const { backend, sent, worker } = makeBackend();
await backend.start({ displays: oneDisplay, sources: oneSource });
const promise = backend.frameForClick({ clickAt: 5000 });
const request = sent.find((m) => m.type === 'frame-request');
worker({ type: 'frame-response', requestId: request.requestId, ok: false, reason: 'click predates first frame' });
assert.equal(await promise, null);
assert.equal(backend.isActive(), true, 'an honest empty answer is healthy');
backend.stop();
});
test('clicks on a multi-monitor setup route to the stream of the clicked display', async () => {
const displays = [display(1, 0, 0, 1920, 1080), display(2, 1920, 0, 1920, 1080)];
const sources = [
{ id: 'screen:1:0', display_id: '1' },
{ id: 'screen:2:0', display_id: '2' },
];
const { backend, sent } = makeBackend();
await backend.start({ displays, sources });
backend.frameForClick({ clickPos: { x: 2500, y: 400 }, clickAt: 1 });
backend.frameForClick({ clickPos: { x: 300, y: 400 }, clickAt: 2 });
const requests = sent.filter((m) => m.type === 'frame-request');
assert.deepEqual(requests.map((r) => r.displayId), [2, 1]);
backend.stop();
});
test('repeated unanswered frame requests mark the backend unhealthy exactly once', async () => {
let unhealthy = 0;
const { backend, isDestroyed } = makeBackend({ onUnhealthy: () => { unhealthy += 1; } });
await backend.start({ displays: oneDisplay, sources: oneSource });
// Two consecutive ack timeouts (the worker never answers at all).
assert.equal(await backend.frameForClick({ clickAt: 1 }), null);
assert.equal(await backend.frameForClick({ clickAt: 2 }), null);
assert.equal(unhealthy, 1, 'degradation must fire once, not per click');
assert.equal(backend.isActive(), false);
assert.equal(isDestroyed(), true);
});
test('a slow PNG encode after a prompt selection ack is not mistaken for a dead worker', async () => {
// The ack window is 40ms here; the payload arrives at ~80ms — well past
// the ack deadline but inside the encode deadline. The frame must land
// and the failure counter must stay clean.
let unhealthy = 0;
const { backend, sent, worker } = makeBackend({ onUnhealthy: () => { unhealthy += 1; } });
await backend.start({ displays: oneDisplay, sources: oneSource });
const promise = backend.frameForClick({ clickPos: { x: 10, y: 10 }, clickAt: 5000 });
const request = sent.find((m) => m.type === 'frame-request');
worker({ type: 'frame-selected', requestId: request.requestId, startedAt: 4900, capturedAt: 4910 });
setTimeout(() => {
worker({
type: 'frame-response',
requestId: request.requestId,
ok: true,
png: Uint8Array.from([7]),
width: 1920,
height: 1080,
startedAt: 4900,
capturedAt: 4910,
});
}, 80);
const frame = await promise;
assert.ok(frame, 'the slowly-encoded frame must still be delivered');
assert.deepEqual([...frame.png], [7]);
assert.equal(unhealthy, 0);
assert.equal(backend.isActive(), true);
backend.stop();
});
test('an acked request whose payload never arrives resolves null after the encode deadline', async () => {
const { backend, sent, worker } = makeBackend();
await backend.start({ displays: oneDisplay, sources: oneSource });
const promise = backend.frameForClick({ clickAt: 5000 });
const request = sent.find((m) => m.type === 'frame-request');
worker({ type: 'frame-selected', requestId: request.requestId });
assert.equal(await promise, null, 'a stuck encode must not hang the click forever');
backend.stop();
});
test('a click on a display without a ready stream is not served from another display', async () => {
// Only display 1 has a screen source; a click on display 2 must resolve
// null (the caller falls back to a fresh shot of the correct monitor)
// rather than returning display 1 pixels with meaningless marker math.
const displays = [display(1, 0, 0, 1920, 1080), display(2, 1920, 0, 1920, 1080)];
const { backend, sent } = makeBackend();
await backend.start({ displays, sources: [{ id: 'screen:1:0', display_id: '1' }] });
const frame = await backend.frameForClick({ clickPos: { x: 2500, y: 400 }, clickAt: 1 });
assert.equal(frame, null);
assert.equal(sent.some((m) => m.type === 'frame-request'), false,
'no request should even be sent for the wrong display');
backend.stop();
});
test('a late worker reply after the timeout is ignored', async () => {
const { backend, sent, worker } = makeBackend();
await backend.start({ displays: oneDisplay, sources: oneSource });
const result = await backend.frameForClick({ clickAt: 1 }); // times out at 40ms
const request = sent.find((m) => m.type === 'frame-request');
worker({ type: 'frame-response', requestId: request.requestId, ok: true, png: Uint8Array.from([9]), width: 1, height: 1 });
assert.equal(result, null);
backend.stop();
});
test('stop() drains: a frame already selected at finish time still resolves', async () => {
// This is the "I clicked many times but only got two screenshots" fix.
// The session finishes (stop) while a click's frame is still encoding;
// the frame must still come back, not be cancelled to null.
const { backend, sent, worker, isDestroyed } = makeBackend();
await backend.start({ displays: oneDisplay, sources: oneSource });
const pending = backend.frameForClick({ clickPos: { x: 10, y: 10 }, clickAt: 1 });
const request = sent.find((m) => m.type === 'frame-request');
worker({ type: 'frame-selected', requestId: request.requestId, startedAt: 0, capturedAt: 0 });
backend.stop(); // user finishes the session while the encode is in flight
assert.equal(backend.isActive(), false);
assert.equal(isDestroyed(), false, 'the worker stays alive to finish encoding');
worker({
type: 'frame-response',
requestId: request.requestId,
ok: true,
png: Uint8Array.from([5]),
width: 1,
height: 1,
});
const frame = await pending;
assert.ok(frame, 'the in-flight frame must survive the stop');
assert.deepEqual([...frame.png], [5]);
assert.equal(isDestroyed(), true, 'the worker tears down once draining completes');
});
test('stop({ immediate: true }) abandons in-flight requests at once', async () => {
const { backend, isDestroyed } = makeBackend();
await backend.start({ displays: oneDisplay, sources: oneSource });
const pending = backend.frameForClick({ clickPos: { x: 10, y: 10 }, clickAt: 1 });
backend.stop({ immediate: true });
assert.equal(await pending, null);
assert.equal(backend.isActive(), false);
assert.equal(isDestroyed(), true);
});
test('displays pair to screen sources by display_id; single display pairs to a lone source', () => {
const displays = [display(1, 0, 0, 100, 100), display(2, 100, 0, 100, 100)];
const sources = [
{ id: 'screen:b', display_id: '2' },
{ id: 'screen:a', display_id: '1' },
{ id: 'window:x', display_id: '' },
];
assert.deepEqual(pairDisplaysToSources(displays, sources), [
{ display: displays[0], sourceId: 'screen:a' },
{ display: displays[1], sourceId: 'screen:b' },
]);
// WSLg and some portals leave display_id empty — a single display still
// pairs with the single screen source.
assert.deepEqual(
pairDisplaysToSources([displays[0]], [{ id: 'screen:0', display_id: '' }]),
[{ display: displays[0], sourceId: 'screen:0' }],
);
});