Files
autodoc/app/capture.js
T
Iisyourdad 3d0b753205
Template tests / tests (push) Waiting to run
Template tests / tests (pull_request) Waiting to run
Add a 200ms click debounce with extensive behavioral tests
Per request: clicks of the same button closer together than
capture.clickDebounceMs (default 200ms) now collapse into a single step, so
accidental fast/double clicks don't each become a step. It is a leading-edge
debounce measured from the last *accepted* click, so a run of fast clicks
can't push the next deliberate click out — two clicks spaced beyond the
window (e.g. the reported 400-500ms apart) always register.

Replaces the prior 8ms duplicate-delivery suppression (subsumed by the
window). Configurable; 0 captures every click.

Tests (the point of this change is that it can't silently regress):
- 13 behavioral unit tests in capture.test.js that drive real onOsClick
  calls with controlled timestamps and assert which clicks survive — the
  reported 400/450/500ms cases, sub-window collapse, the 200ms boundary,
  per-button independence, configurability, debounce=0, last-accepted (not
  last-dropped) reference, session reset, and a full onOsClick -> queue ->
  store integration check. No keyword/comment assertions.
- A fourth end-to-end self-test scenario (burst of 40ms clicks collapses to
  1; three 300ms-apart clicks each register => 4 total). The marker/drain
  scenarios set debounce to 0 so they keep stressing the frame pipeline.

147 unit tests + all repo checks pass.

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-12 09:02:51 -05:00

1425 lines
57 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
'use strict';
const path = require('node:path');
const { spawn, execFileSync } = require('node:child_process');
const { desktopCapturer, screen, BrowserWindow, nativeImage, Tray, Menu, Notification } = require('electron');
const { expandPlaceholders } = require('../core/placeholders');
const raster = require('../core/raster');
const { encodePng } = require('../core/png');
const {
selectFrameForClick,
frameUsableForClick,
pointInBounds,
DEFAULT_MAX_AGE_MS,
DEFAULT_START_SLACK_MS,
} = require('./click-frames');
const { physicalToDip } = require('./coords');
/**
* Capture service: full-screen, active-window, and region capture, plus a
* click-marker annotation at the click position and a capture session
* (start/pause/resume/finish).
*
* A session captures continuously, with three triggers layered by what the
* platform supports:
* - click-capture via an OS adapter (xinput on X11, a low-level mouse hook
* on Windows),
* - a global hotkey (unreliable on some Wayland compositors),
* - interval auto-capture as the always-works fallback.
*
* Click captures are served from one of two frame recorders:
* - the stream backend (app/stream-backend.js): a hidden worker window
* samples a desktop media stream per display into a timestamped ring
* buffer, entirely off the main process. This is the preferred path —
* the main-process event loop stays free, so OS click events arrive on
* time, and the tight sampling cadence keeps a genuinely fresh pre-click
* frame available for every click;
* - the legacy in-process frame loop below, kept as the fallback when
* streams can't start (portal-less Wayland, exotic drivers).
*
* Either way the pairing rule is the same (click-frames.js): in strict mode
* a click only ever gets a frame captured at or before the click — never one
* whose grab started after it.
*
* Note: under Wayland/WSLg, screen capture may require portal support; all
* failures surface as { ok: false, reason } instead of crashing.
*/
// Leading-edge click debounce: the first click of a button is captured, and
// further clicks of that button within this window of the last *accepted*
// click are ignored. This collapses accidental fast / double clicks into one
// step, while any two deliberate clicks spaced more than the window apart
// each register. Tunable via capture.clickDebounceMs; this is only the
// default when the setting is absent.
const DEFAULT_CLICK_DEBOUNCE_MS = 200;
// How long a Linux raw button event waits for its regular twin (the
// representation that carries root coordinates) before firing without them.
const LINUX_CLICK_TWIN_MS = 25;
// Idle gap between legacy frame-loop grabs. Must stay well above zero:
// grabbing back-to-back starves the main-process event loop, which delays
// delivery of click events from the OS watcher by whole seconds. (The
// stream backend exists precisely because of this constraint.)
const FRAME_LOOP_IDLE_MS = 200;
// A buffered frame older than this is too stale to pass off as "the screen
// at the instant of the click". Shared with click-frames.js.
const CLICK_FRAME_MAX_AGE_MS = DEFAULT_MAX_AGE_MS;
// How long a click waits for the in-flight grab before falling back to a
// one-off fresh shot.
const CLICK_FRAME_WAIT_MS = 2000;
// Balanced (non-strict) mode only: a loop grab that started at most this
// long after the click is still accepted. Strict mode never does this.
const CLICK_FRAME_START_SLACK_MS = DEFAULT_START_SLACK_MS;
const CLICK_CAPTURE_HIDE_DELAY_MS = 25;
// Frames hold raw images (~20MB each at 2880x1800), so keep the history
// window wide enough to outlast any processing hiccup but the count low.
const RECENT_FRAME_RETENTION_MS = 4000;
const RECENT_FRAME_LIMIT = 4;
// The click that stops/pauses a session via the tray reaches the OS hook at
// almost the same instant the tray handler fires. We discard at most that
// one click — and only when it matches the recorded gesture in *both* time
// and position, so a fast workflow click that merely happens to land near
// the stop is never mistaken for the stop itself.
const SESSION_STOP_CLICK_WINDOW_MS = 700;
const SESSION_STOP_CLICK_RADIUS_PX = 8;
// Per-click diagnostics, enabled with STEPFORGE_CAPTURE_LOG=1. Cheap enough
// to leave in: one line per click/frame decision, nothing per frame-loop tick.
const CAPTURE_LOG = Boolean(process.env.STEPFORGE_CAPTURE_LOG);
function clog(...args) {
if (CAPTURE_LOG) console.log('[capture]', ...args);
}
function hasBinary(name) {
try {
execFileSync('which', [name], { stdio: 'pipe' });
return true;
} catch {
return false;
}
}
class CaptureService {
constructor({ store, settings, getWindow, notify, screenApi = screen }) {
this.store = store;
this.settings = settings;
this.getWindow = getWindow;
this.notify = notify;
// Injectable for tests; the click/coordinate paths must never reach for
// the global `screen` directly so coordinate handling stays testable.
this.screen = screenApi;
this.session = null; // { guideId, paused, count, intervalSec }
this.intervalTimer = null;
this.clickWatcher = null;
this.frameLoopTimer = null;
this.frameLoopRunning = false;
this.frameWaiters = [];
this.latestFrame = null;
this.clickWatcherBuf = '';
this.clickWatcherErrTail = '';
this.linuxEvent = null; // event block currently being parsed
this.pendingRawClick = null; // raw press waiting for its coordinate twin
this.clickQueue = Promise.resolve();
this.frameLoopInFlight = false;
this.frameLoopGrabStartedAt = null;
this.recentFrames = [];
this.shooting = false;
this.lastAcceptedClickByButton = new Map();
this.streamBackend = null;
this.streamBackendStarting = false;
}
state() {
return this.session
? {
active: true,
paused: this.session.paused,
guideId: this.session.guideId,
count: this.session.count,
intervalSec: this.session.intervalSec || 0,
clickCapture: Boolean(this.clickWatcher),
clickCaptureAvailable: this.clickCaptureAvailable(),
clickFrameSource: this.streamBackend ? 'stream' : (this.frameLoopRunning ? 'loop' : 'idle'),
strictClickFrames: this.strictClickFrames(),
}
: { active: false, clickCaptureAvailable: this.clickCaptureAvailable() };
}
/**
* Strict is the default: a stored step must never show the screen *after*
* its click (a frame whose grab started post-click can already contain the
* click's effects). The setting exists as an explicit escape hatch for
* machines where capture is too slow to keep pre-click frames buffered —
* there, the legacy slack heuristics trade accuracy for fewer fresh-shot
* fallbacks.
*/
strictClickFrames() {
return this.settings.get('capture.strictClickFrames') !== false;
}
clickCaptureAvailable() {
if (this._clickAvail === undefined) {
this._clickAvail = process.platform === 'win32' || (process.platform === 'linux' && hasBinary('xinput'));
}
return this._clickAvail;
}
startSession(guideId, { intervalSec = null } = {}) {
this.finishSession();
// Default trigger: clicks when the platform supports it, otherwise an
// interval so a session always produces steps even if the global hotkey
// never fires (common under Wayland/WSLg).
let interval = intervalSec;
if (interval == null) {
interval = this.clickCaptureAvailable() ? 0 : (this.settings.get('capture.autoIntervalSec') || 5);
}
// Sessions start paused: nothing hides and no capturing happens until
// the user explicitly presses "Start recording" in the capture bar, so
// New Capture never makes the window vanish out from under them.
this.session = { guideId, paused: true, count: 0, intervalSec: interval };
if (this.settings.get('capture.captureOutsideClicks') !== false) this.startClickWatcher();
this.applyInterval();
this.notify('capture:state', this.state());
// (Skipped for the dev screenshot hook, which needs a visible page.)
if (!process.env.STEPFORGE_SCREENSHOT) {
this.createSessionTray();
const win = this.getWindow();
// Remember whether the window was visible when the session was set
// up — that's what `togglePause` uses to decide whether to tuck the
// app away once the user actually starts recording.
this.hiddenForSession = Boolean(win && !win.isDestroyed() && win.isVisible());
try {
new Notification({
title: 'StepForge is ready to capture',
body: 'Click "Start recording" in the red capture bar when youre ready. The window tucks away and the red tray icon takes over.',
}).show();
} catch { /* notifications unavailable on this desktop */ }
}
}
/** Red-dot tray icon with session controls, shown while recording. */
createSessionTray() {
this.destroySessionTray();
try {
const img = raster.createImage(16, 16, [0, 0, 0, 0]);
raster.fillOval(img, 2, 2, 12, 12, [229, 72, 77, 255]);
this.tray = new Tray(nativeImage.createFromBuffer(encodePng(img)));
this.tray.setToolTip('StepForge — capture session running');
const rebuild = () => {
if (!this.tray || this.tray.isDestroyed()) return;
this.tray.setContextMenu(Menu.buildFromTemplate([
{ label: `Captured ${this.session ? this.session.count : 0} steps`, enabled: false },
{ type: 'separator' },
{ label: 'Capture now', click: () => this.sessionCapture('manual').then(rebuild).catch(() => {}) },
{
label: this.session && this.session.paused ? 'Resume capturing' : 'Pause capturing',
click: () => { this.noteUiStopGesture(); this.togglePause(); rebuild(); },
},
{
label: 'Open StepForge (pauses capture)',
click: () => {
this.noteUiStopGesture();
this.togglePause(true);
this.showWindow();
rebuild();
},
},
{ type: 'separator' },
{ label: 'Finish session', click: () => { this.noteUiStopGesture(); this.finishSession(); } },
]));
};
rebuild();
this.rebuildTrayMenu = rebuild;
this.tray.on('click', () => {
this.noteUiStopGesture();
this.togglePause(true);
this.showWindow();
rebuild();
});
} catch {
this.tray = null; // no tray on this desktop; cursor-over skip still protects clicks
}
}
destroySessionTray() {
if (this.tray && !this.tray.isDestroyed()) this.tray.destroy();
this.tray = null;
this.rebuildTrayMenu = null;
}
/**
* Record that the user just stopped/paused capture from StepForge's own UI
* (tray icon or its menu). The physical click that did so is also seen by
* the OS hook and would otherwise queue as a workflow step; isStopGesture
* uses this to discard exactly that one click — matched by position, not
* just time, so a real fast click elsewhere is never lost.
*/
noteUiStopGesture() {
let pos = null;
try { pos = this.screen.getCursorScreenPoint(); } catch { pos = null; }
this.uiStopGesture = { at: Date.now(), pos };
}
/** True when a queued click is the tray gesture that stopped the session. */
isStopGesture(clickPos, clickAt) {
const g = this.uiStopGesture;
if (!g) return false;
if (Math.abs((clickAt || Date.now()) - g.at) > SESSION_STOP_CLICK_WINDOW_MS) return false;
// No position to compare (e.g. cursor read failed): fall back to the
// time window alone, but only consume the gesture once.
if (!g.pos || !clickPos) {
this.uiStopGesture = null;
return true;
}
const near = Math.abs(clickPos.x - g.pos.x) <= SESSION_STOP_CLICK_RADIUS_PX
&& Math.abs(clickPos.y - g.pos.y) <= SESSION_STOP_CLICK_RADIUS_PX;
if (near) this.uiStopGesture = null; // one stop click per gesture
return near;
}
showWindow() {
const win = this.getWindow();
if (win && !win.isDestroyed()) {
win.show();
win.focus();
}
}
setInterval(intervalSec) {
if (!this.session) return this.state();
this.session.intervalSec = Math.max(0, Number(intervalSec) || 0);
this.applyInterval();
this.notify('capture:state', this.state());
return this.state();
}
applyInterval() {
if (this.intervalTimer) {
clearInterval(this.intervalTimer);
this.intervalTimer = null;
}
const sec = this.session && this.session.intervalSec;
if (sec > 0) {
this.intervalTimer = setInterval(() => {
this.sessionCapture('interval').catch(() => {});
}, sec * 1000);
}
}
togglePause(force) {
if (!this.session) return;
const wasPaused = this.session.paused;
this.session.paused = typeof force === 'boolean' ? force : !this.session.paused;
// Starting/resuming tucks the window away again for clean shots (after
// a brief delay so the user sees it happen) and starts the frame
// recorder that serves click captures. Pausing stops it and discards
// buffered frames, so a resume can never serve a pre-pause screen.
if (wasPaused && !this.session.paused) {
this.armRecording();
} else if (!wasPaused && this.session.paused) {
this.stopFrameLoop();
this.stopClickFrameBackend();
}
if (this.rebuildTrayMenu) this.rebuildTrayMenu();
this.notify('capture:state', this.state());
}
/**
* Bring a session from paused to recording. The order matters for the
* first click: the frame recorder is warmed up *while the window is still
* visible*, then the window is hidden. Warming after the hide (the old
* order) left a ~1s gap where the worker had no buffered frame yet, so the
* first click fell back to a post-click fresh shot — "the first screenshot
* is late". By the time the window tucks away here, frames are already
* being buffered, so the first click is served a pre-click frame like
* every other.
*/
armRecording() {
const win = this.getWindow();
const wantHide = Boolean(this.hiddenForSession && win && !win.isDestroyed());
const recorderWanted = this.settings.get('capture.captureOutsideClicks') !== false
&& this.clickCaptureAvailable();
const run = async () => {
if (!this.session || this.session.paused) return;
const startedAt = Date.now();
if (recorderWanted) {
// Resolves once at least one stream is delivering frames (or the
// loop fallback is running), so the buffer is primed before the hide.
try { await this.startClickFrameBackend(); } catch { /* falls back internally */ }
if (!this.session || this.session.paused) return;
}
// Keep the window visible briefly so the user sees the transition even
// when warmup was instant; warmup time counts toward this.
const minVisibleMs = wantHide ? 400 : 0;
const elapsed = Date.now() - startedAt;
if (elapsed < minVisibleMs) {
await new Promise((r) => setTimeout(r, minVisibleMs - elapsed));
if (!this.session || this.session.paused) return;
}
if (wantHide && win && !win.isDestroyed() && win.isVisible()) {
win.hide();
// Let a couple of frames of the now-unobscured screen land before
// the user's first click, so that frame shows their work, not the
// app window that was just dismissed.
await new Promise((r) => setTimeout(r, this.settings.get('capture.postHideSettleMs') || 150));
}
};
run().catch(() => {});
}
finishSession() {
if (this.intervalTimer) {
clearInterval(this.intervalTimer);
this.intervalTimer = null;
}
this.stopClickWatcher();
this.stopFrameLoop();
this.stopClickFrameBackend();
this.destroySessionTray();
this.session = null;
if (this.hiddenForSession) {
this.hiddenForSession = false;
this.showWindow();
}
this.notify('capture:state', this.state());
}
/**
* True when the user is interacting with StepForge itself. Deliberately
* based on cursor position over the visible window, not isFocused():
* some compositors (WSLg) report focus as stuck-true, which would block
* every automatic capture forever.
*/
userIsInApp() {
const win = this.getWindow();
if (!win || win.isDestroyed() || !win.isVisible() || win.isMinimized()) return false;
const cur = this.screen.getCursorScreenPoint();
const b = win.getBounds();
return cur.x >= b.x && cur.x <= b.x + b.width && cur.y >= b.y && cur.y <= b.y + b.height;
}
/** One capture inside the active session (hotkey/click/interval/manual). */
async sessionCapture(trigger = 'hotkey', clickPos = null, clickMeta = null) {
// A click that was registered while recording carries its guide id
// (see enqueueClickCapture) and must become a step even if the session
// was paused or finished while it sat behind slower clicks in the
// queue. Dropping queued clicks at stop time is how "I clicked five
// times and only got two steps" happens on hosts with slow encodes.
const queuedClickGuide = trigger === 'click' && clickMeta && clickMeta.guideId
? clickMeta.guideId
: null;
if (!this.session || this.session.paused) {
if (!queuedClickGuide) return { ok: false, reason: 'no active capture session' };
} else if (trigger !== 'manual' && this.userIsInApp()) {
// Automatic triggers stand down while the user is in StepForge, so the
// app stays clickable mid-session and never screenshots itself.
return { ok: false, reason: 'skipped — StepForge is focused' };
}
// Clicks are served from the frame recorder: the chosen frame was
// captured at (or moments before) the click instant, so the background
// matches what the user clicked on. A click that lands while a grab is
// in flight waits for that frame instead of being dropped, so fast
// clicking still yields one step per click.
if (trigger === 'click') {
const clickAt = clickMeta && Number.isFinite(clickMeta.at) ? clickMeta.at : Date.now();
// Prefer the frame the click was paired with at event time (see
// enqueueClickCapture); ask now only when no eager pairing happened.
const frame = clickMeta && clickMeta.framePromise
? await clickMeta.framePromise
: await this.frameForClick(clickPos, clickAt);
const sessionLive = this.session && !this.session.paused;
const guideId = sessionLive ? this.session.guideId : queuedClickGuide;
if (!guideId) return { ok: false, reason: 'no active capture session' };
// The tray gesture that stopped the session is itself a hook click in
// the queue — storing it would append a junk step of the menu. Discard
// only that one click, matched by position so a fast workflow click is
// never collateral damage.
if (!sessionLive && this.isStopGesture(clickPos, clickAt)) {
clog('click@', clickAt, 'discarded — it triggered the session stop');
return { ok: false, reason: 'click stopped the session' };
}
if (frame) {
clog('click@', clickAt, 'frame', frame.source || 'loop',
'started', frame.startedAt - clickAt, 'ms, captured', frame.capturedAt - clickAt, 'ms rel. click');
const result = this.storeFrameAsStep(guideId, frame.mode, frame, clickPos);
if (result.ok) this.noteStepAdded(result.step, trigger, guideId);
return result;
}
// No usable frame: fall through to a one-off fresh shot — but only
// while still recording. After a stop, a fresh shot would show
// whatever replaced the user's workflow on screen.
clog('click@', clickAt, 'no frame qualified — falling back to a fresh (post-click) shot');
if (!sessionLive) return { ok: false, reason: 'session ended before the fallback shot' };
}
if (this.shooting) return { ok: false, reason: 'capture already in progress' };
this.shooting = true;
try {
const mode = this.settings.get('capture.mode') || 'fullscreen';
const grabMode = mode === 'region' ? 'fullscreen' : mode;
const finalResult = await this.shoot({
guideId: this.session.guideId,
mode: grabMode,
delayMs: 0,
hideWindowDelayMs: trigger === 'click' ? CLICK_CAPTURE_HIDE_DELAY_MS : null,
refocus: false, // don't steal focus from the app the user is documenting
clickPos,
});
if (finalResult.ok) this.noteStepAdded(finalResult.step, trigger);
return finalResult;
} finally {
this.shooting = false;
}
}
noteStepAdded(step, trigger, guideId = null) {
// Steps from queued clicks can land after the session object is gone.
if (this.session) this.session.count += 1;
this.notify('capture:added', {
guideId: guideId || (this.session && this.session.guideId),
step,
trigger,
});
this.notify('capture:state', this.state());
if (this.rebuildTrayMenu) this.rebuildTrayMenu(); // refresh step counter
}
hotkeyCapture() {
return this.sessionCapture('hotkey');
}
// ---- click-triggered capture --------------------------------------------
/**
* Fallback frame recorder: a continuous screen-grab loop in the main
* process, used only when the stream backend can't run. It keeps the most
* recent frames buffered so a click can be served from a frame grabbed at
* (or moments before) the instant of the click — a fresh grab started
* after the click would land hundreds of ms late and show the click's
* effects instead of what the user clicked on. Its cadence is capped at
* FRAME_LOOP_IDLE_MS because tighter grabbing here starves the event loop
* and delays the very click events it serves.
*/
startFrameLoop() {
if (this.frameLoopRunning) return;
this.frameLoopRunning = true;
const tick = async () => {
if (!this.frameLoopRunning) return;
if (!this.session || this.session.paused) {
this.frameLoopRunning = false;
this.frameLoopInFlight = false;
return;
}
try {
if (!this.shooting) {
this.frameLoopInFlight = true;
this.frameLoopGrabStartedAt = Date.now();
const mode = this.settings.get('capture.mode') || 'fullscreen';
const grabMode = mode === 'region' ? 'fullscreen' : mode;
const frame = await this.captureCurrentFrame(grabMode, null, this.frameLoopGrabStartedAt);
if (this.frameLoopRunning) this.acceptFrame(frame);
}
} catch {
// Grab failures are fine — clicks fall back to a one-off fresh shot.
} finally {
this.frameLoopInFlight = false;
this.frameLoopGrabStartedAt = null;
if (this.frameLoopRunning && this.session && !this.session.paused) {
this.frameLoopTimer = setTimeout(tick, FRAME_LOOP_IDLE_MS);
}
}
};
this.frameLoopTimer = setTimeout(tick, 0);
}
/** Store a grabbed frame and hand it to any clicks waiting on it. */
acceptFrame(frame) {
this.latestFrame = frame;
this.recentFrames.push(frame);
const cutoff = Date.now() - RECENT_FRAME_RETENTION_MS;
this.recentFrames = this.recentFrames
.filter((f) => f && f.capturedAt >= cutoff)
.slice(-RECENT_FRAME_LIMIT);
const waiters = this.frameWaiters;
this.frameWaiters = [];
for (const resolve of waiters) resolve(frame);
}
/** Resolves with the next frame the loop grabs (null on timeout/stop). */
nextFrame(timeoutMs) {
return new Promise((resolve) => {
const entry = (frame) => {
clearTimeout(timer);
resolve(frame);
};
const timer = setTimeout(() => {
this.frameWaiters = this.frameWaiters.filter((w) => w !== entry);
resolve(null);
}, timeoutMs);
this.frameWaiters.push(entry);
});
}
stopFrameLoop() {
if (this.frameLoopTimer) {
clearTimeout(this.frameLoopTimer);
this.frameLoopTimer = null;
}
this.frameLoopRunning = false;
this.frameLoopGrabStartedAt = null;
this.latestFrame = null;
this.recentFrames = [];
const waiters = this.frameWaiters;
this.frameWaiters = [];
for (const resolve of waiters) resolve(null);
}
/**
* Frame representing the screen at the instant of one click.
*
* Order of preference:
* 1. the stream backend's ring buffer (off-main-process, tight cadence);
* 2. the legacy loop's buffered frames;
* 3. waiting for the loop grab that was already in flight when the user
* clicked.
* Selection semantics live in click-frames.js. In strict mode every path
* obeys the same rule — never a frame whose grab started after the click —
* and when nothing qualifies this returns null so the caller takes the
* *explicit* fresh-shot fallback rather than silently passing a post-click
* frame off as the click-time screen.
*/
async frameForClick(clickPos = null, clickAt = Date.now()) {
const mode = this.settings.get('capture.mode') || 'fullscreen';
const grabMode = mode === 'region' ? 'fullscreen' : mode;
const clickTime = Number.isFinite(clickAt) ? clickAt : Date.now();
// Click lead: prefer a frame captured a little *before* the hook
// timestamp. The hook fires on button-down, but the visible UI often
// starts reacting within a frame or two (hover→press states, the cursor
// settling) and capture-stream pixels lag the real screen slightly, so a
// frame timestamped right at the click can still show the click's onset.
// The lead is a *preference*: selection falls back to any pre-click
// frame when none is old enough, so it never forces a post-click fresh
// shot. Tunable via capture.clickLeadMs.
const leadMs = Math.max(0, Number(this.settings.get('capture.clickLeadMs')) || 0);
const strict = this.strictClickFrames();
const opts = {
clickAt: clickTime,
leadMs,
clickPos,
mode: grabMode,
strict,
maxAgeMs: CLICK_FRAME_MAX_AGE_MS,
startSlackMs: CLICK_FRAME_START_SLACK_MS,
};
if (this.streamBackend && this.streamBackend.isActive() && grabMode === 'fullscreen') {
const frame = await this.streamBackend.frameForClick({ clickPos, clickAt: clickTime, strict, leadMs });
if (frame) return frame;
// No qualifying frame (or the backend just went unhealthy): fall
// through to the loop buffer / fresh-shot fallbacks below.
}
const buffered = selectFrameForClick(
[...this.recentFrames, this.latestFrame].filter((f, i, arr) => f && arr.indexOf(f) === i),
opts,
);
if (buffered) return buffered;
if (!this.frameLoopRunning) return null;
if (strict) {
// Only a grab already in flight when the user clicked can still
// qualify: its pixels predate the click even though it completes
// after. Any grab starting later is post-click by definition, so
// don't wait around for one — return immediately and let the caller
// take the fresh-shot fallback.
const inFlightStartedBeforeClick = this.frameLoopInFlight
&& Number.isFinite(this.frameLoopGrabStartedAt)
&& this.frameLoopGrabStartedAt <= clickTime;
if (!inFlightStartedBeforeClick) return null;
const next = await this.nextFrame(CLICK_FRAME_WAIT_MS);
return frameUsableForClick(next, { ...opts, allowInFlight: true }) ? next : null;
}
// Balanced (legacy) mode: wait for the next loop frame and accept it if
// its grab started within the slack window after the click.
const deadline = Date.now() + CLICK_FRAME_WAIT_MS;
while (this.frameLoopRunning && Date.now() < deadline) {
const next = await this.nextFrame(Math.max(1, deadline - Date.now()));
if (frameUsableForClick(next, { ...opts, allowInFlight: true })) return next;
if (next && Number.isFinite(next.startedAt)
&& next.startedAt > clickTime + CLICK_FRAME_START_SLACK_MS) {
// Grabs only get later from here; let the fresh-shot path handle it.
return null;
}
}
return null;
}
// ---- click-frame backends -------------------------------------------------
/**
* Bring up the frame recorder for a recording run. The stream backend is
* the architecture path (capture entirely off the main process); the
* in-process frame loop is the fallback when streams can't start — and the
* automatic degradation target if the worker stops answering mid-session.
*/
async startClickFrameBackend() {
const mode = this.settings.get('capture.mode') || 'fullscreen';
// The worker streams screens; window-mode grabs need the loop's
// source-filtering logic.
if (this.settings.get('capture.streamCapture') === false || mode === 'window') {
this.startFrameLoop();
return;
}
if (this.streamBackend || this.streamBackendStarting) return;
this.streamBackendStarting = true;
try {
// eslint-disable-next-line global-require
const { StreamCaptureBackend, createElectronHost } = require('./stream-backend');
const backend = new StreamCaptureBackend({
createHost: createElectronHost,
onUnhealthy: () => this.degradeToFrameLoop(),
});
const displays = this.screen.getAllDisplays();
const sources = await desktopCapturer.getSources({
types: ['screen'],
thumbnailSize: { width: 1, height: 1 }, // ids only — skip thumbnail work
});
const ok = await backend.start({
displays,
sources: sources.map((s) => ({ id: s.id, display_id: s.display_id })),
sampleMs: this.settings.get('capture.frameSampleMs') || 100,
});
if (!ok || !this.session || this.session.paused) {
backend.stop();
if (this.session && !this.session.paused) {
console.error('[stepforge] stream capture backend failed to start — using in-process frame loop');
this.startFrameLoop();
}
return;
}
this.streamBackend = backend;
clog('stream capture backend active');
this.notify('capture:state', this.state());
} catch (err) {
if (this.session && !this.session.paused) {
console.error(`[stepforge] stream capture backend error (${err && err.message}) — using in-process frame loop`);
this.startFrameLoop();
}
} finally {
this.streamBackendStarting = false;
}
}
stopClickFrameBackend() {
if (!this.streamBackend) return;
const backend = this.streamBackend;
this.streamBackend = null;
backend.stop();
}
/**
* The worker stopped answering frame requests. Capture must not silently
* stop mid-session: drop the backend and run the in-process loop for the
* rest of the recording.
*/
degradeToFrameLoop() {
this.streamBackend = null;
console.error('[stepforge] stream capture backend unhealthy — falling back to in-process frame loop');
if (this.session && !this.session.paused) this.startFrameLoop();
this.notify('capture:state', this.state());
}
startClickWatcher() {
this.stopClickWatcher();
try {
this.clickWatcherBuf = '';
this.linuxEvent = null;
if (process.platform === 'linux' && hasBinary('xinput')) {
// Stream raw button events from the X server; one capture per press.
// xinput block-buffers stdout when piped, so a press event can sit
// in its buffer until later motion events flush it — by then the
// cursor read in onOsClick lands where the mouse moved *after* the
// click. stdbuf -oL forces line-buffering so events (and the cursor
// read) line up with the actual click instant.
const argv = hasBinary('stdbuf')
? ['stdbuf', '-oL', 'xinput', 'test-xi2', '--root']
: ['xinput', 'test-xi2', '--root'];
this.clickWatcher = spawn(argv[0], argv.slice(1), { stdio: ['ignore', 'pipe', 'ignore'] });
this.clickWatcher.stdout.on('data', (chunk) => {
this.ingestClickWatcherChunk(chunk.toString(), 'linux');
});
} else if (process.platform === 'win32') {
// Use a low-level Windows mouse hook instead of polling
// GetAsyncKeyState. The low bit from GetAsyncKeyState can be consumed
// by other processes and a polling loop can miss short clicks under
// load; WH_MOUSE_LL gives us one event for each button-down, with the
// hook-time cursor position and timestamp.
const ps = `
$ErrorActionPreference = 'Stop'
Add-Type -TypeDefinition @'
using System;
using System.Collections.Concurrent;
using System.Runtime.InteropServices;
using System.Threading;
public static class SFMouseHook {
private const int WH_MOUSE_LL = 14;
private const int WM_LBUTTONDOWN = 0x0201;
private const int WM_RBUTTONDOWN = 0x0204;
private const int WM_MBUTTONDOWN = 0x0207;
private const int WM_XBUTTONDOWN = 0x020B;
private const long UnixEpochMilliseconds = 62135596800000L;
private static IntPtr hook = IntPtr.Zero;
private static LowLevelMouseProc proc = HookCallback;
private static readonly ConcurrentQueue<string> queue = new ConcurrentQueue<string>();
private static readonly AutoResetEvent signal = new AutoResetEvent(false);
[StructLayout(LayoutKind.Sequential)]
private struct POINT {
public int x;
public int y;
}
[StructLayout(LayoutKind.Sequential)]
private struct MSLLHOOKSTRUCT {
public POINT pt;
public uint mouseData;
public uint flags;
public uint time;
public UIntPtr dwExtraInfo;
}
[StructLayout(LayoutKind.Sequential)]
private struct MSG {
public IntPtr hwnd;
public uint message;
public UIntPtr wParam;
public IntPtr lParam;
public uint time;
public POINT pt;
}
private delegate IntPtr LowLevelMouseProc(int nCode, IntPtr wParam, IntPtr lParam);
[DllImport("user32.dll", SetLastError = true)]
private static extern IntPtr SetWindowsHookEx(int idHook, LowLevelMouseProc lpfn, IntPtr hMod, uint dwThreadId);
[DllImport("user32.dll", SetLastError = true)]
private static extern bool UnhookWindowsHookEx(IntPtr hhk);
[DllImport("user32.dll")]
private static extern IntPtr CallNextHookEx(IntPtr hhk, int nCode, IntPtr wParam, IntPtr lParam);
[DllImport("kernel32.dll", CharSet = CharSet.Auto, SetLastError = true)]
private static extern IntPtr GetModuleHandle(string lpModuleName);
[DllImport("user32.dll")]
private static extern int GetMessage(out MSG lpMsg, IntPtr hWnd, uint wMsgFilterMin, uint wMsgFilterMax);
[DllImport("user32.dll")]
private static extern bool TranslateMessage(ref MSG lpMsg);
[DllImport("user32.dll")]
private static extern IntPtr DispatchMessage(ref MSG lpMsg);
[DllImport("user32.dll")]
private static extern bool SetProcessDpiAwarenessContext(IntPtr value);
public static void Run() {
try { SetProcessDpiAwarenessContext(new IntPtr(-4)); } catch { }
Thread writer = new Thread(WriterLoop);
writer.IsBackground = true;
writer.Start();
hook = SetWindowsHookEx(WH_MOUSE_LL, proc, GetModuleHandle(null), 0);
if (hook == IntPtr.Zero) {
throw new System.ComponentModel.Win32Exception(Marshal.GetLastWin32Error());
}
Console.Out.WriteLine("READY");
Console.Out.Flush();
MSG msg;
while (GetMessage(out msg, IntPtr.Zero, 0, 0) > 0) {
TranslateMessage(ref msg);
DispatchMessage(ref msg);
}
UnhookWindowsHookEx(hook);
}
private static void WriterLoop() {
while (true) {
signal.WaitOne();
string line;
while (queue.TryDequeue(out line)) {
Console.Out.WriteLine(line);
}
Console.Out.Flush();
}
}
private static IntPtr HookCallback(int nCode, IntPtr wParam, IntPtr lParam) {
if (nCode >= 0) {
int message = wParam.ToInt32();
string button = ButtonName(message, lParam);
if (button != null) {
MSLLHOOKSTRUCT data = (MSLLHOOKSTRUCT)Marshal.PtrToStructure(lParam, typeof(MSLLHOOKSTRUCT));
long unixMs = DateTime.UtcNow.Ticks / TimeSpan.TicksPerMillisecond - UnixEpochMilliseconds;
queue.Enqueue("CLICK " + data.pt.x + " " + data.pt.y + " " + button + " " + unixMs);
signal.Set();
}
}
return CallNextHookEx(hook, nCode, wParam, lParam);
}
private static string ButtonName(int message, IntPtr lParam) {
if (message == WM_LBUTTONDOWN) return "left";
if (message == WM_RBUTTONDOWN) return "right";
if (message == WM_MBUTTONDOWN) return "middle";
if (message == WM_XBUTTONDOWN) {
MSLLHOOKSTRUCT data = (MSLLHOOKSTRUCT)Marshal.PtrToStructure(lParam, typeof(MSLLHOOKSTRUCT));
uint xButton = (data.mouseData >> 16) & 0xffff;
return xButton == 1 ? "x1" : "x2";
}
return null;
}
}
'@
[SFMouseHook]::Run()
`;
this.clickWatcher = spawn('powershell.exe', ['-NoProfile', '-NonInteractive', '-ExecutionPolicy', 'Bypass', '-Command', ps], {
stdio: ['ignore', 'pipe', 'pipe'],
windowsHide: true,
});
this.clickWatcher.stdout.on('data', (chunk) => {
this.ingestClickWatcherChunk(chunk.toString(), 'win32');
});
}
if (this.clickWatcher) {
const child = this.clickWatcher;
this.clickWatcherErrTail = '';
if (child.stderr) {
child.stderr.on('data', (chunk) => {
this.clickWatcherErrTail = String(chunk).slice(-400);
});
}
const lost = (reason) => {
if (this.clickWatcher !== child) return; // stopped deliberately
this.clickWatcher = null;
this.handleClickWatcherLoss(reason);
};
child.on('error', (err) => lost(err && err.message));
child.on('exit', (code) => lost(`exited with code ${code}`));
}
} catch {
this.clickWatcher = null;
}
}
/**
* The watcher process died mid-session (crashed X server, PowerShell
* blocked by policy, …). Captures must not silently stop: log why, switch
* the session to interval captures, and tell the UI.
*/
handleClickWatcherLoss(reason) {
this.linuxEvent = null;
this.discardPendingRawClick();
const detail = [reason, this.clickWatcherErrTail].filter(Boolean).join(' — ');
console.error(`[stepforge] click watcher stopped${detail ? `: ${detail}` : ''}`);
if (!this.session) return;
if (!this.session.intervalSec) {
this.session.intervalSec = this.settings.get('capture.autoIntervalSec') || 5;
this.applyInterval();
}
this.notify('capture:state', this.state());
}
stopClickWatcher() {
if (this.clickWatcher) {
try { this.clickWatcher.kill(); } catch { /* already gone */ }
this.clickWatcher = null;
}
this.clickWatcherBuf = '';
this.linuxEvent = null;
this.discardPendingRawClick();
this.lastAcceptedClickByButton.clear();
}
/**
* Buffer stdout chunks and only parse complete lines: a chunk boundary
* can split an event line in half, which used to corrupt press/release
* parsing and swallow clicks.
*/
ingestClickWatcherChunk(chunk, platform = process.platform) {
this.clickWatcherBuf += String(chunk);
const cut = this.clickWatcherBuf.lastIndexOf('\n');
if (cut === -1) return;
const complete = this.clickWatcherBuf.slice(0, cut);
this.clickWatcherBuf = this.clickWatcherBuf.slice(cut + 1);
this.processClickWatcherData(complete, platform);
}
processClickWatcherData(text, platform = process.platform) {
const lines = String(text).split(/\r?\n/);
if (platform === 'linux') {
// xinput test-xi2 --root prints each event as a multi-line block:
//
// EVENT type 4 (ButtonPress) EVENT type 15 (RawButtonPress)
// device: 11 (10) device: 11 (11)
// detail: 1 detail: 1
// root: 644.52/343.55 valuators: …
//
// Regular (non-raw) blocks carry the event-time root coordinates —
// exactly what the click marker needs, because a cursor read at parse
// time drifts whenever delivery is delayed or the pointer keeps
// moving after the click. Raw blocks have no coordinates, but on many
// servers they are the only representation delivered for the root
// window, so both kinds must fire. One physical press can produce
// *both* representations; that duplication is resolved structurally
// in fireLinuxClick (raw press briefly waits for its regular twin and
// they merge into one click), never by a time-only debounce that
// could swallow legitimate fast clicks.
for (const line of lines) {
if (!line) continue;
const header = /EVENT type \d+ \(([A-Za-z]+)\)/.exec(line);
if (header) {
this.finishLinuxEvent();
const name = header[1];
this.linuxEvent = /ButtonPress$/.test(name)
? { name, raw: /^Raw/.test(name), button: null, at: Date.now(), fired: false }
: null;
continue;
}
const ev = this.linuxEvent;
if (!ev || ev.fired) continue;
const detail = /detail:\s*(\d+)/.exec(line);
if (detail) {
ev.button = Number(detail[1]);
if (ev.button >= 4 && ev.button <= 7) {
// Scroll-wheel ticks (X11 buttons 4-7) are not clicks.
this.linuxEvent = null;
} else if (ev.raw) {
// Raw blocks never carry coordinates; this one is complete.
ev.fired = true;
this.linuxEvent = null;
this.fireLinuxClick(ev.at, null, ev.button, { raw: true });
}
continue;
}
const root = /root:\s*(-?[\d.]+)\/(-?[\d.]+)/.exec(line);
if (root && !ev.raw && ev.button != null) {
ev.fired = true;
this.linuxEvent = null;
this.fireLinuxClick(ev.at, {
x: Math.round(parseFloat(root[1])),
y: Math.round(parseFloat(root[2])),
}, ev.button, { raw: false });
}
}
return;
}
if (platform === 'win32') {
for (const line of lines) {
const m = /^CLICK(?:\s+(-?\d+)\s+(-?\d+)(?:\s+([A-Za-z0-9_-]+))?(?:\s+(\d+))?)?\s*$/.exec(line.trim());
if (m) {
const osPoint = m[1] === undefined ? null : { x: Number(m[1]), y: Number(m[2]) };
const eventAt = m[4] === undefined ? Date.now() : Number(m[4]);
this.onOsClick(Number.isFinite(eventAt) ? eventAt : Date.now(), osPoint, m[3] || 'mouse');
}
}
}
}
/**
* A new event header arrived while a press block was still open: the block
* ended without the line we fire on. Old xinput builds sometimes omit
* detail lines entirely — treat such a press as a plain click rather than
* dropping it.
*/
finishLinuxEvent() {
const ev = this.linuxEvent;
this.linuxEvent = null;
if (!ev || ev.fired) return;
if (ev.button == null) {
this.onOsClick(ev.at, null, 'mouse');
} else if (!ev.raw) {
// Regular press whose root line never showed up — fire without
// coordinates; onOsClick falls back to a cursor read.
this.fireLinuxClick(ev.at, null, ev.button, { raw: false });
}
}
/**
* Funnel for parsed Linux button presses. Raw and regular blocks for the
* same physical press are merged here: a raw press (no coordinates) is
* held for LINUX_CLICK_TWIN_MS; if the regular twin (with root
* coordinates) arrives inside that window the pair fires once, with the
* raw block's earlier timestamp and the regular block's coordinates.
* Distinct presses always fire — there is no time-based dropping.
*/
fireLinuxClick(at, osPoint, button, { raw = false } = {}) {
const pending = this.pendingRawClick;
if (raw) {
// Two raw presses can't be one click — release the held one first.
this.flushPendingRawClick();
const entry = { button, at, timer: null };
entry.timer = setTimeout(() => {
if (this.pendingRawClick !== entry) return;
this.pendingRawClick = null;
this.onOsClick(entry.at, null, `button-${entry.button}`);
}, LINUX_CLICK_TWIN_MS);
if (entry.timer.unref) entry.timer.unref();
this.pendingRawClick = entry;
return;
}
if (pending && pending.button === button) {
// The regular twin of the held raw press: one physical click.
this.pendingRawClick = null;
clearTimeout(pending.timer);
this.onOsClick(Math.min(pending.at, at), osPoint, `button-${button}`);
return;
}
this.onOsClick(at, osPoint, `button-${button}`);
}
/** Fire the held raw press immediately (its twin is not coming). */
flushPendingRawClick() {
const pending = this.pendingRawClick;
if (!pending) return;
this.pendingRawClick = null;
clearTimeout(pending.timer);
this.onOsClick(pending.at, null, `button-${pending.button}`);
}
discardPendingRawClick() {
if (!this.pendingRawClick) return;
clearTimeout(this.pendingRawClick.timer);
this.pendingRawClick = null;
}
/** Debounce window in ms (capture.clickDebounceMs, default 200). */
clickDebounceMs() {
const raw = this.settings.get('capture.clickDebounceMs');
const v = Number(raw);
return raw != null && Number.isFinite(v) && v >= 0 ? v : DEFAULT_CLICK_DEBOUNCE_MS;
}
onOsClick(at = Date.now(), osPoint = null, button = 'mouse') {
if (!this.session || this.session.paused) return;
const clickAt = Number.isFinite(at) ? at : Date.now();
// Leading-edge debounce: ignore a click that lands within the debounce
// window of the last accepted click of the same button. This makes fast
// / accidental repeat clicks register once, while two deliberate clicks
// spaced more than the window apart each register (one step per click).
if (this.isDebouncedClick(clickAt, button)) {
clog('click@', clickAt, button, 'debounced (within', this.clickDebounceMs(), 'ms of last accepted)');
return;
}
// Prefer the position the watcher sampled with the button-down event
// (physical px -> DIP); otherwise read the cursor synchronously, right
// now, so the marker lands where the user clicked even if the shot
// itself takes a moment to grab. (Clicks on StepForge itself are
// filtered by the cursor-position check in sessionCapture, not by
// window focus — WSLg reports focus unreliably.)
let clickPos = osPoint ? this.osPointToDip(osPoint) : null;
if (!clickPos) clickPos = this.screen.getCursorScreenPoint();
clog('click@', clickAt, button, 'os', osPoint, '-> dip', clickPos);
this.enqueueClickCapture(clickPos, clickAt, button || 'mouse');
}
/**
* Whether this click should be dropped by the debounce. A click is dropped
* only when it follows the last *accepted* click of the same button by
* less than the debounce window — so the window is measured from accepted
* clicks, never from dropped ones, and a run of fast clicks can't push the
* next deliberate click out indefinitely. Accepting a click records it as
* the new reference point. Different buttons debounce independently.
*/
isDebouncedClick(at, button) {
const key = button || 'mouse';
const windowMs = this.clickDebounceMs();
const last = this.lastAcceptedClickByButton.get(key);
if (last != null && at >= last && at - last < windowMs) return true;
this.lastAcceptedClickByButton.set(key, at);
return false;
}
/**
* Physical (OS event) pixels -> DIP. Windows exposes the canonical
* conversion; on Linux/X11 it is reconstructed from display geometry (see
* app/coords.js). Without this, the click marker drifts on any display
* scaled away from 100% and on secondary monitors.
*/
osPointToDip(osPoint) {
if (this.screen && typeof this.screen.screenToDipPoint === 'function') {
try {
const dip = this.screen.screenToDipPoint(osPoint);
if (dip && Number.isFinite(dip.x) && Number.isFinite(dip.y)) return dip;
} catch { /* fall through to manual conversion */ }
}
try {
const displays = this.screen && typeof this.screen.getAllDisplays === 'function'
? this.screen.getAllDisplays()
: [];
const dip = physicalToDip(osPoint, displays);
if (dip) return dip;
} catch { /* no display geometry available */ }
return osPoint;
}
/**
* Serialize click captures: a click that lands while an earlier capture is
* still being stored queues behind it instead of being dropped by the
* "capture already in progress" guard. The marker position was already
* read at click time, so a queued step still circles the right spot.
*
* Crucially, only the *storing* is serialized. The click is paired with
* its frame right here, at event time: behind a slow store or PNG encode
* the queue can run seconds late, and a frame request issued that late
* could find the click-time frame already evicted from the ring buffer.
* Eager pairing keeps one-click-one-frame semantics intact no matter how
* fast the user clicks or how slow the encoder is.
*/
enqueueClickCapture(clickPos, clickAt = Date.now(), button = 'mouse') {
const clickMeta = { at: Number.isFinite(clickAt) ? clickAt : Date.now(), button: button || 'mouse' };
if (this.session && !this.session.paused && !this.userIsInApp()) {
// The guide id pins the click to its recording so it can still be
// stored if the session stops while this click waits in the queue.
clickMeta.guideId = this.session.guideId;
clickMeta.framePromise = this.frameForClick(clickPos, clickMeta.at)
.catch(() => null);
}
this.clickQueue = this.clickQueue
.then(() => this.sessionCapture('click', clickPos, clickMeta))
.catch(() => {});
return this.clickQueue;
}
async captureCurrentFrame(mode, capturePoint = null, startedAt = Date.now()) {
const grabbed = await this.grab(mode, capturePoint);
return {
mode,
// Keep the raw image and defer PNG encoding to storeFrameAsStep:
// toPNG() on a full-resolution frame blocks the main thread for
// hundreds of ms, and doing it every frame-loop tick starved the
// event loop so badly that click events arrived seconds late.
// Encoding once per *stored* step is cheap; encoding per grab is not.
image: grabbed.image,
size: grabbed.image.getSize(),
display: grabbed.display,
cursor: capturePoint || grabbed.cursor,
startedAt,
capturedAt: Date.now(),
};
}
storeFrameAsStep(guideId, mode, frame, clickPos = null) {
if (!frame) return { ok: false, reason: 'no capture frame available' };
const annotations = [];
// The click position (DIP, read at event time) wins over the frame's
// grab-time cursor; stream-backend frames carry no cursor at all.
const cursor = clickPos || frame.cursor || null;
if (cursor && mode !== 'window' && this.settings.get('capture.clickMarker')) {
const fx = (cursor.x - frame.display.bounds.x) / frame.display.bounds.width;
const fy = (cursor.y - frame.display.bounds.y) / frame.display.bounds.height;
if (fx >= 0 && fx <= 1 && fy >= 0 && fy <= 1) {
const d = 0.035;
annotations.push({
type: 'oval',
x: fx - d / 2, y: fy - (d * frame.size.width / frame.size.height) / 2,
w: d, h: d * frame.size.width / frame.size.height,
style: {
stroke: this.settings.get('capture.clickMarkerColor') || '#E5484D',
strokeWidth: 4, fill: 'transparent',
},
});
}
}
const step = this.store.addStep(guideId, {
title: this.autoTitle(mode),
annotations,
focusedView: {
enabled: Boolean(this.settings.get('editor.focusedViewDefaultForNewSteps')),
zoom: 1, panX: 0.5, panY: 0.5,
},
}, frame.png || frame.image.toPNG(), frame.size);
return { ok: true, step };
}
autoTitle(mode) {
const tplStr = this.settings.get('editor.autoTitleTemplate') || '[[Mode]] capture [[Time]]';
const now = new Date();
const pad = (n) => String(n).padStart(2, '0');
return expandPlaceholders(tplStr, {
Mode: { fullscreen: 'Screen', window: 'Window', region: 'Region' }[mode] || 'Screen',
Time: `${pad(now.getHours())}:${pad(now.getMinutes())}:${pad(now.getSeconds())}`,
Date: `${now.getFullYear()}-${pad(now.getMonth() + 1)}-${pad(now.getDate())}`,
});
}
/** Grab the screen/window image as { image, display } or throw. */
async grab(mode, cursorPoint = null) {
const cursor = cursorPoint || this.screen.getCursorScreenPoint();
const display = this.screen.getDisplayNearestPoint(cursor);
const { width, height } = display.size;
const scale = display.scaleFactor || 1;
// Ask for both kinds: some compositors (WSLg/Wayland portals) expose no
// individual window sources, so window mode falls back to the screen.
const sources = await desktopCapturer.getSources({
types: mode === 'window' ? ['window', 'screen'] : ['screen'],
thumbnailSize: { width: Math.round(width * scale), height: Math.round(height * scale) },
});
if (!sources.length) throw new Error('no capture sources available (portal/permissions?)');
let source = null;
if (mode === 'window') {
const win = this.getWindow();
const ownTitle = win ? win.getTitle() : '';
const windows = sources.filter((s) => s.id.startsWith('window:'));
source = windows.find((s) => s.name && s.name !== ownTitle && !/stepforge/i.test(s.name))
|| windows[0]
|| sources.find((s) => s.id.startsWith('screen:'));
} else {
const screens = sources.filter((s) => s.id.startsWith('screen:'));
source = screens.find((s) => String(s.display_id) === String(display.id)) || screens[0] || sources[0];
}
if (!source) throw new Error('no capture source matched');
const image = source.thumbnail;
if (!image || image.isEmpty()) throw new Error('capture returned an empty image');
return { image, display, cursor };
}
/**
* Hide the app window while `fn` runs so screenshots show the user's work,
* not StepForge itself. Restores visibility afterwards.
*/
async withWindowHidden(fn, { refocus = true, pauseMs = 350 } = {}) {
const win = this.getWindow();
const wasVisible = win && !win.isDestroyed() && win.isVisible() && !win.isMinimized();
if (wasVisible) {
win.hide();
if (pauseMs > 0) {
await new Promise((r) => setTimeout(r, pauseMs)); // let the compositor repaint
}
}
try {
return await fn();
} finally {
if (wasVisible && win && !win.isDestroyed()) {
if (refocus) {
win.show();
win.focus();
} else {
win.showInactive();
}
}
}
}
/**
* Take a screenshot and append it to the guide as a new image step.
* Adds a click-marker annotation at the cursor position when enabled.
*/
async shoot({
guideId,
mode = 'fullscreen',
delayMs = null,
hideWindow = true,
refocus = true,
hideWindowDelayMs = null,
clickPos = null,
}) {
const delay = delayMs == null ? this.settings.get('capture.delayMs') || 0 : delayMs;
if (delay > 0) await new Promise((resolve) => setTimeout(resolve, delay));
let frame;
try {
frame = hideWindow
? await this.withWindowHidden(() => this.captureCurrentFrame(mode, clickPos), {
refocus,
pauseMs: hideWindowDelayMs == null ? 350 : hideWindowDelayMs,
})
: await this.captureCurrentFrame(mode, clickPos);
} catch (err) {
return { ok: false, reason: err.message };
}
return this.storeFrameAsStep(guideId, mode, frame, clickPos);
}
/**
* Region capture: shoot the full screen, then let the user drag a
* rectangle in a fullscreen overlay; the crop becomes the step image.
*/
async regionCapture(guideId) {
let grabbed;
try {
grabbed = await this.withWindowHidden(() => this.grab('fullscreen'));
} catch (err) {
return { ok: false, reason: err.message };
}
const { image, display } = grabbed;
const rect = await this.pickRegion(display, image);
if (!rect) return { ok: false, reason: 'selection cancelled' };
const cropped = image.crop(rect);
const size = cropped.getSize();
if (!size.width || !size.height) return { ok: false, reason: 'empty selection' };
const step = this.store.addStep(guideId, { title: this.autoTitle('region') },
cropped.toPNG(), size);
return { ok: true, step };
}
/** Fullscreen overlay window that resolves with a crop rect (image px). */
pickRegion(display, image) {
return new Promise((resolve) => {
const overlay = new BrowserWindow({
x: display.bounds.x,
y: display.bounds.y,
width: display.bounds.width,
height: display.bounds.height,
frame: false,
transparent: true,
alwaysOnTop: true,
fullscreen: true,
skipTaskbar: true,
webPreferences: {
preload: path.join(__dirname, 'region-preload.js'),
contextIsolation: true,
},
});
let settled = false;
const finish = (rect) => {
if (settled) return;
settled = true;
if (!overlay.isDestroyed()) overlay.close();
resolve(rect);
};
const { ipcMain } = require('electron');
const onPick = (event, rect) => {
if (event.sender !== overlay.webContents) return;
ipcMain.removeListener('region:picked', onPick);
if (!rect) return finish(null);
const imgSize = image.getSize();
const sx = imgSize.width / display.bounds.width;
const sy = imgSize.height / display.bounds.height;
finish({
x: Math.round(rect.x * sx),
y: Math.round(rect.y * sy),
width: Math.round(rect.w * sx),
height: Math.round(rect.h * sy),
});
};
ipcMain.on('region:picked', onPick);
overlay.on('closed', () => finish(null));
overlay.loadFile(path.join(__dirname, 'renderer', 'region.html'));
});
}
}
module.exports = CaptureService;