Files
autodoc/app/click-frames.js
T
Iisyourdad a0b69f8cc7
Template tests / tests (push) Successful in 1m50s
Rearchitect click capture: strict click-time frames, off-main-process recorder, exact marker coordinates
Implements the architecture change from ai_prompts/prompt3.md:

- New app/click-frames.js: shared timestamped frame ring + strict
  click-to-frame pairing (never a frame whose grab started after the
  click); legacy slack behavior kept behind capture.strictClickFrames=false.
- New stream capture backend (app/stream-backend.js + hidden worker
  window): per-display desktop media streams sampled into ring buffers
  and PNG-encoded entirely off the main process, so click delivery is
  never starved by capture work. Auto-degrades to the legacy in-process
  frame loop when streams cannot start or the worker stops answering.
- Clicks are paired with their frame at event time (eager pairing in
  enqueueClickCapture); only the storing is serialized, so slow encodes
  cannot skew later clicks in a fast burst.
- Linux watcher: restored event-time root coordinates from
  xinput test-xi2 and merge raw/regular twin events structurally.
- Replaced the 40ms time debounce with source-aware duplicate
  suppression: fast legitimate clicks are never dropped.
- New app/coords.js: physical-to-DIP conversion with multi-monitor and
  scale-factor handling; Windows keeps screenToDipPoint.
- STEPFORGE_CLICK_SELFTEST end-to-end hook: 3/3 clicks become steps via
  the stream backend with 0.00% marker offset on this host.
- Tests rewritten/added: strict selection, coords, stream backend,
  Linux coordinate parsing, twin merge, burst clicking (126 passing).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
2026-06-11 21:33:31 -05:00

163 lines
5.5 KiB
JavaScript

'use strict';
/**
* Click ↔ frame correlation logic, shared by the main process and the
* capture-worker renderer (loaded there via a plain <script> tag, hence the
* UMD-style export at the bottom and the total absence of dependencies).
*
* The model: a recorder keeps a ring buffer of timestamped frames, each with
* { startedAt, capturedAt } — when the grab began and when it completed.
* A click carries its own hook-time timestamp. Pairing the two answers
* "what did the screen look like when the user clicked?".
*
* Strict mode encodes the product requirement (Folge-like recording): a step
* must show the screen *at or before* the click, never after it. A frame
* whose grab started after the click can already contain the click's effects
* (menus opened, pages navigated), so strict mode rejects it outright — the
* caller falls back to an explicit fresh shot instead of silently passing a
* post-click frame off as the click-time screen. Balanced mode keeps the old
* slack-window behavior for platforms where capture is too slow to keep a
* pre-click frame buffered.
*/
const DEFAULT_FRAME_LIMIT = 6;
const DEFAULT_RETENTION_MS = 4000;
// A frame older than this is too stale to pass off as "the screen at the
// instant of the click".
const DEFAULT_MAX_AGE_MS = 600;
// Balanced mode only: a grab that began within this window after the click
// is accepted on the assumption that UI reactions render slower than this.
const DEFAULT_START_SLACK_MS = 300;
function pointInBounds(point, bounds) {
if (!point || !bounds) return false;
return point.x >= bounds.x
&& point.x <= bounds.x + bounds.width
&& point.y >= bounds.y
&& point.y <= bounds.y + bounds.height;
}
/**
* Ring buffer of recent frames, bounded by both count and age. Frames are
* raw images (potentially tens of MB each), so eviction is eager and an
* optional onEvict hook lets callers release native resources (e.g.
* ImageBitmap.close() in the capture worker).
*/
class FrameRing {
constructor({ limit = DEFAULT_FRAME_LIMIT, retentionMs = DEFAULT_RETENTION_MS, now = Date.now, onEvict = null } = {}) {
this.limit = limit;
this.retentionMs = retentionMs;
this.now = now;
this.onEvict = onEvict;
this.items = [];
}
push(frame) {
if (!frame) return null;
this.items.push(frame);
this.prune();
return frame;
}
prune() {
const cutoff = this.now() - this.retentionMs;
while (this.items.length
&& (this.items.length > this.limit || !(this.items[0].capturedAt >= cutoff))) {
const evicted = this.items.shift();
if (this.onEvict) this.onEvict(evicted);
}
}
frames() {
return [...this.items];
}
latest() {
return this.items.length ? this.items[this.items.length - 1] : null;
}
clear() {
const dropped = this.items;
this.items = [];
if (this.onEvict) for (const f of dropped) this.onEvict(f);
}
}
/**
* Whether one frame may represent one click.
*
* Strict mode accepts only:
* - a frame completed at or before the click (and not older than maxAgeMs), or
* - when allowInFlight is set, a frame whose grab *started* at or before the
* click — its pixels predate the click's effects even though encoding
* finished after.
* A frame whose grab started after the click is never acceptable in strict
* mode, no matter how close: that is exactly the "screenshot shows the menu
* already open" failure.
*
* Balanced mode additionally accepts in-flight frames that started within
* startSlackMs after the click (the legacy heuristic).
*/
function frameUsableForClick(frame, {
clickAt,
clickPos = null,
mode = null,
strict = true,
allowInFlight = false,
maxAgeMs = DEFAULT_MAX_AGE_MS,
startSlackMs = DEFAULT_START_SLACK_MS,
} = {}) {
if (!frame) return false;
if (mode && frame.mode !== mode) return false;
// Fast clicks can move to another monitor before a buffered frame is
// consumed; only reuse frames from the clicked display.
if (clickPos && frame.display && !pointInBounds(clickPos, frame.display.bounds)) return false;
const clickTime = Number.isFinite(clickAt) ? clickAt : Date.now();
const capturedAt = frame.capturedAt;
const startedAt = Number.isFinite(frame.startedAt) ? frame.startedAt : capturedAt;
const completedBeforeClick = Number.isFinite(capturedAt) && capturedAt <= clickTime;
if (completedBeforeClick) return clickTime - capturedAt <= maxAgeMs;
if (!allowInFlight || !Number.isFinite(startedAt)) return false;
if (strict) return startedAt <= clickTime;
return startedAt <= clickTime + startSlackMs;
}
/**
* Best already-buffered frame for a click: the newest frame that qualifies
* under frameUsableForClick. Buffered frames are by definition completed, so
* in-flight acceptance never applies here. Returns null when nothing
* qualifies and the caller must wait for the in-flight grab or fall back to
* a fresh shot.
*/
function selectFrameForClick(frames, opts = {}) {
let best = null;
for (const frame of frames || []) {
if (!frameUsableForClick(frame, { ...opts, allowInFlight: false })) continue;
if (!best || frame.capturedAt > best.capturedAt) best = frame;
}
return best;
}
const api = {
FrameRing,
frameUsableForClick,
selectFrameForClick,
pointInBounds,
DEFAULT_FRAME_LIMIT,
DEFAULT_RETENTION_MS,
DEFAULT_MAX_AGE_MS,
DEFAULT_START_SLACK_MS,
};
/* eslint-disable no-undef */
if (typeof module === 'object' && module.exports) {
module.exports = api;
} else if (typeof self !== 'undefined') {
self.StepForgeClickFrames = api;
} else if (typeof window !== 'undefined') {
window.StepForgeClickFrames = api;
}