Rearchitect click capture: strict click-time frames, off-main-process recorder, exact marker coordinates
Template tests / tests (push) Successful in 1m50s

Implements the architecture change from ai_prompts/prompt3.md:

- New app/click-frames.js: shared timestamped frame ring + strict
  click-to-frame pairing (never a frame whose grab started after the
  click); legacy slack behavior kept behind capture.strictClickFrames=false.
- New stream capture backend (app/stream-backend.js + hidden worker
  window): per-display desktop media streams sampled into ring buffers
  and PNG-encoded entirely off the main process, so click delivery is
  never starved by capture work. Auto-degrades to the legacy in-process
  frame loop when streams cannot start or the worker stops answering.
- Clicks are paired with their frame at event time (eager pairing in
  enqueueClickCapture); only the storing is serialized, so slow encodes
  cannot skew later clicks in a fast burst.
- Linux watcher: restored event-time root coordinates from
  xinput test-xi2 and merge raw/regular twin events structurally.
- Replaced the 40ms time debounce with source-aware duplicate
  suppression: fast legitimate clicks are never dropped.
- New app/coords.js: physical-to-DIP conversion with multi-monitor and
  scale-factor handling; Windows keeps screenToDipPoint.
- STEPFORGE_CLICK_SELFTEST end-to-end hook: 3/3 clicks become steps via
  the stream backend with 0.00% marker offset on this host.
- Tests rewritten/added: strict selection, coords, stream backend,
  Linux coordinate parsing, twin merge, burst clicking (126 passing).

Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
This commit is contained in:
Iisyourdad
2026-06-11 21:33:31 -05:00
parent c6d0e9e356
commit a0b69f8cc7
14 changed files with 2109 additions and 162 deletions
+11
View File
@@ -0,0 +1,11 @@
<!doctype html>
<html>
<head>
<meta charset="utf-8">
<title>StepForge capture worker</title>
<!-- Shared click↔frame selection logic; sets window.StepForgeClickFrames. -->
<script src="../click-frames.js" defer></script>
<script src="capture-worker.js" defer></script>
</head>
<body><!-- hidden window; frames live in JS, nothing renders here --></body>
</html>
+199
View File
@@ -0,0 +1,199 @@
'use strict';
/**
* Capture worker: runs in a hidden renderer window and owns all continuous
* screen capture during a recording session.
*
* Per display it opens a desktop media stream (the desktopCapturer source id
* comes from the main process) and samples it on a fixed cadence into a
* timestamped ring buffer of ImageBitmaps. Sampling and PNG encoding happen
* entirely in this process, so the main-process event loop — which must stay
* responsive to deliver OS click events on time — never blocks on capture
* work. ImageBitmaps are GPU-backed and cheap to create from a <video>
* element, which is what lets the cadence be much tighter than the old
* 200ms main-process desktopCapturer loop.
*
* On a frame request the worker applies the shared strict selection rule
* (newest frame captured at or before the click; never one whose grab
* started after it), encodes that single frame to PNG, and ships the bytes
* to the main process.
*/
/* global StepForgeClickFrames, captureWorkerBridge */
(() => {
const FALLBACK_SAMPLE_MS = 100;
// Tight cadence means more frames per second; keep enough of them to
// bridge any encode/IPC hiccup without hoarding GPU memory.
const FALLBACK_FRAME_LIMIT = 8;
const FALLBACK_RETENTION_MS = 2000;
const streams = new Map(); // displayId(string) -> stream state
function send(msg) {
try {
captureWorkerBridge.send(msg);
return true;
} catch (err) {
// Either the main process is gone or the payload didn't survive the
// bridge; log it — a silently dropped frame-response would otherwise
// look like a worker hang from the main process.
console.error('capture-worker send failed:', err && err.message, 'type:', msg && msg.type);
return false;
}
}
async function startStream(cmd) {
const key = String(cmd.displayId);
stopStream(key);
const display = cmd.display || {};
const scale = display.scaleFactor || 1;
const bounds = display.bounds || { width: 1280, height: 720 };
const physWidth = Math.round(bounds.width * scale);
const physHeight = Math.round(bounds.height * scale);
const state = {
displayId: cmd.displayId,
media: null,
video: null,
timer: null,
sampling: false,
ring: new StepForgeClickFrames.FrameRing({
limit: cmd.frameLimit || FALLBACK_FRAME_LIMIT,
retentionMs: cmd.retentionMs || FALLBACK_RETENTION_MS,
onEvict: (frame) => {
if (frame && frame.bitmap && frame.bitmap.close) frame.bitmap.close();
},
}),
};
streams.set(key, state);
try {
// The chromeMediaSource constraint set is Electron's documented bridge
// from a desktopCapturer source id to a live media stream.
state.media = await navigator.mediaDevices.getUserMedia({
audio: false,
video: {
mandatory: {
chromeMediaSource: 'desktop',
chromeMediaSourceId: cmd.sourceId,
minWidth: physWidth,
maxWidth: physWidth,
minHeight: physHeight,
maxHeight: physHeight,
maxFrameRate: 30,
},
},
});
const video = document.createElement('video');
video.muted = true;
video.srcObject = state.media;
state.video = video;
await video.play();
const sampleMs = cmd.sampleMs || FALLBACK_SAMPLE_MS;
state.timer = setInterval(() => sampleFrame(state), sampleMs);
// Buffer a frame immediately so a click right after "Start recording"
// already has something captured before it.
await sampleFrame(state);
send({ type: 'stream-ready', displayId: cmd.displayId });
} catch (err) {
stopStream(key);
send({ type: 'stream-error', displayId: cmd.displayId, reason: String(err && err.message || err) });
}
}
async function sampleFrame(state) {
if (state.sampling || !state.video || state.video.readyState < 2) return;
state.sampling = true;
// startedAt/capturedAt bracket the grab so strict selection can tell
// pre-click frames from post-click ones.
const startedAt = Date.now();
try {
const bitmap = await createImageBitmap(state.video);
state.ring.push({
mode: 'fullscreen',
bitmap,
width: bitmap.width,
height: bitmap.height,
startedAt,
capturedAt: Date.now(),
});
} catch {
// A failed sample only means a slightly older best frame.
} finally {
state.sampling = false;
}
}
function stopStream(key) {
const state = streams.get(key);
if (!state) return;
if (state.timer) clearInterval(state.timer);
if (state.media) {
for (const track of state.media.getTracks()) {
try { track.stop(); } catch { /* already stopped */ }
}
}
state.ring.clear();
streams.delete(key);
}
async function handleFrameRequest(cmd) {
const state = streams.get(String(cmd.displayId));
const reply = (extra) => send({ type: 'frame-response', requestId: cmd.requestId, ...extra });
if (!state) return reply({ ok: false, reason: 'no stream for display' });
// One last sample: if the compositor delivered a newer video frame since
// the previous tick, a sub-millisecond grab here can only improve (never
// worsen) the match — its startedAt is still checked against the click.
await sampleFrame(state);
const frame = StepForgeClickFrames.selectFrameForClick(state.ring.frames(), {
clickAt: cmd.clickAt,
mode: 'fullscreen',
strict: cmd.strict !== false,
});
if (!frame) return reply({ ok: false, reason: 'no frame at or before the click' });
try {
const canvas = new OffscreenCanvas(frame.width, frame.height);
canvas.getContext('2d').drawImage(frame.bitmap, 0, 0);
const blob = await canvas.convertToBlob({ type: 'image/png' });
const png = await blob.arrayBuffer();
return reply({
ok: true,
png: new Uint8Array(png),
width: frame.width,
height: frame.height,
startedAt: frame.startedAt,
capturedAt: frame.capturedAt,
});
} catch (err) {
return reply({ ok: false, reason: String(err && err.message || err) });
}
}
/** Health/diagnostic snapshot of every stream. */
function reportStats(cmd) {
const stats = {};
for (const [key, state] of streams) {
stats[key] = {
frames: state.ring.frames().length,
latestCapturedAt: state.ring.latest() ? state.ring.latest().capturedAt : null,
videoReadyState: state.video ? state.video.readyState : null,
videoSize: state.video ? `${state.video.videoWidth}x${state.video.videoHeight}` : null,
sampling: state.sampling,
};
}
send({ type: 'stats', requestId: cmd && cmd.requestId, stats });
}
captureWorkerBridge.onCommand((msg) => {
if (!msg || typeof msg !== 'object') return;
if (msg.type === 'start-stream') startStream(msg);
else if (msg.type === 'stop-stream') stopStream(String(msg.displayId));
else if (msg.type === 'frame-request') {
// A request must always produce a response — an unanswered click
// counts toward backend unhealthiness in the main process.
handleFrameRequest(msg).catch((err) => {
console.error('capture-worker frame-request failed:', err && err.message);
send({ type: 'frame-response', requestId: msg.requestId, ok: false, reason: String(err && err.message || err) });
});
} else if (msg.type === 'stats-request') reportStats(msg);
});
})();