import { hashStringOf } from "@commonfabric/data-model/value-hash"; import { cloneIfNecessary } from "@commonfabric/data-model/value-clone"; import type { SchemaPathSelector } from "@commonfabric/api"; import type { FabricValue } from "@commonfabric/data-model/fabric-value"; import { getLogger } from "../../utils/src/logger.ts"; import type { NormalizedFullLink } from "./link-utils.ts"; import type { IExtendedStorageTransaction, IMemorySpaceAddress, IReadOptions, } from "./storage/interface.ts"; import { ignoreReadForScheduling } from "./storage/reactivity-log.ts"; const logger = getLogger("traverse-recorder", { enabled: true, level: "info" }); /** * Capture/replay fixture format for traverse workloads. * * A fixture is captured from a real run (pattern test, integration test, or * server query) via `CF_TRAVERSE_CAPTURE=` and replayed by * `test/traverse-replay/replay.ts` against a corpus-backed read-only * transaction. The corpus stores each visited doc's full fact value (the * value at path `[]`), keyed by `space|scope|id|type`. Invocations record * every `SchemaObjectTraverser.traverse()` call in order, with selectors and * links interned into side tables (they repeat heavily). * * Known fidelity caveats (acceptable for benchmarking/regression use): * - Doc values are captured first-wins: if a doc is written mid-run, replay * sees the earliest captured value for all invocations. Replay is still * deterministic; prefer capturing steady-state phases. * - Client captures replay with `StandardObjectCreator` instead of the * runtime's `TransformObjectCreator`, so cell/proxy construction cost is * excluded; traversal control flow (branches taken, docs visited, reads * issued) is preserved. */ export type TraverseFixtureAddress = { space: string; id: string; type: string; path: readonly string[]; scope?: string; }; export type TraverseFixtureInvocation = { /** Address passed to `SchemaObjectTraverser.traverse()`. */ address: TraverseFixtureAddress; /** Index into the fixture's `selectors` table. */ selector: number; /** Index into the fixture's `links` table, when traverse() got a link. */ link?: number; /** `TraversalContext.includeMeta` (true on the query path). */ includeMeta: boolean; /** Invocations sharing this id shared one `TraversalContext`. */ context: number; /** Invocations sharing this id shared one `SchemaMemo`. */ memo?: number; }; export type TraverseFixture = { version: 1; meta: { name: string; source: string; capturedAt?: string; description?: string; }; selectors: SchemaPathSelector[]; links: NormalizedFullLink[]; /** Full fact values keyed by `space|scope|id|type`. */ docs: Record; invocations: TraverseFixtureInvocation[]; }; export function fixtureDocKey( address: { space: string; id: string; type?: string; scope?: string }, ): string { return `${address.space}|${address.scope ?? "space"}|${address.id}|` + `${address.type ?? "application/json"}`; } const DEFAULT_MAX_INVOCATIONS = 20_000; class TraverseCaptureRecorder { private docs = new Map(); private invocations: TraverseFixtureInvocation[] = []; private selectors: SchemaPathSelector[] = []; private selectorIndex = new Map(); private links: NormalizedFullLink[] = []; private linkIndex = new Map(); private contextIds = new WeakMap(); private nextContextId = 0; private memoIds = new WeakMap(); private nextMemoId = 0; private capHit = false; constructor(private maxInvocations = DEFAULT_MAX_INVOCATIONS) {} private idFor(map: WeakMap, obj: object, next: () => number) { let id = map.get(obj); if (id === undefined) { id = next(); map.set(obj, id); } return id; } private internSelector(selector: SchemaPathSelector): number { const key = hashStringOf(selector); let index = this.selectorIndex.get(key); if (index === undefined) { index = this.selectors.length; // Snapshot via deep-frozen clone (identity-passes already-frozen // selectors): callers may mutate or intern them later. this.selectors.push( cloneIfNecessary(selector as FabricValue) as SchemaPathSelector, ); this.selectorIndex.set(key, index); } return index; } private internLink(link: NormalizedFullLink): number { const key = hashStringOf(link); let index = this.linkIndex.get(key); if (index === undefined) { index = this.links.length; this.links.push( cloneIfNecessary( link as unknown as FabricValue, ) as unknown as NormalizedFullLink, ); this.linkIndex.set(key, index); } return index; } recordInvocation( doc: { address: IMemorySpaceAddress }, selector: SchemaPathSelector, link: NormalizedFullLink | undefined, context: { includeMeta: boolean }, memo: object | undefined, ): void { if (this.invocations.length >= this.maxInvocations) { if (!this.capHit) { this.capHit = true; logger.warn("capture", () => [ `invocation cap (${this.maxInvocations}) hit; later traversals ` + "are not recorded (docs still are)", ]); } return; } const { space, id, type, path, scope } = doc.address; this.invocations.push({ address: { space, id, type: type ?? "application/json", path: [...path], scope, }, selector: this.internSelector(selector), ...(link !== undefined && { link: this.internLink(link) }), includeMeta: context.includeMeta, context: this.idFor( this.contextIds, context, () => this.nextContextId++, ), ...(memo !== undefined && { memo: this.idFor(this.memoIds, memo, () => this.nextMemoId++), }), }); } /** * Capture the full doc behind `address` (value at path `[]`) once per doc. * Reads through the *unwrapped* tx with a scheduling-ignored meta so the * extra read does not perturb reactivity logs. */ private captureDoc( tx: IExtendedStorageTransaction, address: IMemorySpaceAddress, ): void { // data: URIs carry their value in the id; replay decodes them directly. if (address.id.startsWith("data:")) return; const key = fixtureDocKey(address); if (this.docs.has(key)) return; const { ok } = tx.read( { space: address.space, id: address.id, type: address.type, ...(address.scope !== undefined && { scope: address.scope }), path: [], }, { meta: ignoreReadForScheduling }, ); if (ok !== undefined && ok.value !== undefined) { this.docs.set( key, cloneIfNecessary(ok.value as FabricValue) as FabricValue, ); } } /** * Wrap a transaction so every read/readOrThrow first snapshots the target * doc into the corpus. All other members delegate to the original tx. */ wrapTx(tx: IExtendedStorageTransaction): IExtendedStorageTransaction { // deno-lint-ignore no-this-alias const recorder = this; return new Proxy(tx, { get(target, prop) { if (prop === "read" || prop === "readOrThrow") { return (address: IMemorySpaceAddress, options?: IReadOptions) => { recorder.captureDoc(target, address); // deno-lint-ignore no-explicit-any return (target as any)[prop](address, options); }; } // deno-lint-ignore no-explicit-any const value = (target as any)[prop]; return typeof value === "function" ? value.bind(target) : value; }, }); } toFixture(name: string, source: string): TraverseFixture { return { version: 1, meta: { name, source, capturedAt: new Date().toISOString(), }, selectors: this.selectors, links: this.links, docs: Object.fromEntries( [...this.docs.entries()].sort(([a], [b]) => a < b ? -1 : 1), ), invocations: this.invocations, }; } // Writes plain JSON (this module is bundled for browsers, so no node:zlib // here); gzip the output afterwards to check it in as a fixture. flush(path: string): void { const name = path.split("/").pop()?.replace(/\.json(\.gz)?$/, "") ?? "capture"; const fixture = this.toFixture(name, `CF_TRAVERSE_CAPTURE=${path}`); Deno.writeTextFileSync(path, JSON.stringify(fixture)); logger.info("capture", () => [ `wrote ${path}: ${this.docs.size} docs, ` + `${this.invocations.length} invocations, ` + `${this.selectors.length} selectors, ${this.links.length} links`, ]); } } let active: TraverseCaptureRecorder | undefined; function capturePathFromEnv(): string | undefined { try { // Not available (or not permitted) in browser/sandboxed builds. return typeof Deno !== "undefined" && typeof Deno.env?.get === "function" ? Deno.env.get("CF_TRAVERSE_CAPTURE") || undefined : undefined; } catch { return undefined; } } const capturePath = capturePathFromEnv(); if (capturePath !== undefined) { const max = Number( (() => { try { return Deno.env.get("CF_TRAVERSE_CAPTURE_MAX"); } catch { return undefined; } })() ?? DEFAULT_MAX_INVOCATIONS, ); active = new TraverseCaptureRecorder( Number.isFinite(max) && max > 0 ? max : DEFAULT_MAX_INVOCATIONS, ); logger.info("capture", () => [ `traverse capture enabled, writing to ${capturePath} on unload`, ]); globalThis.addEventListener("unload", () => { active!.flush(capturePath); }); // Long-running processes (e.g. a local toolshed) rarely exit cleanly, so // "unload" alone would lose the capture. Flush periodically too; the timer // is unref'd so it never keeps a process alive. const flushTimer = setInterval(() => { try { active!.flush(capturePath); } catch (error) { logger.warn("capture", () => ["periodic flush failed", error]); } }, 30_000); Deno.unrefTimer(flushTimer); } /** * No-op passthrough unless `CF_TRAVERSE_CAPTURE` is set, in which case the * returned tx snapshots every doc it reads into the capture corpus. */ export function wrapTxForTraverseCapture( tx: IExtendedStorageTransaction, ): IExtendedStorageTransaction { return active === undefined ? tx : active.wrapTx(tx); } /** * Record one `SchemaObjectTraverser.traverse()` invocation. No-op unless * capture is enabled. */ export function recordTraverseInvocation( doc: { address: IMemorySpaceAddress }, selector: SchemaPathSelector, link: NormalizedFullLink | undefined, context: { includeMeta: boolean }, memo: object | undefined, ): void { active?.recordInvocation(doc, selector, link, context, memo); }