import { isRecord } from "@commontools/utils/types"; import { getLogger } from "@commontools/utils/logger"; import { ID, ID_FIELD, type JSONSchema } from "./builder/types.ts"; import { createRef } from "./create-ref.ts"; import { CellImpl, isCell } from "./cell.ts"; import { resolveLink } from "./link-resolution.ts"; import { areLinksSame, areMaybeLinkAndNormalizedLinkSame, areNormalizedLinksSame, createSigilLinkFromParsedLink, findAndInlineDataURILinks, isCellLink, isPrimitiveCellLink, isWriteRedirectLink, type NormalizedFullLink, parseLink, } from "./link-utils.ts"; import { isCellResultForDereferencing } from "./query-result-proxy.ts"; import { type IExtendedStorageTransaction, type IReadOptions, type JSONValue, } from "./storage/interface.ts"; import { type Runtime } from "./runtime.ts"; import { toURI } from "./uri-utils.ts"; import { markReadAsPotentialWrite } from "./scheduler.ts"; const diffLogger = getLogger("normalizeAndDiff", { enabled: false, level: "debug", }); // Sentinel value to distinguish "no precomputed value" from "precomputed value is undefined" const NO_PRECOMPUTED = Symbol("no-precomputed"); /** * Traverses newValue and updates `current` and any relevant linked documents. * * Returns true if any changes were made. * * When encountering an object with a `[ID]` property, it'll be used to compute * an entity id based on it's relative location and the passed context, and the * changes will be written to that entity. * * @param current - A doc link to the current value to compare against. * @param newValue - The new value to traverse. * @param log - The log to write to. * @param context - The context of the change. * @returns Whether any changes were made. */ export function diffAndUpdate( runtime: Runtime, tx: IExtendedStorageTransaction, link: NormalizedFullLink, newValue: unknown, context?: unknown, options?: IReadOptions, ): boolean { const readOptions: IReadOptions = { ...options, meta: { ...options?.meta, ...markReadAsPotentialWrite }, }; const changes = normalizeAndDiff( runtime, tx, link, newValue, context, readOptions, ); diffLogger.debug( "diff", () => `[diffAndUpdate] changes: ${JSON.stringify(changes)}`, ); applyChangeSet(tx, changes); return changes.length > 0; } export type ChangeSet = { location: NormalizedFullLink; value: JSONValue | undefined; }[]; /** * Traverses objects and returns an array of changes that should be written. An * empty array means no changes. * * When encountering an object with a `[ID]` property, it'll be used to compute * an entity id based on it's relative location and the passed context, and the * changes will be queued to be written to that entity. * * Otherwise, when traversing and if the new value is a regular JSON value, but * the old value is an alias, follow the alias before writing. However document * references get overwritten (except as per above, the object gets converted to * a document itself). * * Any proxy is unwrapped, and docs and cells mapped to doc links. * * @param current - A doc link to the current value to compare against. * @param newValue - The new value to traverse. * @param log - The log to write to. * @param context - The context of the change. * @returns An array of changes that should be written. */ export function normalizeAndDiff( runtime: Runtime, tx: IExtendedStorageTransaction, link: NormalizedFullLink, newValue: unknown, context?: unknown, options?: IReadOptions, seen: Map = new Map(), precomputedCurrent: unknown = NO_PRECOMPUTED, ): ChangeSet { const changes: ChangeSet = []; // Log entry with value type and symbol presence const valueType = Array.isArray(newValue) ? "array" : typeof newValue; const pathStr = link.path.join("."); diffLogger.debug( "diff", () => `[DIFF_ENTER] path=${pathStr} type=${valueType} newValue=${ JSON.stringify(newValue as any) }`, ); // When detecting a circular reference on JS objects, turn it into a cell, // which below will be turned into a relative link. if (seen.has(newValue)) { diffLogger.debug( "diff", () => `[SEEN_CHECK] Already seen object at path=${pathStr}, converting to cell`, ); newValue = new CellImpl(runtime, tx, seen.get(newValue)!); } // ID_FIELD redirects to an existing field and we do something like DOM // diffing with it: We look at sibling entries and their value for that field, // and if we find a match, we reuse that document. Otherwise we create a new // one, but with a random id. It's random as opposed to causal like ID below, // because we don't want to recycle a document that was removed and added // back, we want to assume removing and adding with the same id is // semantically a new item (in fact we otherwise run into compare-and-swap // transaction errors). if ( isRecord(newValue) && newValue[ID_FIELD] !== undefined ) { diffLogger.debug( "diff", () => `[BRANCH_ID_FIELD] Processing ID_FIELD redirect at path=${pathStr}`, ); const { [ID_FIELD]: fieldName, ...rest } = newValue as & { [ID_FIELD]: string } & Record; const id = newValue[fieldName as PropertyKey]; if (link.path.length > 1) { const parent = tx.readValueOrThrow({ ...link, path: link.path.slice(0, -1), }, options); if (Array.isArray(parent)) { const base = runtime.getCellFromLink(link, undefined, tx); for (const v of parent) { if (isCellLink(v)) { const sibling = parseLink(v, base); const siblingId = tx.readValueOrThrow({ ...sibling, path: [...sibling.path, fieldName as string], }, options); if (siblingId === id) { // We found a sibling with the same id, so ... return [ // ... reuse the existing document ...normalizeAndDiff( runtime, tx, link, v, context, options, seen, ), // ... and update it to the new value ...normalizeAndDiff( runtime, tx, sibling, rest, context, options, seen, ), ]; } } } } } // Fallback: A random id. Below this will create a new entity. newValue = { [ID]: crypto.randomUUID(), ...rest }; } // Unwrap proxies and handle special types if (isCellResultForDereferencing(newValue)) { const parsedLink = parseLink(newValue); const sigilLink = createSigilLinkFromParsedLink(parsedLink); diffLogger.debug( "diff", () => `[BRANCH_QUERY_RESULT] Converted query result to sigil link at path=${pathStr} link=${sigilLink} parsedLink=${parsedLink}`, ); newValue = sigilLink; } // Track whether this link originates from a Cell value (either a cycle we // wrapped into a CellImpl above, or a user-supplied Cell). For Cell-origin // links we preserve the link (do NOT collapse). For links created via // query-result dereferencing (non-Cell), we may collapse immediate-parent // self-links. let linkOriginFromCell = false; if (isCell(newValue)) { diffLogger.debug( "diff", () => `[BRANCH_CELL] Converting cell to link at path=${pathStr}`, ); linkOriginFromCell = true; newValue = newValue.getAsLink(); } // Check for links that are data: URIs and inline them, by calling // normalizeAndDiff on the contents of the link. if ( isCellLink(newValue) && parseLink(newValue, link).id?.startsWith("data:") ) { return normalizeAndDiff( runtime, tx, link, findAndInlineDataURILinks(newValue), context, options, seen, ); } // If we're about to create a reference to ourselves, no-op if (areMaybeLinkAndNormalizedLinkSame(newValue, link)) { diffLogger.debug( "diff", () => `[BRANCH_SELF_REF] Self-reference detected, no-op at path=${pathStr}`, ); return []; } // Get current value to compare against (use precomputed if available) let currentValue = precomputedCurrent === NO_PRECOMPUTED ? tx.readValueOrThrow(link, options) : precomputedCurrent; // A new alias can overwrite a previous alias. No-op if the same. if (isWriteRedirectLink(newValue)) { if ( isWriteRedirectLink(currentValue) && areNormalizedLinksSame(parseLink(currentValue, link), link) ) { diffLogger.debug( "diff", () => `[BRANCH_WRITE_REDIRECT] Same redirect, no-op at path=${pathStr}`, ); return []; } else { diffLogger.debug( "diff", () => `[BRANCH_WRITE_REDIRECT] Different redirect, updating at path=${pathStr}`, ); changes.push({ location: link, value: newValue as JSONValue }); return changes; } } // Handle alias in current value (at this point: if newValue is not an alias) if (isWriteRedirectLink(currentValue)) { diffLogger.debug( "diff", () => `[BRANCH_CURRENT_ALIAS] Following current value alias at path=${pathStr}`, ); // Log reads of the alias, so that changing aliases cause refreshes const redirectLink = resolveLink( runtime, tx, parseLink(currentValue, link), "writeRedirect", ); return normalizeAndDiff( runtime, tx, redirectLink, newValue, context, options, seen, ); } if (isPrimitiveCellLink(newValue)) { diffLogger.debug( "diff", () => `[BRANCH_CELL_LINK] Processing cell link at path=${pathStr} link=${ JSON.stringify(newValue as any) }`, ); const parsedLink = parseLink(newValue, link); // Collapse same-document self/parent links created by query-result dereferencing. // Example: "internal.__#1.next" -> "internal.__#1". Writing that link would // create a tight self-loop, so we instead embed the target's current value // (a plain JSON snapshot). Do not collapse when the link came from converting // a seen cycle to a Cell, and only collapse when the target is the immediate // parent path. if (!linkOriginFromCell && isImmediateParent(parsedLink, link)) { diffLogger.debug( "diff", () => `[CELL_LINK_COLLAPSE] Same-doc ancestor/self link detected at path=${pathStr} -> embedding snapshot from ${ parsedLink.path.join(".") }`, ); const snapshot = tx.readValueOrThrow( parsedLink, options, ) as unknown; return normalizeAndDiff( runtime, tx, link, snapshot, context, options, seen, ); } if ( isPrimitiveCellLink(currentValue) && areLinksSame(newValue, currentValue, link) ) { diffLogger.debug( "diff", () => `[BRANCH_CELL_LINK] Same cell link, no-op at path=${pathStr}`, ); return []; } else { diffLogger.debug( "diff", () => `[BRANCH_CELL_LINK] Different cell link, updating at path=${pathStr}`, ); return [ // TODO(seefeld): Normalize the link to a sigil link? { location: link, value: newValue as JSONValue }, ]; } } // Handle ID-based object (convert to entity) if (isRecord(newValue) && newValue[ID] !== undefined) { diffLogger.debug( "diff", () => `[BRANCH_ID_OBJECT] Processing ID-based object at path=${pathStr}`, ); const { [ID]: id, ...rest } = newValue as & { [ID]: string } & Record; let path = link.path; // If we're setting an array element, make the array the context for the // derived id, not the array index. If it's a nested array, take the parent // array as context, recursively. while ( path.length > 0 && Array.isArray( tx.readValueOrThrow({ ...link, path: path.slice(0, -1) }, options), ) ) { path = path.slice(0, -1); } const entityId = createRef({ id }, { parent: { id: link.id, space: link.space }, path, context, }); const newEntryLink: NormalizedFullLink = { id: toURI(entityId), space: link.space, path: [], type: link.type, }; seen.set(newValue, newEntryLink); return [ // If it wasn't already, set the current value to be a doc link to this doc ...normalizeAndDiff( runtime, tx, link, createSigilLinkFromParsedLink(newEntryLink, { base: link }), context, options, seen, ), // And see whether the value of the document itself changed ...normalizeAndDiff( runtime, tx, newEntryLink, rest, context, options, seen, ), ]; } // Handle arrays if (Array.isArray(newValue)) { diffLogger.debug( "diff", () => `[BRANCH_ARRAY] Processing array at path=${pathStr} length=${newValue.length}`, ); // If the current value is not an array, set it to an empty array if (!Array.isArray(currentValue)) { changes.push({ location: link, value: [] }); } // Have to set this before recursing! seen.set(newValue, link); // Get current array for precomputing child values (if it was an array) const currentArray = Array.isArray(currentValue) ? currentValue : undefined; for (let i = 0; i < newValue.length; i++) { const childSchema = runtime.cfc.getSchemaAtPath( link.schema, [i.toString()], link.rootSchema, ); const nestedChanges = normalizeAndDiff( runtime, tx, { ...link, path: [...link.path, i.toString()], schema: childSchema, rootSchema: link.rootSchema, }, newValue[i], context, options, seen, currentArray?.[i], ); changes.push(...nestedChanges); } // Handle array length changes if (Array.isArray(currentValue) && currentValue.length != newValue.length) { // We need to add the schema here, since the array may be secret, so the length should be too const lub = (link.schema !== undefined) ? runtime.cfc.lubSchema(link.schema, link.rootSchema) : undefined; // We have to cast these, since the type could be changed to another value const childSchema = (lub !== undefined) ? { type: "number", ifc: { classification: [lub] } } as JSONSchema : { type: "number" } as JSONSchema; changes.push({ location: { ...link, path: [...link.path, "length"], schema: childSchema, }, value: newValue.length, }); } return changes; } // Handle objects if (isRecord(newValue)) { diffLogger.debug( "diff", () => `[BRANCH_OBJECT] Processing object at path=${pathStr}`, ); // If the current value is not a (regular) object, set it to an empty object // Note that the alias case is handled above if (!isRecord(currentValue) || isPrimitiveCellLink(currentValue)) { diffLogger.debug( "diff", () => `[BRANCH_OBJECT] Current value is not a record or cell link, setting to empty object at path=${pathStr}`, ); changes.push({ location: link, value: {} }); currentValue = {}; } // Have to set this before recursing! seen.set(newValue, link); // At this point currentValue is guaranteed to be a record const currentRecord = currentValue as Record; for (const key in newValue) { diffLogger.debug("diff", () => { const childPath = [...link.path, key].join("."); return `[DIFF_RECURSE] Recursing into key='${key}' childPath=${childPath}`; }); const childSchema = runtime.cfc.getSchemaAtPath( link.schema, [key], link.rootSchema, ); const nestedChanges = normalizeAndDiff( runtime, tx, { ...link, path: [...link.path, key], schema: childSchema }, newValue[key], context, options, seen, currentRecord[key], ); changes.push(...nestedChanges); } // Handle removed keys for (const key in currentRecord) { if (!(key in newValue)) { changes.push({ location: { ...link, path: [...link.path, key] }, value: undefined, }); } } return changes; } // When setting array length, also update the removed/added elements. if ( link.path.length > 0 && link.path[link.path.length - 1] === "length" ) { const maybeCurrentArray = tx.readValueOrThrow({ ...link, path: link.path.slice(0, -1), }, options); if (Array.isArray(maybeCurrentArray)) { const currentLength = maybeCurrentArray.length; const newLength = newValue as number; if (currentLength !== newLength) { changes.push({ location: link, value: newLength }); for ( let i = Math.min(currentLength, newLength); i < Math.max(currentLength, newLength); i++ ) { changes.push({ location: { ...link, path: [...link.path.slice(0, -1), i.toString()], }, value: undefined, }); } return changes; } } // else, i.e. parent is not an array: fall through to the primitive case } // Handle primitive values and other cases (Object.is handles NaN and -0) if (!Object.is(currentValue, newValue)) { changes.push({ location: link, value: newValue as JSONValue }); } return changes; } /** * Checks if a value contains data at a given path. * Returns true if the path exists in the value (even if the value at that path is undefined). */ function hasPath(value: unknown, path: readonly string[]): boolean { if (path.length === 0) return true; if (value === null || value === undefined || typeof value !== "object") { return false; } const [first, ...rest] = path; if (Array.isArray(value)) { const index = parseInt(first, 10); if (isNaN(index) || index < 0 || index >= value.length) { // Special case: "length" is always present on arrays if (first === "length" && rest.length === 0) return true; return false; } // Check if index actually exists (handles sparse arrays with holes) if (!(index in value)) return false; return hasPath(value[index], rest); } const obj = value as Record; if (!(first in obj)) return false; return hasPath(obj[first], rest); } /** * Compacts a ChangeSet by removing redundant child path changes when a * parent path change already includes that data. * * This optimization reduces the number of writes when setting nested structures. * For example, if we set `foo = {a: 1, b: 2}` and also set `foo/a = 1`, * the child write is redundant since the parent already contains it. * * Key rules: * - Empty objects `{}` or arrays `[]` do NOT subsume children (children populate them) * - Parent deletions (`value: undefined`) DO subsume child changes * - Parent must actually CONTAIN the child's path for subsumption to occur * * @param changes - The original change set * @returns A compacted change set with redundant child paths removed */ export function compactChangeSet(changes: ChangeSet): ChangeSet { if (changes.length <= 1) return changes; // Group by document using safe separator (JSON.stringify avoids key collisions) const byDocument = new Map(); for (const change of changes) { const key = JSON.stringify([ change.location.space, change.location.id, change.location.type, ]); if (!byDocument.has(key)) byDocument.set(key, []); byDocument.get(key)!.push(change); } const result: ChangeSet = []; for (const docChanges of byDocument.values()) { // Sort by path length (shortest first - parents before children) const sorted = docChanges.toSorted( (a, b) => a.location.path.length - b.location.path.length, ); // Track parent paths that can subsume children // Empty {} or [] don't subsume - children populate them! const subsumingPaths: Array<{ path: readonly string[]; value: unknown }> = []; for (const change of sorted) { const path = change.location.path; // Check if subsumed by a parent with actual content const isSubsumed = subsumingPaths.some((parent) => { if (parent.path.length >= path.length) return false; if (!parent.path.every((seg, i) => seg === path[i])) return false; // Parent path is prefix - check if parent VALUE contains this child's path const parentVal = parent.value; if (parentVal === null || parentVal === undefined) return false; if (typeof parentVal !== "object") return false; // Calculate the relative path from parent to child const relativePath = path.slice(parent.path.length); // Only subsume if parent's value actually contains data at the child's relative path return hasPath(parentVal, relativePath); }); // Also check: is this child subsumed by a DELETION of parent? const isDeletedByParent = subsumingPaths.some((parent) => { if (parent.path.length >= path.length) return false; if (!parent.path.every((seg, i) => seg === path[i])) return false; return parent.value === undefined; // Parent deletion subsumes child }); if (!isSubsumed && !isDeletedByParent) { result.push(change); // Track this path for potential child subsumption subsumingPaths.push({ path, value: change.value }); } } } diffLogger.debug( "compact", () => `[compactChangeSet] Compacted ${changes.length} changes to ${result.length}`, ); return result; } /** * Apply a change set to all mentioned documents. * * @param changes - The change set to apply. * @param log - The log to write to. */ export function applyChangeSet( tx: IExtendedStorageTransaction, changes: ChangeSet, ) { // CT-1123: Removed compactChangeSet - structural sharing makes redundant writes // cheap (O(path_depth) with noop detection), while compaction added O(N²) overhead. // Benchmarks showed 2.5-4.4x slowdown with compactChangeSet enabled. for (const change of changes) { tx.writeValueOrThrow(change.location, change.value); } } /** * Translates `id` that React likes to create to our `ID` property, making sure * in any given object it is never used twice. * * This mostly makes sense in a context where we ship entire JSON documents back * and forth and can't express graphs, i.e. two places referring to the same * underlying entity. * * We'll want to revisit once iframes become more sophisticated in what they can * express, e.g. we could have the inner shim do some of this work instead. */ export function addCommonIDfromObjectID( obj: unknown, fieldName: string = "id", ): void { function traverse(obj: unknown): void { if (isRecord(obj) && fieldName in obj) { obj[ID_FIELD] = fieldName; } if (isRecord(obj) && !isCell(obj) && !isPrimitiveCellLink(obj)) { Object.values(obj).forEach((v) => traverse(v)); } } traverse(obj); } /** * Returns true if `target` is the immediate parent of `base` in the same document. * * Example: * - base.path = ["internal", "__#1", "next"] * - target.path = ["internal", "__#1"] * * This is used to decide when to collapse a self/parent link that would create * a tight self-loop (e.g., obj.next -> obj) while allowing references to * higher ancestors (like an item's `items` pointing to its containing array). */ function isImmediateParent( target: NormalizedFullLink, base: NormalizedFullLink, ): boolean { return ( target.id === base.id && target.space === base.space && target.path.length === base.path.length - 1 && target.path.every((seg, i) => seg === base.path[i]) ); }