// CFC Phase 3: the per-row label rule — builder helpers, the serialized AST, // authoring/wire validation, and the shared evaluator. // (Spec: docs/specs/sqlite-builtin/06-cfc.md, "Per-row labels".) // // A rule is a PURE declarative projection over (stored columns, fixed db // properties): the builders below return plain-JSON AST nodes, `table()` // validates and attaches the result to the table schema as `rowLabel`, and // `evaluateRowLabel` interprets it identically at the write gate, the server // commit, and read re-derivation. There is deliberately NO acting-principal // term (a read-time `currentUser()` would resolve to the *reader* — in an // OR-clause that self-grants access; the acting user belongs in the result // ceiling). `any(...)` (one authored OR-clause) serializes but is REJECTED by // validation until the runtime ships the clause-aware label profile (CFC spec // §18.5.3 rule 3) — authored OR semantics is never silently lowered to the // conjunctive form, and a smuggled `anyOf` fails closed at evaluation too. // // Pure module: no FFI, no engine imports — safe for client-side import. /** A reference to a declared column, handed to the rule as `f.`. */ export interface FieldRef { field: string; } export interface MatchOpts { /** Capture group to extract instead of the whole match. */ group?: number; /** Minimum number of matches; fewer fails closed (required anchor). */ min?: number; } /** Serialized rule, attached to the table schema as `rowLabel`. */ export interface RowLabelSpec { version: 1; confidentiality?: unknown; integrity?: unknown; } /** Field handles passed to the rule: one accessor per declared column. */ export type RowFieldHandles> = & { [K in keyof C]: FieldRef } & Record; export type RowLabelRule> = ( f: RowFieldHandles, ) => { confidentiality?: unknown; integrity?: unknown }; const isRecord = (x: unknown): x is Record => typeof x === "object" && x !== null && !Array.isArray(x); const isFieldRef = (x: unknown): x is FieldRef => isRecord(x) && typeof x.field === "string" && Object.keys(x).length === 1; // --------------------------------------------------------------------------- // Builders — each returns its serialized AST node. // --------------------------------------------------------------------------- /** * Run `re` (forced global) over a column's text ⟹ the ordered list of matches * (or capture `group`). The universal field extractor: splits a dirty * `Name , addr` recipient line for free. Strict-if-present: a non-empty * value yielding zero matches fails closed at evaluation (never under-label); * `min` makes the field a required anchor. */ export function match( field: FieldRef, re: RegExp, opts: MatchOpts = {}, ): { match: Record } { assertField(field, "match()"); assertRegExp(re, "match()"); const flags = re.flags.includes("g") ? re.flags : re.flags + "g"; const node: Record = { field: field.field, source: re.source, flags, }; if (opts.group !== undefined) node.group = opts.group; if (opts.min !== undefined) node.min = opts.min; return { match: node }; } /** * Include `then` only when the regex TESTS true against the column (gate * trust, or a data-dependent conjunct, on extracted metadata). One fused * helper — a bare `when(matches(…))` pair would collide with the builder's * control-flow `when`, whose transformer lowering matches by NAME and mangles * any local so named. */ export function whenMatches( field: FieldRef, re: RegExp, then: unknown, ): { when: unknown; then: unknown } { assertField(field, "whenMatches()"); assertRegExp(re, "whenMatches()"); return { when: { match: { field: field.field, source: re.source, flags: re.flags.replace("g", ""), }, }, then, }; } /** `did::` for each extracted `v` (distributes over the match * list). Normalization is protocol-implied: mailto/web lowercase+trim, * did:key untouched (base58 is case-sensitive), unknown protocols identity. */ export function principal( protocol: string, of: { match: Record }, ): { principal: Record } { if (typeof protocol !== "string" || !/^[a-z][a-z0-9.+-]*$/.test(protocol)) { throw new TypeError( `principal(): invalid DID protocol ${JSON.stringify(protocol)}`, ); } if (!isRecord(of) || !isRecord(of.match)) { throw new TypeError("principal() takes a match(...) term"); } return { principal: { protocol, of } }; } /** The db's owner — the principal that created the SqliteDb cell, resolved * from the db ref. A FIXED db property, so the rule stays pure. */ export function dbOwner(): { dbOwner: true } { return { dbOwner: true }; } /** A literal atom (escape hatch). (Named `constant` — `const` is reserved.) */ export function constant(atom: unknown): { constant: unknown } { return { constant: atom }; } /** Separate conjunctive clauses, one per atom — today's only confidentiality * combinator (every principal an independent requirement). */ export function all(...terms: unknown[]): { allOf: unknown[] } { return { allOf: terms }; } /** ONE authored OR-clause: any alternative satisfies it (CFC spec §3.1.8). * Serializes, but `table()` REJECTS it until the runtime ships the * clause-aware label profile — never silently lowered to all-of. */ export function any(...terms: unknown[]): { anyOf: unknown[] } { return { anyOf: terms }; } /** Set-intersection over integrity atom sets (the trust-floor meet). * Integrity only — confidentiality combines by all()/any(). */ export function intersect(...terms: unknown[]): { intersect: unknown[] } { return { intersect: terms }; } /** Integrity claim: the row was authored by the extracted principal. Minted as * a self-describing `claimed-authored-by` atom — content-derived provenance * is forgeable by the row's writer, so it never lowers to the trusted * `AuthoredBy` family directly (see 06-cfc.md; upgrade via provider trust). */ export function authoredBy( p: { principal: Record }, ): { authoredBy: unknown } { assertPrincipal(p, "authoredBy()"); return { authoredBy: p }; } /** Integrity claim: endorsed by the extracted principal (same downgrade rule * as `authoredBy`). */ export function endorsedBy( p: { principal: Record }, ): { endorsedBy: unknown } { assertPrincipal(p, "endorsedBy()"); return { endorsedBy: p }; } function assertField(x: unknown, who: string): asserts x is FieldRef { if (!isFieldRef(x)) { throw new TypeError(`${who} takes a field handle (f.)`); } } function assertRegExp(x: unknown, who: string): asserts x is RegExp { if (!(x instanceof RegExp)) throw new TypeError(`${who} takes a RegExp`); } function assertPrincipal(x: unknown, who: string) { if (!isRecord(x) || !isRecord(x.principal)) { throw new TypeError(`${who} takes a principal(...) term`); } } // --------------------------------------------------------------------------- // Validation — fail closed at authoring AND on wire-supplied specs. // --------------------------------------------------------------------------- const MAX_REGEX_SOURCE = 512; /** Reject ReDoS-shaped patterns: a quantifier applied to a group that itself * contains an unbounded quantifier (star height ≥ 2), e.g. `(a+)+`. Linear * scan honoring escapes and character classes. Conservative lint, not a * parser; the per-eval input is additionally produced by the row itself. */ function regexLintReason(source: string): string | undefined { if (source.length > MAX_REGEX_SOURCE) { return `regex too long (${source.length} > ${MAX_REGEX_SOURCE})`; } const QUANT = new Set(["*", "+", "{", "?"]); // Stack of group frames; [0] is the top level. hasQuant = an unbounded // quantifier occurred directly inside this frame (or a nested one). const stack: { hasQuant: boolean }[] = [{ hasQuant: false }]; let inClass = false; for (let i = 0; i < source.length; i++) { const c = source[i]; if (c === "\\") { i++; // skip escaped char continue; } if (inClass) { if (c === "]") inClass = false; continue; } if (c === "[") { inClass = true; continue; } if (c === "(") { stack.push({ hasQuant: false }); continue; } if (c === ")") { const frame = stack.pop(); if (frame === undefined || stack.length === 0) { // Unmatched ")" — invalid regex; report a reason rather than crash // (the lint runs on hostile wire specs and must stay fail-closed). return "regex fails the safety lint (unbalanced parenthesis)"; } const next = source[i + 1]; if (frame.hasQuant && next !== undefined && QUANT.has(next)) { return "regex fails the safety lint (nested quantifier — ReDoS risk)"; } stack[stack.length - 1].hasQuant ||= frame.hasQuant; continue; } if (QUANT.has(c)) stack[stack.length - 1].hasQuant = true; } return undefined; } const ANY_REJECTION = "any() requires the clause-aware label profile (CFC spec §18.5.3 rule 3) " + "— an authored OR-clause is not enforceable on this runtime and is never " + "silently lowered to the conjunctive form; use all() for now"; function validateMatchNode( node: unknown, columns: ReadonlySet, who: string, ): string | undefined { if (!isRecord(node)) return `${who}: malformed match node`; const { field, source, flags, group, min } = node; if (typeof field !== "string") return `${who}: match without a field`; if (!columns.has(field)) { return `rule references unknown column "${field}"`; } if (typeof source !== "string") return `${who}: match without a source`; const lint = regexLintReason(source); if (lint) return lint; if (typeof flags !== "string" || !/^[dgimsuvy]*$/.test(flags)) { return `${who}: invalid regex flags`; } try { new RegExp(source, flags); } catch { return `${who}: invalid regex ${JSON.stringify(source)}`; } if ( group !== undefined && (!Number.isInteger(group) || (group as number) < 0) ) { return `${who}: invalid capture group`; } if (min !== undefined && (!Number.isInteger(min) || (min as number) < 0)) { return `${who}: invalid min`; } return undefined; } function validatePrincipalNode( node: unknown, columns: ReadonlySet, ): string | undefined { if (!isRecord(node) || typeof node.protocol !== "string") { return "malformed principal node"; } if (!/^[a-z][a-z0-9.+-]*$/.test(node.protocol)) { return `invalid DID protocol ${JSON.stringify(node.protocol)}`; } const of = node.of; if (!isRecord(of) || !("match" in of)) { return "principal() takes a match(...) term"; } return validateMatchNode(of.match, columns, "principal"); } function unknownOp(node: Record, position: string): string { return `unknown rowLabel op in ${position} position: ` + `{${Object.keys(node).join(", ")}}`; } function validateConfTerm( node: unknown, columns: ReadonlySet, ): string | undefined { if (!isRecord(node)) return "malformed confidentiality term"; if ("anyOf" in node) return ANY_REJECTION; if ("intersect" in node) { return "intersect() is integrity-only (the trust-floor meet); " + "confidentiality combines by all() — and any() once OR-clauses land"; } if ("authoredBy" in node || "endorsedBy" in node) { return "authoredBy()/endorsedBy() are integrity terms, not confidentiality"; } if ("allOf" in node) return validateConfExpr(node, columns); if ("principal" in node) { return validatePrincipalNode(node.principal, columns); } if ("dbOwner" in node) { return node.dbOwner === true ? undefined : "malformed dbOwner node"; } if ("constant" in node) return undefined; if ("when" in node) { const test = (node as { when?: unknown }).when; const r = isRecord(test) && "match" in test ? validateMatchNode(test.match, columns, "when") : "malformed when gate (use whenMatches())"; if (r) return r; return validateConfTerm((node as { then?: unknown }).then, columns); } return unknownOp(node, "confidentiality"); } function validateConfExpr( node: unknown, columns: ReadonlySet, ): string | undefined { if (!isRecord(node)) return "malformed confidentiality expression"; if ("anyOf" in node) return ANY_REJECTION; if ("allOf" in node) { const terms = node.allOf; if (!Array.isArray(terms) || terms.length === 0) { return "all() needs at least one term"; } for (const t of terms) { const r = validateConfTerm(t, columns); if (r) return r; } return undefined; } return validateConfTerm(node, columns); } function validateIntegTerm( node: unknown, columns: ReadonlySet, ): string | undefined { if (!isRecord(node)) return "malformed integrity term"; if ("anyOf" in node) { return "disjunctive integrity does not exist (CFC spec §3.1.8): " + "integrity is a conjunction combined by meet"; } if ("authoredBy" in node || "endorsedBy" in node) { const inner = (node.authoredBy ?? node.endorsedBy) as unknown; if (!isRecord(inner) || !("principal" in inner)) { return "authoredBy()/endorsedBy() take a principal(...) term"; } return validatePrincipalNode(inner.principal, columns); } if ("intersect" in node || "allOf" in node) { const terms = (node.intersect ?? node.allOf) as unknown; if (!Array.isArray(terms) || terms.length === 0) { return "intersect()/all() need at least one term"; } for (const t of terms) { const r = validateIntegTerm(t, columns); if (r) return r; } return undefined; } if ("when" in node) { const test = node.when; const r = isRecord(test) && "match" in test ? validateMatchNode(test.match, columns, "when") : "malformed when gate (use whenMatches())"; if (r) return r; return validateIntegTerm((node as { then?: unknown }).then, columns); } if ("constant" in node) return undefined; return unknownOp(node, "integrity"); } /** * Validate a rowLabel spec against the declared column names. Returns the * failure reason, or undefined when valid. Used by `table()` at authoring * (throws) and MUST be re-run on wire-supplied specs before evaluation — * "couldn't validate" is never "no label". */ export function validateRowLabelSpec( spec: unknown, columns: readonly string[], ): string | undefined { if (!isRecord(spec)) return "rowLabel spec must be an object"; if (spec.version !== 1) { return `unsupported rowLabel version ${JSON.stringify(spec.version)}`; } const cols = new Set(columns); if (spec.confidentiality === undefined && spec.integrity === undefined) { return "rowLabel rule must declare confidentiality and/or integrity"; } if (spec.confidentiality !== undefined) { const r = validateConfExpr(spec.confidentiality, cols); if (r) return r; } if (spec.integrity !== undefined) { const r = validateIntegTerm(spec.integrity, cols); if (r) return r; } return undefined; } /** Build + validate the serialized spec from an authored rule. Throws on any * violation (fail closed at definition time). Called by `table()`. */ export function buildRowLabelSpec>( columns: readonly string[], rule: RowLabelRule, ): RowLabelSpec { const handles = Object.fromEntries( columns.map((name) => [name, { field: name }]), ) as RowFieldHandles; const out = rule(handles); if (!isRecord(out)) { throw new TypeError( "table(): a rowLabel rule must return { confidentiality?, integrity? }", ); } const spec: RowLabelSpec = { version: 1 }; if (out.confidentiality !== undefined) { spec.confidentiality = out.confidentiality; } if (out.integrity !== undefined) spec.integrity = out.integrity; const reason = validateRowLabelSpec(spec, columns); if (reason) { throw new TypeError(`table(): invalid rowLabel rule — ${reason}`); } return spec; } // --------------------------------------------------------------------------- // Evaluation — one pure function, shared by write gate, server, and read. // --------------------------------------------------------------------------- class RowLabelEvalError extends Error {} const fail = (msg: string): never => { throw new RowLabelEvalError(msg); }; /** Stable structural key for dedup (atoms are small plain JSON). */ function atomKey(v: unknown): string { if (typeof v === "string") return `s:${v}`; return `j:${ JSON.stringify(v, (_k, val) => isRecord(val) ? Object.fromEntries(Object.keys(val).sort().map((k) => [k, val[k]])) : val) }`; } function dedup(atoms: unknown[]): unknown[] { const seen = new Set(); const out: unknown[] = []; for (const a of atoms) { const k = atomKey(a); if (seen.has(k)) continue; seen.add(k); out.push(a); } return out; } function normalizeForProtocol(protocol: string, v: string): string { switch (protocol) { case "mailto": case "web": return v.trim().toLowerCase(); default: // did:key is base58 (case-sensitive); unknown protocols: do nothing. return v; } } /** Extract the match list for a field per the strict-if-present contract. */ function evalMatch( node: Record, row: Record, ): string[] { const field = node.field as string; if (!(field in row)) { return fail(`rule input field "${field}" is absent from the row`); } const value = row[field]; const values: string[] = []; if (value !== null && value !== undefined && value !== "") { if (typeof value !== "string") { return fail( `field "${field}" is ${typeof value}, not a string — regex input`, ); } // Force the global flag like match() does at authoring: matchAll throws // on non-global regexes, and a hostile/legacy wire spec must degrade to // the documented split semantics, not an uncaught exception. const rawFlags = typeof node.flags === "string" ? node.flags : ""; const flags = rawFlags.includes("g") ? rawFlags : rawFlags + "g"; const re = new RegExp(node.source as string, flags); const group = node.group as number | undefined; for (const m of value.matchAll(re)) { const picked = group !== undefined ? m[group] : m[0]; if (typeof picked === "string") values.push(picked); } if (values.length === 0) { // Strict-if-present: a populated field that yields nothing would // silently drop real principals (under-labeling). Fail closed. return fail( `field "${field}" is non-empty but matched nothing — refusing to ` + "under-label (strict-if-present)", ); } } const min = node.min as number | undefined; if (min !== undefined && values.length < min) { return fail( `field "${field}" yielded ${values.length} match(es); rule requires ` + `at least ${min}`, ); } return values; } function evalTest( test: unknown, row: Record, ): boolean { if (!isRecord(test) || !isRecord(test.match)) { return fail("malformed when gate (use whenMatches())"); } const { field, source, flags } = test.match as Record; if (typeof field !== "string" || !(field in row)) { return fail(`rule input field "${String(field)}" is absent from the row`); } const value = row[field]; if (value === null || value === undefined || value === "") return false; if (typeof value !== "string") { return fail( `field "${field}" is ${typeof value}, not a string — regex input`, ); } return new RegExp(source as string, (flags as string) ?? "").test(value); } function evalPrincipal( node: Record, row: Record, ): string[] { const protocol = node.protocol as string; const of = node.of as { match: Record }; return evalMatch(of.match, row).map( (v) => `did:${protocol}:${normalizeForProtocol(protocol, v)}`, ); } function evalConf( node: unknown, row: Record, ctx: { dbOwner?: string }, ): unknown[] { if (!isRecord(node)) return fail("malformed confidentiality term"); if ("anyOf" in node) { return fail( "anyOf (authored OR-clause) is not evaluable on this runtime — " + "fail closed, never flattened (CFC spec §18.5.3 rule 4)", ); } if ("allOf" in node) { const terms = node.allOf; if (!Array.isArray(terms)) return fail("malformed all()"); return terms.flatMap((t) => evalConf(t, row, ctx)); } if ("principal" in node) { return evalPrincipal(node.principal as Record, row); } if ("dbOwner" in node) { return ctx.dbOwner !== undefined ? [ctx.dbOwner] : fail( "dbOwner() has no owner in the evaluation context", ); } if ("constant" in node) return [node.constant]; if ("when" in node) { return evalTest(node.when, row) ? evalConf((node as { then?: unknown }).then, row, ctx) : []; } return fail(unknownOp(node, "confidentiality")); } function evalInteg( node: unknown, row: Record, ctx: { dbOwner?: string }, ): unknown[] { if (!isRecord(node)) return fail("malformed integrity term"); if ("anyOf" in node) return fail("disjunctive integrity does not exist"); if ("authoredBy" in node || "endorsedBy" in node) { const kind = "authoredBy" in node ? "claimed-authored-by" : "claimed-endorsed-by"; const inner = (node.authoredBy ?? node.endorsedBy) as Record< string, unknown >; const subjects = evalPrincipal( inner.principal as Record, row, ); if (subjects.length > 1) { return fail( `${subjects.length} matches in an integrity-bearing position — a ` + "provenance subject must be unique (display-name bait); fail closed", ); } // Zero matches: the claim simply is not made (distinct from >1 = error). return subjects.map((subject) => ({ kind, subject })); } if ("intersect" in node) { const terms = node.intersect; if (!Array.isArray(terms) || terms.length === 0) { return fail("malformed intersect()"); } const lists = terms.map((t) => evalInteg(t, row, ctx)); return lists.reduce((acc, list) => { const keys = new Set(list.map(atomKey)); return acc.filter((a) => keys.has(atomKey(a))); }); } if ("allOf" in node) { const terms = node.allOf; if (!Array.isArray(terms)) return fail("malformed all()"); return terms.flatMap((t) => evalInteg(t, row, ctx)); } if ("when" in node) { return evalTest(node.when, row) ? evalInteg((node as { then?: unknown }).then, row, ctx) : []; } if ("constant" in node) return [node.constant]; return fail(unknownOp(node, "integrity")); } /** * Evaluate a rowLabel spec against a row's column values. Fail-closed: any * unresolvable input, malformed node, or policy violation returns `{error}` — * never a partial label. Callers turn `{error}` into a refused query / * rejected write. */ export function evaluateRowLabel( spec: RowLabelSpec, row: Record, ctx: { dbOwner?: string }, ): | { confidentiality: unknown[]; integrity: unknown[] } | { error: string } { if (!isRecord(spec) || spec.version !== 1) { return { error: `unsupported rowLabel version ${ JSON.stringify(isRecord(spec) ? spec.version : spec) }`, }; } try { const confidentiality = spec.confidentiality !== undefined ? dedup(evalConf(spec.confidentiality, row, ctx)) : []; const integrity = spec.integrity !== undefined ? dedup(evalInteg(spec.integrity, row, ctx)) : []; return { confidentiality, integrity }; } catch (error) { if (error instanceof RowLabelEvalError) return { error: error.message }; throw error; } } /** The rule attached to a (possibly wire-supplied) table schema, or undefined. * Presence gates all Phase 3 work, so rule-less tables pay nothing. */ export function rowLabelSpecOf(tableSchema: unknown): RowLabelSpec | undefined { if (!isRecord(tableSchema)) return undefined; const spec = tableSchema.rowLabel; return isRecord(spec) ? spec as unknown as RowLabelSpec : undefined; } /** * The column names a rule reads (its input columns), in walk order, deduped. * The read side locates each of these in the projection by TRUE origin * `(table, column)` — never by output name — and refuses when one is missing * or ambiguous. */ export function ruleInputFields(spec: RowLabelSpec): string[] { const seen = new Set(); const out: string[] = []; const walk = (n: unknown): void => { if (Array.isArray(n)) { for (const x of n) walk(x); return; } if (!isRecord(n)) return; const m = n.match; if (isRecord(m) && typeof m.field === "string" && !seen.has(m.field)) { seen.add(m.field); out.push(m.field); } for (const v of Object.values(n)) walk(v); }; walk(spec.confidentiality); walk(spec.integrity); return out; }