/**
 * Canonical hash of an arbitrary value, producing a deterministic digest
 * based on the value's logical structure.
 *
 * Replaces merkle-reference's CID-based hashing. Traverses the value tree
 * directly (no intermediate serialization) and feeds type-tagged data into
 * a single SHA-256 context. See Section 6 of the formal spec and the
 * byte-level spec for the full algorithm.
 *
 * Gated behind `ExperimentalOptions.canonicalHashing`.
 */
import { createHasher, type IncrementalHasher } from "./hash-impl.ts";
import { StorableContentId } from "./storable-content-id.ts";
import { StorableUint8Array } from "./storable-native-instances.ts";
import { DECONSTRUCT, isStorableInstance } from "./storable-protocol.ts";
import { NATIVE_TAGS, tagFromNativeValue } from "./type-tags.ts";
import { encodeULEB128 } from "@commontools/leb128";
import { bigintToMinimalTwosComplement } from "./bigint-encoding.ts";

// ---------------------------------------------------------------------------
// Type tag bytes (Section 2 of the byte-level spec)
// ---------------------------------------------------------------------------

// Meta (0x0N)
const TAG_END = 0x00;
const TAG_HOLE = 0x01;

// Compound (0x1N)
const TAG_ARRAY = 0x10;
const TAG_OBJECT = 0x11;
const TAG_INSTANCE = 0x12;

// Primitive (0x2N) -- ordered by conceptual size
const TAG_NULL = 0x20;
const TAG_UNDEFINED = 0x21;
const TAG_BOOLEAN = 0x22;
const TAG_NUMBER = 0x23;
const TAG_STRING = 0x24;
const TAG_BYTES = 0x25;
const TAG_BIGINT = 0x26;
const TAG_EPOCH_NSEC = 0x27;
const TAG_EPOCH_DAYS = 0x28;
const TAG_CONTENT_ID = 0x29;

// ---------------------------------------------------------------------------
// Pre-allocated tag byte arrays (avoids per-call allocation)
// ---------------------------------------------------------------------------

const TAG_END_BYTES = new Uint8Array([TAG_END]);
const TAG_HOLE_BYTES = new Uint8Array([TAG_HOLE]);
const TAG_ARRAY_BYTES = new Uint8Array([TAG_ARRAY]);
const TAG_OBJECT_BYTES = new Uint8Array([TAG_OBJECT]);
const TAG_INSTANCE_BYTES = new Uint8Array([TAG_INSTANCE]);
const TAG_NULL_BYTES = new Uint8Array([TAG_NULL]);
const TAG_UNDEFINED_BYTES = new Uint8Array([TAG_UNDEFINED]);
const TAG_BOOLEAN_TRUE_BYTES = new Uint8Array([TAG_BOOLEAN, 0x01]);
const TAG_BOOLEAN_FALSE_BYTES = new Uint8Array([TAG_BOOLEAN, 0x00]);
const TAG_NUMBER_BYTES = new Uint8Array([TAG_NUMBER]);
const TAG_STRING_BYTES = new Uint8Array([TAG_STRING]);
const TAG_BYTES_BYTES = new Uint8Array([TAG_BYTES]);
const TAG_BIGINT_BYTES = new Uint8Array([TAG_BIGINT]);
const TAG_EPOCH_NSEC_BYTES = new Uint8Array([TAG_EPOCH_NSEC]);
const TAG_EPOCH_DAYS_BYTES = new Uint8Array([TAG_EPOCH_DAYS]);
const TAG_CONTENT_ID_BYTES = new Uint8Array([TAG_CONTENT_ID]);

// ---------------------------------------------------------------------------
// Shared scratch buffer (safe in single-threaded synchronous JS -- see
// async safety analysis in PR #2856 review round 2)
// ---------------------------------------------------------------------------

/** Reusable 8-byte buffer for float64 encoding. */
const f64Buf = new ArrayBuffer(8);
const f64View = new DataView(f64Buf);
const f64Bytes = new Uint8Array(f64Buf);

/** Shared TextEncoder for UTF-8 string encoding. */
const encoder = new TextEncoder();

// ---------------------------------------------------------------------------
// Helper: feed an unsigned LEB128 length prefix
// ---------------------------------------------------------------------------

function feedLength(hasher: IncrementalHasher, value: number): void {
  hasher.update(encodeULEB128(value));
}

// ---------------------------------------------------------------------------
// Core: recursive value feeding
// ---------------------------------------------------------------------------

/**
 * Feed a single `StorableValue` into the hasher, using the type-tagged
 * byte format from the byte-level spec.
 */
function feedValue(hasher: IncrementalHasher, value: unknown): void {
  switch (typeof value) {
    case "boolean":
      hasher.update(value ? TAG_BOOLEAN_TRUE_BYTES : TAG_BOOLEAN_FALSE_BYTES);
      break;

    case "number":
      if (!Number.isFinite(value)) {
        throw new Error(
          `canonicalHash: non-finite number not allowed: ${value}`,
        );
      }
      hasher.update(TAG_NUMBER_BYTES);
      // Normalize -0 to +0.
      f64View.setFloat64(0, value === 0 ? 0 : value, false); // big-endian
      hasher.update(f64Bytes);
      break;

    case "string": {
      hasher.update(TAG_STRING_BYTES);
      const utf8 = encoder.encode(value);
      feedLength(hasher, utf8.length);
      hasher.update(utf8);
      break;
    }

    case "bigint": {
      hasher.update(TAG_BIGINT_BYTES);
      const bytes = bigintToMinimalTwosComplement(value);
      feedLength(hasher, bytes.length);
      hasher.update(bytes);
      break;
    }

    case "undefined":
      hasher.update(TAG_UNDEFINED_BYTES);
      break;

    case "object":
      if (value === null) {
        hasher.update(TAG_NULL_BYTES);
      } else {
        feedObjectValue(hasher, value);
      }
      break;

    default:
      throw new Error(
        `canonicalHash: unsupported type: ${typeof value}`,
      );
  }
}

/**
 * Feed an object-typed value (special primitives, StorableInstance, Array,
 * or plain object) into the hasher. Dispatches via `tagFromNativeValue()` /
 * `NATIVE_TAGS` for recognized types. The `null` case is handled by the
 * caller (`feedValue()`).
 */
function feedObjectValue(
  hasher: IncrementalHasher,
  value: object,
): void {
  // StorableUint8Array has a dedicated hash encoding (TAG_BYTES) but is a
  // StorableInstance wrapper, not a native Uint8Array. Handle before the
  // tagFromNativeValue switch.
  if (value instanceof StorableUint8Array) {
    hasher.update(TAG_BYTES_BYTES);
    const bytes = value.bytes;
    feedLength(hasher, bytes.length);
    hasher.update(bytes);
    return;
  }

  const nativeTag = tagFromNativeValue(value);

  switch (nativeTag) {
    case NATIVE_TAGS.EpochNsec: {
      hasher.update(TAG_EPOCH_NSEC_BYTES);
      const bytes = bigintToMinimalTwosComplement(
        (value as { value: bigint }).value,
      );
      feedLength(hasher, bytes.length);
      hasher.update(bytes);
      return;
    }

    case NATIVE_TAGS.EpochDays: {
      hasher.update(TAG_EPOCH_DAYS_BYTES);
      const bytes = bigintToMinimalTwosComplement(
        (value as { value: bigint }).value,
      );
      feedLength(hasher, bytes.length);
      hasher.update(bytes);
      return;
    }

    case NATIVE_TAGS.ContentId: {
      const cid = value as StorableContentId;
      hasher.update(TAG_CONTENT_ID_BYTES);
      const algTagUtf8 = encoder.encode(cid.algorithmTag);
      feedLength(hasher, algTagUtf8.length);
      hasher.update(algTagUtf8);
      feedLength(hasher, cid.hash.length);
      hasher.update(cid.hash);
      return;
    }

    case NATIVE_TAGS.Array:
      feedArray(hasher, value as unknown[]);
      return;

    case NATIVE_TAGS.Object:
      feedPlainObject(hasher, value as Record<string, unknown>);
      return;

      // Error, Map, Set, Date, HasToJSON: not valid StorableValue types for
      // hashing -- they should have been converted before reaching here.
      // Fall through to the StorableInstance / error path below.
  }

  // StorableInstance (generic protocol path via DECONSTRUCT).
  if (isStorableInstance(value)) {
    hasher.update(TAG_INSTANCE_BYTES);
    const typeTag = (value as { typeTag?: unknown }).typeTag;
    if (typeof typeTag !== "string") {
      throw new Error(
        `canonicalHash: StorableInstance missing typeTag property`,
      );
    }
    const typeTagUtf8 = encoder.encode(typeTag);
    feedLength(hasher, typeTagUtf8.length);
    hasher.update(typeTagUtf8);
    const state = value[DECONSTRUCT]();
    feedValue(hasher, state);
    return;
  }

  throw new Error(
    `canonicalHash: unsupported object type: ${
      value?.constructor?.name ?? typeof value
    }`,
  );
}

/**
 * Feed an array value with sparse hole handling, terminated by TAG_END.
 */
function feedArray(hasher: IncrementalHasher, value: unknown[]): void {
  hasher.update(TAG_ARRAY_BYTES);
  let i = 0;
  while (i < value.length) {
    if (!(i in value)) {
      // Start of a hole run -- coalesce consecutive holes.
      let runLen = 0;
      while (i < value.length && !(i in value)) {
        runLen++;
        i++;
      }
      hasher.update(TAG_HOLE_BYTES);
      feedLength(hasher, runLen);
    } else {
      feedValue(hasher, value[i]);
      i++;
    }
  }
  hasher.update(TAG_END_BYTES);
}

/**
 * Feed a plain object value, keys sorted by UTF-8 byte order, terminated
 * by TAG_END.
 */
function feedPlainObject(
  hasher: IncrementalHasher,
  value: Record<string, unknown>,
): void {
  const keys = Object.keys(value);
  // Sort keys by UTF-8 byte comparison.
  keys.sort((a, b) => {
    const aBytes = encoder.encode(a);
    const bBytes = encoder.encode(b);
    const minLen = Math.min(aBytes.length, bBytes.length);
    for (let j = 0; j < minLen; j++) {
      if (aBytes[j] !== bBytes[j]) return aBytes[j] - bBytes[j];
    }
    return aBytes.length - bBytes.length;
  });

  hasher.update(TAG_OBJECT_BYTES);
  for (const key of keys) {
    // Keys are encoded as TAG_STRING-style values (same format as strings).
    hasher.update(TAG_STRING_BYTES);
    const keyUtf8 = encoder.encode(key);
    feedLength(hasher, keyUtf8.length);
    hasher.update(keyUtf8);
    // Value is hashed recursively.
    feedValue(hasher, value[key]);
  }
  hasher.update(TAG_END_BYTES);
}

// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------

/**
 * Compute the canonical SHA-256 hash of a `StorableValue`. Returns a
 * `StorableContentId` with algorithm tag `fid1` (fabric ID, v1).
 * The caller (`refer()`) extracts the raw digest via `.hash` for
 * `Reference.fromDigest()`.
 */
export function canonicalHash(value: unknown): StorableContentId {
  const hasher = createHasher();
  feedValue(hasher, value);
  return new StorableContentId(hasher.digest(), "fid1");
}