Source code for srdatalog.ir.mir.print

'''S-expression emitter for Python MIR, matching the output of
src/srdatalog/mir/printer.nim byte-for-byte.

The canonical text form is what Racket will ingest as the final metalang,
so S-expr is the target rather than JSON. Indentation uses two spaces per
level, matching the Nim printer's `repeat("  ", indent)` convention.

Every registered Nim MIR op kind now has a Python emitter case. For
ops where Nim's printer silently returns empty (no `of` branch —
CreateFlatView, ProbeJoin, GatherColumn), the Python formats are new
conventions; if byte-diff on those becomes necessary, add matching
cases to src/srdatalog/mir/printer.nim first.
'''

from __future__ import annotations

from collections.abc import Sequence

import srdatalog.ir.mir.types as m
from srdatalog.ir.hir.types import Version

# -----------------------------------------------------------------------------
# Small helpers (mirror Nim printVarTuple / printIndex / printVer)
# -----------------------------------------------------------------------------


def _var_tuple(vars: list[str]) -> str:
  '''"(x y z)" — Nim uses space-separated, no commas.'''
  return "(" + " ".join(vars) + ")"


def _index(rel_name: str, cols: list[int]) -> str:
  '''"(Rel 0 1 ...)"'''
  return "(" + rel_name + " " + " ".join(str(c) for c in cols) + ")"


def _ver(v: Version) -> str:
  return v.value


# -----------------------------------------------------------------------------
# index-spec helpers (used by ExecutePipeline's #:sources / #:dests)
# -----------------------------------------------------------------------------


def _index_spec(node: m.MirNode) -> str:
  '''Mirror printIndexSpec in printer.nim.

  Joins flatten: a ColumnJoin / CartesianJoin contributes specs from ALL
  its sources, space-separated. Everything else produces one spec.
  '''
  if isinstance(node, m.ColumnJoin):
    return " ".join(_flatten_specs(s) for s in node.sources)
  if isinstance(node, m.CartesianJoin):
    return " ".join(_flatten_specs(s) for s in node.sources)

  if isinstance(node, m.ColumnSource) or isinstance(node, m.Negation):
    rel, ver, idx = node.rel_name, node.version, node.index
  elif isinstance(node, m.InsertInto):
    # Dest always uses FULL index for dedup logic (matches Nim).
    rel, ver, idx = node.rel_name, Version.FULL, node.index
  elif isinstance(node, m.Scan):
    rel, ver, idx = node.rel_name, node.version, node.index
  else:
    return "void"

  return (
    "(index-spec #:schema "
    + rel
    + " #:index ("
    + " ".join(str(c) for c in idx)
    + ")"
    + " #:ver "
    + _ver(ver)
    + ")"
  )


def _flatten_specs(node: m.MirNode) -> str:
  '''Flatten joins into concatenated leaf index-specs.'''
  if isinstance(node, m.ColumnJoin):
    return " ".join(_flatten_specs(s) for s in node.sources)
  if isinstance(node, m.CartesianJoin):
    return " ".join(_flatten_specs(s) for s in node.sources)
  return _index_spec(node)


def _index_specs_tuple(nodes: Sequence[m.MirNode], indent: int = 0) -> str:
  '''Mirror printIndexSpecsTuple.'''
  prefix = "  " * indent
  if not nodes:
    return "(tuple)"
  parts: list[str] = []
  for n in nodes:
    if isinstance(n, (m.ColumnJoin, m.CartesianJoin)):
      for s in n.sources:
        parts.append(_index_spec(s))
    else:
      parts.append(_index_spec(n))
  return "(tuple\n" + prefix + ("\n" + prefix).join(parts) + ")"


# -----------------------------------------------------------------------------
# Main dispatcher
# -----------------------------------------------------------------------------