Source code for srdatalog.ir.mir.print

'''S-expression emitter for Python MIR, matching the output of
src/srdatalog/mir/printer.nim byte-for-byte.

The canonical text form is what Racket will ingest as the final metalang,
so S-expr is the target rather than JSON. Indentation uses two spaces per
level, matching the Nim printer's `repeat("  ", indent)` convention.

Every registered Nim MIR op kind now has a Python emitter case. For
ops where Nim's printer silently returns empty (no `of` branch —
CreateFlatView, ProbeJoin, GatherColumn), the Python formats are new
conventions; if byte-diff on those becomes necessary, add matching
cases to src/srdatalog/mir/printer.nim first.
'''

from __future__ import annotations

from collections.abc import Sequence

import srdatalog.ir.mir.types as m
from srdatalog.ir.hir.types import Version

# -----------------------------------------------------------------------------
# Small helpers (mirror Nim printVarTuple / printIndex / printVer)
# -----------------------------------------------------------------------------


def _var_tuple(vars: list[str]) -> str:
  '''"(x y z)" — Nim uses space-separated, no commas.'''
  return "(" + " ".join(vars) + ")"


def _index(rel_name: str, cols: list[int]) -> str:
  '''"(Rel 0 1 ...)"'''
  return "(" + rel_name + " " + " ".join(str(c) for c in cols) + ")"


def _ver(v: Version) -> str:
  return v.value


# -----------------------------------------------------------------------------
# index-spec helpers (used by ExecutePipeline's #:sources / #:dests)
# -----------------------------------------------------------------------------


def _index_spec(node: m.MirNode) -> str:
  '''Mirror printIndexSpec in printer.nim.

  Joins flatten: a ColumnJoin / CartesianJoin contributes specs from ALL
  its sources, space-separated. Everything else produces one spec.
  '''
  if isinstance(node, m.ColumnJoin):
    return " ".join(_flatten_specs(s) for s in node.sources)
  if isinstance(node, m.CartesianJoin):
    return " ".join(_flatten_specs(s) for s in node.sources)

  if isinstance(node, m.ColumnSource) or isinstance(node, m.Negation):
    rel, ver, idx = node.rel_name, node.version, node.index
  elif isinstance(node, m.InsertInto):
    # Dest always uses FULL index for dedup logic (matches Nim).
    rel, ver, idx = node.rel_name, Version.FULL, node.index
  elif isinstance(node, m.Scan):
    rel, ver, idx = node.rel_name, node.version, node.index
  else:
    return "void"

  return (
    "(index-spec #:schema "
    + rel
    + " #:index ("
    + " ".join(str(c) for c in idx)
    + ")"
    + " #:ver "
    + _ver(ver)
    + ")"
  )


def _flatten_specs(node: m.MirNode) -> str:
  '''Flatten joins into concatenated leaf index-specs.'''
  if isinstance(node, m.ColumnJoin):
    return " ".join(_flatten_specs(s) for s in node.sources)
  if isinstance(node, m.CartesianJoin):
    return " ".join(_flatten_specs(s) for s in node.sources)
  return _index_spec(node)


def _index_specs_tuple(nodes: Sequence[m.MirNode], indent: int = 0) -> str:
  '''Mirror printIndexSpecsTuple.'''
  prefix = "  " * indent
  if not nodes:
    return "(tuple)"
  parts: list[str] = []
  for n in nodes:
    if isinstance(n, (m.ColumnJoin, m.CartesianJoin)):
      for s in n.sources:
        parts.append(_index_spec(s))
    else:
      parts.append(_index_spec(n))
  return "(tuple\n" + prefix + ("\n" + prefix).join(parts) + ")"


# -----------------------------------------------------------------------------
# Main dispatcher
# -----------------------------------------------------------------------------



[docs]
def print_mir_sexpr(node: m.MirNode, indent: int = 0) -> str:
  p = "  " * indent

  # --- Leaves ---

  if isinstance(node, m.ColumnSource):
    return (
      p
      + "(column-source"
      + " #:index "
      + _index(node.rel_name, node.index)
      + " #:ver "
      + _ver(node.version)
      + " #:prefix "
      + _var_tuple(node.prefix_vars)
      + ")"
    )

  if isinstance(node, m.Scan):
    return (
      p
      + "(scan"
      + " #:vars "
      + _var_tuple(node.vars)
      + " #:index "
      + _index(node.rel_name, node.index)
      + " #:ver "
      + _ver(node.version)
      + " #:prefix "
      + _var_tuple(node.prefix_vars)
      + ")"
    )

  if isinstance(node, m.ColumnJoin):
    body = p + "(column-join #:var " + node.var_name
    body += "\n" + p + "  #:sources (\n"
    for src in node.sources:
      body += print_mir_sexpr(src, indent + 2) + "\n"
    body += p + "  ))"
    return body

  if isinstance(node, m.CartesianJoin):
    body = p + "(cartesian-join #:vars " + _var_tuple(node.vars)
    if node.var_from_source:
      body += " #:var-from-source ("
      body += " ".join(_var_tuple(vs) for vs in node.var_from_source)
      body += ")"
    body += "\n" + p + "  #:sources (\n"
    for src in node.sources:
      body += print_mir_sexpr(src, indent + 2) + "\n"
    body += p + "  ))"
    return body

  if isinstance(node, m.Filter):
    return p + "(filter" + " #:vars " + _var_tuple(node.vars) + " #:code \"" + node.code + "\")"

  if isinstance(node, m.ConstantBind):
    return (
      p
      + "(constant-bind"
      + " #:var "
      + node.var_name
      + " #:code \""
      + node.code
      + "\""
      + " #:deps "
      + _var_tuple(node.deps)
      + ")"
    )

  if isinstance(node, m.Aggregate):
    return (
      p
      + "(aggregate"
      + " #:var "
      + node.result_var
      + " #:func "
      + node.agg_func
      + " #:index "
      + _index(node.rel_name, node.index)
      + " #:ver "
      + _ver(node.version)
      + " #:prefix "
      + _var_tuple(node.prefix_vars)
      + ")"
    )

  if isinstance(node, m.CreateFlatView):
    # (create-flat-view #:index (Rel cols) #:ver V) — aligned with the
    # rebuild-index style and matches the Nim printer case added
    # alongside the Python split-rule lowering.
    return (
      p
      + "(create-flat-view"
      + " #:index "
      + _index(node.rel_name, node.index)
      + " #:ver "
      + _ver(node.version)
      + ")"
    )

  if isinstance(node, m.InnerPipeline):
    body = p + "(inner-pipeline"
    if node.rule_name:
      body += " #:rule " + node.rule_name
    body += " #:bound-vars " + _var_tuple(node.bound_vars)
    body += "\n" + p + "  #:handles (\n"
    for h in node.input_handles:
      body += print_mir_sexpr(h, indent + 2) + "\n"
    body += p + "  )\n"
    body += p + "  #:ops (\n"
    for op in node.inner_ops:
      body += print_mir_sexpr(op, indent + 2) + "\n"
    body += p + "  ))"
    return body

  if isinstance(node, m.ProbeJoin):
    # Nim printer has no case for moProbeJoin — Python convention.
    res = p + "(probe-join"
    if node.input_buffer:
      res += " #:input-buffer " + node.input_buffer
    res += " #:output-buffer " + node.output_buffer
    res += " #:probe " + _index(node.probe_rel, node.probe_index)
    res += " #:ver " + _ver(node.probe_version)
    res += " #:key " + node.join_key
    res += ")"
    return res

  if isinstance(node, m.GatherColumn):
    # Nim printer has no case for moGatherColumn — Python convention.
    res = p + "(gather-column"
    if node.input_buffer:
      res += " #:input-buffer " + node.input_buffer
    res += " #:schema " + node.rel_name
    res += " #:ver " + _ver(node.rel_version)
    res += " #:col " + str(node.column)
    res += " #:out " + node.output_var
    res += ")"
    return res

  if isinstance(node, m.Negation):
    return (
      p
      + "(negation"
      + " #:schema "
      + node.rel_name
      + " #:ver "
      + _ver(node.version)
      + " #:index "
      + _index(node.rel_name, node.index)
      + " #:prefix "
      + _var_tuple(node.prefix_vars)
      + ")"
    )

  if isinstance(node, m.InsertInto):
    return (
      p
      + "(insert-into"
      + " #:schema "
      + node.rel_name
      + " #:ver "
      + _ver(node.version)
      + " #:dedup-index ("
      + " ".join(str(c) for c in node.index)
      + ")"
      + " #:terms "
      + _var_tuple(node.vars)
      + ")"
    )

  # --- Fixpoint maintenance ---

  if isinstance(node, m.RebuildIndex):
    return (
      p
      + "(rebuild-index"
      + " #:index "
      + _index(node.rel_name, node.index)
      + " #:ver "
      + _ver(node.version)
      + ")"
    )

  if isinstance(node, m.ClearRelation):
    return (
      p + "(clear-relation" + " #:schema " + node.rel_name + " #:ver " + _ver(node.version) + ")"
    )

  if isinstance(node, m.CheckSize):
    return p + "(check-size" + " #:schema " + node.rel_name + " #:ver " + _ver(node.version) + ")"

  if isinstance(node, m.ComputeDelta):
    return p + "(compute-delta #:schema " + node.rel_name + ")"

  if isinstance(node, m.ComputeDeltaIndex):
    return (
      p
      + "(compute-delta-index"
      + " #:schema "
      + node.rel_name
      + " #:canonical-index ("
      + " ".join(str(c) for c in node.canonical_index)
      + ")"
      + ")"
    )

  if isinstance(node, m.MergeIndex):
    return p + "(merge-index #:index " + _index(node.rel_name, node.index) + ")"

  if isinstance(node, m.MergeRelation):
    return p + "(merge-relation #:schema " + node.rel_name + ")"

  if isinstance(node, m.RebuildIndexFromIndex):
    return (
      p
      + "(rebuild-index-from-index"
      + " #:source "
      + _index(node.rel_name, node.source_index)
      + " #:target "
      + _index(node.rel_name, node.target_index)
      + " #:ver "
      + _ver(node.version)
      + ")"
    )

  # --- Structural ---

  if isinstance(node, m.ExecutePipeline):
    body = p + "(execute-pipeline"
    if node.rule_name:
      body += " #:rule " + node.rule_name
    body += "\n"
    body += p + "  #:sources " + _index_specs_tuple(node.source_specs, indent + 2) + "\n"
    body += p + "  #:dests " + _index_specs_tuple(node.dest_specs, indent + 2) + "\n"
    for pn in node.pipeline:
      body += print_mir_sexpr(pn, indent + 2) + "\n"
    body += p + ")"
    return body

  if isinstance(node, m.FixpointPlan):
    body = p + "(fixpoint-plan\n"
    for instr in node.instructions:
      body += print_mir_sexpr(instr, indent + 2) + "\n"
    body += p + ")"
    return body

  if isinstance(node, m.Block):
    body = p + "(block\n"
    for instr in node.instructions:
      body += print_mir_sexpr(instr, indent + 2) + "\n"
    body += p + ")"
    return body

  if isinstance(node, m.BalancedScan):
    # (balanced-scan #:group-var v #:source1 (...) #:source2 (...)
    #                #:vars1 (...) #:vars2 (...))
    body = p + "(balanced-scan"
    body += " #:group-var " + node.group_var
    body += "\n" + p + "  #:source1 " + print_mir_sexpr(node.source1, 0)
    body += "\n" + p + "  #:source2 " + print_mir_sexpr(node.source2, 0)
    if node.vars1:
      body += " #:vars1 " + _var_tuple(node.vars1)
    if node.vars2:
      body += " #:vars2 " + _var_tuple(node.vars2)
    body += ")"
    return body

  if isinstance(node, m.PositionedExtract):
    body = p + "(positioned-extract"
    body += " #:var " + node.var_name
    body += " #:sources ("
    for i, src in enumerate(node.sources):
      if i > 0:
        body += " "
      body += print_mir_sexpr(src, 0)
    body += ")"
    body += " #:bind " + _var_tuple(node.bind_vars)
    body += ")"
    return body

  if isinstance(node, m.ParallelGroup):
    body = p + "(parallel-group  ;; " + str(len(node.ops)) + " independent ops\n"
    for op in node.ops:
      body += print_mir_sexpr(op, indent + 2) + "\n"
    body += p + ")"
    return body

  if isinstance(node, m.InjectCppHook):
    res = p + "(inject-cpp-hook"
    if node.rule_name:
      res += " #:rule " + node.rule_name
    # Nim always writes #:code "..." (literal ellipsis) rather than dumping
    # the raw body — keeps the S-expr readable.
    res += " #:code \"...\")"
    return res

  if isinstance(node, m.PostStratumReconstructInternCols):
    return (
      p
      + "(post-stratum-reconstruct-intern-cols"
      + " #:rel "
      + node.rel_name
      + " #:canonical-index ("
      + " ".join(str(c) for c in node.canonical_index)
      + ")"
      + ")"
    )

  if isinstance(node, m.Program):
    # Nim uses indent+4 for the nested plan (unlike Block/FixpointPlan which use +2),
    # and bool printing in Nim is lowercase "true"/"false" — both affect byte-diff.
    body = p + "(program\n"
    for plan, is_rec in node.steps:
      body += p + "  (step #:recursive " + ("true" if is_rec else "false") + "\n"
      body += print_mir_sexpr(plan, indent + 4) + "\n"
      body += p + "  )\n"
    body += p + ")"
    return body

  raise TypeError(f"Unsupported MIR node: {type(node).__name__}")