Source code for srdatalog.ir.pipeline

'''Pure compile pipeline — Program → compiled artifacts, no disk I/O.

`build_project` (build.py) and `srdatalog.viz` both want the same
intermediate artifacts: HIR, MIR, per-rule runner code, schema defs,
DB alias. The difference is what they do NEXT — `build_project`
writes a .cpp tree to disk; viz renders in a webview.

Splitting the pipeline at the compile/write boundary lets both
callers share the work. This module is that shared core.
'''

from __future__ import annotations

from dataclasses import dataclass, field
from typing import TYPE_CHECKING

from srdatalog.ir.codegen.cuda.batchfile import _collect_pipelines
from srdatalog.ir.codegen.cuda.complete_runner import gen_complete_runner
from srdatalog.ir.codegen.cuda.main_file import (
  gen_db_type_alias_for_batch,
  gen_schema_definitions_for_batch,
)
from srdatalog.ir.codegen.cuda.orchestrator import gen_step_body
from srdatalog.ir.hir import compile_to_hir, compile_to_mir

if TYPE_CHECKING:
  from srdatalog.dsl import Program
  from srdatalog.ir.hir.types import HirProgram
  from srdatalog.ir.mir.types import Program as MirProgram


# Per-relation index type → header file. Registered at emit time
# because it mirrors Nim's "plugin" registry (an emit-time concern,
# not a runtime one).
_INDEX_HEADER = {
  "SRDatalog::GPU::Device2LevelIndex": "gpu/device_2level_index.h",
  "SRDatalog::GPU::DeviceTvjoinIndex": "gpu/device_tvjoin_index.h",
}


[docs] @dataclass(frozen=True) class CompileResult: '''Everything a downstream consumer (build_project, viz) needs from the compile pipeline, in memory.''' hir: HirProgram mir: MirProgram # The <project>_DB and <project>_DB_DeviceDB C++ type names the # codegen uses. Stored here so file-emitting callers don't have to # rebuild the string. ext_db: str device_db: str # Per-orchestrator-step body (one per MIR step, in step order). step_bodies: list[str] # Per-rule complete runner: (rule_name, full_cpp_code). This is the # struct + kernels + phase methods + execute() for one rule. Goes # into a jit_batch_N.cpp when writing to disk. per_rule_runners: list[tuple[str, str]] # Per-rule runner forward declarations, keyed by rule name. main.cpp # references these; jit_batch files provide the definitions. runner_decls: dict[str, str] # Schema definitions string — inlined into every batch file so the # batch compiles in isolation. schema_defs: str # `using <project>_DB_Blueprint = Database<...>;` — inlined into # every batch file. db_alias: str # Per-relation canonical index, merged across strata (later strata # override earlier). Used by the print_size emit block so the # readback query hits the actual planned index. canonical_indices: dict[str, list[int]] = field(default_factory=dict) # Extra #include headers needed by non-default index plugins # (Device2LevelIndex, etc). extra_headers: list[str] = field(default_factory=list) # Per-relation index type string (C++ template name), for relations # that override the default. Empty-string entries are dropped. rel_index_types: dict[str, str] = field(default_factory=dict)
[docs] def compile_program(program: Program, project_name: str) -> CompileResult: '''Run the full compile pipeline — HIR → MIR → all emitted strings. Stops before any file I/O. The resulting `CompileResult` is the point both `build_project` (writes it to disk) and the viz module (renders it in a webview) branch from. ''' hir = compile_to_hir(program) mir = compile_to_mir(program, hir=hir) ext_db = f"{project_name}_DB" device_db = f"{ext_db}_DeviceDB" rel_index_types = {d.rel_name: d.index_type for d in hir.relation_decls if d.index_type} seen_idx_types = set(rel_index_types.values()) extra_headers = [_INDEX_HEADER[t] for t in seen_idx_types if t in _INDEX_HEADER] step_bodies = [ gen_step_body(step, device_db, is_rec, i) for i, (step, is_rec) in enumerate(mir.steps) ] per_rule_runners: list[tuple[str, str]] = [] runner_decls: dict[str, str] = {} for ep in _collect_pipelines(mir): decl, full = gen_complete_runner(ep, device_db, rel_index_types=rel_index_types) per_rule_runners.append((ep.rule_name, full)) runner_decls[ep.rule_name] = decl schema_defs = gen_schema_definitions_for_batch(hir.relation_decls) db_alias = gen_db_type_alias_for_batch(ext_db, hir.relation_decls) # Merge canonical indices across strata; later strata override earlier # entries. Mirrors Nim's compileToMirWithDecls canonicalIndices merge # (hir.nim:310-314). canonical_indices: dict[str, list[int]] = {} for s in hir.strata: for rel, cols in s.canonical_index.items(): canonical_indices[rel] = list(cols) return CompileResult( hir=hir, mir=mir, ext_db=ext_db, device_db=device_db, step_bodies=step_bodies, per_rule_runners=per_rule_runners, runner_decls=runner_decls, schema_defs=schema_defs, db_alias=db_alias, canonical_indices=canonical_indices, extra_headers=extra_headers, rel_index_types=rel_index_types, )