Source code for srdatalog.build

'''High-level project builder.

`build_project(program, project_name, ...)` runs the full pipeline in
one call:

  Program → HIR → MIR
         → emit per-rule complete runner structs
         → compute schema definitions + DB blueprint alias
         → write the .cpp tree to ~/.cache/srdatalog/jit/<project>_<hash>/

Returns the same dict as `cache.write_jit_project`.

The resulting `jit_batch_N.cpp` files are byte-identical to what Nim's
codegen writes to its own JIT cache — they self-contain the schema +
DB type alias, so the same compile flags / external deps that work
for Nim's output work here too.

This module is a thin wrapper over `srdatalog.ir.pipeline.compile_program`
plus the file-emitting layer (`cache.write_jit_project` + optional
shard/main file emission). The compile phase lives in `pipeline.py`
so viz / other consumers can share it without touching disk.
'''

from __future__ import annotations

from typing import TYPE_CHECKING

from srdatalog.ir.codegen.cuda.build.cache import JitProjectLayout, write_jit_project
from srdatalog.ir.codegen.cuda.main_file import (
  gen_extern_c_shim,
  gen_main_file_content,
  gen_run_dispatcher_file,
  gen_step_shard_file,
  gen_unity_main_file_content,
)
from srdatalog.ir.pipeline import compile_program

if TYPE_CHECKING:
  from srdatalog.dsl import Program



[docs]
def build_project(
  program: Program,
  project_name: str,
  *,
  cache_base: str | None = None,
  emit_main_file: bool = True,
  shard_step_bodies: bool = False,
  unity: bool = False,
) -> JitProjectLayout:
  '''Compile `program` end-to-end and write the .cpp tree.

  Args:
    program: a `srdatalog.Program` (DSL output).
    project_name: human-readable name used in C++ identifiers + cache
      dir. The C++ side derives `<project>_DB`, `<project>_DB_Blueprint`,
      `<project>_DB_DeviceDB`, and the cache lives at
      `~/.cache/srdatalog/jit/<project>_DB_<hash>/`.
    cache_base: override `~/.cache/srdatalog` (e.g., a tmpdir for
      tests, or `./build` for an in-tree project layout).
    emit_main_file: if True, also write a `main.cpp` containing the
      `_Runner` struct + relation typedefs. Set False if you only want
      the batch files (e.g., when integrating with an existing host
      binary that defines its own `<Project>_Runner`).
    shard_step_bodies: if True (default), emit each `step_N` and the
      `run()` dispatcher as its own compilable .cpp shard. This moves
      the heavy template instantiation out of main.cpp so the shards
      compile in parallel with the batch files. Set False to get the
      old layout (step bodies inline in main.cpp as template methods)
      — useful for byte-match tests against the Nim fragment fixture.
    unity: if True (default), emit ONE big .cpp containing the
      preamble + all JitRunner structs + _Runner + extern "C" shim.
      Parses `srdatalog.h` once per build instead of N times — the
      dominant cost when PCH isn't available. On doop this cuts cold
      compile from ~100s to ~20s. Set False for the traditional
      main + batch layout (better for byte-match testing against the
      Nim reference or for partial recompiles once PCH works).

  Returns the dict from `cache.write_jit_project`:
    { "dir", "main", "batches": [...], "schema_header", "kernel_header" }
  '''
  cr = compile_program(program, project_name)

  main_cpp = ""
  if emit_main_file:
    if unity:
      main_cpp = gen_unity_main_file_content(
        project_name,
        cr.hir.relation_decls,
        cr.mir,
        cr.step_bodies,
        cr.runner_decls,
        cr.per_rule_runners,
        extra_index_headers=cr.extra_headers,
        canonical_indices=cr.canonical_indices,
      )
    else:
      main_cpp = gen_main_file_content(
        project_name,
        cr.hir.relation_decls,
        cr.mir,
        cr.step_bodies,
        cr.runner_decls,
        cache_dir_hint="<cache>",
        jit_batch_count=1,
        emit_preamble=True,  # standalone TU — add #include "srdatalog.h" + namespaces
        extra_index_headers=cr.extra_headers,
        decl_only_runner=shard_step_bodies,
        canonical_indices=cr.canonical_indices,
      )
    main_cpp += "\n" + gen_extern_c_shim(project_name, cr.hir.relation_decls)

  # In unity mode we want no jit_batch_*.cpp files — they'd be
  # redundant (every JitRunner is already inlined in main.cpp).
  result = write_jit_project(
    cr.ext_db,
    main_file_content=main_cpp,
    per_rule_runners=[] if unity else cr.per_rule_runners,
    schema_definitions=cr.schema_defs,
    db_type_alias=cr.db_alias,
    extra_headers=cr.extra_headers,
    cache_base=cache_base,
  )

  # Sharded mode: emit one .cpp per step_N + one for run(), writing them
  # into the cache dir and appending their paths to the batch list so
  # compile_jit_project picks them up on its parallel compile queue.
  if shard_step_bodies and emit_main_file:
    import os

    for i in range(len(cr.mir.steps)):
      shard = gen_step_shard_file(
        project_name,
        cr.hir.relation_decls,
        cr.runner_decls,
        cr.mir,
        cr.step_bodies,
        i,
        extra_index_headers=cr.extra_headers,
      )
      path = os.path.join(str(result["dir"]), f"step_body_{i}.cpp")
      with open(path, "w") as f:
        f.write(shard)
      result["batches"].append(path)

    run_cpp = gen_run_dispatcher_file(
      project_name,
      cr.hir.relation_decls,
      cr.runner_decls,
      cr.mir,
      extra_index_headers=cr.extra_headers,
    )
    path = os.path.join(str(result["dir"]), "runner_dispatcher.cpp")
    with open(path, "w") as f:
      f.write(run_cpp)
    result["batches"].append(path)

  return result