Source code for srdatalog.runtime

'''Bundled C++ runtime headers + vendored deps.

The Python codegen emits `jit_batch_<rule>.cpp` files that `#include`
`srdatalog.h` (the bundled SRDatalog runtime). The runtime in turn
includes boost / highway / RMM / spdlog (also bundled, under
`vendor/`) plus CUDA SDK headers (NOT bundled — must come from a
system NVIDIA install; auto-detected via `find_cuda_root()`).

`runtime_include_paths()` returns every -I path needed to compile
batch files, EXCEPT the CUDA toolkit. Combine with
`cuda_include_paths()` and `cuda_compile_flags()` for a full
CompilerConfig:

    from srdatalog import CompilerConfig
    from srdatalog.runtime import (
      runtime_include_paths, cuda_include_paths,
      cuda_compile_flags, runtime_defines, runtime_undefines,
    )

    cfg = CompilerConfig(
      cxx="acpp",
      include_paths=runtime_include_paths() + cuda_include_paths(),
      defines=runtime_defines(),
      cxx_flags=cuda_compile_flags(),
    )
'''

from __future__ import annotations

import glob
import os
from pathlib import Path

_HERE = Path(__file__).resolve().parent


# ---------------------------------------------------------------------------
# Bundled runtime + vendor paths
# ---------------------------------------------------------------------------



[docs]
def runtime_include_path() -> str:
  '''Absolute path to the bundled `generalized_datalog/` headers.
  Kept for back-compat — prefer `runtime_include_paths()` (plural).'''
  return str(_HERE / "generalized_datalog")




[docs]
def runtime_include_paths() -> list[str]:
  '''All bundled C++ -I paths (runtime + vendored deps). CUDA paths
  are NOT included here — see `cuda_include_paths()`.

  Order matters: more-specific subdirs first so `#include "../mir.h"`
  resolves correctly via path arithmetic (e.g. `build/../mir.h` =
  `mir.h`).
  '''
  rt = _HERE / "generalized_datalog"
  vendor = _HERE / "vendor"
  return [
    str(rt),
    str(rt / "build"),
    str(rt / "gpu" / "runtime"),
    str(rt / "gpu" / "runtime" / "instructions"),
    str(rt / "gpu" / "runtime" / "executor_impl"),
    str(vendor / "boost" / "include"),
    str(vendor / "highway" / "include"),
    str(vendor / "rmm" / "include"),
    str(vendor / "spdlog" / "include"),
  ]




[docs]
def runtime_defines() -> list[str]:
  '''Preprocessor `-D` flags the runtime expects. Append to
  `CompilerConfig.defines`.

  `ENABLE_LOGGING` is intentionally NOT here — it pulls in boost::log
  and its phoenix/proto/spirit transitive deps (~50 MB of headers we
  don't ship in the wheel). Opt in by appending `"ENABLE_LOGGING"` to
  your own defines AND supplying system boost::log headers via an
  additional `-I` path, e.g.::

      cfg.defines.append("ENABLE_LOGGING")
      cfg.include_paths.append("/usr/include")  # system boost
  '''
  return [
    "LIBCUDACXX_ENABLE_EXPERIMENTAL_MEMORY_RESOURCE",
    "USE_CUDA",
    "BOOST_ATOMIC_NO_CMPXCHG16B",
    "SPDLOG_USE_STD_FORMAT",
    "NDEBUG",
    "_GLIBCXX_USE_CXX11_ABI=1",
  ]




[docs]
def runtime_undefines() -> list[str]:
  '''Preprocessor `-U` undefines (paired with `defines`).'''
  return ["__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16"]




[docs]
def has_vendored_deps() -> bool:
  '''True if boost/highway/rmm/spdlog headers are present under
  `runtime/vendor/`. False on a fresh clone — call
  `srdatalog populate-vendor` (or use the install hook) to fetch
  them.'''
  vendor = _HERE / "vendor"
  return all((vendor / dep / "include").is_dir() for dep in ("boost", "highway", "rmm", "spdlog"))



# ---------------------------------------------------------------------------
# CUDA toolkit auto-detection
# ---------------------------------------------------------------------------



[docs]
def find_cuda_root() -> str | None:
  '''Locate a usable CUDA toolkit. Tries (in order):
    - `$CUDA_HOME` / `$CUDA_PATH`
    - NVIDIA HPC SDK: `/opt/nvidia/hpc_sdk/.../cuda/<version>`
    - Standard install: `/usr/local/cuda`, `/opt/cuda`

  Among multiple HPC SDK CUDA versions, prefer one where
  `thrust/optional.h` exists — that header was removed in CUDA 13.0+
  but RMM (bundled in vendor/) still needs it. So 12.9 is preferred
  over 13.0+ on machines that have both.
  '''
  for var in ("CUDA_HOME", "CUDA_PATH"):
    if os.environ.get(var) and os.path.isdir(os.environ[var]):
      return os.environ[var]

  hpc_sdk = sorted(glob.glob("/opt/nvidia/hpc_sdk/Linux_x86_64/*/cuda/*"))
  standard = ["/usr/local/cuda", "/opt/cuda"]

  def _score(path: str) -> tuple[int, str]:
    if not os.path.isfile(os.path.join(path, "include", "cuda_runtime.h")):
      return (-1, path)
    has_thrust_opt = os.path.isfile(
      os.path.join(path, "targets", "x86_64-linux", "include", "thrust", "optional.h")
    ) or os.path.isfile(os.path.join(path, "include", "thrust", "optional.h"))
    return (1 if has_thrust_opt else 0, path)

  candidates = [(_score(p), p) for p in hpc_sdk + standard]
  candidates = [(s, p) for s, p in candidates if s[0] >= 0]
  if not candidates:
    return None
  candidates.sort(reverse=True)
  return candidates[0][1]




[docs]
def cuda_include_paths(cuda_root: str | None = None) -> list[str]:
  '''All -I paths a CUDA-enabled compile needs. Auto-detects via
  `find_cuda_root()` when `cuda_root` is None; raises RuntimeError
  if no toolkit is found.'''
  root = cuda_root or find_cuda_root()
  if root is None:
    raise RuntimeError(
      "No CUDA toolkit found. Install one (NVIDIA HPC SDK or CUDA Toolkit) "
      "and set $CUDA_HOME, or pass `cuda_root=` explicitly."
    )
  paths = [
    os.path.join(root, "include"),
    os.path.join(root, "targets", "x86_64-linux", "include"),
    os.path.join(root, "targets", "x86_64-linux", "include", "crt"),
    # CUDA 13.0+ relocates libcudacxx (cuda/std/*, cuda/stream_ref, etc.)
    # under a `cccl/` subdir; older versions had it directly.
    os.path.join(root, "targets", "x86_64-linux", "include", "cccl"),
  ]
  # math_libs (curand, cublas) lives one level above `cuda/<ver>` in
  # NVIDIA HPC SDK layouts.
  parent = os.path.dirname(root)
  ver = os.path.basename(root)
  math_libs = os.path.join(parent, "..", "math_libs", ver, "targets", "x86_64-linux", "include")
  if os.path.isdir(math_libs):
    paths.append(math_libs)
  return [p for p in paths if os.path.isdir(p)]




[docs]
def cuda_compile_flags(
  cuda_root: str | None = None,
  gpu_arch: str = "sm_89",
) -> list[str]:
  '''CUDA-specific clang flags (`-x cuda`, `--cuda-gpu-arch=`,
  `--cuda-path=`). Combine with `runtime_include_paths()` +
  `cuda_include_paths()` for a full CompilerConfig.'''
  root = cuda_root or find_cuda_root()
  if root is None:
    raise RuntimeError("CUDA toolkit not found")
  return [
    "-Qunused-arguments",
    "-x",
    "cuda",
    f"--cuda-gpu-arch={gpu_arch}",
    f"--cuda-path={root}",
    # Match Nim's docker/config.nims passC set so our compiled `.so`
    # matches the Nim executable's codegen. `-O3` changes GPU codegen
    # (loop unrolling, register allocation); `-m64` locks ABI bit-width;
    # the atomic flags keep boost::atomic from switching cmpxchg16b
    # variants under our feet.
    "-O3",
    "-g",
    "-m64",
    "-DSRDATALOG_GPU_AVAILABLE=1",
    "-DENABLE_LOGGING",
    "-DBOOST_ATOMIC_NO_CMPXCHG16B",
    "-U__GCC_HAVE_SYNC_COMPARE_AND_SWAP_16",
    "-Wno-unknown-cuda-version",
  ]



def _vendor_lib_dir() -> str | None:
  '''Locate the vendor boost-static libs.

  The emitted `.so` includes vendor boost HEADERS (version 1.90 by
  `BOOST_VERSION` in `vendor/include/boost/version.hpp`). Linking the
  system's `libboost_container.so.1.74` would be an ABI mismatch (16
  minor versions apart) and surfaces as downstream corruption — in
  practice, `cuKernelGetFunction: INVALID_HANDLE` on kernel launches.

  Search order:
    1. `SRDATALOG_VENDOR_LIB` env var — explicit override.
    2. The in-tree sibling at `<project>/src/srdatalog/runtime/vendor/lib`.
    3. The upstream Nim repo at `/home/stargazermiao/workspace/SRDatalog/src/srdatalog/vendor/lib`
       — fallback for dev machines where the wheel's populate_vendor hook
       hasn't run.
  '''
  env = os.environ.get("SRDATALOG_VENDOR_LIB")
  if env and os.path.isdir(env):
    return env
  here = os.path.dirname(os.path.abspath(__file__))
  local = os.path.join(here, "vendor", "lib")
  if os.path.isdir(local):
    return local
  nim = "/home/stargazermiao/workspace/SRDatalog/src/srdatalog/vendor/lib"
  if os.path.isdir(nim):
    return nim
  return None



[docs]
def cuda_link_flags(cuda_root: str | None = None) -> list[str]:
  '''`-L` paths the link step needs. Pair with `cuda_libs()`.'''
  root = cuda_root or find_cuda_root()
  if root is None:
    raise RuntimeError("CUDA toolkit not found")
  candidates = [
    os.path.join(root, "lib64"),
    os.path.join(root, "targets", "x86_64-linux", "lib"),
    os.path.join(root, "lib"),
  ]
  flags = [f"-L{p}" for p in candidates if os.path.isdir(p)] + [
    # rpath embedding — .so stays loadable even when LD_LIBRARY_PATH
    # doesn't include the CUDA lib dir (common on NVIDIA HPC SDK
    # installs that rely on modulefile-set envs).
    f"-Wl,-rpath,{p}"
    for p in candidates
    if os.path.isdir(p)
  ]
  vendor = _vendor_lib_dir()
  if vendor is not None:
    flags.insert(0, f"-L{vendor}")
  return flags




[docs]
def cuda_libs() -> list[str]:
  '''`-l<libname>` entries needed to satisfy the runtime's CUDA symbol
  references (cudaMemcpyAsync, cuLaunchKernel, ...) and the vendored
  boost libs whose ABI must match the vendored boost headers.

  Boost is listed BEFORE cudart so that the static `.a` in the vendor
  dir wins over any system `libboost_*.so` on the `-L` search path.
  '''
  return [
    "boost_container",
    "boost_log",
    "boost_log_setup",
    "boost_thread",
    "boost_filesystem",
    "boost_atomic",
    "cudart",
    "cuda",
  ]