Source code for srdatalog.ir.hir.types

'''Python mirror of src/srdatalog/hir/hir_types.nim.

These are the high-level IR types that HIR passes (stratification, planning,
index selection, lowering) read and write. The Python pipeline must produce
HIR that serializes byte-identically to the Nim golden for differential testing.

Keep field names in sync with the Nim side; the canonical emitter (hir_emit.py, TBD)
will translate between snake_case Python and the camelCase keys used in
json_printer.nim's output.
'''

from __future__ import annotations

from dataclasses import dataclass, field
from enum import Enum

from srdatalog.dsl import Rule


[docs] class Version(Enum): '''Mirrors Nim hir_types.Version. `.value` stays the plain string (`"FULL"`, `"DELTA"`, `"NEW"`) so the MIR S-expr emitter's byte-match against Nim continues to work. For C++ codegen the extra properties below surface the forms the backend needs: - `.method` — method-suffix used in C++ helpers (`full`, `delta`, `newt`) - `.number` — numeric index Nim's template backend uses (`"0"`, `"1"`, `"2"`) - `.code` — `*_VER` macro the non-template codegen emits into types ''' FULL = "FULL" DELTA = "DELTA" NEW = "NEW" @property def method(self) -> str: return _CPP_METHOD[self] @property def number(self) -> str: return _CPP_NUMBER[self] @property def code(self) -> str: return _CPP_CODE[self]
_CPP_METHOD = { Version.FULL: "full", Version.DELTA: "delta", Version.NEW: "newt", } _CPP_NUMBER = {Version.FULL: "0", Version.DELTA: "1", Version.NEW: "2"} _CPP_CODE = { Version.FULL: "FULL_VER", Version.DELTA: "DELTA_VER", Version.NEW: "NEW_VER", }
[docs] @dataclass class AccessPattern: '''Access pattern for a single body clause. Mirrors hir_types.nim AccessPattern. All fields default-initialized for ease of construction inside passes; the planner fills them in. ''' rel_name: str = "" version: Version = Version.FULL access_order: list[str] = field(default_factory=list) # Variables in access order index_cols: list[int] = field(default_factory=list) # Column indices in access order prefix_len: int = 0 # # bound vars at start of access_order clause_idx: int = -1 # Original body clause index const_args: list[tuple[int, int]] = field(default_factory=list) # (col, const) pairs
[docs] @dataclass class HirRuleVariant: '''One delta variant of a rule (mirrors HirRuleVariant). Non-recursive rules have delta_idx == -1 and a single base variant. Recursive rules fan out into N variants (one delta per recursive body clause). ''' original_rule: Rule delta_idx: int = -1 clause_versions: list[Version] = field(default_factory=list) clause_order: list[int] = field(default_factory=list) var_order: list[str] = field(default_factory=list) join_vars: set[str] = field(default_factory=set) split_at: int = -1 temp_vars: list[str] = field(default_factory=list) temp_rel_name: str = "" access_patterns: list[AccessPattern] = field(default_factory=list) negation_patterns: list[AccessPattern] = field(default_factory=list) balanced_root: list[str] = field(default_factory=list) balanced_sources: list[str] = field(default_factory=list) # Codegen hints (pragma-driven) fanout: bool = False work_stealing: bool = False block_group: bool = False dedup_hash: bool = False count: bool = False
[docs] @dataclass class HirStratum: '''An SCC + its rule variants. Mirrors HirStratum.''' scc_members: set[str] = field(default_factory=set) is_recursive: bool = False is_generated: bool = False stratum_rules: list[Rule] = field(default_factory=list) base_variants: list[HirRuleVariant] = field(default_factory=list) recursive_variants: list[HirRuleVariant] = field(default_factory=list) # Filled by Pass 4 (index selection): required_indices: dict[str, list[list[int]]] = field(default_factory=dict) canonical_index: dict[str, list[int]] = field(default_factory=dict) # Hooks (C++ injection points): before_hook: str = "" after_hook: str = ""
[docs] @dataclass class RelationDecl: '''Relation declaration; subset of syntax.nim RelationDecl. NimNode-typed fields in Nim (relname, types, semiring) are reduced to strings here. Python emitter must reproduce the same repr() the Nim emitter writes. ''' rel_name: str types: list[str] # e.g. ["int", "int"]; matches NimNode.repr() output semiring: str = "BooleanSR" input_file: str = "" print_size: bool = False count_only: bool = False output_file: str = "" index_type: str = "" is_generated: bool = False is_temp: bool = False
[docs] @dataclass class HirProgram: '''The complete HIR program. Mirrors HirProgram.''' strata: list[HirStratum] = field(default_factory=list) relation_decls: list[RelationDecl] = field(default_factory=list) global_index_map: dict[str, list[list[int]]] = field(default_factory=dict)