import ast
import re
from collections import deque
import polars as pl
from iguanas.metrics import compute_metrics
from iguanas.rule_evaluation import apply_rules
def _to_py(expr: str) -> str:
return re.sub(r"\s*&\s*", " and ", re.sub(r"\s*\|\s*", " or ", expr))
def _node_to_str(node: ast.AST) -> str:
if isinstance(node, ast.Compare):
return f"({ast.unparse(node)})"
if isinstance(node, ast.BoolOp):
op = " & " if isinstance(node.op, ast.And) else " | "
return op.join(_node_to_str(v) for v in node.values)
s = ast.unparse(node)
return re.sub(r"\sand\s", " & ", re.sub(r"\sor\s", " | ", s))
[docs]
def parse_conditions(expr: str) -> dict:
"""Parse a boolean expression string into a nested dict tree.
Parameters
----------
expr : str
Boolean rule expression using ``&`` (AND) and ``|`` (OR) operators,
e.g. ``'(X["a"] > 1) & (X["b"] < 5)'``.
Returns
-------
dict
Nested dict with keys ``"op"`` (``"&"`` or ``"|"``), ``"left"``,
and ``"right"``. Leaf nodes are plain strings.
"""
tree = ast.parse(_to_py(expr), mode="eval")
return _convert(tree.body)
def _convert(node):
if isinstance(node, ast.BoolOp):
op = "&" if isinstance(node.op, ast.And) else "|"
values = [_convert(v) for v in node.values]
result = values[0]
for v in values[1:]:
result = {"op": op, "left": result, "right": v}
return result
if isinstance(node, ast.Name):
return node.id
if isinstance(node, ast.Compare):
return ast.unparse(node)
return ast.unparse(node)
[docs]
def parse_levels(expr: str) -> list[dict]:
"""Parse a boolean expression level by level using BFS.
Assigns a hierarchical dot-notation index to each sub-expression so the
original expression can be rebuilt bottom-up.
Parameters
----------
expr : str
Boolean rule expression using ``&`` (AND) and ``|`` (OR) operators.
Returns
-------
list[dict]
BFS-ordered list of level entries. Each entry is a dict with a single
key (the operator ``"&"`` or ``"|"``), whose value is a list of
``(index, sub_expr)`` tuples. Indices use dot notation reflecting
position in the tree (e.g. ``"1.0"`` = first child of the item
indexed ``"1"`` in the parent level).
Examples
--------
>>> parse_levels('(A > 1) | ((B <= 5) & (C < 3)) | (D >= 0)')
[
{'|': [('0', '(A > 1)'), ('1', '(B <= 5) & (C < 3)'), ('2', '(D >= 0)')]},
{'&': [('1.0', '(B <= 5)'), ('1.1', '(C < 3)')]},
]
"""
tree = ast.parse(_to_py(expr), mode="eval")
levels = []
# queue items: (ast_node, parent_index_string)
queue = deque([(tree.body, "")])
while queue:
next_queue = deque()
level_entries = []
for node, parent_idx in queue:
if isinstance(node, ast.BoolOp):
op = "&" if isinstance(node.op, ast.And) else "|"
children = []
for i, v in enumerate(node.values):
child_idx = f"{parent_idx}.{i}" if parent_idx else str(i)
children.append((child_idx, _node_to_str(v)))
if isinstance(v, ast.BoolOp):
next_queue.append((v, child_idx))
level_entries.append({op: children})
if level_entries:
levels.append(level_entries[0] if len(level_entries) == 1 else level_entries)
queue = next_queue
return levels
[docs]
def rebuild_from_levels(levels: list[dict]) -> str:
"""Rebuild the original boolean expression from ``parse_levels`` output.
Processes levels bottom-up: the deepest compound sub-expressions are
collapsed first, then their rebuilt strings replace the placeholder in
the parent level.
Parameters
----------
levels : list[dict]
Output of :func:`parse_levels`.
Returns
-------
str
Reconstructed boolean expression string.
"""
# Seed the map with all leaf expressions across all levels
index_map: dict[str, str] = {}
for entry in levels:
for e in [entry] if isinstance(entry, dict) else entry:
op = next(iter(e))
for idx, expr in e[op]:
index_map.setdefault(idx, expr)
# Collapse bottom-up
for entry in reversed(levels):
for e in [entry] if isinstance(entry, dict) else entry:
op = next(iter(e))
children = e[op]
first_idx = children[0][0]
parent_idx = first_idx.rsplit(".", 1)[0] if "." in first_idx else None
rebuilt = f" {op} ".join(f"({index_map[idx]})" for idx, _ in children)
if parent_idx is None:
return rebuilt # reached the root
index_map[parent_idx] = rebuilt
return ""