Source code for iguanas.metrics

import polars as pl


[docs] def compute_single_metric( y_pred: pl.Series, y: pl.Series, metric: str, weights: pl.Series | None = None, ) -> float: """Compute a single performance metric for one boolean prediction series. Faster than compute_metrics when only one scalar is needed, because it skips computing all 25+ derived metrics. Used internally by combine_rules_beam_search during candidate evaluation. Parameters ---------- y_pred : pl.Series Boolean prediction series. y : pl.Series Boolean target series. metric : str Metric name: "precision", "recall", "accuracy", or an F-beta score (f<number>). weights : pl.Series | None, default=None Optional sample weights. When provided, all counts use weighted sums. Returns ------- float The requested metric value. """ y_bool = y.cast(pl.Boolean) y_pred_bool = y_pred.cast(pl.Boolean) if weights is not None: TP = float(weights.filter(y_bool & y_pred_bool).sum()) FP = float(weights.filter(~y_bool & y_pred_bool).sum()) FN = float(weights.filter(y_bool & ~y_pred_bool).sum()) else: TP = float((y_bool & y_pred_bool).sum()) FP = float((~y_bool & y_pred_bool).sum()) FN = float((y_bool & ~y_pred_bool).sum()) if metric == "precision": return TP / (TP + FP) if (TP + FP) > 0 else 0.0 if metric == "recall": return TP / (TP + FN) if (TP + FN) > 0 else 0.0 if metric == "accuracy": TN = ( float((~y_bool & ~y_pred_bool).sum()) if weights is None else float(weights.filter(~y_bool & ~y_pred_bool).sum()) ) return (TP + TN) / (TP + TN + FP + FN) if (TP + TN + FP + FN) > 0 else 0.0 if metric.startswith("f"): beta = float(metric[1:]) precision = TP / (TP + FP) if (TP + FP) > 0 else 0.0 recall = TP / (TP + FN) if (TP + FN) > 0 else 0.0 denom = beta**2 * precision + recall return (1 + beta**2) * precision * recall / denom if denom > 0 else 0.0 raise ValueError( f"Unsupported metric '{metric}'. Must be 'precision', 'recall', " f"'accuracy', or an F-beta score (f<number>)." )
[docs] def compute_metrics( R: pl.Series | pl.DataFrame, y: pl.Series, weights: pl.Series | None = None, betas: list[float] | None = None, ) -> pl.DataFrame: """Compute comprehensive performance metrics for all rule columns. Calculates confusion matrix, precision, recall, F-beta scores, and TPVE metrics for each rule. Optionally computes weighted versions of all metrics. Parameters ---------- R : pl.DataFrame DataFrame with boolean columns representing rule predictions. Each column is a rule that evaluates to True/False for each observation. y : pl.Series Boolean target series indicating true labels (True for positive class). Will be cast to Boolean if not already. weights : pl.Series | None, default=None Optional numeric series for weighted metrics computation. If provided, computes both count-based and weighted versions of all metrics. betas : list[float], default=[0.25, 0.5, 1, 1.5, 2] F-beta values to compute. Each value ``b`` produces a column named ``f{b}`` (and ``f{b}_weight`` when *weights* is provided). Returns ------- pl.DataFrame DataFrame with one row per rule containing: - rule: Rule name (column name from R) - TP, FP, TN, FN: Confusion matrix counts - precision, recall, accuracy: Standard classification metrics - flagged(%): Percentage of total flagged as positive - good_flagged(%): Percentage of negatives flagged as positive - f{b} for each b in *betas*: F-beta scores - num_rules: Number of individual rules y_pred (1 for single rules) If weights is provided, additional columns with "_weight" suffix: - TP_weight, FP_weight, TN_weight, FN_weight: Weighted confusion matrix - total_weight, precision_weight, recall_weight, accuracy_weight: Weighted versions - f{b}_weight for each b in *betas*: Weighted F-beta scores Examples -------- >>> import polars as pl >>> # Count-based metrics only >>> metrics_df = compute_metrics(R, y, weights=None) >>> >>> # Both count and weighted metrics >>> metrics_df = compute_metrics(R, y, weights=transaction_amounts) >>> >>> # Sort by TPVE3 to find best rules >>> top_rules = metrics_df.sort("TPVE3", descending=True).head(10) """ if betas is None: betas = [0.25, 0.5, 1, 1.5, 2] if y.dtype != pl.Boolean: y = y.cast(pl.Boolean) if isinstance(R, pl.Series): R = R.to_frame() # Compute confusion matrix for all columns if weights is not None: # Both count and weighted metrics metrics_df = pl.DataFrame( { "rule": R.columns, "TP": [(y & R[col]).sum() for col in R.columns], "FP": [(~y & R[col]).sum() for col in R.columns], "TN": [(~y & ~R[col]).sum() for col in R.columns], "FN": [(y & ~R[col]).sum() for col in R.columns], "TP_weight": [(weights.filter(y & R[col])).sum() for col in R.columns], "FP_weight": [(weights.filter(~y & R[col])).sum() for col in R.columns], "TN_weight": [(weights.filter(~y & ~R[col])).sum() for col in R.columns], "FN_weight": [(weights.filter(y & ~R[col])).sum() for col in R.columns], } ) else: # Only count metrics metrics_df = pl.DataFrame( { "rule": R.columns, "TP": [(y & R[col]).sum() for col in R.columns], "FP": [(~y & R[col]).sum() for col in R.columns], "TN": [(~y & ~R[col]).sum() for col in R.columns], "FN": [(y & ~R[col]).sum() for col in R.columns], } ) # Step 1: Add basic metrics (precision, recall, and accuracy) metrics_df = metrics_df.with_columns( [ (pl.col("TP") / (pl.col("TP") + pl.col("FP"))).alias("precision"), (pl.col("TP") / (pl.col("TP") + pl.col("FN"))).alias("recall"), ( (pl.col("TP") + pl.col("TN")) / (pl.col("TP") + pl.col("FP") + pl.col("TN") + pl.col("FN")) ).alias("accuracy"), ] ) # Step 2: Build complete list of all derived metrics that depend on precision/recall expressions = [ ( (pl.col("TP") + pl.col("FP")) / (pl.col("TP") + pl.col("FP") + pl.col("TN") + pl.col("FN")) * 100 ).alias("flagged(%)"), (pl.col("FP") / (pl.col("TN") + pl.col("FP")) * 100).alias("good_flagged(%)"), *[ ( (1 + b**2) * pl.col("precision") * pl.col("recall") / (b**2 * pl.col("precision") + pl.col("recall")) ).alias(f"f{b:g}") for b in betas ], # Number of rules (pl.col("rule").str.count_matches(r"\) \| \(") + 1).alias("num_rules"), ] if weights is not None: # First compute total_weight metrics_df = metrics_df.with_columns( [ ( pl.col("TP_weight") + pl.col("FP_weight") + pl.col("TN_weight") + pl.col("FN_weight") ).alias("total_weight"), ] ) # Then compute precision, recall, and accuracy using total_weight metrics_df = metrics_df.with_columns( [ (pl.col("TP_weight") / (pl.col("TP_weight") + pl.col("FP_weight"))).alias( "precision_weight" ), (pl.col("TP_weight") / (pl.col("TP_weight") + pl.col("FN_weight"))).alias( "recall_weight" ), ((pl.col("TP_weight") + pl.col("TN_weight")) / pl.col("total_weight")).alias( "accuracy_weight" ), ] ) expressions.extend( [ *[ ( (1 + b**2) * pl.col("precision_weight") * pl.col("recall_weight") / (b**2 * pl.col("precision_weight") + pl.col("recall_weight")) ).alias(f"f{b:g}_weight") for b in betas ], ] ) metrics_df = metrics_df.with_columns(expressions) return metrics_df