Source code for iguanas.monotone_constraints
import polars as pl
from xgboost import XGBClassifier
[docs]
def infer_monotone_constraints_from_correlations(X: pl.DataFrame, y: pl.Series) -> pl.DataFrame:
"""Compute monotone constraint signs for XGBoost based on feature-target correlations.
Parameters
----------
X : pl.DataFrame
DataFrame containing features.
y : pl.Series
Target series for computing correlations.
Returns
-------
pl.DataFrame
DataFrame with columns:
- feature: Feature name
- pearson_corr: Pearson correlation with target
- constraint: Constraint value (1 for positive constraint, -1 for negative constraint, 0 for no constraint)
"""
# Add y as temporary column for efficient correlation computation
X_temp = X.with_columns(y.alias("_target"))
corr_result = X_temp.select([pl.corr(col, "_target").alias(col) for col in X.columns])
result = pl.DataFrame(
{
"feature": X.columns,
"pearson_corr": [corr_result[col][0] for col in X.columns],
}
).with_columns(
# Vectorized constraint computation
pl.when(pl.col("pearson_corr") > 0)
.then(pl.lit(1))
.when(pl.col("pearson_corr") < 0)
.then(pl.lit(-1))
.otherwise(pl.lit(0))
.alias("constraint")
)
return result
[docs]
def infer_monotone_constraints_from_stumps(
stump: XGBClassifier, X: pl.DataFrame, y: pl.Series
) -> pl.DataFrame:
"""Determine monotone constraints by training decision stumps for each feature.
Trains a single-split tree (max_depth=1) for each feature and examines how
predictions change from min to max value to determine monotonic relationship.
Parameters
----------
stump : XGBClassifier
XGBoost classifier configured as a stump (max_depth=1).
X : pl.DataFrame
Features DataFrame.
y : pl.Series
Target series for training.
Returns
-------
pl.DataFrame
DataFrame with columns:
- feature: Feature name
- constraint: Constraint value (1 for positive constraint, -1 for negative constraint, 0 for no constraint)
- pred_at_min: Predicted probability at minimum feature value
- pred_at_max: Predicted probability at maximum feature value
- delta: Change in probability (pred_at_max - pred_at_min)
"""
# Get feature columns and target
results = []
for col in X.columns:
stump.fit(X.select(col), y)
# Get predictions at min and max values in a single call
min_val = X.select(pl.col(col).min()).item()
max_val = X.select(pl.col(col).max()).item()
preds = stump.predict_proba([[min_val], [max_val]])
pred_at_min = preds[0][1]
pred_at_max = preds[1][1]
# Determine constraint based on prediction direction
if pred_at_max > pred_at_min:
constraint = 1
elif pred_at_max < pred_at_min:
constraint = -1
else:
constraint = 0
results.append(
{
"feature": col,
"constraint": constraint,
"pred_at_min": pred_at_min,
"pred_at_max": pred_at_max,
"delta": pred_at_max - pred_at_min,
}
)
return pl.DataFrame(results)