From a0a551ab60b3aad255fe0fb35659d7a64f49b96b Mon Sep 17 00:00:00 2001 From: Sudipta Basak Date: Mon, 8 Jul 2024 10:43:19 +1000 Subject: [PATCH] update quantile computation in quantilegbm and quantilelgbm --- configs/ref_gradientboost_quantiles.yaml | 2 +- configs/ref_quantile_lgbm.yaml | 4 ++- uncoverml/optimise/models.py | 34 ++++++++++++++++++------ uncoverml/predict.py | 4 ++- 4 files changed, 33 insertions(+), 11 deletions(-) diff --git a/configs/ref_gradientboost_quantiles.yaml b/configs/ref_gradientboost_quantiles.yaml index 0a5f3343..a9390476 100644 --- a/configs/ref_gradientboost_quantiles.yaml +++ b/configs/ref_gradientboost_quantiles.yaml @@ -56,7 +56,7 @@ learning: 'min_weight_fraction_leaf': Real(0.0, 0.5, prior='uniform') prediction: - quantiles: 0.95 + quantiles: 0.9 outbands: 4 diff --git a/configs/ref_quantile_lgbm.yaml b/configs/ref_quantile_lgbm.yaml index 542c0974..7653c2df 100644 --- a/configs/ref_quantile_lgbm.yaml +++ b/configs/ref_quantile_lgbm.yaml @@ -38,11 +38,13 @@ learning: target_transform: identity random_state: 1 max_depth: 20 + upper_alpha: 0.95 + lower_alpha: 0.05 prediction: prediction_template: configs/data/sirsam/dem_foc2.tif - quantiles: 0.95 + quantiles: 0.90 outbands: 4 diff --git a/uncoverml/optimise/models.py b/uncoverml/optimise/models.py index a29cebf8..d20dac10 100644 --- a/uncoverml/optimise/models.py +++ b/uncoverml/optimise/models.py @@ -641,6 +641,7 @@ def __init__(self, target_transform='identity', self.alpha = alpha self.upper_alpha = upper_alpha self.lower_alpha = lower_alpha + self.interval = upper_alpha - lower_alpha @staticmethod def collect_prediction(regressor, X_test): @@ -658,7 +659,7 @@ def fit(self, X, y, *args, **kwargs): def predict(self, X, *args, **kwargs): return self.predict_dist(X, *args, **kwargs)[0] - def predict_dist(self, X, interval=0.95, *args, ** kwargs): + def predict_dist(self, X, interval=0.90, *args, ** kwargs): Ey = self.gb.predict(X) ql_ = self.collect_prediction(self.gb_quantile_lower, X) @@ -667,9 +668,17 @@ def predict_dist(self, X, interval=0.95, *args, ** kwargs): Vy = ((qu_ - ql_) / (norm.ppf(self.upper_alpha) - norm.ppf(self.lower_alpha))) ** 2 # to make gbm quantile model consistent with other quantile based models - ql, qu = norm.interval(interval, loc=Ey, scale=np.sqrt(Vy)) - - return Ey, Vy, ql, qu + if np.isclose(interval, self.interval): + return Ey, Vy, ql_, qu_ + else: + # if the interval matches (upper_alpha-lower_alpha), we don't need to compute ql, qu + # and also don't need to make assumpition of normal distribution to compute ql and qu + log.warn("===============================================") + log.warn("Used normal distribution assumption to compute quantiles." + " Using quantiles=(upper_alpha-lower_alpha) will remove this requirement!") + log.warn("===============================================") + ql, qu = norm.interval(interval, loc=Ey, scale=np.sqrt(Vy)) + return Ey, Vy, ql, qu class GBMReg(GradientBoostingRegressor, TagsMixin): @@ -758,6 +767,7 @@ def __init__(self, target_transform='identity', loss='quantile', self.n_iter_no_change = n_iter_no_change self.tol = tol self.ccp_alpha = ccp_alpha + self.interval = upper_alpha - lower_alpha self.gb = GradientBoostingRegressor( learning_rate=learning_rate, n_estimators=n_estimators, @@ -820,7 +830,7 @@ def fit(self, X, y, *args, **kwargs): def predict(self, X, *args, **kwargs): return self.predict_dist(X, *args, **kwargs)[0] - def predict_dist(self, X, interval=0.95, *args, ** kwargs): + def predict_dist(self, X, interval=0.9, *args, ** kwargs): Ey = self.gb.predict(X) ql_ = self.collect_prediction(self.gb_quantile_lower, X) @@ -829,9 +839,17 @@ def predict_dist(self, X, interval=0.95, *args, ** kwargs): Vy = ((qu_ - ql_) / (norm.ppf(self.upper_alpha) - norm.ppf(self.lower_alpha))) ** 2 # to make gbm quantile model consistent with other quantile based models - ql, qu = norm.interval(interval, loc=Ey, scale=np.sqrt(Vy)) - - return Ey, Vy, ql, qu + if np.isclose(interval, self.interval): + return Ey, Vy, ql_, qu_ + else: + # if the interval matches (upper_alpha-lower_alpha), we don't need to compute ql, qu + # and also don't need to make assumpition of normal distribution to compute ql and qu + log.warn("===============================================") + log.warn("Used normal distribution assumption to compute quantiles." + " Using quantiles=(upper_alpha-lower_alpha) will remove this requirement!") + log.warn("===============================================") + ql, qu = norm.interval(interval, loc=Ey, scale=np.sqrt(Vy)) + return Ey, V, ql, qu class CatBoostWrapper(CatBoostRegressor, TagsMixin): diff --git a/uncoverml/predict.py b/uncoverml/predict.py index 8ca4fe13..640534bf 100644 --- a/uncoverml/predict.py +++ b/uncoverml/predict.py @@ -21,7 +21,7 @@ modelmaps.update(krig_dict) -def predict(data, model, interval=0.95, **kwargs): +def predict(data, model, interval=0.9, **kwargs): # Classification if hasattr(model, 'predict_proba'): @@ -34,6 +34,8 @@ def pred(X): else: def pred(X): if hasattr(model, 'predict_dist'): + if hasattr(model, 'upper_alpha') and hasattr(model, 'lower_alpha'): + interval = model.upper_alpha - model.lower_alpha Ey, Vy, ql, qu = model.predict_dist(X, interval, **kwargs) predres = np.hstack((Ey[:, np.newaxis], Vy[:, np.newaxis], ql[:, np.newaxis], qu[:, np.newaxis]))