From 5381681b86ec3f83c2aa57a5f7677b463a644abc Mon Sep 17 00:00:00 2001 From: Sudipta Basak Date: Mon, 8 Jul 2024 10:43:19 +1000 Subject: [PATCH] update quantile computation in quantilegbm and quantilelgbm --- configs/ref_quantile_lgbm.yaml | 4 +++- uncoverml/optimise/models.py | 33 +++++++++++++++++++++++++-------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/configs/ref_quantile_lgbm.yaml b/configs/ref_quantile_lgbm.yaml index 542c0974..c58c8c56 100644 --- a/configs/ref_quantile_lgbm.yaml +++ b/configs/ref_quantile_lgbm.yaml @@ -38,11 +38,13 @@ learning: target_transform: identity random_state: 1 max_depth: 20 + upper_alpha: 0.9 + lower_alpha: 0.1 prediction: prediction_template: configs/data/sirsam/dem_foc2.tif - quantiles: 0.95 + quantiles: 0.90 outbands: 4 diff --git a/uncoverml/optimise/models.py b/uncoverml/optimise/models.py index a29cebf8..2c4a3d4d 100644 --- a/uncoverml/optimise/models.py +++ b/uncoverml/optimise/models.py @@ -641,6 +641,7 @@ def __init__(self, target_transform='identity', self.alpha = alpha self.upper_alpha = upper_alpha self.lower_alpha = lower_alpha + self.interval = upper_alpha - lower_alpha @staticmethod def collect_prediction(regressor, X_test): @@ -658,7 +659,7 @@ def fit(self, X, y, *args, **kwargs): def predict(self, X, *args, **kwargs): return self.predict_dist(X, *args, **kwargs)[0] - def predict_dist(self, X, interval=0.95, *args, ** kwargs): + def predict_dist(self, X, interval=0.90, *args, ** kwargs): Ey = self.gb.predict(X) ql_ = self.collect_prediction(self.gb_quantile_lower, X) @@ -667,9 +668,17 @@ def predict_dist(self, X, interval=0.95, *args, ** kwargs): Vy = ((qu_ - ql_) / (norm.ppf(self.upper_alpha) - norm.ppf(self.lower_alpha))) ** 2 # to make gbm quantile model consistent with other quantile based models - ql, qu = norm.interval(interval, loc=Ey, scale=np.sqrt(Vy)) - - return Ey, Vy, ql, qu + if interval == self.interval: + return Ey, Vy, ql_, qu_ + else: + # if the interval matches (upper_alpha-lower_alpha), we don't need to compute ql, qu + # and also don't need to make assumpition of normal distribution to compute ql and qu + log.warn("===============================================") + log.warn("Used normal distribution assumption to compute quantiles." + " Using quantiles=(upper_alpha-lower_alpha) will remove this requirement!") + log.warn("===============================================") + ql, qu = norm.interval(self.interval, loc=Ey, scale=np.sqrt(Vy)) + return Ey, Vy, ql, qu class GBMReg(GradientBoostingRegressor, TagsMixin): @@ -820,7 +829,7 @@ def fit(self, X, y, *args, **kwargs): def predict(self, X, *args, **kwargs): return self.predict_dist(X, *args, **kwargs)[0] - def predict_dist(self, X, interval=0.95, *args, ** kwargs): + def predict_dist(self, X, interval=0.9, *args, ** kwargs): Ey = self.gb.predict(X) ql_ = self.collect_prediction(self.gb_quantile_lower, X) @@ -829,9 +838,17 @@ def predict_dist(self, X, interval=0.95, *args, ** kwargs): Vy = ((qu_ - ql_) / (norm.ppf(self.upper_alpha) - norm.ppf(self.lower_alpha))) ** 2 # to make gbm quantile model consistent with other quantile based models - ql, qu = norm.interval(interval, loc=Ey, scale=np.sqrt(Vy)) - - return Ey, Vy, ql, qu + if interval == self.interval: + return Ey, Vy, ql_, qu_ + else: + # if the interval matches (upper_alpha-lower_alpha), we don't need to compute ql, qu + # and also don't need to make assumpition of normal distribution to compute ql and qu + log.warn("===============================================") + log.warn("Used normal distribution assumption to compute quantiles." + " Using quantiles=(upper_alpha-lower_alpha) will remove this requirement!") + log.warn("===============================================") + ql, qu = norm.interval(self.interval, loc=Ey, scale=np.sqrt(Vy)) + return Ey, V, ql, qu class CatBoostWrapper(CatBoostRegressor, TagsMixin):