Merge pull request #230 from winedarksea/dev

0.6.10
winedarksea · Jan 30, 2024 · 38cf328 · 38cf328
2 parents 3e6baff + 3b3d0b0
commit 38cf328
Show file tree

Hide file tree

Showing 37 changed files with 336 additions and 372 deletions.
diff --git a/TODO.md b/TODO.md
@@ -11,10 +11,12 @@
 * Series will largely be consistent in period, or at least up-sampled to regular intervals
 * The most recent data will generally be the most important
 * Forecasts are desired for the future immediately following the most recent data.
+* trimmed_mean to AverageValueNaive
 
-# 0.6.9 🇺🇦 🇺🇦 🇺🇦
-* expanded regressor options for MultivariateRegression, NeuralForecast (currently only available directly, not from AutoTS class)
-* matse bug fix on all 0 history
+# 0.6.10 🇺🇦 🇺🇦 🇺🇦
+* assorted minor bug fixes
+* bug in mosaic model selection fixed
+* added crosshair_lite mosaic
 
 ### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
 * Pytorch-Forecasting

diff --git a/autots/__init__.py b/autots/__init__.py
@@ -26,7 +26,7 @@
 from autots.models.cassandra import Cassandra
 
 
-__version__ = '0.6.9'
+__version__ = '0.6.10'
 
 TransformTS = GeneralTransformer
 

diff --git a/autots/evaluator/auto_model.py b/autots/evaluator/auto_model.py
@@ -1470,6 +1470,7 @@ def TemplateWizard(
     current_model_file: str = None,
     mosaic_used=None,
     force_gc: bool = False,
+    additional_msg: str = "",
 ):
     """
     Take Template, returns Results.
@@ -1548,13 +1549,12 @@ def virtual_memory():
             template_result.model_count += 1
             if verbose > 0:
                 if validation_round >= 1:
-                    base_print = (
-                        "Model Number: {} of {} with model {} for Validation {}".format(
-                            str(template_result.model_count),
-                            template.shape[0],
-                            model_str,
-                            str(validation_round),
-                        )
+                    base_print = "Model Number: {} of {} with model {} for Validation {}{}".format(
+                        str(template_result.model_count),
+                        template.shape[0],
+                        model_str,
+                        str(validation_round),
+                        str(additional_msg),
                     )
                 else:
                     base_print = (
@@ -1568,9 +1568,10 @@ def virtual_memory():
                 if verbose > 1:
                     print(
                         base_print
-                        + " with params {} and transformations {}".format(
+                        + " with params {} and transformations {}{}".format(
                             json.dumps(parameter_dict),
                             json.dumps(transformation_dict),
+                            str(additional_msg),
                         )
                     )
                 else:

diff --git a/autots/evaluator/auto_ts.py b/autots/evaluator/auto_ts.py
diff --git a/autots/models/basics.py b/autots/models/basics.py
@@ -19,7 +19,7 @@
     sliding_window_view,
     chunk_reshape,
 )
-from autots.tools.percentile import nan_quantile
+from autots.tools.percentile import nan_quantile, trimmed_mean
 from autots.tools.fast_kalman import KalmanFilter, new_kalman_params
 from autots.tools.transform import (
     GeneralTransformer,
@@ -309,6 +309,12 @@ def fit(self, df, future_regressor=None):
             self.average_values = np.average(
                 df_used.to_numpy(), axis=0, weights=weights
             )
+        elif method == "trimmed_mean_20":
+            self.average_values = trimmed_mean(df_used, percent=0.2, axis=0)
+        elif method == "trimmed_mean_40":
+            self.average_values = trimmed_mean(df_used, percent=0.4, axis=0)
+        else:
+            raise ValueError(f"method {method} not recognized")
         self.fit_runtime = datetime.datetime.now() - self.startTime
         self.lower, self.upper = historic_quantile(
             df_used, prediction_interval=self.prediction_interval
@@ -366,8 +372,10 @@ def get_new_params(self, method: str = 'random'):
                 "Midhinge",
                 "Weighted_Mean",
                 "Exp_Weighted_Mean",
+                "trimmed_mean_20",
+                "trimmed_mean_40",
             ],
-            [0.3, 0.3, 0.01, 0.1, 0.4, 0.1],
+            [0.3, 0.3, 0.01, 0.1, 0.4, 0.1, 0.05, 0.05],
         )[0]
 
         return {

diff --git a/autots/models/cassandra.py b/autots/models/cassandra.py
@@ -2011,8 +2011,9 @@ def get_new_params(self, method='fast'):
                     'lstsq',
                     'linalg_solve',
                     'bayesian_linear',
+                    'l1_positive',
                 ],
-                [0.8, 0.15, 0.05],
+                [0.8, 0.15, 0.05, 0.01],
             )[0]
         recency_weighting = random.choices(
             [None, 0.05, 0.1, 0.25, 0.5], [0.7, 0.1, 0.1, 0.1, 0.05]
@@ -2537,8 +2538,11 @@ def lstsq_minimize(X, y, maxiter=15000, cost_function="l1", method=None):
     elif cost_function == "quantile":
         cost_func = cost_function_quantile
     elif cost_function == "l1_positive":
-        bounds = [(0, 14) for x in x0]
+        max_bound = 14
+        bounds = [(0, max_bound) for x in x0]
         cost_func = cost_function_l1
+        x0[x0 <= 0] = 0.000001
+        x0[x0 > max_bound] = max_bound - 0.0001
     else:
         cost_func = cost_function_l1
     return minimize(

diff --git a/autots/models/ensemble.py b/autots/models/ensemble.py
@@ -117,10 +117,16 @@ def parse_mosaic(ensemble):
         # zero is considered None here
         if swindow == 0:
             swindow = None
+        if "crosshair_lite" in ensemble:
+            crosshair = 'crosshair_lite'
+        elif 'crosshair' in ensemble:
+            crosshair = True
+        else:
+            crosshair = False
         return {
             'metric': metric,
             'smoothing_window': swindow,
-            'crosshair': "crosshair" in ensemble,
+            'crosshair': crosshair,
             'n_models': n_models,
         }
 
@@ -481,7 +487,7 @@ def mosaic_classifier(df_train, known, classifier_params=None):
             "model_params": {
                 'n_estimators': 62,
                 'max_features': 0.181116,
-                'max_leaves': 261,
+                'max_leaf_nodes': 261,
                 'criterion': 'entropy',
             },
         }
@@ -1564,17 +1570,21 @@ def HorizontalTemplateGenerator(
     return ensemble_templates
 
 
-def generate_crosshair_score(error_matrix):
-    arr_size = error_matrix.size
-    base_weight = 0.001 / arr_size
-    sum_error = np.sum(error_matrix) * base_weight
+def generate_crosshair_score(error_matrix, method=None):
+    # 'lite' only takes the weighted axis down a series not from other series
+    if method == 'crosshair_lite':
+        return error_matrix + (np.median(error_matrix, axis=0) / 3)
+    else:
+        arr_size = error_matrix.size
+        base_weight = 0.001 / arr_size
+        sum_error = np.sum(error_matrix) * base_weight
 
-    cross_base = error_matrix * (base_weight * 50)
-    row_sums = cross_base.sum(axis=1)
-    col_sums = cross_base.sum(axis=0)
-    outer_sum = np.add.outer(row_sums, col_sums)
+        cross_base = error_matrix * (base_weight * 50)
+        row_sums = cross_base.sum(axis=1)
+        col_sums = cross_base.sum(axis=0)
+        outer_sum = np.add.outer(row_sums, col_sums)
 
-    return error_matrix + sum_error + outer_sum
+        return error_matrix + sum_error + outer_sum
 
 
 def generate_crosshair_score_list(error_list):

diff --git a/autots/models/neural_forecast.py b/autots/models/neural_forecast.py
@@ -44,6 +44,7 @@ def __init__(
         activation='ReLU',
         scaler_type='robust',
         model_args={},
+        point_quantile=None,
         **kwargs,
     ):
         ModelObject.__init__(
@@ -66,11 +67,18 @@ def __init__(
         self.activation = activation
         self.scaler_type = scaler_type
         self.model_args = model_args
+        self.point_quantile = point_quantile
         self.forecast_length = forecast_length
         self.df_train = None
         self.static_regressor = None
 
-    def fit(self, df, future_regressor=None, static_regressor=None):
+    def fit(
+        self,
+        df,
+        future_regressor=None,
+        static_regressor=None,
+        regressor_per_series=None,
+    ):
         """Train algorithm given data supplied.
 
         Args:
@@ -86,6 +94,9 @@ def fit(self, df, future_regressor=None, static_regressor=None):
             self.static_regressor = static_regressor
             if isinstance(self.static_regressor, pd.DataFrame):
                 static_cols = static_regressor.columns.tolist()
+            if regressor_per_series is not None:
+                if not isinstance(regressor_per_series, dict):
+                    raise ValueError("regressor_per_series in incorrect format")
 
         from neuralforecast import NeuralForecast
         from neuralforecast.losses.pytorch import (
@@ -121,7 +132,12 @@ def fit(self, df, future_regressor=None, static_regressor=None):
         logging.getLogger("pytorch_lightning").setLevel(logging.CRITICAL)
         loss = self.loss
         if loss == "MQLoss":
-            loss = MQLoss(level=levels)
+            if self.point_quantile is None:
+                loss = MQLoss(level=levels)
+            else:
+                div = (1 - self.prediction_interval) / 2
+                quantiles = [div, 1 - div, self.point_quantile]
+                loss = MQLoss(quantiles=quantiles)
         elif loss == "Poisson":
             loss = DistributionLoss(
                 distribution='Poisson', level=levels, return_params=False
@@ -187,7 +203,10 @@ def fit(self, df, future_regressor=None, static_regressor=None):
         models = self.model
         model_args = self.model_args
         if self.regression_type in ['User', 'user', True]:
-            self.base_args["futr_exog_list"] = future_regressor.columns.tolist()
+            regr_cols = future_regressor.columns.tolist()
+            if regressor_per_series is not None:
+                regr_cols + next(iter(regressor_per_series.values())).columns.tolist()
+            self.base_args["futr_exog_list"] = regr_cols
             self.base_args['stat_exog_list'] = static_cols
 
         if isinstance(models, list):
@@ -221,6 +240,17 @@ def fit(self, df, future_regressor=None, static_regressor=None):
             silly_format = silly_format.merge(
                 future_regressor, left_on='ds', right_index=True
             )
+            if regressor_per_series is not None:
+                full_df = []
+                for key, value in regressor_per_series.items():
+                    local_copy = value.copy().reindex(df.index)
+                    local_copy.index.name = 'ds'
+                    local_copy = local_copy.reset_index()
+                    local_copy['unique_id'] = str(key)
+                    full_df.append(local_copy)
+                silly_format = silly_format.merge(
+                    pd.concat(full_df), on=['unique_id', 'ds'], how='left'
+                ).fillna(0)
         self.nf = NeuralForecast(models=models, freq=freq)
         if self.static_regressor is None:
             self.nf.fit(df=silly_format)
@@ -234,7 +264,11 @@ def fit(self, df, future_regressor=None, static_regressor=None):
         return self
 
     def predict(
-        self, forecast_length=None, future_regressor=None, just_point_forecast=False
+        self,
+        forecast_length=None,
+        future_regressor=None,
+        just_point_forecast=False,
+        regressor_per_series=None,
     ):
         predictStartTime = datetime.datetime.now()
         if self.regression_type in ['User', 'user', True]:
@@ -249,6 +283,17 @@ def predict(
                 future_regressor, left_index=True, right_index=True
             )
             futr_df = futr_df.reset_index(names='ds')
+            if regressor_per_series is not None:
+                full_df = []
+                for key, value in regressor_per_series.items():
+                    local_copy = value.copy().reindex(index)
+                    local_copy.index.name = 'ds'
+                    local_copy = local_copy.reset_index()
+                    local_copy['unique_id'] = str(key)
+                    full_df.append(local_copy)
+                futr_df = futr_df.merge(
+                    pd.concat(full_df), on=['unique_id', 'ds'], how='left'
+                ).fillna(0)
             self.futr_df = futr_df
             long_forecast = self.nf.predict(futr_df=futr_df)
         else:
@@ -259,6 +304,9 @@ def predict(
             target_col = long_forecast.columns[-1]
         else:
             target_col = target_col[0]
+        if self.point_quantile is not None:
+            # print(long_forecast.columns)
+            target_col = long_forecast.columns[-1]
         forecast = long_forecast.reset_index().pivot_table(
             index='ds', columns='unique_id', values=target_col
         )[self.column_names]
@@ -274,10 +322,12 @@ def predict(
             )
         else:
             target_col = [x for x in long_forecast.columns if "hi-" in x][0]
+            # print(f"upper target col: {target_col}")
             upper_forecast = long_forecast.reset_index().pivot_table(
                 index='ds', columns='unique_id', values=target_col
             )[self.column_names]
             target_col = [x for x in long_forecast.columns if "lo-" in x][0]
+            # print(f"lower target col {target_col}")
             lower_forecast = long_forecast.reset_index().pivot_table(
                 index='ds', columns='unique_id', values=target_col
             )[self.column_names]
@@ -311,6 +361,16 @@ def get_new_params(self, method: str = 'random'):
             regression_type_choice = random.choices([None, "User"], weights=[0.8, 0.2])[
                 0
             ]
+        if "deep" in method:
+            max_steps = random.choices(
+                [40, 80, 100, 1000, 5000, 10000, 50000],
+                [0.2, 0.2, 0.2, 0.1, 0.05, 0.05, 0.01],
+            )[0]
+        else:
+            max_steps = random.choices(
+                [40, 80, 100, 1000, 5000],
+                [0.2, 0.2, 0.2, 0.05, 0.03],
+            )[0]
         activation = random.choices(
             ['ReLU', 'Softplus', 'Tanh', 'SELU', 'LeakyReLU', 'PReLU', 'Sigmoid'],
             [0.5, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
@@ -328,8 +388,13 @@ def get_new_params(self, method: str = 'random'):
                 "SMAPE",
                 "StudentT",
             ],
-            [0.3, 0.1, 0.01, 0.1, 0.1, 0.01, 0.1, 0.1, 0.1, 0.01],
+            [0.5, 0.1, 0.01, 0.1, 0.1, 0.01, 0.1, 0.1, 0.1, 0.01],
         )[0]
+        point_quantile = None
+        if loss == "MQLoss":
+            point_quantile = random.choices(
+                [None, 0.35, 0.45, 0.55, 0.65, 0.7], [0.5, 0.1, 0.1, 0.1, 0.1, 0.1]
+            )[0]
         if models == "TFT":
             model_args = {
                 "n_head": random.choice([2, 4]),
@@ -368,14 +433,12 @@ def get_new_params(self, method: str = 'random'):
             'learning_rate': random.choices(
                 [0.001, 0.1, 0.01, 0.0003, 0.00001], [0.4, 0.1, 0.1, 0.1, 0.1]
             )[0],
-            "max_steps": random.choices(
-                [40, 80, 100, 1000],
-                [0.2, 0.2, 0.2, 0.05],
-            )[0],
+            "max_steps": max_steps,
             'input_size': random.choices(
                 [10, 28, "2ForecastLength", "3ForecastLength"], [0.2, 0.2, 0.2, 0.2]
             )[0],
             # "early_stop_patience_steps": random.choice([1, 3, 5]),
+            "point_quantile": point_quantile,
             "model_args": model_args,
             'regression_type': regression_type_choice,
         }
@@ -390,13 +453,14 @@ def get_params(self):
             'learning_rate': self.learning_rate,
             "max_steps": self.max_steps,
             'input_size': self.input_size,
+            'point_quantile': self.point_quantile,
             "model_args": self.model_args,
             'regression_type': self.regression_type,
         }
 
 
 if False:
-    from autots.models.neural_forecast import NeuralForecast
+    # from autots.models.neural_forecast import NeuralForecast
     from autots import load_daily, create_regressor, infer_frequency
 
     df = load_daily(long=False)