Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.6.10 #230

Merged
merged 12 commits into from
Jan 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@
* Series will largely be consistent in period, or at least up-sampled to regular intervals
* The most recent data will generally be the most important
* Forecasts are desired for the future immediately following the most recent data.
* trimmed_mean to AverageValueNaive

# 0.6.9 🇺🇦 🇺🇦 🇺🇦
* expanded regressor options for MultivariateRegression, NeuralForecast (currently only available directly, not from AutoTS class)
* matse bug fix on all 0 history
# 0.6.10 🇺🇦 🇺🇦 🇺🇦
* assorted minor bug fixes
* bug in mosaic model selection fixed
* added crosshair_lite mosaic

### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
* Pytorch-Forecasting
Expand Down
2 changes: 1 addition & 1 deletion autots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from autots.models.cassandra import Cassandra


__version__ = '0.6.9'
__version__ = '0.6.10'

TransformTS = GeneralTransformer

Expand Down
17 changes: 9 additions & 8 deletions autots/evaluator/auto_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -1470,6 +1470,7 @@ def TemplateWizard(
current_model_file: str = None,
mosaic_used=None,
force_gc: bool = False,
additional_msg: str = "",
):
"""
Take Template, returns Results.
Expand Down Expand Up @@ -1548,13 +1549,12 @@ def virtual_memory():
template_result.model_count += 1
if verbose > 0:
if validation_round >= 1:
base_print = (
"Model Number: {} of {} with model {} for Validation {}".format(
str(template_result.model_count),
template.shape[0],
model_str,
str(validation_round),
)
base_print = "Model Number: {} of {} with model {} for Validation {}{}".format(
str(template_result.model_count),
template.shape[0],
model_str,
str(validation_round),
str(additional_msg),
)
else:
base_print = (
Expand All @@ -1568,9 +1568,10 @@ def virtual_memory():
if verbose > 1:
print(
base_print
+ " with params {} and transformations {}".format(
+ " with params {} and transformations {}{}".format(
json.dumps(parameter_dict),
json.dumps(transformation_dict),
str(additional_msg),
)
)
else:
Expand Down
408 changes: 110 additions & 298 deletions autots/evaluator/auto_ts.py

Large diffs are not rendered by default.

12 changes: 10 additions & 2 deletions autots/models/basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
sliding_window_view,
chunk_reshape,
)
from autots.tools.percentile import nan_quantile
from autots.tools.percentile import nan_quantile, trimmed_mean
from autots.tools.fast_kalman import KalmanFilter, new_kalman_params
from autots.tools.transform import (
GeneralTransformer,
Expand Down Expand Up @@ -309,6 +309,12 @@ def fit(self, df, future_regressor=None):
self.average_values = np.average(
df_used.to_numpy(), axis=0, weights=weights
)
elif method == "trimmed_mean_20":
self.average_values = trimmed_mean(df_used, percent=0.2, axis=0)
elif method == "trimmed_mean_40":
self.average_values = trimmed_mean(df_used, percent=0.4, axis=0)
else:
raise ValueError(f"method {method} not recognized")
self.fit_runtime = datetime.datetime.now() - self.startTime
self.lower, self.upper = historic_quantile(
df_used, prediction_interval=self.prediction_interval
Expand Down Expand Up @@ -366,8 +372,10 @@ def get_new_params(self, method: str = 'random'):
"Midhinge",
"Weighted_Mean",
"Exp_Weighted_Mean",
"trimmed_mean_20",
"trimmed_mean_40",
],
[0.3, 0.3, 0.01, 0.1, 0.4, 0.1],
[0.3, 0.3, 0.01, 0.1, 0.4, 0.1, 0.05, 0.05],
)[0]

return {
Expand Down
8 changes: 6 additions & 2 deletions autots/models/cassandra.py
Original file line number Diff line number Diff line change
Expand Up @@ -2011,8 +2011,9 @@ def get_new_params(self, method='fast'):
'lstsq',
'linalg_solve',
'bayesian_linear',
'l1_positive',
],
[0.8, 0.15, 0.05],
[0.8, 0.15, 0.05, 0.01],
)[0]
recency_weighting = random.choices(
[None, 0.05, 0.1, 0.25, 0.5], [0.7, 0.1, 0.1, 0.1, 0.05]
Expand Down Expand Up @@ -2537,8 +2538,11 @@ def lstsq_minimize(X, y, maxiter=15000, cost_function="l1", method=None):
elif cost_function == "quantile":
cost_func = cost_function_quantile
elif cost_function == "l1_positive":
bounds = [(0, 14) for x in x0]
max_bound = 14
bounds = [(0, max_bound) for x in x0]
cost_func = cost_function_l1
x0[x0 <= 0] = 0.000001
x0[x0 > max_bound] = max_bound - 0.0001
else:
cost_func = cost_function_l1
return minimize(
Expand Down
32 changes: 21 additions & 11 deletions autots/models/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,10 +117,16 @@ def parse_mosaic(ensemble):
# zero is considered None here
if swindow == 0:
swindow = None
if "crosshair_lite" in ensemble:
crosshair = 'crosshair_lite'
elif 'crosshair' in ensemble:
crosshair = True
else:
crosshair = False
return {
'metric': metric,
'smoothing_window': swindow,
'crosshair': "crosshair" in ensemble,
'crosshair': crosshair,
'n_models': n_models,
}

Expand Down Expand Up @@ -481,7 +487,7 @@ def mosaic_classifier(df_train, known, classifier_params=None):
"model_params": {
'n_estimators': 62,
'max_features': 0.181116,
'max_leaves': 261,
'max_leaf_nodes': 261,
'criterion': 'entropy',
},
}
Expand Down Expand Up @@ -1564,17 +1570,21 @@ def HorizontalTemplateGenerator(
return ensemble_templates


def generate_crosshair_score(error_matrix):
arr_size = error_matrix.size
base_weight = 0.001 / arr_size
sum_error = np.sum(error_matrix) * base_weight
def generate_crosshair_score(error_matrix, method=None):
# 'lite' only takes the weighted axis down a series not from other series
if method == 'crosshair_lite':
return error_matrix + (np.median(error_matrix, axis=0) / 3)
else:
arr_size = error_matrix.size
base_weight = 0.001 / arr_size
sum_error = np.sum(error_matrix) * base_weight

cross_base = error_matrix * (base_weight * 50)
row_sums = cross_base.sum(axis=1)
col_sums = cross_base.sum(axis=0)
outer_sum = np.add.outer(row_sums, col_sums)
cross_base = error_matrix * (base_weight * 50)
row_sums = cross_base.sum(axis=1)
col_sums = cross_base.sum(axis=0)
outer_sum = np.add.outer(row_sums, col_sums)

return error_matrix + sum_error + outer_sum
return error_matrix + sum_error + outer_sum


def generate_crosshair_score_list(error_list):
Expand Down
84 changes: 74 additions & 10 deletions autots/models/neural_forecast.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ def __init__(
activation='ReLU',
scaler_type='robust',
model_args={},
point_quantile=None,
**kwargs,
):
ModelObject.__init__(
Expand All @@ -66,11 +67,18 @@ def __init__(
self.activation = activation
self.scaler_type = scaler_type
self.model_args = model_args
self.point_quantile = point_quantile
self.forecast_length = forecast_length
self.df_train = None
self.static_regressor = None

def fit(self, df, future_regressor=None, static_regressor=None):
def fit(
self,
df,
future_regressor=None,
static_regressor=None,
regressor_per_series=None,
):
"""Train algorithm given data supplied.

Args:
Expand All @@ -86,6 +94,9 @@ def fit(self, df, future_regressor=None, static_regressor=None):
self.static_regressor = static_regressor
if isinstance(self.static_regressor, pd.DataFrame):
static_cols = static_regressor.columns.tolist()
if regressor_per_series is not None:
if not isinstance(regressor_per_series, dict):
raise ValueError("regressor_per_series in incorrect format")

from neuralforecast import NeuralForecast
from neuralforecast.losses.pytorch import (
Expand Down Expand Up @@ -121,7 +132,12 @@ def fit(self, df, future_regressor=None, static_regressor=None):
logging.getLogger("pytorch_lightning").setLevel(logging.CRITICAL)
loss = self.loss
if loss == "MQLoss":
loss = MQLoss(level=levels)
if self.point_quantile is None:
loss = MQLoss(level=levels)
else:
div = (1 - self.prediction_interval) / 2
quantiles = [div, 1 - div, self.point_quantile]
loss = MQLoss(quantiles=quantiles)
elif loss == "Poisson":
loss = DistributionLoss(
distribution='Poisson', level=levels, return_params=False
Expand Down Expand Up @@ -187,7 +203,10 @@ def fit(self, df, future_regressor=None, static_regressor=None):
models = self.model
model_args = self.model_args
if self.regression_type in ['User', 'user', True]:
self.base_args["futr_exog_list"] = future_regressor.columns.tolist()
regr_cols = future_regressor.columns.tolist()
if regressor_per_series is not None:
regr_cols + next(iter(regressor_per_series.values())).columns.tolist()
self.base_args["futr_exog_list"] = regr_cols
self.base_args['stat_exog_list'] = static_cols

if isinstance(models, list):
Expand Down Expand Up @@ -221,6 +240,17 @@ def fit(self, df, future_regressor=None, static_regressor=None):
silly_format = silly_format.merge(
future_regressor, left_on='ds', right_index=True
)
if regressor_per_series is not None:
full_df = []
for key, value in regressor_per_series.items():
local_copy = value.copy().reindex(df.index)
local_copy.index.name = 'ds'
local_copy = local_copy.reset_index()
local_copy['unique_id'] = str(key)
full_df.append(local_copy)
silly_format = silly_format.merge(
pd.concat(full_df), on=['unique_id', 'ds'], how='left'
).fillna(0)
self.nf = NeuralForecast(models=models, freq=freq)
if self.static_regressor is None:
self.nf.fit(df=silly_format)
Expand All @@ -234,7 +264,11 @@ def fit(self, df, future_regressor=None, static_regressor=None):
return self

def predict(
self, forecast_length=None, future_regressor=None, just_point_forecast=False
self,
forecast_length=None,
future_regressor=None,
just_point_forecast=False,
regressor_per_series=None,
):
predictStartTime = datetime.datetime.now()
if self.regression_type in ['User', 'user', True]:
Expand All @@ -249,6 +283,17 @@ def predict(
future_regressor, left_index=True, right_index=True
)
futr_df = futr_df.reset_index(names='ds')
if regressor_per_series is not None:
full_df = []
for key, value in regressor_per_series.items():
local_copy = value.copy().reindex(index)
local_copy.index.name = 'ds'
local_copy = local_copy.reset_index()
local_copy['unique_id'] = str(key)
full_df.append(local_copy)
futr_df = futr_df.merge(
pd.concat(full_df), on=['unique_id', 'ds'], how='left'
).fillna(0)
self.futr_df = futr_df
long_forecast = self.nf.predict(futr_df=futr_df)
else:
Expand All @@ -259,6 +304,9 @@ def predict(
target_col = long_forecast.columns[-1]
else:
target_col = target_col[0]
if self.point_quantile is not None:
# print(long_forecast.columns)
target_col = long_forecast.columns[-1]
forecast = long_forecast.reset_index().pivot_table(
index='ds', columns='unique_id', values=target_col
)[self.column_names]
Expand All @@ -274,10 +322,12 @@ def predict(
)
else:
target_col = [x for x in long_forecast.columns if "hi-" in x][0]
# print(f"upper target col: {target_col}")
upper_forecast = long_forecast.reset_index().pivot_table(
index='ds', columns='unique_id', values=target_col
)[self.column_names]
target_col = [x for x in long_forecast.columns if "lo-" in x][0]
# print(f"lower target col {target_col}")
lower_forecast = long_forecast.reset_index().pivot_table(
index='ds', columns='unique_id', values=target_col
)[self.column_names]
Expand Down Expand Up @@ -311,6 +361,16 @@ def get_new_params(self, method: str = 'random'):
regression_type_choice = random.choices([None, "User"], weights=[0.8, 0.2])[
0
]
if "deep" in method:
max_steps = random.choices(
[40, 80, 100, 1000, 5000, 10000, 50000],
[0.2, 0.2, 0.2, 0.1, 0.05, 0.05, 0.01],
)[0]
else:
max_steps = random.choices(
[40, 80, 100, 1000, 5000],
[0.2, 0.2, 0.2, 0.05, 0.03],
)[0]
activation = random.choices(
['ReLU', 'Softplus', 'Tanh', 'SELU', 'LeakyReLU', 'PReLU', 'Sigmoid'],
[0.5, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
Expand All @@ -328,8 +388,13 @@ def get_new_params(self, method: str = 'random'):
"SMAPE",
"StudentT",
],
[0.3, 0.1, 0.01, 0.1, 0.1, 0.01, 0.1, 0.1, 0.1, 0.01],
[0.5, 0.1, 0.01, 0.1, 0.1, 0.01, 0.1, 0.1, 0.1, 0.01],
)[0]
point_quantile = None
if loss == "MQLoss":
point_quantile = random.choices(
[None, 0.35, 0.45, 0.55, 0.65, 0.7], [0.5, 0.1, 0.1, 0.1, 0.1, 0.1]
)[0]
if models == "TFT":
model_args = {
"n_head": random.choice([2, 4]),
Expand Down Expand Up @@ -368,14 +433,12 @@ def get_new_params(self, method: str = 'random'):
'learning_rate': random.choices(
[0.001, 0.1, 0.01, 0.0003, 0.00001], [0.4, 0.1, 0.1, 0.1, 0.1]
)[0],
"max_steps": random.choices(
[40, 80, 100, 1000],
[0.2, 0.2, 0.2, 0.05],
)[0],
"max_steps": max_steps,
'input_size': random.choices(
[10, 28, "2ForecastLength", "3ForecastLength"], [0.2, 0.2, 0.2, 0.2]
)[0],
# "early_stop_patience_steps": random.choice([1, 3, 5]),
"point_quantile": point_quantile,
"model_args": model_args,
'regression_type': regression_type_choice,
}
Expand All @@ -390,13 +453,14 @@ def get_params(self):
'learning_rate': self.learning_rate,
"max_steps": self.max_steps,
'input_size': self.input_size,
'point_quantile': self.point_quantile,
"model_args": self.model_args,
'regression_type': self.regression_type,
}


if False:
from autots.models.neural_forecast import NeuralForecast
# from autots.models.neural_forecast import NeuralForecast
from autots import load_daily, create_regressor, infer_frequency

df = load_daily(long=False)
Expand Down
Loading
Loading