Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

0.6.9 #227

Merged
merged 8 commits into from
Jan 22, 2024
Merged

0.6.9 #227

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2022 Colin Catlin
Copyright (c) 2024 Colin Catlin

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
7 changes: 3 additions & 4 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
* The most recent data will generally be the most important
* Forecasts are desired for the future immediately following the most recent data.

# 0.6.8 🇺🇦 🇺🇦 🇺🇦
* bug fixes, robust for OpenBLAS nan handling kernel failures
* added BKBandpassFilter
* added expand_horizontal for scaling mosaics
# 0.6.9 🇺🇦 🇺🇦 🇺🇦
* expanded regressor options for MultivariateRegression, NeuralForecast (currently only available directly, not from AutoTS class)
* matse bug fix on all 0 history

### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
* Pytorch-Forecasting
Expand Down
2 changes: 1 addition & 1 deletion autots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from autots.models.cassandra import Cassandra


__version__ = '0.6.8'
__version__ = '0.6.9'

TransformTS = GeneralTransformer

Expand Down
3 changes: 2 additions & 1 deletion autots/evaluator/anomaly_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ def detect(self, df):
model = GeneralTransformer(
verbose=2, **self.transform_dict
) # DATEPART, LOG, SMOOTHING, DIFF, CLIP OUTLIERS with high z score
self.df_anomaly = model.fit_transform(self.df_anomaly)
# the post selecting by columns is for CenterSplit and any similar renames or expansions
self.df_anomaly = model.fit_transform(self.df_anomaly)[self.df.columns]

if self.forecast_params is not None:
backcast = back_forecast(
Expand Down
119 changes: 18 additions & 101 deletions autots/evaluator/auto_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,7 @@ def __init__(
per_series_uwmse=None,
per_series_smoothness=None,
per_series_mate=None,
per_series_matse=None,
per_series_wasserstein=None,
per_series_dwd=None,
model_count: int = 0,
Expand All @@ -1005,6 +1006,7 @@ def __init__(
self.per_series_uwmse = per_series_uwmse
self.per_series_smoothness = per_series_smoothness
self.per_series_mate = per_series_mate
self.per_series_matse = per_series_matse
self.per_series_wasserstein = per_series_wasserstein
self.per_series_dwd = per_series_dwd
self.full_mae_ids = []
Expand Down Expand Up @@ -1083,6 +1085,9 @@ def concat(self, another_eval):
self.per_series_mate = pd.concat(
[self.per_series_mate, another_eval.per_series_mate], axis=0, sort=False
)
self.per_series_matse = pd.concat(
[self.per_series_matse, another_eval.per_series_matse], axis=0, sort=False
)
self.per_series_wasserstein = pd.concat(
[self.per_series_wasserstein, another_eval.per_series_wasserstein],
axis=0,
Expand Down Expand Up @@ -1667,57 +1672,6 @@ def virtual_memory():
ps_metric.index = [model_id] * ps_metric.shape[0]
ps_metric.index.name = "ID"
template_result.per_series_metrics.append(ps_metric)

"""
template_result.per_series_mae.append(
_ps_metric(ps_metric, 'mae', model_id)
)
template_result.per_series_made.append(
_ps_metric(ps_metric, 'made', model_id)
)
template_result.per_series_contour.append(
_ps_metric(ps_metric, 'contour', model_id)
)
template_result.per_series_rmse.append(
_ps_metric(ps_metric, 'rmse', model_id)
)
template_result.per_series_spl.append(
_ps_metric(ps_metric, 'spl', model_id)
)
template_result.per_series_mle.append(
_ps_metric(ps_metric, 'mle', model_id)
)
template_result.per_series_imle.append(
_ps_metric(ps_metric, 'imle', model_id)
)
template_result.per_series_maxe.append(
_ps_metric(ps_metric, 'maxe', model_id)
)
template_result.per_series_oda.append(
_ps_metric(ps_metric, 'oda', model_id)
)
template_result.per_series_mqae.append(
_ps_metric(ps_metric, 'mqae', model_id)
)
template_result.per_series_dwae.append(
_ps_metric(ps_metric, 'dwae', model_id)
)
template_result.per_series_ewmae.append(
_ps_metric(ps_metric, 'ewmae', model_id)
)
template_result.per_series_uwmse.append(
_ps_metric(ps_metric, 'uwmse', model_id)
)
template_result.per_series_smoothness.append(
_ps_metric(ps_metric, 'smoothness', model_id)
)
template_result.per_series_mate.append(
_ps_metric(ps_metric, 'mate', model_id)
)
template_result.per_series_wasserstein.append(
_ps_metric(ps_metric, 'wasserstein', model_id)
)
"""
if 'distance' in ensemble:
cur_smape = model_error.per_timestamp.loc['weighted_smape']
cur_smape = pd.DataFrame(cur_smape).transpose()
Expand Down Expand Up @@ -1864,62 +1818,15 @@ def virtual_memory():
template_result.per_series_mate = ps[ps['autots_eval_metric'] == 'mate'].drop(
columns='autots_eval_metric'
)
template_result.per_series_matse = ps[ps['autots_eval_metric'] == 'matse'].drop(
columns='autots_eval_metric'
)
template_result.per_series_wasserstein = ps[
ps['autots_eval_metric'] == 'wasserstein'
].drop(columns='autots_eval_metric')
template_result.per_series_dwd = ps[ps['autots_eval_metric'] == 'dwd'].drop(
columns='autots_eval_metric'
)
"""
template_result.per_series_mae = pd.concat(
template_result.per_series_mae, axis=0
)
template_result.per_series_made = pd.concat(
template_result.per_series_made, axis=0
)
template_result.per_series_contour = pd.concat(
template_result.per_series_contour, axis=0
)
template_result.per_series_rmse = pd.concat(
template_result.per_series_rmse, axis=0
)
template_result.per_series_spl = pd.concat(
template_result.per_series_spl, axis=0
)
template_result.per_series_mle = pd.concat(
template_result.per_series_mle, axis=0
)
template_result.per_series_imle = pd.concat(
template_result.per_series_imle, axis=0
)
template_result.per_series_maxe = pd.concat(
template_result.per_series_maxe, axis=0
)
template_result.per_series_oda = pd.concat(
template_result.per_series_oda, axis=0
)
template_result.per_series_mqae = pd.concat(
template_result.per_series_mqae, axis=0
)
template_result.per_series_dwae = pd.concat(
template_result.per_series_dwae, axis=0
)
template_result.per_series_ewmae = pd.concat(
template_result.per_series_ewmae, axis=0
)
template_result.per_series_uwmse = pd.concat(
template_result.per_series_uwmse, axis=0
)
template_result.per_series_smoothness = pd.concat(
template_result.per_series_smoothness, axis=0
)
template_result.per_series_mate = pd.concat(
template_result.per_series_mate, axis=0
)
template_result.per_series_wasserstein = pd.concat(
template_result.per_series_wasserstein, axis=0
)
"""
else:
template_result.per_series_metrics = pd.DataFrame()
template_result.per_series_mae = pd.DataFrame()
Expand All @@ -1937,6 +1844,7 @@ def virtual_memory():
template_result.per_series_uwmse = pd.DataFrame()
template_result.per_series_smoothness = pd.DataFrame()
template_result.per_series_mate = pd.DataFrame()
template_result.per_series_matse = pd.DataFrame()
template_result.per_series_wasserstein = pd.DataFrame()
template_result.per_series_dwd = pd.DataFrame()
if verbose > 0 and not template.empty:
Expand Down Expand Up @@ -2731,6 +2639,7 @@ def generate_score_per_series(
uwmse_weighting = metric_weighting.get('uwmse_weighting', 0)
smoothness_weighting = metric_weighting.get('smoothness_weighting', 0)
mate_weighting = metric_weighting.get('mate_weighting', 0)
matse_weighting = metric_weighting.get('matse_weighting', 0)
wasserstein_weighting = metric_weighting.get('wasserstein_weighting', 0)
dwd_weighting = metric_weighting.get('dwd_weighting', 0)

Expand Down Expand Up @@ -2823,6 +2732,14 @@ def generate_score_per_series(
)
mate_score = results_object.per_series_mate / mate_scaler
overall_score = overall_score + (mate_score * mate_weighting)
if matse_weighting != 0:
matse_scaler = (
results_object.per_series_matse[results_object.per_series_matse != 0]
.min()
.fillna(1)
)
matse_score = results_object.per_series_matse / matse_scaler
overall_score = overall_score + (matse_score * matse_weighting)
if wasserstein_weighting != 0:
wasserstein_scaler = (
results_object.per_series_wasserstein[
Expand Down
19 changes: 13 additions & 6 deletions autots/evaluator/auto_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1393,7 +1393,7 @@ def fit(
validation_template = validation_template.drop_duplicates(
subset=['Model', 'ModelParameters', 'TransformationParameters']
)
self.validation_template = validation_template[self.template_cols]
self.validation_template = validation_template[self.template_cols_id]
if self.validate_import is not None:
self.validation_template = pd.concat(
[self.validation_template, self.validate_import]
Expand Down Expand Up @@ -1813,7 +1813,14 @@ def validation_agg(self):
)
return self

def _set_best_model(self, metric_weighting=None, allow_horizontal=True):
def _set_best_model(self, metric_weighting=None, allow_horizontal=True, n=1):
"""Sets best model based on validation results.

Args:
metric_weighting (dict): if not None, overrides input metric weighting this this metric weighting
allow_horizontal (bool): if False, force no horizontal, if True, allows if ensemble param and runs occurred
n (int): default 1 means chose best model, 2 = use 2nd best, and so on
"""
if metric_weighting is None:
metric_weighting = self.metric_weighting
hens_model_results = self.initial_results.model_results[
Expand All @@ -1828,7 +1835,7 @@ def _set_best_model(self, metric_weighting=None, allow_horizontal=True):
# horizontal ensembles can't be compared directly to others because they don't get run through all validations
# they are built themselves from cross validation so a full rerun of validations is unnecessary
self.best_model_non_horizontal = self._best_non_horizontal(
metric_weighting=metric_weighting
metric_weighting=metric_weighting, n=n
)
if not hens_model_results.empty and requested_H_ens:
hens_model_results.loc['Score'] = generate_score(
Expand All @@ -1838,7 +1845,7 @@ def _set_best_model(self, metric_weighting=None, allow_horizontal=True):
)
self.best_model = hens_model_results.sort_values(
by="Score", ascending=True, na_position='last'
).head(1)[self.template_cols_id]
).iloc[(n - 1) : n][self.template_cols_id]
self.ensemble_check = 1
# print a warning if requested but unable to produce a horz ensemble
elif requested_H_ens:
Expand All @@ -1859,7 +1866,7 @@ def _set_best_model(self, metric_weighting=None, allow_horizontal=True):
self.parse_best_model()
return self

def _best_non_horizontal(self, metric_weighting=None, series=None):
def _best_non_horizontal(self, metric_weighting=None, series=None, n=1):
if self.validation_results is None:
if not self.initial_results.model_results.empty:
self = self.validation_agg()
Expand Down Expand Up @@ -1908,7 +1915,7 @@ def _best_non_horizontal(self, metric_weighting=None, series=None):
by="Score", ascending=True, na_position='last'
)
.drop_duplicates(subset=self.template_cols)
.head(1)[self.template_cols_id]
.iloc[(n - 1) : n][self.template_cols_id]
)
except IndexError:
raise ValueError(
Expand Down
4 changes: 3 additions & 1 deletion autots/evaluator/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,9 @@ def full_metric_evaluation(
else:
mate = np.abs(np.sum(full_errors, axis=0))
# possibly temporary
matse = mate / np.sum(A, axis=0)
matse_scale = np.sum(A, axis=0)
matse_scale[matse_scale == 0] = 1
matse = mate / matse_scale

direc_sign = np.sign(F - last_of_array) == np.sign(A - last_of_array)
weights = np.geomspace(1, 10, full_mae_errors.shape[0])[:, np.newaxis]
Expand Down
4 changes: 2 additions & 2 deletions autots/models/cassandra.py
Original file line number Diff line number Diff line change
Expand Up @@ -1138,7 +1138,7 @@ def process_components(self, to_origin_space=True):
pd.DataFrame(
self.components[:, comp, :],
index=t_indx,
columns=self.column_names,
columns=self.df.columns,
),
components=True,
bounds=True,
Expand Down Expand Up @@ -2537,7 +2537,7 @@ def lstsq_minimize(X, y, maxiter=15000, cost_function="l1", method=None):
elif cost_function == "quantile":
cost_func = cost_function_quantile
elif cost_function == "l1_positive":
bounds = [(0, 10) for x in x0]
bounds = [(0, 14) for x in x0]
cost_func = cost_function_l1
else:
cost_func = cost_function_l1
Expand Down
12 changes: 12 additions & 0 deletions autots/models/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,8 @@ def horizontal_classifier(
if classifier_params is None:
# found using FLAML
classifier_params = {"model": 'KNN', "model_params": {'n_neighbors': 5}}
# newer, but don't like as much
# RandomForest {'n_estimators': 69, 'max_features': 0.5418860350847585, 'max_leaves': 439, 'criterion': 'gini'}

# known = {'EXUSEU': 'xx1', 'MCOILWTICO': 'xx2', 'CSUSHPISA': 'xx3'}
Xt, Y, Xf = horizontal_xy(df_train, known)
Expand Down Expand Up @@ -473,6 +475,16 @@ def mosaic_classifier(df_train, known, classifier_params=None):
'criterion': 'gini',
},
}
# slightly newer, on a mosaic-weighted-0-40
classifier_params = {
"model": 'ExtraTrees',
"model_params": {
'n_estimators': 62,
'max_features': 0.181116,
'max_leaves': 261,
'criterion': 'entropy',
},
}

X, Xf, Y, to_predict = mosaic_xy(df_train, known)

Expand Down
Loading
Loading