Skip to content

Commit

Permalink
Merge pull request #227 from winedarksea/dev
Browse files Browse the repository at this point in the history
0.6.9
  • Loading branch information
winedarksea committed Jan 22, 2024
2 parents c23c244 + c40093a commit 3e6baff
Show file tree
Hide file tree
Showing 22 changed files with 190 additions and 159 deletions.
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
MIT License

Copyright (c) 2022 Colin Catlin
Copyright (c) 2024 Colin Catlin

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
Expand Down
7 changes: 3 additions & 4 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,9 @@
* The most recent data will generally be the most important
* Forecasts are desired for the future immediately following the most recent data.

# 0.6.8 🇺🇦 🇺🇦 🇺🇦
* bug fixes, robust for OpenBLAS nan handling kernel failures
* added BKBandpassFilter
* added expand_horizontal for scaling mosaics
# 0.6.9 🇺🇦 🇺🇦 🇺🇦
* expanded regressor options for MultivariateRegression, NeuralForecast (currently only available directly, not from AutoTS class)
* matse bug fix on all 0 history

### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
* Pytorch-Forecasting
Expand Down
2 changes: 1 addition & 1 deletion autots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from autots.models.cassandra import Cassandra


__version__ = '0.6.8'
__version__ = '0.6.9'

TransformTS = GeneralTransformer

Expand Down
3 changes: 2 additions & 1 deletion autots/evaluator/anomaly_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ def detect(self, df):
model = GeneralTransformer(
verbose=2, **self.transform_dict
) # DATEPART, LOG, SMOOTHING, DIFF, CLIP OUTLIERS with high z score
self.df_anomaly = model.fit_transform(self.df_anomaly)
# the post selecting by columns is for CenterSplit and any similar renames or expansions
self.df_anomaly = model.fit_transform(self.df_anomaly)[self.df.columns]

if self.forecast_params is not None:
backcast = back_forecast(
Expand Down
119 changes: 18 additions & 101 deletions autots/evaluator/auto_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -982,6 +982,7 @@ def __init__(
per_series_uwmse=None,
per_series_smoothness=None,
per_series_mate=None,
per_series_matse=None,
per_series_wasserstein=None,
per_series_dwd=None,
model_count: int = 0,
Expand All @@ -1005,6 +1006,7 @@ def __init__(
self.per_series_uwmse = per_series_uwmse
self.per_series_smoothness = per_series_smoothness
self.per_series_mate = per_series_mate
self.per_series_matse = per_series_matse
self.per_series_wasserstein = per_series_wasserstein
self.per_series_dwd = per_series_dwd
self.full_mae_ids = []
Expand Down Expand Up @@ -1083,6 +1085,9 @@ def concat(self, another_eval):
self.per_series_mate = pd.concat(
[self.per_series_mate, another_eval.per_series_mate], axis=0, sort=False
)
self.per_series_matse = pd.concat(
[self.per_series_matse, another_eval.per_series_matse], axis=0, sort=False
)
self.per_series_wasserstein = pd.concat(
[self.per_series_wasserstein, another_eval.per_series_wasserstein],
axis=0,
Expand Down Expand Up @@ -1667,57 +1672,6 @@ def virtual_memory():
ps_metric.index = [model_id] * ps_metric.shape[0]
ps_metric.index.name = "ID"
template_result.per_series_metrics.append(ps_metric)

"""
template_result.per_series_mae.append(
_ps_metric(ps_metric, 'mae', model_id)
)
template_result.per_series_made.append(
_ps_metric(ps_metric, 'made', model_id)
)
template_result.per_series_contour.append(
_ps_metric(ps_metric, 'contour', model_id)
)
template_result.per_series_rmse.append(
_ps_metric(ps_metric, 'rmse', model_id)
)
template_result.per_series_spl.append(
_ps_metric(ps_metric, 'spl', model_id)
)
template_result.per_series_mle.append(
_ps_metric(ps_metric, 'mle', model_id)
)
template_result.per_series_imle.append(
_ps_metric(ps_metric, 'imle', model_id)
)
template_result.per_series_maxe.append(
_ps_metric(ps_metric, 'maxe', model_id)
)
template_result.per_series_oda.append(
_ps_metric(ps_metric, 'oda', model_id)
)
template_result.per_series_mqae.append(
_ps_metric(ps_metric, 'mqae', model_id)
)
template_result.per_series_dwae.append(
_ps_metric(ps_metric, 'dwae', model_id)
)
template_result.per_series_ewmae.append(
_ps_metric(ps_metric, 'ewmae', model_id)
)
template_result.per_series_uwmse.append(
_ps_metric(ps_metric, 'uwmse', model_id)
)
template_result.per_series_smoothness.append(
_ps_metric(ps_metric, 'smoothness', model_id)
)
template_result.per_series_mate.append(
_ps_metric(ps_metric, 'mate', model_id)
)
template_result.per_series_wasserstein.append(
_ps_metric(ps_metric, 'wasserstein', model_id)
)
"""
if 'distance' in ensemble:
cur_smape = model_error.per_timestamp.loc['weighted_smape']
cur_smape = pd.DataFrame(cur_smape).transpose()
Expand Down Expand Up @@ -1864,62 +1818,15 @@ def virtual_memory():
template_result.per_series_mate = ps[ps['autots_eval_metric'] == 'mate'].drop(
columns='autots_eval_metric'
)
template_result.per_series_matse = ps[ps['autots_eval_metric'] == 'matse'].drop(
columns='autots_eval_metric'
)
template_result.per_series_wasserstein = ps[
ps['autots_eval_metric'] == 'wasserstein'
].drop(columns='autots_eval_metric')
template_result.per_series_dwd = ps[ps['autots_eval_metric'] == 'dwd'].drop(
columns='autots_eval_metric'
)
"""
template_result.per_series_mae = pd.concat(
template_result.per_series_mae, axis=0
)
template_result.per_series_made = pd.concat(
template_result.per_series_made, axis=0
)
template_result.per_series_contour = pd.concat(
template_result.per_series_contour, axis=0
)
template_result.per_series_rmse = pd.concat(
template_result.per_series_rmse, axis=0
)
template_result.per_series_spl = pd.concat(
template_result.per_series_spl, axis=0
)
template_result.per_series_mle = pd.concat(
template_result.per_series_mle, axis=0
)
template_result.per_series_imle = pd.concat(
template_result.per_series_imle, axis=0
)
template_result.per_series_maxe = pd.concat(
template_result.per_series_maxe, axis=0
)
template_result.per_series_oda = pd.concat(
template_result.per_series_oda, axis=0
)
template_result.per_series_mqae = pd.concat(
template_result.per_series_mqae, axis=0
)
template_result.per_series_dwae = pd.concat(
template_result.per_series_dwae, axis=0
)
template_result.per_series_ewmae = pd.concat(
template_result.per_series_ewmae, axis=0
)
template_result.per_series_uwmse = pd.concat(
template_result.per_series_uwmse, axis=0
)
template_result.per_series_smoothness = pd.concat(
template_result.per_series_smoothness, axis=0
)
template_result.per_series_mate = pd.concat(
template_result.per_series_mate, axis=0
)
template_result.per_series_wasserstein = pd.concat(
template_result.per_series_wasserstein, axis=0
)
"""
else:
template_result.per_series_metrics = pd.DataFrame()
template_result.per_series_mae = pd.DataFrame()
Expand All @@ -1937,6 +1844,7 @@ def virtual_memory():
template_result.per_series_uwmse = pd.DataFrame()
template_result.per_series_smoothness = pd.DataFrame()
template_result.per_series_mate = pd.DataFrame()
template_result.per_series_matse = pd.DataFrame()
template_result.per_series_wasserstein = pd.DataFrame()
template_result.per_series_dwd = pd.DataFrame()
if verbose > 0 and not template.empty:
Expand Down Expand Up @@ -2731,6 +2639,7 @@ def generate_score_per_series(
uwmse_weighting = metric_weighting.get('uwmse_weighting', 0)
smoothness_weighting = metric_weighting.get('smoothness_weighting', 0)
mate_weighting = metric_weighting.get('mate_weighting', 0)
matse_weighting = metric_weighting.get('matse_weighting', 0)
wasserstein_weighting = metric_weighting.get('wasserstein_weighting', 0)
dwd_weighting = metric_weighting.get('dwd_weighting', 0)

Expand Down Expand Up @@ -2823,6 +2732,14 @@ def generate_score_per_series(
)
mate_score = results_object.per_series_mate / mate_scaler
overall_score = overall_score + (mate_score * mate_weighting)
if matse_weighting != 0:
matse_scaler = (
results_object.per_series_matse[results_object.per_series_matse != 0]
.min()
.fillna(1)
)
matse_score = results_object.per_series_matse / matse_scaler
overall_score = overall_score + (matse_score * matse_weighting)
if wasserstein_weighting != 0:
wasserstein_scaler = (
results_object.per_series_wasserstein[
Expand Down
19 changes: 13 additions & 6 deletions autots/evaluator/auto_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1393,7 +1393,7 @@ def fit(
validation_template = validation_template.drop_duplicates(
subset=['Model', 'ModelParameters', 'TransformationParameters']
)
self.validation_template = validation_template[self.template_cols]
self.validation_template = validation_template[self.template_cols_id]
if self.validate_import is not None:
self.validation_template = pd.concat(
[self.validation_template, self.validate_import]
Expand Down Expand Up @@ -1813,7 +1813,14 @@ def validation_agg(self):
)
return self

def _set_best_model(self, metric_weighting=None, allow_horizontal=True):
def _set_best_model(self, metric_weighting=None, allow_horizontal=True, n=1):
"""Sets best model based on validation results.
Args:
metric_weighting (dict): if not None, overrides input metric weighting this this metric weighting
allow_horizontal (bool): if False, force no horizontal, if True, allows if ensemble param and runs occurred
n (int): default 1 means chose best model, 2 = use 2nd best, and so on
"""
if metric_weighting is None:
metric_weighting = self.metric_weighting
hens_model_results = self.initial_results.model_results[
Expand All @@ -1828,7 +1835,7 @@ def _set_best_model(self, metric_weighting=None, allow_horizontal=True):
# horizontal ensembles can't be compared directly to others because they don't get run through all validations
# they are built themselves from cross validation so a full rerun of validations is unnecessary
self.best_model_non_horizontal = self._best_non_horizontal(
metric_weighting=metric_weighting
metric_weighting=metric_weighting, n=n
)
if not hens_model_results.empty and requested_H_ens:
hens_model_results.loc['Score'] = generate_score(
Expand All @@ -1838,7 +1845,7 @@ def _set_best_model(self, metric_weighting=None, allow_horizontal=True):
)
self.best_model = hens_model_results.sort_values(
by="Score", ascending=True, na_position='last'
).head(1)[self.template_cols_id]
).iloc[(n - 1) : n][self.template_cols_id]
self.ensemble_check = 1
# print a warning if requested but unable to produce a horz ensemble
elif requested_H_ens:
Expand All @@ -1859,7 +1866,7 @@ def _set_best_model(self, metric_weighting=None, allow_horizontal=True):
self.parse_best_model()
return self

def _best_non_horizontal(self, metric_weighting=None, series=None):
def _best_non_horizontal(self, metric_weighting=None, series=None, n=1):
if self.validation_results is None:
if not self.initial_results.model_results.empty:
self = self.validation_agg()
Expand Down Expand Up @@ -1908,7 +1915,7 @@ def _best_non_horizontal(self, metric_weighting=None, series=None):
by="Score", ascending=True, na_position='last'
)
.drop_duplicates(subset=self.template_cols)
.head(1)[self.template_cols_id]
.iloc[(n - 1) : n][self.template_cols_id]
)
except IndexError:
raise ValueError(
Expand Down
4 changes: 3 additions & 1 deletion autots/evaluator/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -682,7 +682,9 @@ def full_metric_evaluation(
else:
mate = np.abs(np.sum(full_errors, axis=0))
# possibly temporary
matse = mate / np.sum(A, axis=0)
matse_scale = np.sum(A, axis=0)
matse_scale[matse_scale == 0] = 1
matse = mate / matse_scale

direc_sign = np.sign(F - last_of_array) == np.sign(A - last_of_array)
weights = np.geomspace(1, 10, full_mae_errors.shape[0])[:, np.newaxis]
Expand Down
4 changes: 2 additions & 2 deletions autots/models/cassandra.py
Original file line number Diff line number Diff line change
Expand Up @@ -1138,7 +1138,7 @@ def process_components(self, to_origin_space=True):
pd.DataFrame(
self.components[:, comp, :],
index=t_indx,
columns=self.column_names,
columns=self.df.columns,
),
components=True,
bounds=True,
Expand Down Expand Up @@ -2537,7 +2537,7 @@ def lstsq_minimize(X, y, maxiter=15000, cost_function="l1", method=None):
elif cost_function == "quantile":
cost_func = cost_function_quantile
elif cost_function == "l1_positive":
bounds = [(0, 10) for x in x0]
bounds = [(0, 14) for x in x0]
cost_func = cost_function_l1
else:
cost_func = cost_function_l1
Expand Down
12 changes: 12 additions & 0 deletions autots/models/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,8 @@ def horizontal_classifier(
if classifier_params is None:
# found using FLAML
classifier_params = {"model": 'KNN', "model_params": {'n_neighbors': 5}}
# newer, but don't like as much
# RandomForest {'n_estimators': 69, 'max_features': 0.5418860350847585, 'max_leaves': 439, 'criterion': 'gini'}

# known = {'EXUSEU': 'xx1', 'MCOILWTICO': 'xx2', 'CSUSHPISA': 'xx3'}
Xt, Y, Xf = horizontal_xy(df_train, known)
Expand Down Expand Up @@ -473,6 +475,16 @@ def mosaic_classifier(df_train, known, classifier_params=None):
'criterion': 'gini',
},
}
# slightly newer, on a mosaic-weighted-0-40
classifier_params = {
"model": 'ExtraTrees',
"model_params": {
'n_estimators': 62,
'max_features': 0.181116,
'max_leaves': 261,
'criterion': 'entropy',
},
}

X, Xf, Y, to_predict = mosaic_xy(df_train, known)

Expand Down
Loading

0 comments on commit 3e6baff

Please sign in to comment.