Skip to content

Commit

Permalink
Merge pull request #245 from winedarksea/dev
Browse files Browse the repository at this point in the history
0.6.14
  • Loading branch information
winedarksea committed May 17, 2024
2 parents bd26a75 + 38f42a7 commit 995c748
Show file tree
Hide file tree
Showing 26 changed files with 420 additions and 97 deletions.
9 changes: 5 additions & 4 deletions TODO.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
* Forecasts are desired for the future immediately following the most recent data.
* trimmed_mean to AverageValueNaive

# 0.6.13 🇺🇦 🇺🇦 🇺🇦
* trend_phi directly into Prophet
* subset arg to make KalmanStateSpace more scalable to memory
* bug fixes
# 0.6.14 🇺🇦 🇺🇦 🇺🇦
* prevent excessive use of 'CenterSplit' and other macro_micro style transformers
* added ElasticNetwork as subsidiary regression model option
* KalmanSmoothing, BKBandpassFilter added on_inverse option
* add threshold arg to AlignLastValue

### Unstable Upstream Pacakges (those that are frequently broken by maintainers)
* Pytorch-Forecasting
Expand Down
2 changes: 1 addition & 1 deletion autots/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from autots.models.cassandra import Cassandra


__version__ = '0.6.13'
__version__ = '0.6.14'

TransformTS = GeneralTransformer

Expand Down
32 changes: 16 additions & 16 deletions autots/evaluator/auto_ts.py
Original file line number Diff line number Diff line change
Expand Up @@ -418,9 +418,9 @@ def __init__(

full_params['transformations'] = transformations
full_params['transformation_params'] = transformation_params
self.initial_template.loc[
index, 'TransformationParameters'
] = json.dumps(full_params)
self.initial_template.loc[index, 'TransformationParameters'] = (
json.dumps(full_params)
)

self.regressor_used = False
self.grouping_ids = None
Expand Down Expand Up @@ -1093,9 +1093,9 @@ def fit(
Args:
df (pandas.DataFrame): Datetime Indexed dataframe of series, or dataframe of three columns as below.
date_col (str): name of datetime column
value_col (str): name of column containing the data of series.
id_col (str): name of column identifying different series.
date_col (str): name of datetime column if long style data
value_col (str): name of column containing the data of series if using long style data. NOT for pointing out the most important column if several, that's `weights`
id_col (str): name of column identifying different series if long style data.
future_regressor (numpy.Array): single external regressor matching train.index
weights (dict): {'colname1': 2, 'colname2': 5} - increase importance of a series in metric evaluation. Any left blank assumed to have weight of 1.
pass the alias 'mean' as a str ie `weights='mean'` to automatically use the mean value of a series as its weight
Expand Down Expand Up @@ -1827,10 +1827,10 @@ def _run_template(
self.model_count = template_result.model_count
# capture results from lower-level template run
if "TotalRuntime" in template_result.model_results.columns:
template_result.model_results[
'TotalRuntime'
] = template_result.model_results['TotalRuntime'].fillna(
pd.Timedelta(seconds=60)
template_result.model_results['TotalRuntime'] = (
template_result.model_results['TotalRuntime'].fillna(
pd.Timedelta(seconds=60)
)
)
else:
# trying to catch a rare and sneaky bug (perhaps some variety of beetle?)
Expand Down Expand Up @@ -1930,9 +1930,9 @@ def _run_validations(
frac=0.8, random_state=self.random_seed
).reindex(idx)
nan_frac = val_df_train.shape[1] / num_validations
val_df_train.iloc[
-2:, int(nan_frac * y) : int(nan_frac * (y + 1))
] = np.nan
val_df_train.iloc[-2:, int(nan_frac * y) : int(nan_frac * (y + 1))] = (
np.nan
)

# run validation template on current slice
result = self._run_template(
Expand Down Expand Up @@ -3851,9 +3851,9 @@ def diagnose_params(self, target='runtime', waterfall_plots=True):
)
y = pd.json_normalize(json.loads(row["ModelParameters"]))
y.index = [row['ID']]
y[
'Model'
] = x # might need to remove this and do analysis independently for each
y['Model'] = (
x # might need to remove this and do analysis independently for each
)
res.append(
pd.DataFrame(
{
Expand Down
12 changes: 6 additions & 6 deletions autots/models/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,18 +690,18 @@ def long_form_results(
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload_upper[
interval_name
] = f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
upload_upper[interval_name] = (
f"{round(100 - ((1- self.prediction_interval)/2) * 100, 0)}%"
)
upload_lower = pd.melt(
self.lower_forecast.rename_axis(index='datetime').reset_index(),
var_name=id_name,
value_name=value_name,
id_vars="datetime",
).set_index("datetime")
upload_lower[
interval_name
] = f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"
upload_lower[interval_name] = (
f"{round(((1- self.prediction_interval)/2) * 100, 0)}%"
)

upload = pd.concat([upload, upload_upper, upload_lower], axis=0)
if datetime_column is not None:
Expand Down
123 changes: 123 additions & 0 deletions autots/models/dnn.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Neural Nets."""

import random
import pandas as pd
from autots.tools.shaping import wide_to_3d

Expand Down Expand Up @@ -421,3 +422,125 @@ def predict(self, X):
"""Predict on dataframe of X."""
test = pd.DataFrame(X).to_numpy().reshape((X.shape[0], X.shape[1], 1))
return pd.DataFrame(self.model.predict(test))


class ElasticNetwork(object):
def __init__(
self,
size: int = 256,
l1: float = 0.01,
l2: float = 0.02,
feature_subsample_rate: float = None,
optimizer: str = 'adam',
loss: str = 'mse',
epochs: int = 20,
batch_size: int = 32,
activation: str = "relu",
verbose: int = 1,
random_seed: int = 2024,
):
self.name = 'ElasticNetwork'
self.verbose = verbose
self.random_seed = random_seed
self.size = size
self.l1 = l1
self.l2 = l2
self.feature_subsample_rate = feature_subsample_rate
self.epochs = epochs
self.batch_size = batch_size
self.optimizer = optimizer
self.loss = loss
self.activation = activation

def fit(self, X, y):
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Layer
from tensorflow.keras.regularizers import L1L2

# hiding this here as TF is an optional import
class SubsetDense(Layer):
def __init__(self, units, input_dim, feature_subsample_rate=0.5, **kwargs):
super(SubsetDense, self).__init__(**kwargs)
self.units = units
self.input_dim = input_dim
self.feature_subsample_rate = feature_subsample_rate
self.selected_features_per_unit = []
self.kernels = []
self.biases = None

def build(self, input_shape):
# Select a subset of the input features for each unit
num_features = int(self.input_dim * self.feature_subsample_rate)
for _ in range(self.units):
selected_features = random.sample(
range(self.input_dim), num_features
)
self.selected_features_per_unit.append(selected_features)
kernel = self.add_weight(
shape=(num_features,),
initializer='glorot_uniform',
name=f'kernel_{len(self.kernels)}',
)
self.kernels.append(kernel)

self.biases = self.add_weight(
shape=(self.units,), initializer='zeros', name='biases'
)

def call(self, inputs):
outputs = []
for i in range(self.units):
selected_inputs = tf.gather(
inputs, self.selected_features_per_unit[i], axis=1
)
output = (
tf.reduce_sum(selected_inputs * self.kernels[i], axis=1)
+ self.biases[i]
)
outputs.append(output)
return tf.stack(outputs, axis=1)

# Model configuration
input_dim = X.shape[1] # Number of input features
output_dim = y.shape[1] # Number of outputs

# Build the model
if self.feature_subsample_rate is None:
self.model = Sequential(
[
Dense(
self.size,
input_dim=input_dim,
activation=self.activation,
kernel_regularizer=L1L2(l1=self.l1, l2=self.l2),
), # Example layer
Dense(output_dim), # Output layer
]
)
else:
self.model = Sequential(
[
SubsetDense(
self.size,
input_dim=input_dim,
feature_subsample_rate=self.feature_subsample_rate,
),
tf.keras.layers.Activation(self.activation),
SubsetDense(
self.size // 2,
input_dim=input_dim,
feature_subsample_rate=self.feature_subsample_rate,
),
tf.keras.layers.Activation(self.activation),
Dense(output_dim), # Output layer
]
)

# Compile the model
self.model.compile(optimizer=self.optimizer, loss=self.loss)
self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size)

return self

def predict(self, X):
return self.model.predict(X)
18 changes: 9 additions & 9 deletions autots/models/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -1838,15 +1838,15 @@ def MosaicEnsemble(
f"Mosaic Ensemble failed on model {row[3]} series {row[2]} and period {row[1]} due to missing model: {e} "
+ mi
)
melted[
'forecast'
] = fore # [forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
melted[
'upper_forecast'
] = u_fore # [upper_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
melted[
'lower_forecast'
] = l_fore # [lower_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
melted['forecast'] = (
fore # [forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
)
melted['upper_forecast'] = (
u_fore # [upper_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
)
melted['lower_forecast'] = (
l_fore # [lower_forecasts[row[3]][row[2]].iloc[row[1]] for row in melted.itertuples()]
)

forecast_df = melted.pivot(
values="forecast", columns="series_id", index="forecast_period"
Expand Down
2 changes: 1 addition & 1 deletion autots/models/matrix_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def get_new_params(self, method: str = 'random'):
"""Return dict of new parameters for parameter tuning."""
return {
'method': random.choices(['als', 'dmd'], [0.7, 0.3])[0],
'rank': random.choice([2, 4, 6, 0.1, 0.2, 0.5]),
'rank': random.choice([2, 4, 8, 16, 32, 0.1, 0.2, 0.5]),
'maxiter': 200,
}

Expand Down
Loading

0 comments on commit 995c748

Please sign in to comment.