def train_models(train, models, forecast_len, full_df=None, seasonality="infer_from_data", in_sample=None): seasons = select_seasonality(train, seasonality) models_dict = {} for m in models: if m == "ARIMA": models_dict["ARIMA"] = pm.auto_arima(train, seasonal=True, m=seasons) if m == "Prophet": model = Prophet() models_dict["Prophet"] = model.fit(prophet_dataframe(train)) if m == "HWAAS": models_dict["HWAAS"] = ExponentialSmoothing( train, seasonal_periods=seasons, trend='add', seasonal='add', damped=True).fit(use_boxcox=True) if m == "HWAMS": models_dict["HWAMS"] = ExponentialSmoothing( train, seasonal_periods=seasons, trend='add', seasonal='mul', damped=True).fit(use_boxcox=True) # if m=="HOLT": # models_dict["HOLT"] = Holt(train,exponential=True).fit() if m == "PYAF": model = autof.cForecastEngine() model.train(iInputDS=train.reset_index(), iTime='Date', iSignal='Target', iHorizon=len(train)) # bad coding to have horison here models_dict["PYAF"] = model.forecast(iInputDS=train.reset_index(), iHorizon=forecast_len) if m == "Gluonts": freqed = pd.infer_freq(train.index) if freqed == "MS": freq = "M" else: freq = freqed estimator = DeepAREstimator( freq=freq, prediction_length=forecast_len, trainer=Trainer(epochs=2)) #use_feat_dynamic_real=True print(train) print(type(train)) print(gluonts_dataframe(train)) models_dict["Gluonts"] = estimator.train( training_data=gluonts_dataframe(train)) if m == "NBEATS": device = torch.device('cpu') seasons = select_seasonality(train, seasonality) if os.path.isfile(CHECKPOINT_NAME): os.remove(CHECKPOINT_NAME) stepped = 5 batch_size = 10 if in_sample: x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe( full_df, forecast_len, in_sample=True) optimiser = optim.Adam(net.parameters()) data = data_generator(x_train, y_train, batch_size) #test_losses = [] for r in range(stepped): train_100_grad_steps(data, device, net, optimiser) #test_losses models_dict["NBEATS"] = {} models_dict["NBEATS"]["model"] = net models_dict["NBEATS"]["x_test"] = x_test models_dict["NBEATS"]["y_test"] = y_test models_dict["NBEATS"]["constant"] = norm_constant else: # if out_sample train is df x_train, y_train, net, norm_constant = nbeats_dataframe( full_df, forecast_len, in_sample=False) batch_size = 10 # greater than 4 for viz optimiser = optim.Adam(net.parameters()) data = data_generator(x_train, y_train, batch_size) stepped = 5 #test_losses = [] for r in range(stepped): _, forecast = net(torch.tensor( x_train, dtype=torch.float)) ### Not Used p = forecast.detach().numpy() ### Not Used train_100_grad_steps(data, device, net, optimiser) #test_losses models_dict["NBEATS"] = {} models_dict["NBEATS"]["model"] = net models_dict["NBEATS"]["tuple"] = (x_train, y_train, net, norm_constant) # if m=="ProphetGluonts": # freqed = pd.infer_freq(train.index) # if freqed=="MS": # freq= "M" # else: # freq= freqed # models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True # models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"]) # create a forecast engine. This is the main object handling all the operations # We use the test-dataset as the last step of our training to generate the evaluation-metrics and do not use the test-dataset during prediction. # get the best time series model for predicting one week return models_dict
def prep_estimators( pred_length: int, dataset_name: str, num_series: int, cardinalities: List[int], epochs: int, ) -> List[Model]: trainer = Trainer(epochs=epochs) models = [ # DeepAREstimator( # freq = freqs[dataset_name], # prediction_length = pred_length, # trainer = trainer, # use_feat_static_cat = True, # cardinality = cardinalities, # distr_output=NegativeBinomialOutput() # ), # DeepFactorEstimator( # freq = freqs[dataset_name], # prediction_length = pred_length, # trainer = trainer, # cardinality = [num_series], # distr_output=NegativeBinomialOutput(), # ), # DeepStateEstimator( # freq = freqs[dataset_name], # prediction_length = pred_length, # trainer = trainer, # cardinality = cardinalities, # ), # NBEATSEstimator( # freq = freqs[dataset_name], # prediction_length = pred_length, # trainer = trainer, # # TODO is the loss function/evaluation metric we want to use? # loss_function = 'MAPE', # ), NBEATSEnsembleEstimator(freq=freqs[dataset_name], prediction_length=pred_length, trainer=trainer, meta_bagging_size=1), NBEATSEnsembleEstimator(freq=freqs[dataset_name], prediction_length=pred_length, trainer=trainer, num_stacks=2, num_blocks=[3], widths=[256, 2048], sharing=[True], expansion_coefficient_lengths=[3], stack_types=["T", "S"], meta_bagging_size=1), # MQCNNEstimator( # freq = freqs[dataset_name], # prediction_length = pred_length, # trainer = trainer, # ), # MQRNNEstimator( # freq = freqs[dataset_name], # prediction_length = pred_length, # trainer = trainer, # ), # WaveNetEstimator( # freq = freqs[dataset_name], # prediction_length = pred_length, # trainer = trainer, # cardinality = cardinalities # ), ] return models
from dataset import dataset from estimator import estimator from gluonts.dataset.loader import TrainDataLoader from gluonts.trainer import Trainer import numpy as np from estimator import net from gluonts.gluonts_tqdm import tqdm training_data = dataset.train transformation = estimator.create_transformation() dtype = np.float32 num_workers = None num_prefetch = None shuffle_buffer_length = None trainer = Trainer(ctx="cpu", epochs=1, learning_rate=0.01, num_batches_per_epoch=100) training_data_loader = TrainDataLoader( dataset=training_data, transform=transformation, batch_size=trainer.batch_size, num_batches_per_epoch=trainer.num_batches_per_epoch, ctx=trainer.ctx, dtype=dtype, num_workers=num_workers, num_prefetch=num_prefetch, ) input_names = ['past_target', 'future_target'] with tqdm(training_data_loader) as it: for batch_no, data_entry in enumerate(it, start=1):
def __init__( self, freq: str, prediction_length: int, context_length: Optional[int] = None, trainer: Trainer = Trainer(), num_stacks: int = 30, widths: Optional[List[int]] = None, num_blocks: Optional[List[int]] = None, num_block_layers: Optional[List[int]] = None, expansion_coefficient_lengths: Optional[List[int]] = None, sharing: Optional[List[bool]] = None, stack_types: Optional[List[str]] = None, loss_function: Optional[str] = "MAPE", **kwargs, ) -> None: """ Defines an estimator. All parameters should be serializable. """ super().__init__(trainer=trainer, **kwargs) assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert (num_stacks is None or num_stacks > 0), "The value of `num_stacks` should be > 0" assert ( loss_function is None or loss_function in VALID_LOSS_FUNCTIONS ), f"The loss function has to be one of the following: {VALID_LOSS_FUNCTIONS}." self.freq = freq self.prediction_length = prediction_length self.context_length = (context_length if context_length is not None else 2 * prediction_length) # num_stacks has to be handled separately because other arguments have to match its length self.num_stacks = num_stacks self.loss_function = loss_function self.widths = self._validate_nbeats_argument( argument_value=widths, argument_name="widths", default_value=[512], validation_condition=lambda val: val > 0, invalidation_message="Values of 'widths' should be > 0", ) self.num_blocks = self._validate_nbeats_argument( argument_value=num_blocks, argument_name="num_blocks", default_value=[1], validation_condition=lambda val: val > 0, invalidation_message="Values of 'num_blocks' should be > 0", ) self.num_block_layers = self._validate_nbeats_argument( argument_value=num_block_layers, argument_name="num_block_layers", default_value=[4], validation_condition=lambda val: val > 0, invalidation_message="Values of 'block_layers' should be > 0", ) self.sharing = self._validate_nbeats_argument( argument_value=sharing, argument_name="sharing", default_value=[False], validation_condition=lambda val: True, invalidation_message="", ) self.expansion_coefficient_lengths = self._validate_nbeats_argument( argument_value=expansion_coefficient_lengths, argument_name="expansion_coefficient_lengths", default_value=[2], validation_condition=lambda val: val > 0, invalidation_message= "Values of 'expansion_coefficient_lengths' should be > 0", ) self.stack_types = self._validate_nbeats_argument( argument_value=stack_types, argument_name="stack_types", default_value=["G"], validation_condition=lambda val: val in VALID_N_BEATS_STACK_TYPES, invalidation_message= f"Values of 'stack_types' should be one of {VALID_N_BEATS_STACK_TYPES}", )
import pandas as pd import matplotlib.pyplot as plt csv_path = '/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/Twitter_volume_AMZN.csv' df = pd.read_csv(csv_path,header=0,sep=',') df['timestamp'] = pd.to_datetime(df['timestamp']) df.set_index(['timestamp'],inplace=True) # print(df.value[:"2015-04-22 20:47:53"]) # 最后的时间戳是包含[2015-04-22 20:47:53] # print(df.value[:"2015-04-23 20:47:53"]) # 如果所给时间戳超出了数据的范围的时候就会输出有的数据 # print("开始时间戳", df.index[0]) # start是开始的时间戳,target对应的是对应时间戳的序列信息 data = common.ListDataset([{'start': df.index[0], 'target': df.value[:"2015-04-22 21:00:00"]}], freq='H')#这个数据格式是固定的 # 这里df.index是时间戳,df.value是时间戳对应的值 estimator = deepar.DeepAREstimator( freq='H', prediction_length=24, trainer=Trainer(epochs=50) ) predictor = estimator.train(training_data=data) predictor.serialize(Path("/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/model_save")) for train_entry, predict_result in zip(data, predictor.predict(data)): to_pandas(train_entry)[-60:].plot(linewidth=2) predict_result.plot(color='g', prediction_intervals=[50.0, 90.0]) plt.grid(which='both') plt.show() ##输出预测结果 prediction = next(predictor.predict(data)) print(prediction.mean) prediction.plot(output_file='graph.png')
def create_predictor(self, transformation: Transformation, trained_network: HybridBlock) -> Predictor: prediction_network = MyPredNetwork1( prediction_length=self.prediction_length, num_cells=self.num_cells) copy_parameters(trained_network, prediction_network) return RepresentableBlockPredictor( input_transform=transformation, prediction_net=prediction_network, batch_size=self.trainer.batch_size, freq=self.freq, prediction_length=self.prediction_length, ctx=self.trainer.ctx) import mxnet as mx pred_length = 6 estimator1 = MyEstimator1(prediction_length=pred_length, context_length=2 * pred_length, freq="1H", num_cells=10, trainer=Trainer(ctx="cpu", epochs=5, learning_rate=1e-3, hybridize=False, num_batches_per_epoch=20)) predictor1 = estimator1.train(morningtrain)
def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: """ Fits NBEATS model using training data from set_training_data and hyperparameters Keyword Arguments: timeout {float} -- timeout, considered (default: {None}) iterations {int} -- iterations, considered (default: {None}) Returns: CallResult[None] """ if iterations is None: iterations = self.hyperparams["epochs"] has_finished = True else: has_finished = False if self.hyperparams['interpretable']: num_stacks = 2 num_blocks = [1] widths = [256, 2048] sharing = [True] expansion_coefficient_lengths = [3] stack_types = ["T", "S"] estimator_class = NBEATSEnsembleEstimatorHook else: num_stacks = 30 num_blocks = [3] widths = [512] sharing = [False] expansion_coefficient_lengths = [32] stack_types = ["G"] estimator_class = NBEATSEnsembleEstimator estimator = estimator_class( freq=self._freq, prediction_length=self.hyperparams['prediction_length'], meta_context_length=[ i for i in range(2, self.hyperparams['num_context_lengths'] + 2) ], meta_loss_function=['sMAPE', 'MASE', 'MAPE'], meta_bagging_size=self.hyperparams['num_estimators'], num_stacks=num_stacks, num_blocks=num_blocks, widths=widths, sharing=sharing, expansion_coefficient_lengths=expansion_coefficient_lengths, stack_types=stack_types, trainer=Trainer( epochs=iterations, learning_rate=self.hyperparams['learning_rate'], batch_size=self.hyperparams['training_batch_size'], num_batches_per_epoch=self.hyperparams['steps_per_epoch']), ) logger.info(f"Fitting for {iterations} iterations") start_time = time.time() predictor = estimator.train(self._train_data) predictor.batch_size = self.hyperparams['inference_batch_size'] predictor.set_aggregation_method('none') self._is_fit = True logger.info( f"Fit for {iterations} epochs, took {time.time() - start_time}s") if not os.path.isdir(self.hyperparams['weights_dir']): os.mkdir(self.hyperparams['weights_dir']) predictor.serialize(Path(self.hyperparams['weights_dir'])) return CallResult(None, has_finished=has_finished)
def __init__( self, freq: str, prediction_length: int, meta_context_length: Optional[List[int]] = None, meta_loss_function: Optional[List[str]] = None, meta_bagging_size: int = 10, trainer: Trainer = Trainer(), num_stacks: int = 30, widths: Optional[List[int]] = None, num_blocks: Optional[List[int]] = None, num_block_layers: Optional[List[int]] = None, expansion_coefficient_lengths: Optional[List[int]] = None, sharing: Optional[List[bool]] = None, stack_types: Optional[List[str]] = None, **kwargs, ) -> None: super().__init__() assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" self.freq = freq self.prediction_length = prediction_length assert meta_loss_function is None or all( [ loss_function in VALID_LOSS_FUNCTIONS for loss_function in meta_loss_function ] ), f"Each loss function has to be one of the following: {VALID_LOSS_FUNCTIONS}." assert meta_context_length is None or all( [context_length > 0 for context_length in meta_context_length] ), "The value of each `context_length` should be > 0" assert ( meta_bagging_size is None or meta_bagging_size > 0 ), "The value of each `context_length` should be > 0" self.meta_context_length = ( meta_context_length if meta_context_length is not None else [multiplier * prediction_length for multiplier in range(2, 8)] ) self.meta_loss_function = ( meta_loss_function if meta_loss_function is not None else VALID_LOSS_FUNCTIONS ) self.meta_bagging_size = meta_bagging_size # The following arguments are validated in the NBEATSEstimator: self.trainer = trainer print(f"TRAINER:{str(trainer)}") self.num_stacks = num_stacks self.widths = widths self.num_blocks = num_blocks self.num_block_layers = num_block_layers self.expansion_coefficient_lengths = expansion_coefficient_lengths self.sharing = sharing self.stack_types = stack_types # Actually instantiate the different models self.estimators = self._estimator_factory(**kwargs)
def __init__( self, freq: str, prediction_length: int, context_length: Optional[int] = None, trainer: Trainer = Trainer(), dropout_rate: float = 0.1, cardinality: Optional[List[int]] = None, embedding_dimension: int = 20, distr_output: DistributionOutput = StudentTOutput(), model_dim: int = 32, inner_ff_dim_scale: int = 4, pre_seq: str = "dn", post_seq: str = "drn", act_type: str = "softrelu", num_heads: int = 8, scaling: bool = True, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, num_parallel_samples: int = 100, ) -> None: super().__init__(trainer=trainer) assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" assert ( context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" assert ( cardinality is not None or not use_feat_static_cat ), "You must set `cardinality` if `use_feat_static_cat=True`" assert cardinality is None or [ c > 0 for c in cardinality ], "Elements of `cardinality` should be > 0" assert ( embedding_dimension > 0 ), "The value of `embedding_dimension` should be > 0" assert ( num_parallel_samples > 0 ), "The value of `num_parallel_samples` should be > 0" self.freq = freq self.prediction_length = prediction_length self.context_length = ( context_length if context_length is not None else prediction_length ) self.distr_output = distr_output self.dropout_rate = dropout_rate self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.cardinality = cardinality if use_feat_static_cat else [1] self.embedding_dimension = embedding_dimension self.num_parallel_samples = num_parallel_samples self.lags_seq = ( lags_seq if lags_seq is not None else get_lags_for_frequency(freq_str=freq) ) self.time_features = ( time_features if time_features is not None else time_features_from_frequency_str(self.freq) ) self.history_length = self.context_length + max(self.lags_seq) self.scaling = scaling self.config = { "model_dim": model_dim, "pre_seq": pre_seq, "post_seq": post_seq, "dropout_rate": dropout_rate, "inner_ff_dim_scale": inner_ff_dim_scale, "act_type": act_type, "num_heads": num_heads, } self.encoder = TransformerEncoder( self.context_length, self.config, prefix="enc_" ) self.decoder = TransformerDecoder( self.prediction_length, self.config, prefix="dec_" )
def __init__( self, freq: str, prediction_length: int, trainer: Trainer = Trainer(), context_length: Optional[int] = None, num_layers: int = 2, num_cells: int = 40, cell_type: str = "lstm", dropout_rate: float = 0.1, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, use_feat_static_real: bool = False, cardinality: Optional[List[int]] = None, embedding_dimension: int = 20, distr_output: DistributionOutput = StudentTOutput(), scaling: bool = True, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, num_parallel_samples: int = 100, ) -> None: super().__init__(trainer=trainer) assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert num_layers > 0, "The value of `num_layers` should be > 0" assert num_cells > 0, "The value of `num_cells` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" assert (cardinality is not None and use_feat_static_cat) or ( cardinality is None and not use_feat_static_cat ), "You should set `cardinality` if and only if `use_feat_static_cat=True`" assert cardinality is None or [ c > 0 for c in cardinality ], "Elements of `cardinality` should be > 0" assert (embedding_dimension > 0), "The value of `embedding_dimension` should be > 0" assert (num_parallel_samples > 0), "The value of `num_parallel_samples` should be > 0" self.freq = freq self.context_length = (context_length if context_length is not None else prediction_length) self.prediction_length = prediction_length self.distr_output = distr_output self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.dropout_rate = dropout_rate self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.use_feat_static_real = use_feat_static_real self.cardinality = cardinality if use_feat_static_cat else [1] self.embedding_dimension = embedding_dimension self.scaling = scaling self.lags_seq = (lags_seq if lags_seq is not None else get_lags_for_frequency(freq_str=freq)) self.time_features = (time_features if time_features is not None else time_features_from_frequency_str(self.freq)) self.history_length = self.context_length + max(self.lags_seq) self.num_parallel_samples = num_parallel_samples
def __init__( self, freq: str, prediction_length: int, target_dim: int, trainer: Trainer = Trainer(), # number of dimension to sample at training time context_length: Optional[int] = None, num_layers: int = 2, num_cells: int = 40, cell_type: str = "lstm", num_parallel_samples: int = 100, dropout_rate: float = 0.1, target_dim_sample: Optional[int] = None, distr_output: Optional[DistributionOutput] = None, rank: Optional[int] = 2, scaling: bool = True, pick_incomplete: bool = False, lags_seq: Optional[List[int]] = None, shuffle_target_dim: bool = True, time_features: Optional[List[TimeFeature]] = None, conditioning_length: int = 100, use_marginal_transformation: bool = False, ) -> None: super().__init__(trainer=trainer) assert (prediction_length > 0), "The value of `prediction_length` should be > 0" assert (context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert num_layers > 0, "The value of `num_layers` should be > 0" assert num_cells > 0, "The value of `num_cells` should be > 0" assert (num_parallel_samples > 0), "The value of `num_eval_samples` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" if distr_output is not None: self.distr_output = distr_output else: self.distr_output = LowrankGPOutput(rank=rank) self.freq = freq self.context_length = (context_length if context_length is not None else prediction_length) self.prediction_length = prediction_length self.target_dim = target_dim self.target_dim_sample = (target_dim if target_dim_sample is None else min(target_dim_sample, target_dim)) self.shuffle_target_dim = shuffle_target_dim self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.num_parallel_samples = num_parallel_samples self.dropout_rate = dropout_rate self.lags_seq = (lags_seq if lags_seq is not None else get_lags_for_frequency(freq_str=freq)) self.time_features = (time_features if time_features is not None else time_features_from_frequency_str(self.freq)) self.history_length = self.context_length + max(self.lags_seq) self.pick_incomplete = pick_incomplete self.scaling = scaling self.conditioning_length = conditioning_length self.use_marginal_transformation = use_marginal_transformation if self.use_marginal_transformation: self.output_transform = cdf_to_gaussian_forward_transform else: self.output_transform = None
dataset_name = "m4_hourly" dataset = get_dataset(dataset_name) results = [] # If you want to use GPU, please set ctx="gpu(0)" estimators = [ # partial( # DeepAREstimator, # trainer=Trainer( # ctx="cpu", # epochs=epochs, # num_batches_per_epoch=num_batches_per_epoch # ) # ), partial(MQCNNEstimator, trainer=Trainer(ctx="cpu", epochs=epochs, num_batches_per_epoch=num_batches_per_epoch)), ] for estimator in estimators: estimator = estimator(prediction_length=dataset.metadata.prediction_length, freq=dataset.metadata.freq) predictor = estimator.train(dataset.train) forecast_it, ts_it = make_evaluation_predictions(dataset.test, predictor=predictor, num_eval_samples=100) agg_metrics, item_metrics = Evaluator()(ts_it, forecast_it, num_series=len(dataset.test))
def train_predictor(region_df_dict, end_train_date, regions_list, max_epochs, learning_rate, target_col, feat_dynamic_cols=None, fixed_seeds=False): if fixed_seeds: # Seeds setting taken from # https://gluon-ts.mxnet.io/examples/extended_forecasting_tutorial/extended_tutorial.html mx.random.seed(0) np.random.seed(0) estimator = DeepAREstimator( freq=md.FREQ, prediction_length=md.NB_HOURS_PRED, trainer=Trainer(epochs=max_epochs, learning_rate=learning_rate, learning_rate_decay_factor=md.LR_DECAY_FACTOR), use_feat_dynamic_real=feat_dynamic_cols is not None) if feat_dynamic_cols is not None: training_data = ListDataset([{ "item_id": region, "start": region_df_dict[region].index[0], "target": region_df_dict[region][target_col][:end_train_date], "feat_dynamic_real": [ region_df_dict[region][feat_dynamic_col][:end_train_date] for feat_dynamic_col in feat_dynamic_cols ] } for region in regions_list], freq=md.FREQ) else: training_data = ListDataset( [{ "item_id": region, "start": region_df_dict[region].index[0], "target": region_df_dict[region][target_col][:end_train_date] } for region in regions_list], freq=md.FREQ) model_path = predictor_path(region_df_dict, regions_list, max_epochs, learning_rate, feat_dynamic_cols, fixed_seeds=fixed_seeds) model_dir, model_name = os.path.split(model_path) logging.info("Training deepar model {}".format(model_name)) logging.getLogger().setLevel(logging.WARNING) predictor = estimator.train(training_data=training_data) logging.getLogger().setLevel(logging.INFO) logging.info("Saving model with {} epochs and learning rate of {}".format( max_epochs, learning_rate)) with open(model_path, "wb") as file: pickle.dump(predictor, file) return predictor
def fit(self, df, future_regressor = []): """Train algorithm given data supplied. Args: df (pandas.DataFrame): Datetime Indexed """ df = self.basic_profile(df) try: from mxnet.random import seed as mxnet_seed mxnet_seed(self.random_seed) except Exception: pass gluon_train = df.transpose() self.train_index = gluon_train.index gluon_freq = str(self.frequency).split('-')[0] if gluon_freq in ["MS", "1MS"]: gluon_freq = "M" if int(self.verbose) > 1: print(f"Gluon Frequency is {gluon_freq}") if str(self.context_length).replace('.', '').isdigit(): self.gluon_context_length = int(float(self.context_length)) elif 'forecastlength' in str(self.context_length).lower(): len_int = int([x for x in str(self.context_length) if x.isdigit()][0]) self.gluon_context_length = int(len_int * self.forecast_length) else: self.gluon_context_length = 2 * self.forecast_length self.context_length = '2ForecastLength' ts_metadata = {'num_series': len(gluon_train.index), 'freq': gluon_freq, 'gluon_start': [gluon_train.columns[0] for _ in range(len(gluon_train.index))], 'context_length': self.gluon_context_length, 'forecast_length': self.forecast_length } self.test_ds = ListDataset([{FieldName.TARGET: target, FieldName.START: start } for (target, start) in zip( gluon_train.values, ts_metadata['gluon_start'] )], freq=ts_metadata['freq'] ) if self.gluon_model == 'DeepAR': from gluonts.model.deepar import DeepAREstimator estimator = DeepAREstimator(freq=ts_metadata['freq'], context_length=ts_metadata['context_length'], prediction_length=ts_metadata['forecast_length'] ,trainer=Trainer(epochs=self.epochs, learning_rate=self.learning_rate) ) elif self.gluon_model == 'NPTS': from gluonts.model.npts import NPTSEstimator estimator = NPTSEstimator(freq=ts_metadata['freq'], context_length=ts_metadata['context_length'], prediction_length=ts_metadata['forecast_length']) elif self.gluon_model == 'MQCNN': from gluonts.model.seq2seq import MQCNNEstimator estimator = MQCNNEstimator(freq=ts_metadata['freq'], context_length=ts_metadata['context_length'], prediction_length=ts_metadata['forecast_length'] ,trainer=Trainer(epochs=self.epochs, learning_rate=self.learning_rate) ) elif self.gluon_model == 'SFF': from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator estimator = SimpleFeedForwardEstimator( prediction_length=ts_metadata['forecast_length'], context_length=ts_metadata['context_length'], freq=ts_metadata['freq'], trainer=Trainer(epochs=self.epochs, learning_rate=self.learning_rate, hybridize=False, num_batches_per_epoch=100 )) elif self.gluon_model == 'Transformer': from gluonts.model.transformer import TransformerEstimator estimator = TransformerEstimator( prediction_length=ts_metadata['forecast_length'], context_length=ts_metadata['context_length'], freq=ts_metadata['freq'], trainer=Trainer(epochs=self.epochs, learning_rate=self.learning_rate)) elif self.gluon_model == 'DeepState': from gluonts.model.deepstate import DeepStateEstimator estimator = DeepStateEstimator( prediction_length=ts_metadata['forecast_length'], past_length=ts_metadata['context_length'], freq=ts_metadata['freq'], use_feat_static_cat=False, cardinality = [1], trainer=Trainer(ctx='cpu', epochs=self.epochs, learning_rate=self.learning_rate)) elif self.gluon_model == 'DeepFactor': from gluonts.model.deep_factor import DeepFactorEstimator estimator = DeepFactorEstimator(freq=ts_metadata['freq'], context_length=ts_metadata['context_length'], prediction_length=ts_metadata['forecast_length'] ,trainer=Trainer(epochs=self.epochs, learning_rate=self.learning_rate) ) elif self.gluon_model == 'WaveNet': # Usually needs more epochs/training iterations than other models do from gluonts.model.wavenet import WaveNetEstimator estimator = WaveNetEstimator(freq=ts_metadata['freq'], prediction_length=ts_metadata['forecast_length'] ,trainer=Trainer(epochs=self.epochs, learning_rate=self.learning_rate) ) else: raise ValueError("'gluon_model' not recognized.") self.GluonPredictor = estimator.train(self.test_ds) self.ts_metadata = ts_metadata self.fit_runtime = datetime.datetime.now() - self.startTime return self
# npts predictor npts_predictor = NPTSPredictor(freq=freq, prediction_length=prediction_length, context_length=300, kernel_type='uniform', use_seasonal_model=False) npts_forecast = list(npts_predictor.predict(train_ds)) # deep ar distr = PiecewiseLinearOutput(7) deep_ar_trainer = Trainer( ctx=mx.context.gpu() if is_gpu & args.use_cuda else mx.context.cpu(), batch_size=128, learning_rate=1e-2, epochs=20, num_batches_per_epoch=args.number_of_batches_per_epoch, clip_gradient=5.48481845049343, weight_decay=0.001, hybridize=False) deep_ar_estimator = DeepAREstimator( prediction_length=prediction_length, context_length=prediction_length*2, num_layers=2, num_cells=128, cell_type='gru', dropout_rate=0.1, scaling=True, lags_seq=np.arange(1, 1+1).tolist(), freq=freq,
def train_models(train, models, forecast_len, full_df=None, seasonality="infer_from_data", in_sample=None, freq=None, GPU=None): seasons = select_seasonality(train, seasonality) periods = select_seasonality(train, 'periodocity') models_dict = {} for m in models: if in_sample: print( "Model {} is being trained for in sample prediction".format(m)) else: print("Model {} is being trained for out of sample prediction". format(m)) if m == "ARIMA": models_dict[m] = pm.auto_arima(train, seasonal=True, m=seasons) if m == "Prophet": if freq == "D": model = Prophet(daily_seasonality=True) else: model = Prophet() models_dict[m] = model.fit(prophet_dataframe(train)) if m == "HWAAS": try: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend='add', seasonal='add', damped=True).fit(use_boxcox=True) except: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend='add', seasonal='add', damped=True).fit(use_boxcox=False) if m == "HWAMS": try: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend='add', seasonal='mul', damped=True).fit(use_boxcox=True) except: try: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend='add', seasonal='mul', damped=True).fit(use_boxcox=False) except: models_dict[m] = ExponentialSmoothing( train, seasonal_periods=seasons, trend=None, seasonal='add').fit(use_boxcox=False) # if m=="HOLT": # models_dict["HOLT"] = Holt(train,exponential=True).fit() if m == "PYAF": model = autof() model.train(iInputDS=train.reset_index(), iTime='Date', iSignal='Target', iHorizon=len(train)) # bad coding to have horison here models_dict[m] = model.forecast(iInputDS=train.reset_index(), iHorizon=forecast_len) if m == "Gluonts": freqed = pd.infer_freq(train.index) if freqed == "MS": freq = "M" else: freq = freqed estimator = DeepAREstimator( freq=freq, prediction_length=forecast_len, trainer=Trainer(epochs=6, ctx='gpu')) #use_feat_dynamic_real=True if GPU: models_dict[m] = estimator.train( training_data=gluonts_dataframe(train)) else: models_dict[m] = estimator.train( training_data=gluonts_dataframe(train)) if m == "NBEATS": if GPU: device = torch.device('cuda') else: device = torch.device('cpu') if os.path.isfile(CHECKPOINT_NAME): os.remove(CHECKPOINT_NAME) stepped = 35 batch_size = 10 if in_sample: x_train, y_train, x_test, y_test, net, norm_constant = nbeats_dataframe( full_df, forecast_len, in_sample=True, device=device) optimiser = optim.Adam(net.parameters()) data = data_generator(x_train, y_train, batch_size) #test_losses = [] for r in range(stepped): train_100_grad_steps(data, device, net, optimiser) #test_losses models_dict[m] = {} models_dict[m]["model"] = net models_dict[m]["x_test"] = x_test models_dict[m]["y_test"] = y_test models_dict[m]["constant"] = norm_constant else: # if out_sample train is df x_train, y_train, net, norm_constant = nbeats_dataframe( full_df, forecast_len, in_sample=False, device=device) batch_size = 10 # greater than 4 for viz optimiser = optim.Adam(net.parameters()) data = data_generator(x_train, y_train, batch_size) stepped = 5 #test_losses = [] for r in range(stepped): # _, forecast = net(torch.tensor(x_train, dtype=torch.float)) ### Not Used # if GPU: # p = forecast.detach().numpy() ### Not Used # else: # p = forecast.detach().numpy() ### Not Used train_100_grad_steps(data, device, net, optimiser) #test_losses models_dict[m] = {} models_dict[m]["model"] = net models_dict[m]["tuple"] = (x_train, y_train, net, norm_constant) # if m=="TBA": # bat = TBATS(use_arma_errors=False,use_box_cox=True) # models_dict[m] = bat.fit(train) if m == "TATS": bat = TBATS(seasonal_periods=list( get_unique_N(season_list(train), 1)), use_arma_errors=False, use_trend=True) models_dict[m] = bat.fit(train) if m == "TBAT": bat = TBATS(use_arma_errors=False, use_box_cox=True, use_trend=True) models_dict[m] = bat.fit(train) if m == "TBATS1": bat = TBATS(seasonal_periods=[seasons], use_arma_errors=False, use_box_cox=True, use_trend=True) models_dict[m] = bat.fit(train) if m == "TBATP1": bat = TBATS(seasonal_periods=[periods], use_arma_errors=False, use_box_cox=True, use_trend=True) models_dict[m] = bat.fit(train) if m == "TBATS2": bat = TBATS(seasonal_periods=list( get_unique_N(season_list(train), 2)), use_arma_errors=False, use_box_cox=True, use_trend=True) models_dict[m] = bat.fit(train) # if m=="ProphetGluonts": # freqed = pd.infer_freq(train.index) # if freqed=="MS": # freq= "M" # else: # freq= freqed # models_dict["ProphetGluonts"] = ProphetPredictor(freq=freq, prediction_length=forecast_len) #use_feat_dynamic_real=True # models_dict["ProphetGluonts"] = list(models_dict["ProphetGluonts"]) return models_dict, seasons
import pyximport; pyximport.install() import sys import pandas as pd from GTS_new_data import load_dataset from pathlib import Path from gluonts.model.predictor import Predictor from gluonts.model.deepar import DeepAREstimator from gluonts.trainer import Trainer from gluonts.dataset.common import ListDataset if __name__ == "__main__": filename = sys.argv[1] df = load_dataset(filename) training_data = ListDataset( [{"start": df.index[1], "target": df.iloc[:-12].values[:, 1]}], freq = "1min" ) estimator = DeepAREstimator(freq="1min", prediction_length=12, trainer=Trainer(epochs=100)) predictor = estimator.train(training_data=training_data) predictor.serialize(Path("."))
def __init__( self, freq: str, prediction_length: int, trainer: Trainer = Trainer(), context_length: Optional[int] = None, num_layers: int = 2, num_cells: int = 40, cell_type: str = "lstm", dropout_rate: float = 0.1, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, use_feat_static_real: bool = False, cardinality: Optional[List[int]] = None, embedding_dimension: Optional[List[int]] = None, distr_output: DistributionOutput = StudentTOutput(), scaling: bool = True, lags_seq: Optional[List[int]] = None, time_features: Optional[List[TimeFeature]] = None, num_parallel_samples: int = 100, imputation_method: Optional[MissingValueImputation] = None, dtype: DType = np.float32, ) -> None: super().__init__(trainer=trainer, dtype=dtype) assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" assert ( context_length is None or context_length > 0 ), "The value of `context_length` should be > 0" assert num_layers > 0, "The value of `num_layers` should be > 0" assert num_cells > 0, "The value of `num_cells` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" assert (cardinality and use_feat_static_cat) or ( not (cardinality or use_feat_static_cat) ), "You should set `cardinality` if and only if `use_feat_static_cat=True`" assert cardinality is None or all( [c > 0 for c in cardinality] ), "Elements of `cardinality` should be > 0" assert embedding_dimension is None or all( [e > 0 for e in embedding_dimension] ), "Elements of `embedding_dimension` should be > 0" assert ( num_parallel_samples > 0 ), "The value of `num_parallel_samples` should be > 0" self.freq = freq self.context_length = ( context_length if context_length is not None else prediction_length ) self.prediction_length = prediction_length self.distr_output = distr_output self.distr_output.dtype = dtype self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.dropout_rate = dropout_rate self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.use_feat_static_real = use_feat_static_real self.cardinality = ( cardinality if cardinality and use_feat_static_cat else [1] ) self.embedding_dimension = ( embedding_dimension if embedding_dimension is not None else [min(50, (cat + 1) // 2) for cat in self.cardinality] ) self.scaling = scaling self.lags_seq = ( lags_seq if lags_seq is not None else get_lags_for_frequency(freq_str=freq) ) self.time_features = ( time_features if time_features is not None else time_features_from_frequency_str(self.freq) ) self.history_length = self.context_length + max(self.lags_seq) self.num_parallel_samples = num_parallel_samples self.imputation_method = ( imputation_method if imputation_method is not None else DummyValueImputation(self.distr_output.value_in_support) )
# Standard library imports from functools import partial # Third-party imports import pytest # First-party imports from gluonts.model.deepstate import DeepStateEstimator from gluonts.testutil.dummy_datasets import make_dummy_datasets_with_features from gluonts.trainer import Trainer common_estimator_hps = dict( freq="D", prediction_length=3, trainer=Trainer(epochs=3, num_batches_per_epoch=2, batch_size=1), past_length=10, ) @pytest.mark.parametrize( "estimator, datasets", [ # No features ( partial( DeepStateEstimator, **common_estimator_hps, cardinality=[1], use_feat_static_cat=False, ),
def __init__( self, freq: str, prediction_length: int, context_length: Optional[int] = None, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, cardinality: List[int] = None, embedding_dimension: List[int] = None, add_time_feature: bool = False, add_age_feature: bool = False, enable_decoder_dynamic_feature: bool = False, seed: Optional[int] = None, decoder_mlp_dim_seq: Optional[List[int]] = None, channels_seq: Optional[List[int]] = None, dilation_seq: Optional[List[int]] = None, kernel_size_seq: Optional[List[int]] = None, use_residual: bool = True, quantiles: Optional[List[float]] = None, trainer: Trainer = Trainer(), scaling: bool = False, ) -> None: assert (prediction_length > 0), f"Invalid prediction length: {prediction_length}." assert decoder_mlp_dim_seq is None or all( d > 0 for d in decoder_mlp_dim_seq ), "Elements of `mlp_hidden_dimension_seq` should be > 0" assert channels_seq is None or all( d > 0 for d in channels_seq), "Elements of `channels_seq` should be > 0" assert dilation_seq is None or all( d > 0 for d in dilation_seq), "Elements of `dilation_seq` should be > 0" # TODO: add support for kernel size=1 assert kernel_size_seq is None or all( d > 1 for d in kernel_size_seq), "Elements of `kernel_size_seq` should be > 0" assert quantiles is None or all( 0 <= d <= 1 for d in quantiles), "Elements of `quantiles` should be >= 0 and <= 1" self.decoder_mlp_dim_seq = (decoder_mlp_dim_seq if decoder_mlp_dim_seq is not None else [30]) self.channels_seq = (channels_seq if channels_seq is not None else [30, 30, 30]) self.dilation_seq = (dilation_seq if dilation_seq is not None else [1, 3, 5]) self.kernel_size_seq = (kernel_size_seq if kernel_size_seq is not None else [7, 3, 3]) self.quantiles = (quantiles if quantiles is not None else [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]) assert (len(self.channels_seq) == len(self.dilation_seq) == len( self.kernel_size_seq)), ( f"mismatch CNN configurations: {len(self.channels_seq)} vs. " f"{len(self.dilation_seq)} vs. {len(self.kernel_size_seq)}") if seed: np.random.seed(seed) mx.random.seed(seed) # `use_static_feat` and `use_dynamic_feat` always True because network # always receives input; either from the input data or constants encoder = HierarchicalCausalConv1DEncoder( dilation_seq=self.dilation_seq, kernel_size_seq=self.kernel_size_seq, channels_seq=self.channels_seq, use_residual=use_residual, use_static_feat=True, use_dynamic_feat=True, prefix="encoder_", ) decoder = ForkingMLPDecoder( dec_len=prediction_length, final_dim=self.decoder_mlp_dim_seq[-1], hidden_dimension_sequence=self.decoder_mlp_dim_seq[:-1], prefix="decoder_", ) quantile_output = QuantileOutput(self.quantiles) super().__init__( encoder=encoder, decoder=decoder, quantile_output=quantile_output, freq=freq, prediction_length=prediction_length, context_length=context_length, use_feat_dynamic_real=use_feat_dynamic_real, use_feat_static_cat=use_feat_static_cat, enable_decoder_dynamic_feature=enable_decoder_dynamic_feature, cardinality=cardinality, embedding_dimension=embedding_dimension, add_time_feature=add_time_feature, add_age_feature=add_age_feature, trainer=trainer, scaling=scaling, )
def forecast_dataset(dataset, epochs=100, learning_rate=1e-3, num_samples=100, model="SimpleFeedForward", r_method="ets", alpha=0, distrib="Gaussian"): if distrib == "Gaussian": distr_output = GaussianOutput() elif distrib == "Laplace": distr_output = LaplaceOutput() elif distrib == "PiecewiseLinear": distr_output = PiecewiseLinearOutput(num_pieces=2) elif distrib == "Uniform": distr_output = UniformOutput() elif distrib == "Student": distr_output = StudentTOutput() else: distr_output = None if model != "GaussianProcess": ctx = mx.Context("gpu") else: ctx = mx.Context("cpu") # Trainer trainer = Trainer(epochs=epochs, learning_rate=learning_rate, num_batches_per_epoch=100, ctx=ctx, hybridize=True if model[0] != "c" else False) # Estimator (if machine learning model) if model == "SimpleFeedForward": # 10s / epochs for context 60*24 estimator = SimpleFeedForwardEstimator( num_hidden_dimensions=[10], prediction_length=dataset.prediction_length, context_length=dataset.context_length, freq=dataset.freq, trainer=trainer, distr_output=distr_output) elif model == "cSimpleFeedForward": # 10s / epochs for context 60*24 estimator = CustomSimpleFeedForwardEstimator( prediction_length=dataset.prediction_length, context_length=dataset.context_length, freq=dataset.freq, trainer=trainer, num_cells=40, alpha=alpha, distr_output=distr_output, distr_output_type=distrib) elif model == "CanonicalRNN": # 80s /epochs for context 60*24, idem for 60*1 estimator = canonical.CanonicalRNNEstimator( freq=dataset.freq, context_length=dataset.context_length, prediction_length=dataset.prediction_length, trainer=trainer, distr_output=distr_output, ) elif model == "DeepAr": estimator = deepar.DeepAREstimator( freq=dataset.freq, context_length=dataset.context_length, prediction_length=dataset.prediction_length, trainer=trainer, distr_output=distr_output, ) elif model == "DeepFactor": # 120 s/epochs if one big time serie, 1.5s if 183 time series estimator = deep_factor.DeepFactorEstimator( freq=dataset.freq, context_length=dataset.context_length, prediction_length=dataset.prediction_length, trainer=trainer, distr_output=distr_output, ) elif model == "DeepState": # Very slow on cpu estimator = deepstate.DeepStateEstimator( freq=dataset.freq, prediction_length=dataset.prediction_length, trainer=trainer, cardinality=list([1]), use_feat_static_cat=False) elif model == "GaussianProcess": # CPU / GPU problem estimator = gp_forecaster.GaussianProcessEstimator( freq=dataset.freq, prediction_length=dataset.prediction_length, trainer=trainer, cardinality=1) elif model == "NPTS": estimator = npts.NPTSEstimator( freq=dataset.freq, prediction_length=dataset.prediction_length) elif model == "MQCNN": estimator = seq2seq.MQCNNEstimator( prediction_length=dataset.prediction_length, freq=dataset.freq, context_length=dataset.context_length, trainer=trainer, quantiles=list([0.005, 0.05, 0.25, 0.5, 0.75, 0.95, 0.995])) elif model == "MQRNN": estimator = seq2seq.MQRNNEstimator( prediction_length=dataset.prediction_length, freq=dataset.freq, context_length=dataset.context_length, trainer=trainer, quantiles=list([0.005, 0.05, 0.25, 0.5, 0.75, 0.95, 0.995])) elif model == "RNN2QR": # Must be investigated estimator = seq2seq.RNN2QRForecaster( prediction_length=dataset.prediction_length, freq=dataset.freq, context_length=dataset.context_length, trainer=trainer, cardinality=dataset.cardinality, embedding_dimension=1, encoder_rnn_layer=1, encoder_rnn_num_hidden=1, decoder_mlp_layer=[1], decoder_mlp_static_dim=1) elif model == "SeqToSeq": # Must be investigated estimator = seq2seq.Seq2SeqEstimator( prediction_length=dataset.prediction_length, freq=dataset.freq, context_length=dataset.context_length, trainer=trainer, cardinality=[1], embedding_dimension=1, decoder_mlp_layer=[1], decoder_mlp_static_dim=1, encoder=Seq2SeqEncoder()) elif model == "Transformer": # Make the computer lag the first time estimator = transformer.TransformerEstimator( prediction_length=dataset.prediction_length, freq=dataset.freq, context_length=dataset.context_length, trainer=trainer) else: estimator = None # Predictor (directly if non machine learning model and from estimator if machine learning) if model == "Prophet": predictor = prophet.ProphetPredictor( freq=dataset.freq, prediction_length=dataset.prediction_length, ) elif model == "R": predictor = r_forecast.RForecastPredictor( freq=dataset.freq, prediction_length=dataset.prediction_length, method_name=r_method) elif model == "SeasonalNaive": predictor = seasonal_naive.SeasonalNaivePredictor( freq=dataset.freq, prediction_length=dataset.prediction_length, season_length=24) else: predictor = estimator.train(dataset.train_ds) if model[0] != "c": predictor.serialize(Path("temp")) predictor = Predictor.deserialize( Path("temp"), ctx=mx.cpu(0)) # fix for deepstate # Evaluate forecast_it, ts_it = make_evaluation_predictions( dataset=dataset.test_ds, # test dataset predictor=predictor, # predictor num_samples=num_samples, # num of sample paths we want for evaluation ) return list(forecast_it), list(ts_it)
model2 = module.load(load_pars ={ 'path': out_pars['path'] +"/model/"}) from gluonts.model.deepar import DeepAREstimator from gluonts.distribution.neg_binomial import NegativeBinomialOutput from gluonts.trainer import Trainer estimator = DeepAREstimator( prediction_length = 12, freq = "D", distr_output = NegativeBinomialOutput(), use_feat_static_cat =True, use_feat_dynamic_real =True, cardinality = [3049, 7, 3, 10, 3], trainer = Trainer( learning_rate = 1e-3, epochs = 1, num_batches_per_epoch = 10, batch_size = 10 ) ) predictor = estimator.train(TD.train) from gluonts.evaluation.backtest import make_evaluation_predictions from gluonts.evaluation import Evaluator forecast_it, ts_it = make_evaluation_predictions( dataset=TD.test, predictor=predictor, num_samples=100 )
def assert_invalid_param(param_name: str, param_values: List[Any], exp_msg: str) -> None: for x in param_values: with pytest.raises(AssertionError) as excinfo: Trainer(**{param_name: x}) assert exp_msg in str(excinfo.value)
estimator = NBEATSEnsembleEstimator( prediction_length=prediction_length, #context_length=7*prediction_length, meta_bagging_size=3, # 3, ## Change back to 10 after testing?? meta_context_length=[prediction_length * mlp for mlp in [3, 5, 7] ], ## Change back to (2,7) // 3,5,7 meta_loss_function=['sMAPE' ], ## Change back to all three MAPE, MASE ... num_stacks=30, widths=[512], freq="D", trainer=Trainer( learning_rate=6e-4, #clip_gradient=1.0, epochs=12, #10 num_batches_per_epoch=1000, batch_size=16 #ctx=mx.context.gpu() )) # In[ ]: if SUBMISSION: predictor = estimator.train(train_ds) else: predictor = estimator.train(train_ds, test_ds) # # Analyze forcasts - Errors and Visual inspection # # In[ ]:
train_data = common.FileDataset( "/home/root/mxnetTS/GluonTS-Learning-in-Action/chapter-2/data/train", freq="H") test_data = common.FileDataset( "/home/root/mxnetTS/GluonTS-Learning-in-Action/chapter-2/data/val", freq="H") estimator = deepar.DeepAREstimator(prediction_length=24, context_length=100, use_feat_static_cat=True, use_feat_dynamic_real=True, num_parallel_samples=100, cardinality=[2, 1], freq="H", trainer=Trainer(ctx="cpu", epochs=200, learning_rate=1e-3)) predictor = estimator.train(training_data=train_data) for test_entry, forecast in zip(test_data, predictor.predict(test_data)): to_pandas(test_entry)[-100:].plot(figsize=(12, 5), linewidth=2) forecast.plot(color='g', prediction_intervals=[50.0, 90.0]) plt.grid(which='both') plt.legend([ "past observations", "median prediction", "90% prediction interval", "50% prediction interval" ]) plt.show() prediction = next(predictor.predict(test_data)) print(prediction.mean)
def __init__( self, freq: str, prediction_length: int, trainer: Trainer = Trainer( learning_rate=0.01, epochs=200, num_batches_per_epoch=50, hybridize=False, ), cardinality: List[int] = [1], seasonality: Optional[int] = None, embedding_dimension: int = 5, num_bins: int = 1024, hybridize_prediction_net: bool = False, n_residue=24, n_skip=32, dilation_depth: Optional[int] = None, n_stacks: int = 1, train_window_length: Optional[int] = None, temperature: float = 1.0, act_type: str = "elu", num_parallel_samples: int = 200, ) -> None: """ Model with Wavenet architecture and quantized target. :param freq: :param prediction_length: :param trainer: :param num_eval_samples: :param cardinality: :param embedding_dimension: :param num_bins: Number of bins used for quantization of signal :param hybridize_prediction_net: :param n_residue: Number of residual channels in wavenet architecture :param n_skip: Number of skip channels in wavenet architecture :param dilation_depth: number of dilation layers in wavenet architecture. If set to None, dialation_depth is set such that the receptive length is at least as long as 2 * seasonality for the frequency and at least 2 * prediction_length. :param n_stacks: Number of dilation stacks in wavenet architecture :param train_window_length: Length of windows used for training. This should be longer than prediction length. Larger values result in more efficient reuse of computations for convolutions. :param temperature: Temparature used for sampling from softmax distribution. For temperature = 1.0 sampling is according to estimated probability. :param act_type: Activation type used after before output layer. Can be any of 'elu', 'relu', 'sigmoid', 'tanh', 'softrelu', 'softsign' """ super().__init__(trainer=trainer) self.freq = freq self.prediction_length = prediction_length self.cardinality = cardinality self.embedding_dimension = embedding_dimension self.num_bins = num_bins self.hybridize_prediction_net = hybridize_prediction_net self.n_residue = n_residue self.n_skip = n_skip self.n_stacks = n_stacks self.train_window_length = (train_window_length if train_window_length is not None else prediction_length) self.temperature = temperature self.act_type = act_type self.num_parallel_samples = num_parallel_samples seasonality = (_get_seasonality( self.freq, { "H": 7 * 24, "D": 7, "W": 52, "M": 12, "B": 7 * 5, "min": 24 * 60, }, ) if seasonality is None else seasonality) goal_receptive_length = max(2 * seasonality, 2 * self.prediction_length) if dilation_depth is None: d = 1 while (WaveNet.get_receptive_field(dilation_depth=d, n_stacks=n_stacks) < goal_receptive_length): d += 1 self.dilation_depth = d else: self.dilation_depth = dilation_depth self.context_length = WaveNet.get_receptive_field( dilation_depth=self.dilation_depth, n_stacks=n_stacks) self.logger = logging.getLogger(__name__) self.logger.info( f"Using dilation depth {self.dilation_depth} and receptive field length {self.context_length}" )
def __init__( self, freq: str, prediction_length: int, add_trend: bool = False, past_length: Optional[int] = None, num_periods_to_train: int = 4, trainer: Trainer = Trainer(epochs=25, hybridize=False), num_layers: int = 2, num_cells: int = 40, cell_type: str = "lstm", num_eval_samples: int = 100, dropout_rate: float = 0.1, use_feat_dynamic_real: bool = False, use_feat_static_cat: bool = False, cardinality: Optional[List[int]] = None, embedding_dimension: Optional[List[int]] = None, issm: Optional[ISSM] = None, scaling: bool = True, time_features: Optional[List[TimeFeature]] = None, ) -> None: super().__init__(trainer=trainer) assert ( prediction_length > 0 ), "The value of `prediction_length` should be > 0" assert ( past_length is None or past_length > 0 ), "The value of `past_length` should be > 0" assert num_layers > 0, "The value of `num_layers` should be > 0" assert num_cells > 0, "The value of `num_cells` should be > 0" assert ( num_eval_samples > 0 ), "The value of `num_eval_samples` should be > 0" assert dropout_rate >= 0, "The value of `dropout_rate` should be >= 0" assert (cardinality is not None and use_feat_static_cat) or ( cardinality is None and not use_feat_static_cat ), "You should set `cardinality` if and only if `use_feat_static_cat=True`" assert cardinality is None or [ c > 0 for c in cardinality ], "Elements of `cardinality` should be > 0" assert embedding_dimension is None or [ e > 0 for e in embedding_dimension ], "Elements of `embedding_dimension` should be > 0" self.freq = freq self.past_length = ( past_length if past_length is not None else num_periods_to_train * longest_period_from_frequency_str(freq) ) self.prediction_length = prediction_length self.add_trend = add_trend self.num_layers = num_layers self.num_cells = num_cells self.cell_type = cell_type self.num_sample_paths = num_eval_samples self.scaling = scaling self.dropout_rate = dropout_rate self.use_feat_dynamic_real = use_feat_dynamic_real self.use_feat_static_cat = use_feat_static_cat self.cardinality = ( cardinality if cardinality and use_feat_static_cat else [1] ) self.embedding_dimension = ( embedding_dimension if embedding_dimension is not None else [min(50, (cat + 1) // 2) for cat in self.cardinality] ) self.issm = ( issm if issm is not None else CompositeISSM.get_from_freq(freq, add_trend) ) self.time_features = ( time_features if time_features is not None else time_features_from_frequency_str(self.freq) )
from gluonts.dataset.loader import TrainDataLoader from gluonts.model.deepar import DeepAREstimator from gluonts.support.util import get_hybrid_forward_input_names from gluonts.trainer import Trainer from gluonts.dataset.repository.datasets import get_dataset if __name__ == "__main__": dataset = get_dataset(dataset_name="electricity") estimator = DeepAREstimator( prediction_length=dataset.metadata.prediction_length, freq=dataset.metadata.freq, trainer=Trainer(learning_rate=1e-3, epochs=50, num_batches_per_epoch=100), ) # instead of calling `train` method, we call `train_model` that returns more things including the training model train_output = estimator.train_model(dataset.train) # we construct a data_entry that contains 500 random windows batch_size = 500 num_samples = 100 training_data_loader = TrainDataLoader( dataset=dataset.train, transform=train_output.transformation, batch_size=batch_size, num_batches_per_epoch=estimator.trainer.num_batches_per_epoch, ctx=mx.cpu(),
def test_appendix_c(): """ Test GluonTS paper examples from arxiv paper: https://arxiv.org/abs/1906.05264 Appendix C """ from typing import List from mxnet import gluon from gluonts.model.estimator import GluonEstimator from gluonts.model.predictor import Predictor, RepresentableBlockPredictor from gluonts.trainer import Trainer from gluonts.transform import ( InstanceSplitter, Transformation, ExpectedNumInstanceSampler, ) from gluonts.core.component import validated from gluonts.support.util import copy_parameters class MyTrainNetwork(gluon.HybridBlock): def __init__(self, prediction_length, cells, act_type, **kwargs): super().__init__(**kwargs) self.prediction_length = prediction_length with self.name_scope(): # Set up a network that predicts the target self.nn = gluon.nn.HybridSequential() for c in cells: self.nn.add(gluon.nn.Dense(units=c, activation=act_type)) self.nn.add( gluon.nn.Dense(units=self.prediction_length, activation=act_type)) def hybrid_forward(self, F, past_target, future_target): prediction = self.nn(past_target) # calculate L1 loss to learn the median return (prediction - future_target).abs().mean(axis=-1) class MyPredNetwork(MyTrainNetwork): # The prediction network only receives # past target and returns predictions def hybrid_forward(self, F, past_target): prediction = self.nn(past_target) return prediction.expand_dims(axis=1) class MyEstimator(GluonEstimator): @validated() def __init__( self, freq: str, prediction_length: int, act_type: str = "relu", context_length: int = 30, cells: List[int] = [40, 40, 40], trainer: Trainer = Trainer(epochs=10), ) -> None: super().__init__(trainer=trainer) self.freq = freq self.prediction_length = prediction_length self.act_type = act_type self.context_length = context_length self.cells = cells def create_training_network(self) -> MyTrainNetwork: return MyTrainNetwork( prediction_length=self.prediction_length, cells=self.cells, act_type=self.act_type, ) def create_predictor( self, transformation: Transformation, trained_network: gluon.HybridBlock, ) -> Predictor: prediction_network = MyPredNetwork( prediction_length=self.prediction_length, cells=self.cells, act_type=self.act_type, ) copy_parameters(trained_network, prediction_network) return RepresentableBlockPredictor( input_transform=transformation, prediction_net=prediction_network, batch_size=self.trainer.batch_size, freq=self.freq, prediction_length=self.prediction_length, ctx=self.trainer.ctx, ) def create_transformation(self): # Model specific input transform # Here we use a transformation that randomly # selects training samples from all series. return InstanceSplitter( target_field=FieldName.TARGET, is_pad_field=FieldName.IS_PAD, start_field=FieldName.START, forecast_start_field=FieldName.FORECAST_START, train_sampler=ExpectedNumInstanceSampler(num_instances=1), past_length=self.context_length, future_length=self.prediction_length, ) from gluonts.trainer import Trainer from gluonts.evaluation import Evaluator from gluonts.evaluation.backtest import backtest_metrics dataset_info, train_ds, test_ds = constant_dataset() meta = dataset_info.metadata estimator = MyEstimator( freq=meta.freq, prediction_length=1, trainer=Trainer(epochs=1, batch_size=32), ) predictor = estimator.train(train_ds) evaluator = Evaluator(quantiles=(0.1, 0.5, 0.9)) agg_metrics, item_metrics = backtest_metrics( train_dataset=train_ds, test_dataset=test_ds, forecaster=predictor, evaluator=evaluator, )
from gluonts.model.simple_feedforward import SimpleFeedForwardEstimator from gluonts.trainer import Trainer from gluonts.dataset.loader import TrainDataLoader import numpy as np from gluonts.support.util import get_hybrid_forward_input_names from gluonts.evaluation.backtest import make_evaluation_predictions from gluonts.evaluation import Evaluator from dataset import dataset estimator = SimpleFeedForwardEstimator( num_hidden_dimensions=[10], prediction_length=dataset.metadata.prediction_length, context_length=100, freq=dataset.metadata.freq, trainer=Trainer(ctx="cpu", epochs=5, learning_rate=1e-3, num_batches_per_epoch=100)) net = estimator.create_training_network()