def build_deepar_model(): # get the financial data "exchange_rate" gluon_data = get_dataset("exchange_rate", regenerate=True) train_data = next(iter(gluon_data.train)) test_data = next(iter(gluon_data.test)) meta_data = gluon_data.metadata # data set visualisation fig, ax = plt.subplots(1, 1, figsize=(11, 8)) to_pandas(train_data).plot(ax=ax) ax.grid(which="both") ax.legend(["train data"], loc="upper left") plt.savefig("dataset.png") # visualize various members of the 'gluon_data.*' print(train_data.keys()) print(test_data.keys()) print(meta_data) # convert dataset into an object recognised by GluonTS training_data = common.ListDataset(gluon_data.train, freq=meta_data.freq) testing_data = common.ListDataset(gluon_data.test, freq=meta_data.freq) # create an Estimator with DeepAR # an object of Trainer() class is used to customize Estimator estimator = deepar.DeepAREstimator( freq=meta_data.freq, prediction_length=meta_data.prediction_length, trainer=Trainer(ctx="cpu", epochs=100, learning_rate=1e-4)) # create a Predictor by training the Estimator with training dataset predictor = estimator.train(training_data=training_data) # make predictions forecasts, test_series = make_evaluation_predictions(dataset=testing_data, predictor=predictor, num_samples=10) # visualise forecasts prediction_intervals = (50.0, 90.0) legend = ["actual data", "median forecast" ] + [f"{k}% forecast interval" for k in prediction_intervals][::-1] fig, ax = plt.subplots(1, 1, figsize=(11, 8)) list(test_series)[0][-150:].plot(ax=ax) # plot the time series list(forecasts)[0].plot(prediction_intervals=prediction_intervals, color='r') plt.grid(which="both") plt.legend(legend, loc="upper left") plt.savefig("deepar-model.png")
def test_symbol_and_array(hybridize: bool): # Tests for cases like the one presented in issue 1211, in which the Inflated # Beta outputs used a method only available to arrays and not to symbols. # We simply go through a short training to ensure no exceptions are raised. data = [ { "target": [0, 0.0460043, 0.263906, 0.4103112, 1], "start": pd.to_datetime("1999-01-04"), }, { "target": [1, 0.65815564, 0.44982578, 0.58875054, 0], "start": pd.to_datetime("1999-01-04"), }, ] dataset = common.ListDataset(data, freq="W-MON", one_dim_target=True) trainer = Trainer(epochs=1, num_batches_per_epoch=2, hybridize=hybridize) estimator = deepar.DeepAREstimator( freq="W", prediction_length=2, trainer=trainer, distr_output=ZeroAndOneInflatedBetaOutput(), context_length=2, batch_size=1, scaling=False, ) estimator.train(dataset)
def build_ff_model(): # get the csv file as a dataframe raw_data = pd.read_csv( "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv", header=0, index_col=0) # convert the raw data into an object recognised by GluonTS # start: the starting index of the dataframe # target: the actual time-series data that we want to model # freq: the frequency with which the data is collected train_data = common.ListDataset( [{ "start": raw_data.index[0], "target": raw_data.value[:"2015-04-05 00:00:00"] }], freq="5min") # create an Estimator with simple feed forward model # an object of Trainer() class is used to customize Estimator estimator = simple_feedforward.SimpleFeedForwardEstimator( freq="5min", prediction_length=100, trainer=Trainer(ctx="cpu", epochs=100, learning_rate=1e-3)) # create a Predictor by training the Estimator with training dataset predictor = estimator.train(training_data=train_data) # get predictions for the whole forecast horizon for model_train_data, predictions in zip(train_data, predictor.predict(train_data)): # plot only the last 100 timestamps of the training dataset to_pandas(model_train_data)[-100:].plot() # plot the forecasts from the model predictions.plot(output_file='ff-model.png', color='r')
def prepare_data_univariate(index, values, freq): data = common.ListDataset([{ "start": index[0], "target": values }], freq=freq) return data
deepar算法实现,依赖包 mxnet mxnet-mkl gluon gluonts,pathlib ''' import matplotlib.pyplot as plt import pandas as pd from pathlib import Path from gluonts.model import deepar from gluonts.dataset import common from gluonts.dataset.util import to_pandas from gluonts.model.predictor import Predictor # 数据加载 df = pd.read_csv('/home/zhouxi/pig.csv', header=0, index_col=0) data = common.ListDataset([{ "start": df.index[100], "target": df.price[:"2018-12-05 00:00:00"] }], freq="D") train_data = data # 模型训练 estimator = deepar.DeepAREstimator(freq="D", prediction_length=10) predictor = estimator.train(training_data=data) # 模型预测和绘图 for test_entry, forecast in zip(train_data, predictor.predict(train_data)): to_pandas(test_entry)[-60:].plot(linewidth=2) forecast.plot(color='g', prediction_intervals=[50.0, 90.0]) plt.grid(which='both') plt.show()
from gluonts.dataset import common from gluonts.dataset.util import to_pandas from gluonts.model.predictor import Predictor from gluonts.trainer import Trainer from pathlib import Path import pandas as pd import matplotlib.pyplot as plt csv_path = '/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/Twitter_volume_AMZN.csv' df = pd.read_csv(csv_path,header=0,sep=',') df['timestamp'] = pd.to_datetime(df['timestamp']) df.set_index(['timestamp'],inplace=True) # print(df.value[:"2015-04-22 20:47:53"]) # 最后的时间戳是包含[2015-04-22 20:47:53] # print(df.value[:"2015-04-23 20:47:53"]) # 如果所给时间戳超出了数据的范围的时候就会输出有的数据 # print("开始时间戳", df.index[0]) # start是开始的时间戳,target对应的是对应时间戳的序列信息 data = common.ListDataset([{'start': df.index[0], 'target': df.value[:"2015-04-22 21:00:00"]}], freq='H')#这个数据格式是固定的 # 这里df.index是时间戳,df.value是时间戳对应的值 estimator = deepar.DeepAREstimator( freq='H', prediction_length=24, trainer=Trainer(epochs=50) ) predictor = estimator.train(training_data=data) predictor.serialize(Path("/Users/seenli/Documents/workspace/code/pytorch_learn2/time_series_DL/model_save")) for train_entry, predict_result in zip(data, predictor.predict(data)): to_pandas(train_entry)[-60:].plot(linewidth=2) predict_result.plot(color='g', prediction_intervals=[50.0, 90.0]) plt.grid(which='both')
# -- coding UTF-8 -- import matplotlib.pyplot as plt import pandas as pd from pathlib import Path from gluonts.model import deepar from gluonts.dataset import common from gluonts.dataset.util import to_pandas from gluonts.model.predictor import Predictor url = "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv" df = pd.read_csv(url, header=0, index_col=0) data = common.ListDataset([{"start": df.index[0], "target": df.value[:"2015-04-23 00:00:00"]}], freq="H") estimator = deepar.DeepAREstimator(freq="H", prediction_length=24) predictor = estimator.train(training_data=train_data) for test_entry, forecast in zip(train_data, predictor.predict(train_data)) to_pandas(test_entry)[-60].plot(linewidth=2) forecast.plot(color='g', prediction_intervals=[50.0, 90.0]) plt.grid(which='both') plt.show() prediction = next(predictor.predict(train_data)) print(prediction.mean) prediction.plot(output_file='graph.png') predictor.serialize(Path("/home/root/mxnetTS/GluonTS-Learning-in-Action/chapter-1/model"))
def train_and_predict(code, start_date, end_date, data_path, predict_path): predict_days = 2 csv = os.path.join(data_path, '{code}.csv'.format(code=code)) df = pd.read_csv(csv) # skip training data lenght < 360 if len(df) < 360: return False # set DT as index, TCLOSE as label and order by DT desc df.set_axis([ 'DT', 'CODE', 'NAME', 'TCLOSE', 'HIGH', 'LOW', 'TOPEN', 'LCLOSE', 'CHG', 'PCHG', 'TURNOVER', 'VOTURNOVER', 'VATURNOVER', 'TCAP', 'MCAP' ], axis='columns', inplace=True) df.drop([ 'CODE', 'NAME', 'HIGH', 'LOW', 'TOPEN', 'LCLOSE', 'CHG', 'PCHG', 'TURNOVER', 'VOTURNOVER', 'VATURNOVER', 'TCAP', 'MCAP' ], axis=1, inplace=True) df.set_index(['DT'], inplace=True) df = df.iloc[df.index.argsort()] # fill the lost DT and label (TCLOSE) with last available exchange day's value all_dt = [(datetime.strptime(df.index[0], "%Y-%m-%d") + timedelta(days=i)).__format__('%Y-%m-%d') for i in range(1, ( datetime.strptime(end_date, "%Y%m%d") - datetime.strptime(df.index[0], "%Y-%m-%d")).days)] miss_data = [] value = df.TCLOSE[df.index[0]] for dt in all_dt: if dt in df.index: value = df.TCLOSE[dt] else: miss_data.append([dt, value]) miss_df = pd.DataFrame(miss_data, columns=['DT', 'TCLOSE']) miss_df.set_index(['DT'], inplace=True) miss_df = miss_df.iloc[miss_df.index.argsort()] new_df = pd.concat([df, miss_df], axis=0) new_df = new_df.iloc[new_df.index.argsort()] new_df['timestamp'] = pd.to_datetime(new_df.index) new_df.set_index(['timestamp'], inplace=True) new_df = new_df.iloc[new_df.index.argsort()] train_data = new_df # build the training dataset for deepar data = common.ListDataset([{ 'start': train_data.index[0], 'target': train_data.TCLOSE[:] }], freq='1d') # now training the model if len(mxnet.test_utils.list_gpus()): estimator = deepar.DeepAREstimator(freq='1d', prediction_length=predict_days, trainer=Trainer(ctx='gpu', epochs=100)) else: estimator = deepar.DeepAREstimator(freq='1d', prediction_length=predict_days, trainer=Trainer(epochs=100)) predictor = estimator.train(training_data=data) # predict the future data predict = predictor.predict(data, 1) predict_list = list(predict) max, min, max_id, min_id = predict_list[0].samples.max( ), predict_list[0].samples.min(), predict_list[0].samples.argmax( ), predict_list[0].samples.argmin() predict_x = [(predict_list[0].start_date + timedelta(days=i)).__format__('%Y-%m-%d') for i in range(0, predict_days + 1)] predict_y = predict_list[0].samples[0] predict_df = pd.DataFrame(zip(pd.to_datetime(predict_x), predict_y), columns=['DT', 'TCLOSE']) predict_df['timestamp'] = predict_df['DT'] predict_df.set_index('timestamp', inplace=True) train_df = train_data.loc[train_data.index[-5:]] train_df['DT'] = pd.to_datetime(train_df.index) output_df = pd.concat([train_df, predict_df], axis=0) if min_id < max_id and (max - min) / min >= 0.099: output_df.to_csv( os.path.join(predict_path, 'red_{code}.csv'.format(code=code))) else: output_df.to_csv( os.path.join(predict_path, 'green_{code}.csv'.format(code=code))) return True
#df.drop(columns=['label', 'high', 'low', 'volume', 'notional', 'numberOfTrades', "marketHigh", "marketLow", "marketAverage", "marketNotional", "marketNumberOfTrades", "open", "close", "marketOpen", "marketClose", "changeOverTime", "marketChangeOverTime"]) #prices = df.filter(['date','average', 'marketClose']) ''' train_ds = ListDataset([{ "start": df.index[0], "target": df.marketClose[:"2020-06-02 04:29:00"] }], freq="1min") test_ds = ListDataset([{ "start": df.index[-390], "target": df.marketClose["2020-06-05 09:30:00": '2020-06-05 15:59:00' ] }], freq="1min") ''' data = common.ListDataset([{ "start": df.index[0], "target": df.marketClose[:"2020-06-08 15:59:00"] }], freq="1min") lots_of_data = common.ListDataset([{ "start": new_data.index[0], "target": new_data.Close[:-1] }], freq="1min") trainer = Trainer(epochs=10, ctx="cpu", num_batches_per_epoch=75) estimator = deepar.DeepAREstimator(freq="1min", prediction_length=390, trainer=trainer, num_layers=2) #predictor = estimator.train(training_data=data)
from gluonts.dataset import common from gluonts.model import deepar import pandas as pd url = "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_AMZN.csv" df = pd.read_csv(url, header=0, index_col=0) data = common.ListDataset([{"start": df.index[0], "target": df.value[:"2015.txt-04-05 00:00:00"]}], freq="5min") estimator = deepar.DeepAREstimator(freq="5min", prediction_length=12) predictor = estimator.train(training_data=data) prediction = next(predictor.predict(data)) print(prediction.mean) prediction.plot(output_file='graph.png') if __name__ == "__main__": pass