def test_cum_sum(source): ts = TimeSeries("test", source) cs = ts.cumsum() assert len(cs.ts) == len(set(source)) assert cs.vs[-1] == ts.vs.sum()
def engine_func(): global feature_set, label_set get_metric() df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv') feature_set, label_set = hybrid_data(df_in) model = TimeSeries(model=MODEL) # df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv') history = model.train_model(features=feature_set, labels=label_set, epochs=10) write_history(history) prediction = model.get_prediction(feature_set) write_prediction(prediction.tolist()) model.save_model() # Write predictions and scores to disk mail_interval = int(time.time()) train_interval = int(time.time()) predict_interval = int(time.time()) get_metric_interval = int(time.time()) idle_status = False while True: time_now = int(time.time()) if time_now - get_metric_interval >= GET_METRIC_INTERVAL: get_metric() feature_set, label_set = hybrid_data(df_in) if time_now - predict_interval >= PREDICT_INTERVAL: idle_status = False print("Predicting ...") prediction = model.get_prediction(feature_set) write_prediction(prediction.tolist()) predict_interval = int(time.time()) elif time_now - mail_interval >= MAIL_INTERVAL: idle_status = False print("Sending Email ... ") status = mail(TO_ADDRESS, read_prediction()) print(status) mail_interval = int(time.time()) elif time_now - train_interval >= TRAIN_INTERVAL: idle_status = False print("Training model ....") # df_in = pd.read_csv('/home/sandun/Desktop/CPU/RND/280.csv') history = model.train_model(features=feature_set, labels=label_set, epochs=1) write_history(history) model.save_model() train_interval = int(time.time()) else: if not idle_status: print("Engine Idle ...") idle_status = True
def open(self, *args, **kwds): r""" Return a time series containing historical opening prices for this stock. If no arguments are given, will return last acquired historical data. Otherwise, data will be gotten from Google Finance. INPUT: - ``startdate`` -- string, (default: ``'Jan+1,+1900'``) - ``enddate`` -- string, (default: current date) - ``histperiod`` -- string, (``'daily'`` or ``'weekly'``) OUTPUT: A time series -- close price data. EXAMPLES: You can directly obtain Open data as so:: sage: finance.Stock('vmw').open(startdate='Jan+1,+2008', enddate='Feb+1,+2008') # optional -- internet [83.0500, 85.4900, 84.9000, 82.0000, 81.2500 ... 82.0000, 58.2700, 54.4900, 55.6000, 56.9800] Or, you can initialize stock data first and then extract the Open data:: sage: c = finance.Stock('vmw') sage: c.google(startdate='Feb+1,+2008', enddate='Mar+1,+2008')[:5] # optional -- internet [ 31-Jan-08 55.60 57.35 55.52 56.67 2591100, 1-Feb-08 56.98 58.14 55.06 57.85 2473000, 4-Feb-08 58.00 60.47 56.91 58.05 1816500, 5-Feb-08 57.60 59.30 57.17 59.30 1709000, 6-Feb-08 60.32 62.00 59.50 61.52 2191100 ] sage: c.open() # optional -- internet [55.6000, 56.9800, 58.0000, 57.6000, 60.3200 ... 56.5500, 59.3000, 60.0000, 59.7900, 59.2600] Otherwise, ``self.google()`` will be called with the default arguments returning a year's worth of data:: sage: finance.Stock('vmw').open() # random; optional -- internet [52.1100, 60.9900, 59.0000, 56.0500, 57.2500 ... 83.0500, 85.4900, 84.9000, 82.0000, 81.2500] """ from time_series import TimeSeries if len(args) != 0: return TimeSeries([x.open for x in self.google(*args, **kwds)]) try: return TimeSeries([x.open for x in self.__historical]) except AttributeError: pass return TimeSeries([x.open for x in self.google(*args, **kwds)])
def close(self, *args, **kwds): r""" Return the time series of all historical closing prices for this stock. If no arguments are given, will return last acquired historical data. Otherwise, data will be gotten from Google Finance. INPUT: - ``startdate`` -- string, (default: ``'Jan+1,+1900'``) - ``enddate`` -- string, (default: current date) - ``histperiod`` -- string, (``'daily'`` or ``'weekly'``) OUTPUT: A time series -- close price data. EXAMPLES: You can directly obtain close data as so:: sage: finance.Stock('vmw').close(startdate='Jan+1,+2008', enddate='Feb+1,+2008') # optional -- internet [84.6000, 83.9500, 80.4900, 72.9900, ... 83.0000, 54.8700, 56.4200, 56.6700, 57.8500] Or, you can initialize stock data first and then extract the Close data:: sage: c = finance.Stock('vmw') # optional -- internet sage: c.history(startdate='Feb+1,+2008', enddate='Mar+1,+2008')[:5] # optional -- internet [ 1-Feb-08 56.98 58.14 55.06 57.85 2490481, 4-Feb-08 58.00 60.47 56.91 58.05 1840709, 5-Feb-08 57.60 59.30 57.17 59.30 1712179, 6-Feb-08 60.32 62.00 59.50 61.52 2211775, 7-Feb-08 60.50 62.75 59.56 60.80 1521651 ] sage: c.close() # optional -- internet [57.8500, 58.0500, 59.3000, 61.5200, ... 58.2900, 60.1800, 59.8600, 59.9500, 58.6700] Otherwise, :meth:`history` will be called with the default arguments returning a year's worth of data:: sage: finance.Stock('vmw').close() # random; optional -- internet [57.7100, 56.9900, 55.5500, 57.3300, 65.9900 ... 84.9900, 84.6000, 83.9500, 80.4900, 72.9900] """ from time_series import TimeSeries if len(args) != 0: return TimeSeries([x.close for x in self.history(*args, **kwds)]) try: return TimeSeries([x.close for x in self.__historical]) except AttributeError: pass return TimeSeries([x.close for x in self.history(*args, **kwds)])
def test_eq(): # generate variant variants = [[i // 5 for i in range(100 * j, 199 * j)] for j in range(1, 5)] for one in variants: for two in variants: if one == two: assert TimeSeries('a', one) == TimeSeries('b', two) else: assert TimeSeries('c', one) != TimeSeries('d', two)
def _create_monthly_ts(self): return TimeSeries.create(name=u'Test', date_frame=u'Monthly', is_snapshot=False, dates=[ datetime(2017, 1, 31), datetime(2017, 2, 28), datetime(2017, 3, 31), datetime(2017, 4, 30), datetime(2017, 5, 31), datetime(2017, 6, 30), datetime(2017, 7, 31), datetime(2017, 8, 31), datetime(2017, 9, 30), datetime(2017, 10, 31), datetime(2017, 11, 30), datetime(2017, 12, 31), ], values=[ 100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0, 900.0, 1000.0, 1100.0, 1200.0, ])
def get(self, stock_code, metric_name): """Get metric values by metric names. Args: metric_name: A string representing metric name. Returns: A map of TimeSeries. Keys are date frames and values are corresponding TimeSeries. """ output = {} literal_metric_name = self.metric_names.get(metric_name) statement_ids = self._get_statement_ids_containing( stock_code, literal_metric_name) for statement_id in statement_ids: date_frame = self.financial_statement_store.get_date_frame( statement_id) is_snapshot = self.financial_statement_store.get_is_snapshot( statement_id) results = self._get_by_statement_id(stock_code, literal_metric_name, statement_id) dates = [entry.statement_date for entry in results] values = [entry.metric_value for entry in results] output[date_frame] = TimeSeries.create(name=metric_name, date_frame=date_frame, is_snapshot=is_snapshot, dates=dates, values=values) return output
def test_dt(): dt_from_ = datetime.datetime(2019, 12, 1) dt_to_ = datetime.datetime(2020, 1, 1) from_ = dt_from_.timestamp() to_ = dt_to_.timestamp() orig_grid = 10 ts_raw = [ i * orig_grid + from_ for i in range(-2 * orig_grid, int((to_ - from_) / orig_grid) + 2 * orig_grid) ] assert ts_raw ts = TimeSeries("test", ts_raw) assert ts.ts.min() < from_ assert ts.ts.max() > to_ time_interval = datetime.timedelta(days=1) assert ts[from_:to_:time_interval.total_seconds( )] == ts[dt_from_:dt_to_:time_interval]
def clean_timeseries(self, attr='values', inplace=True, time_index_name='year', time_index=None, lower=0, upper=None, interpolation_method='missing', extrapolation_method='missing'): if time_index is None: time_index = cfg.cfgfile.get('case', 'years') interpolation_method = self.interpolation_method if interpolation_method is 'missing' else interpolation_method extrapolation_method = self.extrapolation_method if extrapolation_method is 'missing' else extrapolation_method data = getattr(self, attr) clean_data = TimeSeries.clean( data=data, newindex=time_index, time_index_name=time_index_name, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method).clip(lower=lower, upper=upper) if inplace: setattr(self, attr, clean_data) else: return clean_data
def _collect_price_time_series(self): """Parse the requested JSON for daily close time series data and daily average time series data. Returns: tuple: Ordered pair of dictionaries containing time series data for both the daily close time series and daily average time series. Keys are strings representing ms since epoch. Values are integers representing the signal (amount of gp) """ r = requests.get(self.GRAPH_URL) #dictionary of 2 dictionaries, "daily" and "average" response = r.json() daily_series = TimeSeries.from_dictionary(response["daily"]) average_series = TimeSeries.from_dictionary(response["average"]) return (daily_series, average_series)
def _collect_volume_time_series(self): """Parse the OSRS GX tradeable item webpage for trade volume data. Returns: dict: Single dictionary containing trade volume time series data. Keys are strings in `%Y-%m-%d` format. Values are integers representing the number of unites moved by close. """ #fetch the item page as text page_as_text = requests.get(self.VOLUME_URL).text #search the item page for tags that contain volume information volume_tags = re.findall("trade180.push.*", page_as_text) volume_series = {} #iterate over all the tags just found for match in volume_tags: tv_pairs = re.findall("Date\(.*\), \d+", match) #separate the timestamps from volume data for pair in tv_pairs: t, v = tuple(pair.split(',')) #remove text surrounding Y/M/D piece of timestamp t = t.strip("Date('").strip("')'") volume_series[t] = int(v) volume_series = TimeSeries.from_dictionary(volume_series) return volume_series
def _create_one_ts(self): return TimeSeries.create(name=u'One', date_frame=u'Quarterly', is_snapshot=False, dates=[ datetime(2016, 3, 31), datetime(2016, 6, 30), datetime(2016, 9, 30) ], values=[100.0, 200.0, 300.0])
def _create_other_ts(self): return TimeSeries.create(name=u'Other', date_frame=u'Quarterly', is_snapshot=False, dates=[ datetime(2016, 3, 31), datetime(2016, 6, 30), datetime(2016, 9, 30) ], values=[400.0, 500.0, 600.0])
def to_future_matrix(X, days_predict=5, days_window=5, train_model=None): #Note X is the dataframe that follow the format when first read from excel #initialize TS model ts_model = TimeSeries(days_window, train_model) all_ctry_new_df = pd.DataFrame(columns=[ "country_id", "date", "cases", "deaths", "cases_14_100k", "cases_100k" ]) country_id_col = X.loc[:, "country_id"].unique() for country in country_id_col: X_cur = X[X["country_id"] == country].copy(deep=True) ctry_df = process_ts_ctry(ts_model, country, X_cur, days_predict, days_window, train_model) all_ctry_new_df = pd.concat([all_ctry_new_df, ctry_df], axis=0) return all_ctry_new_df
def test_annualize(self): one = TimeSeries.create(name=u'Test', date_frame=u'Yearly', is_snapshot=False, dates=[ datetime(2013, 12, 31), datetime(2014, 12, 31), datetime(2015, 12, 31) ], values=[100.0, 200.0, 300.0]) other = TimeSeries.create(name=u'Test', date_frame=u'Quarterly', is_snapshot=False, dates=[ datetime(2016, 3, 31), datetime(2016, 6, 30), datetime(2016, 9, 30) ], values=[400.0, 500.0, 600.0]) actual = one.annualize(other).get() expected = { 'name': u'Test', 'date_frame': u'Yearly', 'is_snapshot': False, 'date': [ datetime(2013, 12, 31), datetime(2014, 12, 31), datetime(2015, 12, 31), datetime(2016, 12, 31) ], 'value': [100.0, 200.0, 300.0, 2000.0], } self.assertEqual(actual, expected)
def clean_timeseries(self, attr='values', inplace=True, time_index_name='year', time_index=None, lower=0, upper=None, interpolation_method='missing', extrapolation_method='missing'): if time_index is None: time_index=cfg.cfgfile.get('case', 'years') interpolation_method= self.interpolation_method if interpolation_method is 'missing' else interpolation_method extrapolation_method = self.extrapolation_method if extrapolation_method is 'missing' else extrapolation_method data = getattr(self, attr) clean_data = TimeSeries.clean(data=data, newindex=time_index, time_index_name=time_index_name, interpolation_method=interpolation_method, extrapolation_method=extrapolation_method).clip(lower=lower, upper=upper) if inplace: setattr(self, attr, clean_data) else: return clean_data
def test_to_grid(start, stop, count): ts = TimeSeries("test", dates_perc) sliced = ts[start:stop] assert len(sliced.ts) assert sliced.sum() period = ((stop or sliced.ts.max()) - (start or sliced.ts.min())) / count gridded = ts[start:stop:period] if start: assert gridded.ts.min() == start if count != 1 and stop: assert gridded.ts.max() == stop assert len(gridded.ts) == count assert ts[start:stop].sum() == gridded.sum()
def test_create(self): ts = TimeSeries.create( name=u'Test', date_frame=u'Quarterly', is_snapshot=False, dates=[datetime(2016, 6, 30), datetime(2016, 3, 31)], values=[100, 200]) expected = { 'name': u'Test', 'date_frame': u'Quarterly', 'is_snapshot': False, 'date': [datetime(2016, 3, 31), datetime(2016, 6, 30)], 'value': [200, 100], } self.assertEqual(ts.get(), expected)
def get_filtered_ts(dt_start, dt_end, in_dir, target_month, target_year): filtered_ts_list = [] date_start = str(target_month).zfill(2) + "/" + str( dt_start.day).zfill(2) + "/" + str(target_year) date_end = str(target_month).zfill(2) + "/" + str( dt_end.day).zfill(2) + "/" + str(target_year) for server in os.listdir(in_dir): print server for file_name in os.listdir(in_dir + "/" + server + "/"): mac = file_name.split(".")[0] csv_path = in_dir + "/" + server + "/" + file_name ts = TimeSeries(csv_path, target_month, target_year, metric, dt_start, dt_end) if filter_ts(ts): filtered_ts_list.append( [mac, server, csv_path, date_start, date_end]) return filtered_ts_list
def test_accumulate_annually(self): ts = TimeSeries.create( name=u'Test', date_frame=u'Quarterly', is_snapshot=False, dates=[ datetime(2015, 3, 31), datetime(2015, 6, 30), datetime(2015, 9, 30), datetime(2015, 12, 31), datetime(2016, 3, 31), datetime(2016, 6, 30), datetime(2016, 9, 30), datetime(2016, 12, 31) ], values=[100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0]) actual = ts.accumulate_annually().get() expected = { 'name': u'Test', 'date_frame': u'Quarterly', 'is_snapshot': False, 'date': [ datetime(2015, 3, 31), datetime(2015, 6, 30), datetime(2015, 9, 30), datetime(2015, 12, 31), datetime(2016, 3, 31), datetime(2016, 6, 30), datetime(2016, 9, 30), datetime(2016, 12, 31) ], 'value': [ 100.0, 100.0 + 200.0, 100.0 + 200.0 + 300.0, 100.0 + 200.0 + 300.0 + 400.0, 500.0, 500.0 + 600.0, 500.0 + 600.0 + 700.0, 500.0 + 600.0 + 700.0 + 800.0 ], } self.assertEqual(actual, expected)
def test_yoy(self): ts = TimeSeries.create( name=u'Test', date_frame=u'Quarterly', is_snapshot=False, dates=[ datetime(2015, 3, 31), datetime(2015, 6, 30), datetime(2015, 9, 30), datetime(2015, 12, 31), datetime(2016, 3, 31), datetime(2016, 6, 30), datetime(2016, 9, 30), datetime(2016, 12, 31) ], values=[100.0, 200.0, 300.0, 400.0, 500.0, 600.0, 700.0, 800.0]) actual = ts.yoy().get() expected = { 'name': u'Test', 'date_frame': u'Quarterly', 'is_snapshot': False, 'date': [ datetime(2016, 3, 31), datetime(2016, 6, 30), datetime(2016, 9, 30), datetime(2016, 12, 31) ], 'value': [ (500.0 - 100.0) / 100.0, (600.0 - 200.0) / 200.0, (700.0 - 300.0) / 300.0, (800.0 - 400.0) / 400.0, ], } self.assertEqual(actual, expected)
def correct_signal(self, time, signal, tmin=None, tmax=None, r0=5e-2, correction="bassetbound", window="blackmanharris", impedance=None): if tmin is None: tmin =time[0] if tmax is None: tmax = time[-1] dt = time[1] - time[0] mask = np.logical_and(time>tmin, time<tmax) sig = signal[mask] t = time[mask] signal_length = len(sig) freq = rfftfreq(signal_length, dt)[1:] amp = getattr(self, f"amplitude_ratio_{correction}")(freq) phase = getattr(self, f"phase_{correction}")(freq) if impedance is None: impedance = np.ones_like(amp) else: impedance = getattr(self, f"{impedance}_impedance")(freq, r0) response = amp * np.exp(1j * phase) / impedance response = np.r_[1, response] win = get_window(window, signal_length) corrected_signal = irfft(rfft(sig * win) / response, n=signal_length) return TimeSeries(corrected_signal, t, name="Corrected")
def test_copy(self): original = TimeSeries.create( name=u'Test', date_frame=u'Quarterly', is_snapshot=False, dates=[datetime(2016, 3, 31), datetime(2016, 6, 30)], values=[100.0, 200.0]) original_expected = { 'name': u'Test', 'date_frame': u'Quarterly', 'is_snapshot': False, 'date': [datetime(2016, 3, 31), datetime(2016, 6, 30)], 'value': [100.0, 200.0], } # Make a copy. copied = original.copy() # Original time series won't be changed even we modify copied one. copied.name = u'Copied' copied.date_frame = u'Yearly' copied.is_snapshot = True copied_expected = { 'name': u'Copied', 'date_frame': u'Yearly', 'is_snapshot': True, 'date': [datetime(2016, 3, 31), datetime(2016, 6, 30)], 'value': [100.0, 200.0], } self.assertEqual(original.get(), original_expected) self.assertEqual(copied.get(), copied_expected)
def plot(): global ax, fig, serverMac, change_points, ts, label_text, serverMac_id id_stringvar.set(str(serverMac_id[serverMac] + 1) + "/" + str(len(serverMac_id))) server = serverMac.split("_")[0] mac = serverMac.split("_")[1] in_file_path = "../../input/" + date_dir + "/" + server + "/" + mac + ".csv" in_file_path_cp = "./output/" + date_dir + "/" + server + "/" + mac + ".csv" dt_axvline = [] if os.path.exists(in_file_path_cp): df = pd.read_csv(in_file_path_cp) for idx, row in df.iterrows(): dt_axvline.append(row["dt"]) change_points.append(row["dt"]) fig.clf() ax = fig.add_subplot(111) ts = TimeSeries(in_file_path, target_month, target_year, metric) plot_procedures.plotax_ts(ax, ts, dt_axvline = dt_axvline, ylim = [-0.01, 1.01]) canvas.show() fig.canvas.mpl_connect('button_press_event', handle_mouse_click)
# pHat has a voltage divider using 120k + 820k resistors # (mapping 25.85V onto the 3.3V max) VOLT_DIVIDER = 120.0 / (120.0 + 820.0) # our sampling time in secs INTERVAL = 1.0 # which ADC channel ADC_CHANNEL = 0 if 'debug' in sys.argv: DEBUG = True else: DEBUG = False ts = TimeSeries(["voltage"]) if DEBUG: print("\nPress CTRL+C to exit.\n") time.sleep( INTERVAL) # short pause after ads1015 class creation recommended(??) try: while True: t = time.time() value = adc.read_adc(ADC_CHANNEL, gain=GAIN, data_rate=DATA_RATE) volts = float(value) / MAX_VALUE * GAIN_VOLTAGE / VOLT_DIVIDER if DEBUG: print("{0:.3f} {1:5d} {2:.6f}".format(t, value, volts)) ts.store(t, [volts])
import torch import torch.nn as nn from network import Net from prepare_data import prepare_data from time_series import TimeSeries from trainer import Trainer if __name__ == '__main__': time_series_data = prepare_data() epoch_num = 100 batch_size = 4 dataset = TimeSeries(time_series_data, input_time_interval=365, output_time_interval=7, output_keyword='peak_load') net = Net(in_ch=dataset.data_channels, out_ch=dataset.output_time_interval) optimizer = torch.optim.Adam(net.parameters(), lr=0.0001) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, mode='min', factor=0.1, patience=10, verbose=True, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0,
from time_series import TimeSeries # Holt-Winters or Triple Exponential Smoothing model from statsmodels.tsa.holtwinters import ExponentialSmoothing # Imports for data visualization import matplotlib.pyplot as plt import pandas as pd from matplotlib.dates import DateFormatter from matplotlib import dates as mpld from pandas.plotting import register_matplotlib_converters register_matplotlib_converters() ts = TimeSeries('dataset/average_temp_india.csv', train_size=0.7) plt.plot(ts.data.iloc[:, 1].index, ts.data.iloc[:, 1]) plt.gcf().autofmt_xdate() plt.title("Average Temperature of India (2000-2018)") plt.xlabel("Time") plt.ylabel("Temparature (°C)") plt.show() model = ExponentialSmoothing(ts.train, trend='additive', seasonal='additive').fit() prediction = model.predict(start=ts.data.iloc[:, 1].index[0], end=ts.data.iloc[:, 1].index[-1]) """Brutlag Algorithm""" PERIOD = 12 # The given time series has seasonal_period=12 GAMMA = 0.3684211 # the seasonility component
class MockFinancialStatementEntryStore(object): mock_data = { '2317': { # CapitalIncreaseHistoryService 'CapitalIncreaseByCash': { u'Yearly': TimeSeries.create(name=u'CapitalIncreaseByCash', date_frame=u'Yearly', is_snapshot=False, dates=[ datetime(2005, 12, 31), datetime(2006, 12, 31), datetime(2007, 12, 31) ], values=[26.44, 27.01, 27.01]), }, 'CapitalIncreaseByEarnings': { u'Yearly': TimeSeries.create(name=u'CapitalIncreaseByEarnings', date_frame=u'Yearly', is_snapshot=False, dates=[ datetime(2005, 12, 31), datetime(2006, 12, 31), datetime(2007, 12, 31) ], values=[346.52, 435.51, 547.78]), }, 'CapitalIncreaseBySurplus': { u'Yearly': TimeSeries.create(name=u'CapitalIncreaseBySurplus', date_frame=u'Yearly', is_snapshot=False, dates=[ datetime(2005, 12, 31), datetime(2006, 12, 31), datetime(2007, 12, 31) ], values=[36.01, 53.83, 53.83]), }, # DuPontService 'NetProfit': { u'Yearly': TimeSeries.create(name=u'NetProfit', date_frame=u'Yearly', is_snapshot=False, dates=[ datetime(2014, 12, 31), datetime(2015, 12, 31), datetime(2016, 12, 31) ], values=[132482, 150201, 151357]), u'Quarterly': TimeSeries.create(name=u'NetProfit', date_frame=u'Quarterly', is_snapshot=False, dates=[ datetime(2017, 3, 31), datetime(2017, 6, 30), datetime(2017, 9, 30) ], values=[29207, 14919, 19665]), }, 'Assets': { u'Yearly': TimeSeries.create(name=u'Assets', date_frame=u'Yearly', is_snapshot=True, dates=[ datetime(2014, 12, 31), datetime(2015, 12, 31), datetime(2016, 12, 31) ], values=[2312461, 2462715, 2308300]), u'Quarterly': TimeSeries.create(name=u'Assets', date_frame=u'Quarterly', is_snapshot=True, dates=[ datetime(2017, 3, 31), datetime(2017, 6, 30), datetime(2017, 9, 30) ], values=[2332342, 2457578, 2762655]), }, 'Equity': { u'Yearly': TimeSeries.create(name=u'Equity', date_frame=u'Yearly', is_snapshot=True, dates=[ datetime(2014, 12, 31), datetime(2015, 12, 31), datetime(2016, 12, 31) ], values=[984677, 1060391, 1133789]), u'Quarterly': TimeSeries.create(name=u'Equity', date_frame=u'Quarterly', is_snapshot=True, dates=[ datetime(2017, 3, 31), datetime(2017, 6, 30), datetime(2017, 9, 30) ], values=[1183505, 1132860, 1156638]), }, 'Sales': { u'Yearly': TimeSeries.create(name=u'Sales', date_frame=u'Yearly', is_snapshot=False, dates=[ datetime(2014, 12, 31), datetime(2015, 12, 31), datetime(2016, 12, 31) ], values=[4213172, 4482146, 4358733]), u'Quarterly': TimeSeries.create(name=u'Sales', date_frame=u'Quarterly', is_snapshot=False, dates=[ datetime(2017, 3, 31), datetime(2017, 6, 30), datetime(2017, 9, 30) ], values=[975044, 922412, 1078892]), } } } def get(self, stock_code, metric_name): if stock_code in self.mock_data and metric_name in self.mock_data[ stock_code]: return self.mock_data[stock_code][metric_name] raise ValueError(u'Could not get mock data: stock_code={stock_code} metric_name={metric_name}' \ .format(stock_code=stock_code, metric_name=metric_name))
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # Bokeh component classes # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # Categories map of dropdown values, SQL column, and SQL table (and data source for range_categories) categories = Categories(sources) # Bokeh table objects data_tables = DataTables(sources) # Bokeh objects for each tab layout planning_data = PlanningData(custom_title, data_tables) roi_viewer = ROI_Viewer(sources, custom_title) mlc_analyzer = MLC_Analyzer(sources, custom_title, data_tables) time_series = TimeSeries(sources, categories.range, custom_title, data_tables) correlation = Correlation(sources, categories, custom_title) regression = Regression(sources, time_series, correlation, categories.multi_var_reg_var_names, custom_title, data_tables) correlation.add_regression_link(regression) rad_bio = RadBio(sources, time_series, correlation, regression, custom_title, data_tables) dvhs = DVHs(sources, time_series, correlation, regression, custom_title, data_tables) query = Query(sources, categories, dvhs, rad_bio, roi_viewer, time_series, correlation, regression, mlc_analyzer, custom_title, data_tables) dvhs.add_query_link(query) # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! # Listen for changes to sources
def main(): parser = argparse.ArgumentParser() default_dataset = 'toy-data.npz' parser.add_argument('--data', default=default_dataset, help='data file') parser.add_argument('--seed', type=int, default=None, help='random seed. Randomly set if not specified.') # training options parser.add_argument('--nz', type=int, default=32, help='dimension of latent variable') parser.add_argument('--epoch', type=int, default=1000, help='number of training epochs') parser.add_argument('--batch-size', type=int, default=128, help='batch size') parser.add_argument('--lr', type=float, default=8e-5, help='encoder/decoder learning rate') parser.add_argument('--dis-lr', type=float, default=1e-4, help='discriminator learning rate') parser.add_argument('--min-lr', type=float, default=5e-5, help='min encoder/decoder learning rate for LR ' 'scheduler. -1 to disable annealing') parser.add_argument('--min-dis-lr', type=float, default=7e-5, help='min discriminator learning rate for LR ' 'scheduler. -1 to disable annealing') parser.add_argument('--wd', type=float, default=0, help='weight decay') parser.add_argument('--overlap', type=float, default=.5, help='kernel overlap') parser.add_argument('--no-norm-trans', action='store_true', help='if set, use Gaussian posterior without ' 'transformation') parser.add_argument('--plot-interval', type=int, default=1, help='plot interval. 0 to disable plotting.') parser.add_argument('--save-interval', type=int, default=0, help='interval to save models. 0 to disable saving.') parser.add_argument('--prefix', default='pbigan', help='prefix of output directory') parser.add_argument('--comp', type=int, default=7, help='continuous convolution kernel size') parser.add_argument('--ae', type=float, default=.2, help='autoencoding regularization strength') parser.add_argument('--aeloss', default='smooth_l1', help='autoencoding loss. (options: mse, smooth_l1)') parser.add_argument('--ema', dest='ema', type=int, default=-1, help='start epoch of exponential moving average ' '(EMA). -1 to disable EMA') parser.add_argument('--ema-decay', type=float, default=.9999, help='EMA decay') parser.add_argument('--mmd', type=float, default=1, help='MMD strength for latent variable') # squash is off when rescale is off parser.add_argument('--squash', dest='squash', action='store_const', const=True, default=True, help='bound the generated time series value ' 'using tanh') parser.add_argument('--no-squash', dest='squash', action='store_const', const=False) # rescale to [-1, 1] parser.add_argument('--rescale', dest='rescale', action='store_const', const=True, default=True, help='if set, rescale time to [-1, 1]') parser.add_argument('--no-rescale', dest='rescale', action='store_const', const=False) args = parser.parse_args() batch_size = args.batch_size nz = args.nz epochs = args.epoch plot_interval = args.plot_interval save_interval = args.save_interval try: npz = np.load(args.data) train_data = npz['data'] train_time = npz['time'] train_mask = npz['mask'] except FileNotFoundError: if args.data != default_dataset: raise # Generate the default toy dataset from scratch train_data, train_time, train_mask, _, _ = gen_data( n_samples=10000, seq_len=200, max_time=1, poisson_rate=50, obs_span_rate=.25, save_file=default_dataset) _, in_channels, seq_len = train_data.shape train_time *= train_mask if args.seed is None: rnd = np.random.RandomState(None) random_seed = rnd.randint(np.iinfo(np.uint32).max) else: random_seed = args.seed rnd = np.random.RandomState(random_seed) np.random.seed(random_seed) torch.manual_seed(random_seed) # Scale time max_time = 5 train_time *= max_time squash = None rescaler = None if args.rescale: rescaler = Rescaler(train_data) train_data = rescaler.rescale(train_data) if args.squash: squash = torch.tanh out_channels = 64 cconv_ref = 98 train_dataset = TimeSeries(train_data, train_time, train_mask, label=None, max_time=max_time, cconv_ref=cconv_ref, overlap_rate=args.overlap, device=device) train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True, collate_fn=train_dataset.collate_fn) n_train_batch = len(train_loader) time_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True, collate_fn=train_dataset.collate_fn) test_loader = DataLoader(train_dataset, batch_size=batch_size, collate_fn=train_dataset.collate_fn) grid_decoder = SeqGeneratorDiscrete(in_channels, nz, squash) decoder = Decoder(grid_decoder, max_time=max_time).to(device) cconv = ContinuousConv1D(in_channels, out_channels, max_time, cconv_ref, overlap_rate=args.overlap, kernel_size=args.comp, norm=True).to(device) encoder = Encoder(cconv, nz, not args.no_norm_trans).to(device) pbigan = PBiGAN(encoder, decoder, args.aeloss).to(device) critic_cconv = ContinuousConv1D(in_channels, out_channels, max_time, cconv_ref, overlap_rate=args.overlap, kernel_size=args.comp, norm=True).to(device) critic = ConvCritic(critic_cconv, nz).to(device) ema = None if args.ema >= 0: ema = EMA(pbigan, args.ema_decay, args.ema) optimizer = optim.Adam(pbigan.parameters(), lr=args.lr, weight_decay=args.wd) critic_optimizer = optim.Adam(critic.parameters(), lr=args.dis_lr, weight_decay=args.wd) scheduler = make_scheduler(optimizer, args.lr, args.min_lr, epochs) dis_scheduler = make_scheduler(critic_optimizer, args.dis_lr, args.min_dis_lr, epochs) path = '{}_{}'.format(args.prefix, datetime.now().strftime('%m%d.%H%M%S')) output_dir = Path('results') / 'toy-pbigan' / path print(output_dir) log_dir = mkdir(output_dir / 'log') model_dir = mkdir(output_dir / 'model') start_epoch = 0 with (log_dir / 'seed.txt').open('w') as f: print(random_seed, file=f) with (log_dir / 'gpu.txt').open('a') as f: print(torch.cuda.device_count(), start_epoch, file=f) with (log_dir / 'args.txt').open('w') as f: for key, val in sorted(vars(args).items()): print(f'{key}: {val}', file=f) tracker = Tracker(log_dir, n_train_batch) visualizer = Visualizer(encoder, decoder, batch_size, max_time, test_loader, rescaler, output_dir, device) start = time.time() epoch_start = start for epoch in range(start_epoch, epochs): loss_breakdown = defaultdict(float) for ((val, idx, mask, _, cconv_graph), (_, idx_t, mask_t, index, _)) in zip(train_loader, time_loader): z_enc, x_recon, z_gen, x_gen, ae_loss = pbigan( val, idx, mask, cconv_graph, idx_t, mask_t) cconv_graph_gen = train_dataset.make_graph(x_gen, idx_t, mask_t, index) real = critic(cconv_graph, batch_size, z_enc) fake = critic(cconv_graph_gen, batch_size, z_gen) D_loss = gan_loss(real, fake, 1, 0) critic_optimizer.zero_grad() D_loss.backward(retain_graph=True) critic_optimizer.step() G_loss = gan_loss(real, fake, 0, 1) mmd_loss = mmd(z_enc, z_gen) loss = G_loss + ae_loss * args.ae + mmd_loss * args.mmd optimizer.zero_grad() loss.backward() optimizer.step() if ema: ema.update() loss_breakdown['D'] += D_loss.item() loss_breakdown['G'] += G_loss.item() loss_breakdown['AE'] += ae_loss.item() loss_breakdown['MMD'] += mmd_loss.item() loss_breakdown['total'] += loss.item() if scheduler: scheduler.step() if dis_scheduler: dis_scheduler.step() cur_time = time.time() tracker.log(epoch, loss_breakdown, cur_time - epoch_start, cur_time - start) if plot_interval > 0 and (epoch + 1) % plot_interval == 0: if ema: ema.apply() visualizer.plot(epoch) ema.restore() else: visualizer.plot(epoch) model_dict = { 'pbigan': pbigan.state_dict(), 'critic': critic.state_dict(), 'ema': ema.state_dict() if ema else None, 'epoch': epoch + 1, 'args': args, } torch.save(model_dict, str(log_dir / 'model.pth')) if save_interval > 0 and (epoch + 1) % save_interval == 0: torch.save(model_dict, str(model_dir / f'{epoch:04d}.pth')) print(output_dir)
# Imports for data visualization import matplotlib.pyplot as plt from pandas.plotting import register_matplotlib_converters from matplotlib.dates import DateFormatter from matplotlib import dates as mpld # Seasonal Decompose from statsmodels.tsa.seasonal import seasonal_decompose # Holt-Winters or Triple Exponential Smoothing model from statsmodels.tsa.holtwinters import ExponentialSmoothing register_matplotlib_converters() ts = TimeSeries('dataset/monthly_sales.csv', train_size=0.8) print("Sales Data\n") print(ts.data.describe()) print("\nHead and Tail of the time series\n") print(ts.data.head(5).iloc[:, 1:]) print(ts.data.tail(5).iloc[:, 1:]) # Plot of raw time series data plt.plot(ts.data.index, ts.data.sales) plt.gcf().autofmt_xdate() date_format = mpld.DateFormatter('%Y-%m') plt.gca().xaxis.set_major_formatter(date_format) plt.title("Sales Data Analysis (2013-2016)") plt.xlabel("Time")