def reformat_files(): csvs = [ f for f in os.listdir(COMMSEC) if str(os.path.abspath(f)).endswith(".csv") and not re.search(r'\.\d{8}-\d{8}', f) ] ticker_to_csv = { str(csv).replace('CSV-', '').replace('.csv', ''): csv for csv in csvs } for ticker in ticker_to_csv: dm = CommsecDataManager(ticker, [], [], []) start = '%d%02d%02d' % (dm.start_date.year, dm.start_date.month, dm.start_date.day) end = '%d%02d%02d' % (dm.end_date.year, dm.end_date.month, dm.end_date.year.day) csv_new_name = "{ticker}.{start}-{end}.csv".format(ticker=ticker, start=start, end=end) csv_old_name = ticker_to_csv[ticker] os.rename("/".join([COMMSEC, csv_old_name]), "/".join([COMMSEC, csv_new_name]))
from data.commsec import CommsecColumns as ccs, FeatureTypes as ft, CommsecDataManager from models.lds.kalman import KalmanFilter from matplotlib import pyplot as plt import pandas as pd import numpy as np np.set_printoptions(precision=4, threshold=3, suppress=True) selection = [ccs.high, ccs.low, ccs.volume] types = [ft.median_price] * 2 + [ft.signed_volume] names = ['High', 'Low', '(+/-) x volume'] rmd = CommsecDataManager('RMD', selection, types, names) rmd_allprices = CommsecDataManager('RMD', [ccs.high, ccs.low], [ft.price, ft.price], ['High', 'Low']) rmd_close = CommsecDataManager('RMD', [ccs.close], [ft.price], ['Close']) price_name = 'Median RMD High, RMD Low' # Setup data (dates, obs) = rmd[:] obs = obs.T obs = obs.reshape((obs.shape[0], 1, obs.shape[1])) conds = np.zeros((5, 1, obs.shape[2])) def gen_params(obs_size, state_size): # Setup Kalman Filter A = np.array([[1, 0, 0, 0, 0], [0.01, 1, 1, 0, 0], [0, 0, 1, 1, 0], [0, 0, 0, 1, 1], [0, 0, 0, 0, 1]]) B = np.array([[0, 0, 0, 0, 0],
from data.commsec import CommsecColumns as ccs, FeatureTypes as ft, CommsecDataManager from models.lds.kalman import KalmanFilter from matplotlib import pyplot as plt import pandas as pd import numpy as np from renderers.renderers import GraphWriter import os selection = [ccs.high, ccs.low] types = [ft.median_price] * 2 names = ['High', 'Low'] rmd = CommsecDataManager('RMD', selection, types, names) rmd_allprices = CommsecDataManager('RMD', [ccs.high, ccs.low], [ft.price, ft.price], ['High', 'Low']) rmd_close = CommsecDataManager('RMD', [ccs.close], [ft.price], ['Close']) price_name = 'Median RMD High, RMD Low' # Setup data (dates, obs) = rmd[:] obs = obs.T obs = obs.reshape((obs.shape[0], 1, obs.shape[1])) conds = np.zeros((4, 1, obs.shape[2])) def gen_params(obs_size, state_size): # Setup Kalman Filter A = np.array([[1, 1, 0, 0], [0, 1, 1, 0], [0, 0, 1, 1], [0, 0, 0, 1]]) B = np.array([[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]) C = np.array([[1, 0, 0, 0]]) D = np.array([[0, 0, 0, 0]])
from data.commsec import CommsecColumns as ccs, FeatureTypes as ft, CommsecDataManager from models.lds.kalman import KalmanFilter from matplotlib import pyplot as plt import pandas as pd import numpy as np selection = [ccs.open, ccs.high, ccs.low, ccs.close, ccs.volume] types = [ft.median_price] * 4 + [ft.signed_volume] names = ['Open', 'High', 'Low', 'Close', '(+/-) x volume'] rmd = CommsecDataManager('RMD', selection, types, names) rmd_allprices = CommsecDataManager('RMD', [ccs.high, ccs.low], [ft.price, ft.price], ['High', 'Low']) rmd_close = CommsecDataManager('RMD', [ccs.close], [ft.price], ['Close']) price_name = 'Median RMD High, RMD Low' # Setup data (dates, obs) = rmd[:] obs = obs.T obs = obs.reshape((obs.shape[0], 1, obs.shape[1])) conds = np.zeros((5, 1, obs.shape[2])) # Setup Kalman Filter A = np.array([[1, 0, 0.1, 0, 0], [0.001, 1, 0, 0.8, 0], [0, 0, 1, 0, 0], [0, 0, 0, 1, 0], [0, 0, 0, 0, 1]]) B = np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0], [0, 0, 0, 0, 0],
'SPL', 'SRX', 'STO', 'WOW', 'WPL'] SELECTION = [CommsecColumns.high, CommsecColumns.low] AS_TYPES = [FeatureTypes.median_price, FeatureTypes.median_price] WITH_NAMES = ['High', 'Low'] training_start_dates = KEY_DATES[:-2] test_start_dates = KEY_DATES[1:-1] test_end_dates = KEY_DATES[2:] n_periods = len(test_end_dates) N_LATENT = 3 # Define a period as 6 months. for stock in STOCKS[:7]: data_manager = CommsecDataManager(stock, SELECTION, AS_TYPES, WITH_NAMES) # Split into training sets and test sets for each period for period in range(n_periods): training_data = data_manager.get_data_set(training_start_dates[period], test_start_dates[period]) test_data = data_manager.get_data_set(test_start_dates[period], test_end_dates[period], end_inclusive=True) # Experiment for 1 period training_dates = training_data[CommsecColumns.date.value] test_dates = test_data[CommsecColumns.date.value] training_set = training_data[data_manager.features()].values test_set = test_data[data_manager.features()].values # Reshape data for models to use. training_ys = training_set.T.reshape((training_set.shape[1], 1, training_set.shape[0])) test_ys = test_set.T.reshape((test_set.shape[1], 1, test_set.shape[0]))
from data.commsec import CommsecColumns as ccs, FeatureTypes as ft, CommsecDataManager from models.ts.ts import ARMA, ARIMA from matplotlib import pyplot as plt import pandas as pd import numpy as np np.set_printoptions(precision=4, threshold=3, suppress=True) selection = [ccs.high, ccs.low] types = [ft.median_price] * 2 names = ['High', 'Low'] rmd = CommsecDataManager('RMD', selection, types, names) rmd_allprices = CommsecDataManager('RMD', [ccs.high, ccs.low], [ft.price, ft.price], ['High', 'Low']) rmd_close = CommsecDataManager('RMD', [ccs.close], [ft.price], ['Close']) price_name = 'Median RMD High, RMD Low' # Setup data (dates, obs) = rmd[:] obs = obs.T obs = obs.reshape((obs.shape[0], 1, obs.shape[1])) # Split the data into first half and second half (r, c, ts) = obs.shape test_start = 252 n_data_points = 2000 test_end = test_start + n_data_points training_ys = obs[:, :, :test_start].reshape(test_start) test_ys = obs[:, :, test_start:test_end]