def test_existing_database(self): engine = create_engine('sqlite://') dl = DataLoader('eoddata', engine=engine, mode='SQL', tablename='eod') shutil.copy2('eoddata/INDEX_20180731.txt', 'eoddata/INDEX_20000000.txt') dl.load_data() self.assertEqual(len(pd.read_sql_table('eod', engine)), 12053) self.assertEqual(len(pd.read_sql_table('updated_eod', engine)), 6)
def test_create_hdf_file(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('eoddata', engine=fp.name, mode='HDF', tablename='eod') dl.load_data() self.assertEqual(len(pd.read_hdf(fp.name, 'data/eod')), 10030) self.assertEqual(len(pd.read_hdf(fp.name, 'updated/eod')), 5)
def load_data(self, dest_folder): loader = DataLoader( dest_folder, self.name, MongoFormLoader(self.name), MongoCaseLoader(self.name), MongoSynclogLoader(self.name) ) loader.run()
def load_data(self, dest_folder): loader = DataLoader( dest_folder, self.name, CouchRowLoader(self._get_couch_url(self.dbs.get('forms')), self.auth), CouchRowLoader(self._get_couch_url(self.dbs.get('cases')), self.auth, ['cases/by_owner']), CouchRowLoader(self._get_couch_url(self.dbs.get('synclogs')), self.auth, ['synclogs/by_previous_log_id']), ) loader.run()
def test_SQL_rename_columns(): engine = create_engine('sqlite://') dl = DataLoader('eoddata', engine=engine, mode='SQL', tablename='eod') dl.load_data(columns=rename) df = pd.read_sql_table('eod', engine) assert len(df) == 10030 cols = ['symbol', 'date', 'open', 'high', 'low', 'close', 'vol'] for x, y in zip(df.columns, cols): assert x == y
def test_HDF_rename_columns(): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('eoddata', engine=fp.name, mode='HDF', tablename='eod') dl.load_data(columns=rename) df = pd.read_hdf(fp.name, 'data/eod') assert len(df) == 10030 assert len(pd.read_hdf(fp.name, 'updated/eod')) == 5 cols = ['symbol', 'date', 'open', 'high', 'low', 'close', 'vol'] for x, y in zip(df.columns, cols): assert x == y
def test_existing_hdf_file(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('eoddata', engine=fp.name, mode='HDF', tablename='eod') shutil.copy2('eoddata/INDEX_20180731.txt', 'eoddata/INDEX_20000000.txt') dl.load_data() self.assertEqual(len(pd.read_hdf(fp.name, 'data/eod')), 12053) self.assertEqual(len(pd.read_hdf(fp.name, 'updated/eod')), 6)
def test_wrong_mode(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('eoddata', engine=fp.name, mode='SQL', tablename='eod') with self.assertRaises(Exception): dl.load_data() with self.assertRaises(TypeError): DataLoader('eoddata', engine='some_random_mode', mode='CSV', tablename='eod')
def test_collate_data_function(): df = collate_data('NASDAQ/data', parse_dates=['Date']) df = df.rename(lambda x: x.lower(), axis='columns') df = df.sort_values(by=['date', 'symbol']) engine = create_engine('sqlite://') dl = DataLoader(directory='NASDAQ/data', mode='SQL', engine=engine, tablename='eod') dl.load_data() df2 = pd.read_sql_table('eod', engine).sort_values(by=['date', 'symbol']) assert len(df) == len(df2) for i in range(100): assert compare(df, df2)
def test_SQL_post_func(): engine = create_engine('sqlite://') dl = DataLoader('eoddata', engine=engine, mode='SQL', tablename='eod') def add_filename(x, y, z): x['filename'] = y x['avgprice'] = (x['open'] + x['close']) / 2 return x dl.load_data(columns=rename, postfunc=add_filename) df = pd.read_sql_table('eod', engine) assert df.dtypes['date'] == dtype('<M8[ns]') assert df.shape[1] == 9 assert 'filename' in df.columns assert 'avgprice' in df.columns
def test_HDF_post_func(): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('eoddata', engine=fp.name, mode='HDF', tablename='eod') def add_filename(x, y, z): x['filename'] = y x['avgprice'] = (x['open'] + x['close']) / 2 return x dl.load_data(columns=rename, postfunc=add_filename) df = pd.read_hdf(fp.name, 'data/eod') assert df.dtypes['date'] == dtype('<M8[ns]') assert df.shape[1] == 9 assert 'filename' in df.columns assert 'avgprice' in df.columns
def test_apply_split_HDF_dataloader(): with tempfile.NamedTemporaryFile() as fp: engine = fp.name dl = DataLoader(directory='NASDAQ/data', mode='HDF', engine=engine, tablename='eod') dl.load_data() dl.apply_splits(directory='NASDAQ/adjustments/') df = pd.read_hdf(engine, 'data/eod') result = pd.read_csv('NASDAQ/nasdaq_results.csv', parse_dates=['date']) splits = pd.read_csv('NASDAQ/adjustments/splits.csv', parse_dates=['date']) for i, row in splits.iterrows(): sym = row.at['symbol'] cond = 'symbol == "{}"'.format(sym) frame1 = df.query(cond).sort_values(by='date').reset_index( drop=True) frame2 = result.query(cond).sort_values(by='date').reset_index( drop=True) L = len(frame1) cols = frame1.columns for i in range(L): for j in cols: if j in ['open', 'high', 'low', 'close', 'volume']: a = frame1.loc[i, j] b = frame2.loc[i, j] print(a, b, sym) assert isclose(a, b, abs_tol=0.015) else: assert frame1.loc[i, j] == frame2.loc[i, j]
def test_run_loader_multiple_times(self): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('eoddata', engine=fp.name, mode='HDF', tablename='eod') for i in range(5): dl.load_data() engine = create_engine('sqlite://') dl = DataLoader('eoddata', engine=engine, mode='SQL', tablename='eod') for i in range(5): dl.load_data() shape_hdf = len(pd.read_hdf(fp.name, 'data/eod')) shape_sql = len(pd.read_sql_table('eod', engine)) self.assertEqual(shape_hdf, shape_sql) self.assertEqual(shape_hdf, 12053)
def run(): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") mean = np.array([104.00699, 116.66877, 122.67892]) dataloader = DataLoader('E:/Autopilot/input/vc', 'E:/Autopilot/output/vc') model_path = "E:/Autopilot/pytorch-semseg-master/runs/39060/fcn8s_camvid_best_model.pkl" model_file_name = os.path.split(model_path)[1] model_name = model_file_name[:model_file_name.find("_")] model_dict = {"arch": model_name} model = get_model(model_dict, 2, version='camvid') state = convert_state_dict(torch.load(model_path)["model_state"]) model.load_state_dict(state) model.eval() model.to(device) buffer = [] for img0, _, _, _, frame in dataloader: if frame == 1: buffer = [] # x = 520 # y = 770 x = 550 y = 680 crop = img0[y:y + 304, x:x + 1085] img = preproc_img(crop, mean) img = img.to(device) outputs = model(img) pred = np.squeeze(outputs.data.max(1)[1].cpu().numpy(), axis=0) decoded = decode_segmap(pred) res = overlay_mask(crop, decoded) res, buffer = foo(crop, res, decoded, buffer, x_l=455, y_l=180) img0[y:y + 304, x:x + 1085] = res dataloader.save_results(img0) cv2.imshow('123', res) if cv2.waitKey(1) == ord('q'): dataloader.release() break
def load_data(self, dest_folder): loader = DataLoader(dest_folder, self.name, MockRowLoader(), RawCaseLoaderSQL(self.psql), MockRowLoader()) loader.run()
def test_SQL_parse_dates_auto(): engine = create_engine('sqlite://') dl = DataLoader('eoddata', engine=engine, mode='SQL', tablename='eod') dl.load_data(columns=rename) df = pd.read_sql_table('eod', engine) assert df.dtypes['date'] == dtype('<M8[ns]')
def test_HDF_parse_dates_auto(): with tempfile.NamedTemporaryFile() as fp: dl = DataLoader('eoddata', engine=fp.name, mode='HDF', tablename='eod') dl.load_data(columns=rename) df = pd.read_hdf(fp.name, 'data/eod') assert df.dtypes['date'] == dtype('<M8[ns]')
def load_data(self, dest_folder): row_loader = CouchRowLoader(self.couch_url, self.auth) loader = DataLoader(dest_folder, self.name, row_loader, row_loader, row_loader) loader.run()
def load_data(self, dest_folder): loader = DataLoader(dest_folder, self.name, FormLoaderSQL(self.psql), FullCaseLoaderSQL(self.psql), SynclogLoaderSQL(self.psql)) loader.run()
help='Retrain the model(s) on the dataset(s)') parser.add_argument('--model', type=str, default='all', help='Model to use. CNN, CNN_GRU, EEG_Net, AE or All') parser.add_argument('--run_id', type=int, default=3, help='run ID to use if retraining. Default 3') parser.add_argument('--epochs', type=int, default=100, help='Epochs to use if retraining. Default 100') args = parser.parse_args() os.environ["CUDA_VISIBLE_DEVICES"] = "0" loader = DataLoader() datasets_dict = { # 'ERN': loader.get_ern, # 'SMR': lambda validation=False, subject=2: loader.get_smr(subject, validation), # noqa # 'BMNIST': loader.get_bmnist11, # 'BMNIST_2': loader.get_bmnist2, # 'ThoughtViz': loader.get_thoughtviz, 'ThoughtViz_char': loader.get_thoughtviz_char, 'ThoughtViz_digit': loader.get_thoughtviz_digit, # 'SEED': loader.get_seed, } models_dict = { 'CNN': CNN_Only_Model, 'CNN_GRU': CNN_GRU_Model, 'EEG_Net': EEGNet_model,
from loaders import DataLoader import matplotlib.pyplot as plt loader = DataLoader() datasets_dict = { 'ERN': loader.get_ern, 'SMR': lambda validation=False, subject=2: loader.get_smr( subject, validation), # noqa 'BMNIST': loader.get_bmnist11, # 'BMNIST_2': loader.get_bmnist2, 'SEED': loader.get_seed, # 'ThoughtViz': loader.get_thoughtviz, } datasets = [[k, datasets_dict[k]] for k in datasets_dict] k = 'A' for dname, ldr in datasets: fig, axes = plt.subplots(1, 1) p = ldr(validation=True) data = p['data'][0][-10, :, 0].reshape(-1, ) loader.topoplot(p['name'], data) axes.set_title('({}) Topoplot for {}'.format(k, p['name'])) fig.savefig('./topoplot_new/{}_topoplot_new.png'.format(dname)) k = chr(ord(k) + 1) print('Done {}, {}'.format(dname, p['name']))
main.py: Driver for all other tasks """ # import argparse # import models # import loaders import os import numpy as np from models import CNN_GRU_Model, CNN_Only_Model, EEGNet_model, AutoEncoder_Model from loaders import DataLoader from sklearn.ensemble import RandomForestClassifier as RFC from sklearn.neighbors import KNeighborsClassifier as KNNC if __name__ == '__main__': os.environ["CUDA_VISIBLE_DEVICES"] = "1" loader = DataLoader() datasets_dict = { 'ERN': loader.get_ern, 'SMR': lambda validation=False, subject=2: loader.get_smr( subject, validation), # noqa 'BMNIST': loader.get_bmnist11, 'BMNIST_2': loader.get_bmnist2, 'SEED': loader.get_seed, 'ThoughtViz': loader.get_thoughtviz, 'ThoughtViz_char':
'has2D': [False], 'pool2D': [False], 'poolAvg': [True]}, {'num_gru': [2**i for i in range(5,7)], 'pool1D': [True], 'has2D': [False], 'pool2D': [False], 'poolAvg': [True, False]}, {'num_gru': [32,150],#2**i for i in range(5,7)], 'pool1D': [True, False], 'has2D': [True], 'pool2D': [True,False], 'poolAvg': [True, False]} ] loader = DataLoader() list_splits = [] X = [] X_t = [] Y = [] Y_t = [] train_size = 0 for i in range(1): d = loader.get_smr(subject=i, return_idx=True) data = d['data'] X.append(data[0]) X_t.append(data[2]) Y.append(data[1]) Y_t.append(data[3]) list_splits.append(d['data_idx']) train_size+=data[0].shape[0]
# import argparse # import models # import loaders import os import numpy as np from models import CNN_GRU_Model, CNN_Only_Model, EEGNet_model, AutoEncoder_Model from loaders import DataLoader from sklearn.ensemble import RandomForestClassifier as RFC from sklearn.neighbors import KNeighborsClassifier as KNNC if __name__ == '__main__': os.environ["CUDA_VISIBLE_DEVICES"]="2" loader = DataLoader() datasets_dict = { 'ERN': loader.get_ern, 'SMR': lambda validation=False, subject=2: loader.get_smr(subject, validation), # noqa 'BMNIST': loader.get_bmnist11, 'BMNIST_2': loader.get_bmnist2, 'ThoughtViz': loader.get_thoughtviz, 'SEED': loader.get_seed, } models_dict = { 'CNN': CNN_Only_Model, 'CNN_GRU': CNN_GRU_Model, 'EEG_Net': EEGNet_model, 'AE_rf': lambda: AutoEncoder_Model(RFC()), 'AE_knn':lambda: AutoEncoder_Model(KNNC()),
tsne_plots.py: Creates TSNE plots for various embeddings """ # import argparse # import models # import loaders import os import numpy as np from models import CNN_GRU_Model, CNN_Only_Model, EEGNet_model, AutoEncoder_Model from loaders import DataLoader from sklearn.ensemble import RandomForestClassifier as RFC from sklearn.neighbors import KNeighborsClassifier as KNNC if __name__ == '__main__': os.environ["CUDA_VISIBLE_DEVICES"] = "0" loader = DataLoader() datasets_dict = { 'ERN': loader.get_ern, 'SMR': lambda validation=False, subject=2: loader.get_smr( subject, validation), # noqa 'BMNIST': loader.get_bmnist11, 'BMNIST_2': loader.get_bmnist2, 'ThoughtViz': loader.get_thoughtviz, 'SEED': loader.get_seed, }
def test_create_database(self): engine = create_engine('sqlite://') dl = DataLoader('eoddata', engine=engine, mode='SQL', tablename='eod') dl.load_data() self.assertEqual(len(pd.read_sql_table('eod', engine)), 10030) self.assertEqual(len(pd.read_sql_table('updated_eod', engine)), 5)