def test_single_price(self): df1 = pd.DataFrame( { 'Open': [10.2, 12, 32.1, 9.32], 'Close': [2.3, 3.6, 4.5, 11.11] }, index=pd.to_datetime( ['2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01'])) expected1 = pd.DataFrame({'Fake_1': [2.3, 3.6, 4.5, 11.11]}, index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01' ])) df2 = pd.DataFrame({'FakeSPY': [10.2, 12, 32.1, 9.32]}, index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01' ])) expected2 = pd.DataFrame({'FakeSPY': [10.2, 12, 32.1, 9.32]}, index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01' ])) new_df1 = DataPreprocessing.single_price(df1, 'Fake_1') new_df2 = DataPreprocessing.single_price(df2, 'FakeSPY') assert_frame_equal(new_df1, expected1) assert_frame_equal(new_df2, expected2)
def run_ESRNN(): import torch device = 'cuda' if torch.cuda.is_available() else 'cpu' path_daily = r'C:\Users\xxxli\Desktop\Daily' dic_daily = preprocess.read_file(path_daily) series_list = [] for k, v in dic_daily.items(): ticker_name = k df, cat = v df = preprocess.single_price(df, ticker_name) # column = [ticker] series_list.append(DataSeries(cat, 'daily', df)) collect = DataCollection('universe daily', series_list) train_dc, test_dc = collect.split(numTest = 24) m = ModelESRNN( max_epochs = 15, batch_size = 32, dilations=[[1,3], [7, 14]], input_size = 12, output_size = 24, device = device) m.train(train_dc) y_test = m.predict(test_dc) y_test_df = y_test.to_df() y_test_df.to_csv('hyper_ESRNN_1.csv')
def dc_generator(path: str, frequency: str): dic, recover_list, ticker_list = DataPreprocessing.read_file(path) series_list = [] for k, v in dic.items(): df, cat = v df = DataPreprocessing.single_price(df, k) series_list.append(DataSeries(cat, frequency, df)) collect = DataCollection(frequency + ' Collection', series_list) return collect, recover_list, ticker_list
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # An example of how to use Telescope model path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = preprocess.read_file(path_monthly) series_list = [] for k, v in dic_monthly.items(): df, cat = v df = preprocess.single_price(df, k) series_list.append(DataSeries(cat, 'monthly', df)) self.collect = DataCollection('test1', series_list)
def test_read_single_file(self): single_csv = os.path.join('test', 'Data', 'Daily', 'ETF', 'AGG.csv') #single_csv = r'test\Data\Daily\ETF\AGG.csv' single_excel = os.path.join('test', 'Data', 'Daily', 'ETF', 'SP MidCap 400.xls') # single_excel = r'test\Data\Daily\ETF\SP MidCap 400.xls' df_csv = DataPreprocessing.read_single_file('AGG.csv', single_csv) df_excel = DataPreprocessing.read_single_file('SP MidCap 400.xls', single_excel) assert (isinstance(df_csv, pd.DataFrame)) assert (isinstance(df_csv.index, pd.DatetimeIndex)) assert (isinstance(df_excel, pd.DataFrame)) assert (isinstance(df_excel.index, pd.DatetimeIndex))
def test_Naive2(self): path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = preprocess.read_file(path_monthly) series_list = [] for k, v in dic_monthly.items(): df, cat = v df = preprocess.single_price(df, k) series_list.append(DataSeries(cat, 'monthly', df)) collect = DataCollection('test1', series_list) train_dc, test_dc = collect.split(numTest=12) m = ModelNaive2(12, train_dc, test_dc) y_hat_Naive2_dc = m.fit_and_generate_prediction(12, freq='MS') y_hat_Naive2_dc.to_df().to_csv('test_Naive2_result.csv')
def test_excel_to_pd(self): single_excel = os.path.join('test', 'Data', 'Daily', 'ETF', 'SP MidCap 400.xls') #single_excel = r'test\Data\Daily\ETF\SP MidCap 400.xls' df = DataPreprocessing.excel_to_pd(single_excel) assert (isinstance(df, pd.DataFrame)) assert (isinstance(df.index, pd.DatetimeIndex))
def test_MP_class(self): import torch device = 'cuda' if torch.cuda.is_available() else 'cpu' path_monthly = os.path.join('test','Data','Monthly') dic_monthly = DP.read_file(path_monthly) n_assets = 1 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries('ETF', 'monthly', df[0]) time_series_group.append(ds) input_dc = DataCollection('test1', time_series_group) m = ModelESRNN(seasonality = [12], input_size = 4, output_size = 12, device=device) train_dc, test_dc = input_dc.split(numTest = 12) m.train(train_dc) forecast_dc = m.predict(test_dc) # train_dc.to_df().to_csv('insample.csv') test_dc.to_df().to_csv('test.csv') # forecast_dc.to_df().to_csv('forecast.csv') mn = MN.ModelNaive2(2, train_dc) naive2_dc = mn.fit_and_generate_prediction(12, 'MS') naive2_dc.to_df().to_csv('naive.csv') mp = MP.ModelPerformance("test model performance", 2, test_dc, forecast_dc, train_dc, naive2_dc) mase = MP.MASE(test_dc.to_df(), forecast_dc.to_df(), train_dc.to_df(), 2) smape = MP.sMAPE(test_dc.to_df(), forecast_dc.to_df()) mape = MP.MAPE(mp.y_df, mp.y_hat_df) r2 = MP.R2(test_dc.to_df(), forecast_dc.to_df()) rmse = MP.RMSE(test_dc.to_df(), forecast_dc.to_df()) owa = MP.OWA(test_dc.to_df(), forecast_dc.to_df(), train_dc.to_df(), naive2_dc.to_df(), 2) u1 = MP.Theil_U1(test_dc.to_df(), forecast_dc.to_df()) u2 = MP.Theil_U2(test_dc.to_df(), forecast_dc.to_df()) mp.MASE() mp.sMAPE() mp.MAPE() mp.R2() mp.RMSE() mp.OWA() mp.Theil_U1() mp.Theil_U2() self.assertAlmostEqual(mp.metrics['sMAPE'], smape) self.assertAlmostEqual(mp.metrics['MAPE'], mape) self.assertAlmostEqual(mp.metrics['R2'], r2) self.assertAlmostEqual(mp.metrics['RMSE'], rmse) self.assertAlmostEqual(mp.metrics['MASE'], mase) self.assertAlmostEqual(mp.metrics['OWA'], owa) self.assertAlmostEqual(mp.metrics['Theil_U1'], u1) self.assertAlmostEqual(mp.metrics['Theil_U2'], u2)
def test_simple_imputation(self): df = pd.DataFrame([10.2, np.NaN, 32.1, np.NaN], columns=['fakeSPY'], index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01' ])) self.assertEqual(df.isnull().sum().values[0], 2) new_df = DataPreprocessing.simple_imputation(df) self.assertEqual(new_df.isnull().sum().values[0], 0)
def test_ESRNN(self): # An example of how to use ESRNN import torch device = 'cuda' if torch.cuda.is_available() else 'cpu' path_daily = os.path.join('test','Data','daily') dic_daily = preprocess.read_file(path_daily) series_list = [] for k, v in dic_daily.items(): df, cat = v df = preprocess.single_price(df, k) series_list.append(DataSeries(cat, 'daily', df)) collect = DataCollection('test1', series_list) m = ModelESRNN(max_epochs = 5, seasonality = [], batch_size = 64, input_size = 12, output_size = 12, device = device) train_dc, test_dc = collect.split(numTest = 12) m.train(train_dc) y_test = m.predict(test_dc) assert(isinstance(y_test, DataCollection)) y_test_df = y_test.to_df() y_test_df.to_csv('predict_result.csv')
def test_read_file(self): path_daily = os.path.join('test', 'Data', 'Daily') #path_daily = r'test\Data\Daily' dic_daily = DataPreprocessing.read_file(path_daily) # not whole dataset, only have 3 files including # AGG.csv AA.csv SP MidCap 400.xls self.assertTrue(type(dic_daily) == dict) assert (isinstance(dic_daily['AGG'][0], pd.DataFrame)) self.assertTrue(dic_daily['AGG'][1] == 'ETF') self.assertTrue(dic_daily['AA'][1] == 'Stock') self.assertEqual(len(dic_daily), 3) path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = DataPreprocessing.read_file(path_monthly) self.assertTrue(type(dic_monthly) == dict) assert (isinstance(dic_monthly['AGG'][0], pd.DataFrame)) self.assertTrue(dic_monthly['AGG'][1] == 'ETF') self.assertTrue(dic_monthly['AA'][1] == 'Stock') self.assertNotEqual(len(dic_daily), 0)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) # fake data by ZZ Daily self.a_series = DataSeries( 'ETF', 'daily', pd.DataFrame([10.0, 15.0, 20.0, 30.0], columns=['ABC'], index=pd.to_datetime([ '2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04' ]))) self.b_series = DataSeries( 'Bond', 'daily', pd.DataFrame([1.0, 3.5, 4.5], columns=['KKK'], index=pd.to_datetime([ '2020-01-01', '2020-01-02', '2020-01-03', ]))) self.collect = DataCollection('trial', [self.a_series, self.b_series]) d = {'Initial weights': [0.6, 0.4]} self.weights = pd.DataFrame(data=d).T self.weights = self.weights.rename(columns={0: 'ABC', 1: 'KKK'}) self.p = port.EqualPort("test equal port") self.p.calculate_initial_weight(self.collect) # Monthly path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = DataPreprocessing.read_file(path_monthly) n_assets = 4 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries(df[1], 'monthly', df[0]) time_series_group.append(ds) input_dc_test = DataCollection(label='Test Collection', time_series_group=time_series_group) self.input_dc = input_dc_test self.input_freq = input_dc_test.get_freq() self.input_df = self.input_dc.to_df() self.n_asset = len(self.input_df.columns) input_weights = [[1 / self.n_asset] * self.n_asset] input_weights_df = pd.DataFrame(input_weights, columns=self.input_df.columns, index=['Initial weights']) self.input_weights_df = input_weights_df
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) path_monthly = os.path.join('test', 'Data', 'Monthly') dic_monthly = DataPreprocessing.read_file(path_monthly) n_assets = 4 time_series_group = [] for i in range(n_assets): df = dic_monthly[list(dic_monthly.keys())[i]] ds = DataSeries('ETF', 'monthly', df[0]) time_series_group.append(ds) input_dc_test = DataCollection(label='Test Collection', time_series_group=time_series_group) self.input_dc = input_dc_test self.input_freq = input_dc_test.get_freq() self.input_df = self.input_dc.to_df().dropna() self.a = pd.DataFrame([10, 12, 32, 9, 11, 9], columns=['fakeSPY'], index=pd.to_datetime([ '2020-01-01', '2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01' ])) self.a_series = DataSeries('ETF', self.input_freq, self.a) self.b = pd.DataFrame([1, 1.2, 3.2, 0.9], columns=['fakeTreasury'], index=pd.to_datetime([ '2019-12-01', '2020-02-01', '2020-03-01', '2020-04-01' ])) self.b_series = DataSeries('Bond', self.input_freq, self.b) self.c_collection = DataCollection('trial', [self.a_series, self.b_series]) self.c_df = self.c_collection.to_df().interpolate(method='linear', axis=0)
def fTrainArtDetection(): # training for artifact detection # check if file is already existing -> skip patching if glob.glob(sOutPath + os.sep + sFSname + ''.join(map(str, patchSize)).replace(" ", "") + '*_input.mat'): # deprecated sDatafile = sOutPath + os.sep + sFSname + ''.join(map( str, patchSize)).replace(" ", "") + '_input.mat' try: conten = sio.loadmat(sDatafile) except: f = h5py.File(sDatafile, 'r') conten = {} conten['X_train'] = np.transpose(np.array(f['X_train']), (3, 2, 0, 1)) conten['X_test'] = np.transpose(np.array(f['X_test']), (3, 2, 0, 1)) conten['y_train'] = np.transpose(np.array(f['y_train'])) conten['y_test'] = np.transpose(np.array(f['y_test'])) conten['patchSize'] = np.transpose(np.array(f['patchSize'])) X_train = conten['X_train'] X_test = conten['X_test'] y_train = conten['y_train'] y_test = conten['y_test'] elif glob.glob(sDatafile): with h5py.File(sDatafile, 'r') as hf: X_train = hf['X_train'][:] X_test = hf['X_test'][:] y_train = hf['y_train'][:] y_test = hf['y_test'][:] patchSize = hf['patchSize'][:] if sTrainingMethod == "MultiScaleSeparated": X_train_p2 = hf['X_train_p2'][:] X_test_p2 = hf['X_test_p2'][:] y_train_p2 = hf['y_train_p2'][:] y_test_p2 = hf['y_test_p2'][:] patchSize_down = hf['patchSize_down'][:] else: # perform patching X_train = [] scpatchSize = [0 for i in range(len(patchSize))] if sTrainingMethod == "None" or sTrainingMethod == "ScaleJittering": lScaleFactor = [1] if sTrainingMethod == "MultiScaleSeparated": lScaleFactor = lScaleFactor[:-1] # images will be split into pathces with size scpatchSize and then scaled to patchSize for iscalefactor in lScaleFactor: # Calculate the patchsize according to scale factor and training method scpatchSize = patchSize if iscalefactor != 1: if sTrainingMethod == "MultiScaleSeparated": scpatchSize = fcalculateInputOfPath2( patchSize, iscalefactor, cfg['network']) elif sTrainingMethod == "MultiScaleTogether": scpatchSize = [ int(psi / iscalefactor) for psi in patchSize ] if len(scpatchSize) == 3: dAllPatches = np.zeros( (0, scpatchSize[0], scpatchSize[1], scpatchSize[2])) else: dAllPatches = np.zeros((0, scpatchSize[0], scpatchSize[1])) dAllLabels = np.zeros(0) dAllPats = np.zeros((0, 1)) lDatasets = cfg['selectedDatabase']['dataref'] + cfg[ 'selectedDatabase']['dataart'] iLabels = cfg['selectedDatabase']['labelref'] + cfg[ 'selectedDatabase']['labelart'] for ipat, pat in enumerate(dbinfo.lPats): if os.path.exists(dbinfo.sPathIn + os.sep + pat + os.sep + dbinfo.sSubDirs[1]): for iseq, seq in enumerate(lDatasets): # patches and labels of reference/artifact tmpPatches, tmpLabels = datapre.fPreprocessData( os.path.join(dbinfo.sPathIn, pat, dbinfo.sSubDirs[1], seq), scpatchSize, cfg['patchOverlap'], 1, cfg['sLabeling'], sTrainingMethod=sTrainingMethod, range_norm=cfg['range']) dAllPatches = np.concatenate((dAllPatches, tmpPatches), axis=0) dAllLabels = np.concatenate( (dAllLabels, iLabels[iseq] * tmpLabels), axis=0) dAllPats = np.concatenate((dAllPats, ipat * np.ones( (tmpLabels.shape[0], 1), dtype=np.int)), axis=0) else: pass print('Start splitting') # perform splitting: sp for split if cfg['sSplitting'] == 'crossvalidation_data': spX_train, spy_train, spX_test, spy_test = ttsplit.fSplitDataset( dAllPatches, dAllLabels, dAllPats, cfg['sSplitting'], scpatchSize, cfg['patchOverlap'], cfg['dSplitval'], '', nfolds=nFolds) else: spX_train, spy_train, spX_test, spy_test = ttsplit.fSplitDataset( dAllPatches, dAllLabels, dAllPats, cfg['sSplitting'], scpatchSize, cfg['patchOverlap'], cfg['dSplitval'], '') print('Start scaling') # perform scaling: sc for scale scX_train, scX_test, scedpatchSize = scaling.fscaling( spX_train, spX_test, scpatchSize, iscalefactor) if sTrainingMethod == "MultiScaleSeparated": X_train_p2 = scX_train X_test_p2 = scX_test y_train_p2 = spy_train y_test_p2 = spy_test patchSize_down = scedpatchSize X_train_cut, X_test_cut = scaling.fcutMiddelPartOfPatch( spX_train, spX_test, scpatchSize, patchSize) X_train = X_train_cut X_test = X_test_cut y_train = spy_train y_test = spy_test else: if len(X_train) == 0: X_train = scX_train X_test = scX_test y_train = spy_train y_test = spy_test else: X_train = np.concatenate((X_train, scX_train), axis=1) X_test = np.concatenate((X_test, scX_test), axis=1) y_train = np.concatenate((y_train, spy_train), axis=1) y_test = np.concatenate((y_test, spy_test), axis=1) print('Start saving') # save to file (deprecated) if lSave: # sio.savemat(sOutPath + os.sep + sFSname + str(patchSize[0]) + str(patchSize[1]) + '_input.mat', {'X_train': X_train, 'y_train': y_train, 'X_test': X_test, 'y_test': y_test, 'patchSize': cfg['patchSize']}) with h5py.File(sDatafile, 'w') as hf: hf.create_dataset('X_train', data=X_train) hf.create_dataset('X_test', data=X_test) hf.create_dataset('y_train', data=y_train) hf.create_dataset('y_test', data=y_test) hf.create_dataset('patchSize', data=patchSize) hf.create_dataset('patchOverlap', data=cfg['patchOverlap']) if sTrainingMethod == "MultiScaleSeparated": hf.create_dataset('X_train_p2', data=X_train_p2) hf.create_dataset('X_test_p2', data=X_test_p2) hf.create_dataset('y_train_p2', data=y_train_p2) hf.create_dataset('y_test_p2', data=y_test_p2) hf.create_dataset('patchSize_down', data=patchSize_down) # perform training for iFold in range(0, len(X_train)): if len(X_train) != 1: CV_Patient = iFold + 1 else: CV_Patient = 0 if 'MultiPath' in cfg['network']: frunCNN_MS( { 'X_train': X_train[iFold], 'y_train': y_train[iFold], 'X_test': X_test[iFold], 'y_test': y_test[iFold], 'patchSize': patchSize, 'X_train_p2': X_train_p2[iFold], 'y_train_p2': y_train_p2[iFold], 'X_test_p2': X_test_p2[iFold], 'y_test_p2': y_test_p2[iFold], 'patchSize_down': patchSize_down, 'ScaleFactor': lScaleFactor[0] }, cfg['network'], lTrain, sOutPath, cfg['batchSize'], cfg['lr'], cfg['epochs'], CV_Patient) elif 'MS' in cfg['network']: frunCNN_MS( { 'X_train': X_train[iFold], 'y_train': y_train[iFold], 'X_test': X_test[iFold], 'y_test': y_test[iFold], 'patchSize': patchSize }, cfg['network'], lTrain, sOutPath, cfg['batchSize'], cfg['lr'], cfg['epochs'], CV_Patient) else: fRunCNN( { 'X_train': X_train[iFold], 'y_train': y_train[iFold], 'X_test': X_test[iFold], 'y_test': y_test[iFold], 'patchSize': patchSize }, cfg['network'], lTrain, cfg['sOpti'], sOutPath, cfg['batchSize'], cfg['lr'], cfg['epochs'], CV_Patient)
numTest = output_size = 30 input_size = 30 max_epochs = 15 batch_size = 64 learning_rate = 1e-2 lr_scheduler_step_size = 9 lr_decay = 0.9 noise_std = 0.001 level_variability_penalty = 80 state_hsize = 40 dilation = [[1]] add_nl_layer = False seasonality = [5] # action path = os.path.join('test', 'Data', 'Daily') dic = preprocess.read_file(path) series_list = [] for k, v in dic.items(): df, cat = v df = preprocess.single_price(df, k) series_list.append(DataSeries(cat, 'daily', df)) collect = DataCollection('RollingValidation', series_list) input_dc, _ = collect.split(numTest=2 * numTest) score, _ = validation_simple( input_dc, numTest=numTest, max_epochs=max_epochs, batch_size=batch_size, learning_rate=learning_rate, lr_scheduler_step_size=lr_scheduler_step_size,
y_train = hf['y_train'][:] y_test = hf['y_test'][:] patchSize = hf['patchSize'][:] else: # perform patching dAllPatches = np.zeros((patchSize[0], patchSize[1], 0)) dAllLabels = np.zeros(0) dAllPats = np.zeros((0, 1)) #selectedDatabase值为*id001,所以直接选motion_head的dataref:t1...0002和dataart:t1...0003 lDatasets = cfg['selectedDatabase']['dataref'] + cfg['selectedDatabase']['dataart'] iLabels = cfg['selectedDatabase']['labelref'] + cfg['selectedDatabase']['labelart'] #[0,1] for ipat, pat in enumerate(dbinfo.lPats): for iseq, seq in enumerate(lDatasets): # patches and labels of reference/artifact #import utils.DataPreprocessing as datapre,函数没有问题,只有选到了数据不全的病人才会在这出错 tmpPatches, tmpLabels = datapre.fPreprocessData(os.path.join(dbinfo.sPathIn, pat, dbinfo.sSubDirs[1], seq), cfg['patchSize'], cfg['patchOverlap'], 1 ) dAllPatches = np.concatenate((dAllPatches, tmpPatches), axis=2) dAllLabels = np.concatenate((dAllLabels, iLabels[iseq]*tmpLabels), axis=0) dAllPats = np.concatenate((dAllPats,ipat*np.ones((tmpLabels.shape[0],1), dtype=np.int)), axis=0) # perform splitting X_train, y_train, X_test, y_test = ttsplit.fSplitDataset(dAllPatches, dAllLabels, dAllPats, cfg['sSplitting'], patchSize, cfg['patchOverlap'], cfg['dSplitval'], '') # save to file (deprecated) # sio.savemat(sOutPath + os.sep + sFSname + str(patchSize[0]) + str(patchSize[1]) + '_input.mat', {'X_train': X_train, 'y_train': y_train, 'X_test': X_test, 'y_test': y_test, 'patchSize': cfg['patchSize']}) with h5py.File(sDatafile, 'w') as hf: hf.create_dataset('X_train', data=X_train) hf.create_dataset('X_test', data=X_test) hf.create_dataset('y_train', data=y_train) hf.create_dataset('y_test', data=y_test) hf.create_dataset('patchSize', data=patchSize)
else: dAllPatches = np.zeros((0, scpatchSize[0], scpatchSize[1])) dAllLabels = np.zeros(0) dAllPats = np.zeros((0, 1)) lDatasets = cfg['selectedDatabase']['dataref'] + cfg[ 'selectedDatabase']['dataart'] iLabels = cfg['selectedDatabase']['labelref'] + cfg[ 'selectedDatabase']['labelart'] for ipat, pat in enumerate(dbinfo.lPats): if os.path.exists(dbinfo.sPathIn + os.sep + pat + os.sep + dbinfo.sSubDirs[1]): for iseq, seq in enumerate(lDatasets): # patches and labels of reference/artifact tmpPatches, tmpLabels = datapre.fPreprocessData( os.path.join(dbinfo.sPathIn, pat, dbinfo.sSubDirs[1], seq), scpatchSize, cfg['patchOverlap'], 1, cfg['sLabeling'], sTrainingMethod, cfg['range']) dAllPatches = np.concatenate((dAllPatches, tmpPatches), axis=0) dAllLabels = np.concatenate( (dAllLabels, iLabels[iseq] * tmpLabels), axis=0) dAllPats = np.concatenate((dAllPats, ipat * np.ones( (tmpLabels.shape[0], 1), dtype=np.int)), axis=0) else: pass print('Start splitting') # perform splitting: sp for split if cfg['sSplitting'] == 'crossvalidation_data': spX_train, spy_train, spX_test, spy_test = ttsplit.fSplitDataset(
def run(cfg, dbinfo): """ the main interface of correction program @param cfg: the configuration file loaded from config/param.yml @param dbinfo: database related info """ # load parameters form config file and define the corresponding output path patchSize = cfg['patchSize'] sOutsubdir = cfg['subdirs'][3] sOutPath = cfg['selectedDatabase']['pathout'] + os.sep \ + ''.join(map(str, patchSize)).replace(" ", "") + os.sep + sOutsubdir if cfg['sSplitting'] == 'normal': sFSname = 'normal' sDatafile = sOutPath + os.sep + sFSname + ''.join(map(str, patchSize)).replace(" ", "") + '.h5' elif cfg['sSplitting'] == 'crossvalidation_data': sFSname = 'crossVal_data' sDatafile = sOutPath + os.sep + sFSname + ''.join(map(str, patchSize)).replace(" ", "") + '.h5' elif cfg['sSplitting'] == 'crossvalidation_patient': sFSname = 'crossVal' sDatafile = sOutPath + os.sep + sFSname + ''.join(map(str, patchSize)).replace(" ", "") + '_' + \ cfg['correction']['test_patient'] + '.h5' # if h5 file exists then load the dataset if glob.glob(sDatafile): with h5py.File(sDatafile, 'r') as hf: train_ref = hf['train_ref'][:] train_art = hf['train_art'][:] test_ref = hf['test_ref'][:] test_art = hf['test_art'][:] patchSize = hf['patchSize'][:] else: # perform patching and splitting train_ref, test_ref, train_art, test_art = datapre.fPreprocessDataCorrection(cfg, dbinfo) # save to h5 file if cfg['lSave']: with h5py.File(sDatafile, 'w') as hf: hf.create_dataset('train_ref', data=train_ref) hf.create_dataset('test_ref', data=test_ref) hf.create_dataset('train_art', data=train_art) hf.create_dataset('test_art', data=test_art) hf.create_dataset('patchSize', data=patchSize) hf.create_dataset('patchOverlap', data=cfg['patchOverlap']) dHyper = cfg['correction'] dParam = {'batchSize': cfg['batchSize'], 'patchSize': patchSize, 'patchOverlap': cfg['patchOverlap'], 'learningRate': cfg['lr'], 'epochs': cfg['epochs'], 'lTrain': cfg['lTrain'], 'lSave': cfg['lSave'], 'sOutPath': sOutPath, 'lSaveIndividual': cfg['lSaveIndividual']} if len(train_ref) == 1: dData = {'train_ref': train_ref[0], 'test_ref': test_ref[0], 'train_art': train_art[0], 'test_art': test_art[0]} cnn_main.fRunCNNCorrection(dData, dHyper, dParam) else: for patient_index in range(len(train_ref)): dData = {'train_ref': train_ref[patient_index], 'test_ref': test_ref[patient_index], 'train_art': train_art[patient_index], 'test_art': test_art[patient_index]} cnn_main.fRunCNNCorrection(dData, dHyper, dParam)
generator_train = datagen.flow_from_directory(directory='./input/train', target_size=(img_size[0], img_size[1]), batch_size=batch_size, class_mode='categorical', shuffle=False) generator_test = datagen.flow_from_directory(directory='./input/test', target_size=(img_size[0], img_size[1]), batch_size=batch_size, class_mode=None, shuffle=False) if glob.glob('./input/train/*.jpg'): datapre.sortImg(cfg) if cfg['lExtractor']: feature_extractor.run(cfg, generator_train, generator_test) elif cfg['lTrain']: nTrain = len(generator_train.filenames) nClass = len(generator_train.class_indices) if cfg['lAssemble']: train_data_InceptionV3 = np.load('./feature/InceptionV3_train.npy') train_data_InceptionResNetV2 = np.load( './feature/InceptionResNetV2_train.npy') train_data_original = np.concatenate( (train_data_InceptionV3, train_data_InceptionResNetV2), axis=1)
def fPredictArtDetection(): # prediction sNetworktype = cfg['network'].split("_") if len(sPredictModel) == 0: sPredictModel = cfg['selectedDatabase']['bestmodel'][sNetworktype[2]] if sTrainingMethod == "MultiScaleSeparated": patchSize = fcalculateInputOfPath2(cfg['patchSize'], cfg['lScaleFactor'][0], cfg['network']) if len(patchSize) == 3: X_test = np.zeros((0, patchSize[0], patchSize[1], patchSize[2])) y_test = np.zeros((0)) allImg = np.zeros( (len(cfg['lPredictImg']), cfg['correction']['actualSize'][0], cfg['correction']['actualSize'][1], cfg['correction']['actualSize'][2])) else: X_test = np.zeros((0, patchSize[0], patchSize[1])) y_test = np.zeros(0) for iImg in range(0, len(cfg['lPredictImg'])): # patches and labels of reference/artifact tmpPatches, tmpLabels = datapre.fPreprocessData( cfg['lPredictImg'][iImg], patchSize, cfg['patchOverlap'], 1, cfg['sLabeling'], sTrainingMethod=sTrainingMethod) X_test = np.concatenate((X_test, tmpPatches), axis=0) y_test = np.concatenate( (y_test, cfg['lLabelPredictImg'][iImg] * tmpLabels), axis=0) allImg[iImg] = datapre.fReadData(cfg['lPredictImg'][iImg]) if sTrainingMethod == "MultiScaleSeparated": X_test_p1 = scaling.fcutMiddelPartOfPatch(X_test, X_test, patchSize, cfg['patchSize']) X_train_p2, X_test_p2, scedpatchSize = scaling.fscaling( [X_test], [X_test], patchSize, cfg['lScaleFactor'][0]) frunCNN_MS( { 'X_test': X_test_p1, 'y_test': y_test, 'patchSize': patchSize, 'X_test_p2': X_test_p2[0], 'model_name': sPredictModel, 'patchOverlap': cfg['patchOverlap'], 'actualSize': cfg['correction']['actualSize'] }, cfg['network'], lTrain, sOutPath, cfg['batchSize'], cfg['lr'], cfg['epochs'], predictImg=allImg) elif 'MS' in cfg['network']: frunCNN_MS( { 'X_test': X_test, 'y_test': y_test, 'patchSize': cfg['patchSize'], 'model_name': sPredictModel, 'patchOverlap': cfg['patchOverlap'], 'actualSize': cfg['correction']['actualSize'] }, cfg['network'], lTrain, sOutPath, cfg['batchSize'], cfg['lr'], cfg['epochs'], predictImg=allImg) else: fRunCNN( { 'X_train': [], 'y_train': [], 'X_test': X_test, 'y_test': y_test, 'patchSize': patchSize, 'model_name': sPredictModel, 'patchOverlap': cfg['patchOverlap'], 'actualSize': cfg['correction']['actualSize'] }, cfg['network'], lTrain, cfg['sOpti'], sOutPath, cfg['batchSize'], cfg['lr'], cfg['epochs'])
def test_csv_to_pd(self): single_csv = os.path.join('test', 'Data', 'Daily', 'ETF', 'AGG.csv') #single_csv = r'test\Data\Daily\ETF\AGG.csv' df = DataPreprocessing.csv_to_pd(single_csv) assert (isinstance(df, pd.DataFrame)) assert (isinstance(df.index, pd.DatetimeIndex))