# -*- coding: utf-8 -*- """ Created on Sat Apr 6 08:30:38 2019 @author: zouco """ from data_generator import triplet_generation from data_generator import DataGenerator tg = triplet_generation() ID = "00cfd9bbf55a241e" img3 = tg.get_one_input_tensor(ID) print(len(img3)) print(img3[0].shape) from tools.plot_image import plot_imgs plot_imgs(img3) dg = DataGenerator("test", None) X, y = dg.__getitem__(1) # print(X) print(len(X)) print(X[0].shape) print(y.shape)
class Test0(unittest.TestCase): def __init__(self, *args, **kwargs): super(Test0, self).__init__(*args, **kwargs) def setUp(self): # read dataset columns = np.array(['bpm', 'spo2']) self.dataset_reduced_std, _ = get_dataset_pulsi( columns, filename='./test_data/42nights.csv') # instatiate DataGenerator self.names = np.array(['h_17-04-27', 'p_17-04-27']) self.batch_size = 3 self.number_of_predictions = 4 self.window_size = 12 self.generator = DataGenerator( self.dataset_reduced_std, self.names, "spo2", batch_size=self.batch_size, number_of_predictions=self.number_of_predictions, window_size=self.window_size, step_prediction_dates=1, shuffle=False, rebalance_data=False, debug=False) def test_generator(self): filtered_by_names = self.dataset_reduced_std[ self.dataset_reduced_std["name"].isin(self.names)] expected_lines = len(filtered_by_names) - len( self.names) * (self.window_size + self.number_of_predictions - 1) expected = np.ceil(expected_lines / self.batch_size) # checks self.assertEqual(len(self.generator), expected) X0, y0 = self.generator.__getitem__(0) self.assertEqual(X0.shape, (self.batch_size, self.window_size, 2)) # 2 columns self.assertEqual(y0.shape, (self.batch_size, self.number_of_predictions)) #print(X0) #print(self.dataset_reduced_std[self.dataset_reduced_std["name"] == 'h_17-04-27'][0:20]) #print(y0) with open('./test_data/X0.pkl', 'rb') as f: X0_ref = pd.read_pickle(f) self.assertTrue((X0_ref == X0).all()) with open('./test_data/y0.pkl', 'rb') as f: y0_ref = pd.read_pickle(f) self.assertTrue((y0_ref == y0).all()) X_second_last, y_second_last = self.generator.__getitem__( len(self.generator) - 2) self.assertEqual(X_second_last.shape, (self.batch_size, self.window_size, 2)) # 2 columns self.assertEqual(y_second_last.shape, (self.batch_size, self.number_of_predictions)) #print(X_second_last) #print(self.dataset_reduced_std[self.dataset_reduced_std["name"] == 'p_17-04-27'][-21:]) #print(y_second_last) with open('./test_data/X_second_last_dupl.pkl', 'rb') as f: X_second_last_ref = pd.read_pickle(f) self.assertTrue((X_second_last_ref == X_second_last).all()) with open('./test_data/y_second_last_dupl.pkl', 'rb') as f: y_second_last_ref = pd.read_pickle(f) self.assertTrue((y_second_last_ref == y_second_last).all()) X_last, y_last = self.generator.__getitem__(len(self.generator) - 1) self.assertEqual( X_last.shape, (3, self.window_size, 2)) # could be 2 or 1 instead of 3 self.assertEqual( y_last.shape, (3, self.number_of_predictions)) # could be 2 or 1 instead of 3 #print(X_last) #print(self.dataset_reduced_std[self.dataset_reduced_std["name"] == 'p_17-04-27'][-20:]) #print(y_last) with open('./test_data/X_last_dupl.pkl', 'wb') as f: pickle.dump(X_last, f, pickle.HIGHEST_PROTOCOL) with open('./test_data/y_last_dupl.pkl', 'wb') as f: pickle.dump(y_last, f, pickle.HIGHEST_PROTOCOL) with open('./test_data/X_last_dupl.pkl', 'rb') as f: X_last_ref = pd.read_pickle(f) self.assertTrue((X_last_ref == X_last).all()) with open('./test_data/y_last_dupl.pkl', 'rb') as f: y_last_ref = pd.read_pickle(f) self.assertTrue((y_last_ref == y_last).all()) def test_all_batches(self): X, y = self.generator.get_all_batches() X_b, y_b = self.generator.get_all_batches_debug() self.assertTrue((X == X_b).all()) self.assertTrue((y == y_b).all()) def test_merge(self): X, y = self.generator.get_all_batches() names_1 = np.array(['h_17-04-27']) generator_1 = DataGenerator(self.dataset_reduced_std, names_1, "spo2", batch_size=3, number_of_predictions=4, window_size=12, step_prediction_dates=1, shuffle=False, rebalance_data=False, debug=False) names_2 = np.array(['p_17-04-27']) generator_2 = DataGenerator(self.dataset_reduced_std, names_2, "spo2", batch_size=3, number_of_predictions=4, window_size=12, step_prediction_dates=1, shuffle=False, rebalance_data=False, debug=False) generator_all = generator_1.get_merged_generator(generator_2) X_b, y_b = generator_all.get_all_batches() self.assertTrue((X == X_b).all()) generator_all_bis = generator_2.get_merged_generator(generator_1) X_c, y_c = generator_all_bis.get_all_batches() self.assertTrue((X == X_c).all())
return data_ids if __name__ == '__main__': train_ids = get_data_ids("Train") valid_ids = get_data_ids("Validation") train_dir = os.path.join(FLAGS.data_dir, "Train") valid_dir = os.path.join(FLAGS.data_dir, "Validation") train_gen = DataGenerator(train_ids, train_dir, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size) print("***************", train_gen.__getitem__(0)[1].shape) valid_gen = DataGenerator(valid_ids, valid_dir, image_size=FLAGS.image_size, batch_size=FLAGS.batch_size) train_steps = len(train_ids) // FLAGS.batch_size valid_steps = len(valid_ids) // FLAGS.batch_size epochs = FLAGS.epochs model = build_ResUNet(FLAGS.image_size, FLAGS.num_class) history = model.fit(train_gen, validation_data=valid_gen, steps_per_epoch=train_steps,
def OutputFromTraining(self,data,path_output,output_name=None,crossval_use_training=False): """ Get the output of the model from the test set This is data separated from the training If output_name is specified, the whole data will be written in 'output_name'.root if not, the samples in the dataframe are used to split into different files with names 'sample'.root """ if not self.generator: inputs = data[self.list_inputs] if len(self.model) == 1: # classic training instance = HyperModel(self.model[0]) output = instance.HyperRestore(inputs,verbose=1) output_df = pd.DataFrame(output,columns=[('output_%s'%o).replace('$','') for o in parameters.outputs],index=data.index) else: # cross validation output_df = pd.DataFrame(np.zeros((data.shape[0],len(parameters.outputs))),columns=[('output_%s'%o).replace('$','') for o in parameters.outputs],index=data.index) used_train_idx = [] # for train output for model_idx,model in enumerate(self.model): instance = HyperModel(model) apply_idx,eval_idx,train_idx = GenerateSliceIndices(model_idx) if crossval_use_training: for i in range(model_idx,model_idx+len(train_idx)): if train_idx[i%len(train_idx)] not in used_train_idx: train_idx = [train_idx[i%len(train_idx)]] used_train_idx.extend(train_idx) break # logic necessary so that each model is applied once apply_mask = GenerateSliceMask(train_idx,data['mask']) else: apply_mask = GenerateSliceMask(apply_idx,data['mask']) model_out = instance.HyperRestore(inputs[apply_mask]) output_df[apply_mask] = model_out assert not (output_df.max(1)==0).any() full_df = pd.concat([data,output_df],axis=1) self.SaveToRoot(full_df,path_output,output_name) else: if len(self.model) == 1: # classic training output_generator = DataGenerator(path = parameters.config, inputs = parameters.inputs, inputsLBN = parameters.LBN_inputs, outputs = parameters.outputs, other = parameters.other_variables, cut = parameters.cut, weight = parameters.weight, batch_size = parameters.output_batch_size, state_set = 'output') instance = HyperModel(self.model[0]) for i in range(len(output_generator)): data = output_generator.__getitem__(i,True) output = instance.HyperRestore(data[self.list_inputs]) output_df = pd.DataFrame(output,columns=[('output_%s'%o).replace('$','') for o in parameters.outputs],index=data.index) full_df = pd.concat([data,output_df],axis=1) self.SaveToRoot(full_df,path_output,output_name,out_idx='_slice%d'%i) else: # cross validation output=None for model_idx,model in enumerate(self.model): logging.info('Starting generator for model %d'%model_idx) instance = HyperModel(model) output_generator = DataGenerator(path = parameters.config, inputs = parameters.inputs, inputsLBN = parameters.LBN_inputs, outputs = parameters.outputs, other = parameters.other_variables, cut = parameters.cut, weight = parameters.weight, batch_size = parameters.output_batch_size, state_set = 'output', model_idx = model_idx) for i in range(len(output_generator)): data = output_generator.__getitem__(i,True) output = instance.HyperRestore(data[self.list_inputs]) output_df = pd.DataFrame(output,columns=[('output_%s'%o).replace('$','') for o in parameters.outputs],index=data.index) full_df = pd.concat([data,output_df],axis=1) self.SaveToRoot(full_df,path_output,output_name,out_idx='_model%d_slice%d'%(model_idx,i))
class Test0(unittest.TestCase): def __init__(self, *args, **kwargs): super(Test0, self).__init__(*args, **kwargs) def setUp(self): # read dataset columns = np.array(['bpm', 'spo2']) self.dataset_reduced_std, _ = get_dataset_pulsi( columns, filename='./test_data/42nights_reduced.csv') # instatiate DataGenerator self.names = np.array(['p_17-01-19', 'p_17-01-20', 'p_17-01-25']) self.batch_size = 4 self.number_of_predictions = 2 self.window_size = 4 self.rebalance_threshold = 0.8 random.seed(1234) self.generator = DataGenerator( self.dataset_reduced_std, self.names, "spo2", batch_size=self.batch_size, number_of_predictions=self.number_of_predictions, window_size=self.window_size, step_prediction_dates=1, shuffle=False, rebalance_data=True, rebalance_threshold=self.rebalance_threshold, debug=False) def test_generator(self): filtered_by_names = self.dataset_reduced_std[ self.dataset_reduced_std["name"].isin(self.names)] expected_lines = len(filtered_by_names) - len( self.names) * (self.window_size + self.number_of_predictions - 1) expected = np.ceil(expected_lines / self.batch_size) # checks self.assertEqual(len(self.generator), expected) X0, y0 = self.generator.__getitem__(0) self.assertEqual(X0.shape, (self.batch_size, self.window_size, 2)) # 2 columns self.assertEqual(y0.shape, (self.batch_size, self.number_of_predictions)) #print(y0) self.assertEqual( sum(np.apply_along_axis(self.generator.rebalance_select_row, 1, y0)), self.batch_size / 2) with open('./test_data/X0_balanced.pkl', 'rb') as f: X0_ref = pd.read_pickle(f) self.assertTrue((X0_ref == X0).all()) with open('./test_data/y0_balanced.pkl', 'rb') as f: y0_ref = pd.read_pickle(f) self.assertTrue((y0_ref == y0).all()) X_second_last, y_second_last = self.generator.__getitem__( len(self.generator) - 2) self.assertEqual(X_second_last.shape, (self.batch_size, self.window_size, 2)) # 2 columns self.assertEqual(y_second_last.shape, (self.batch_size, self.number_of_predictions)) #print(y_second_last) self.assertEqual( sum( np.apply_along_axis(self.generator.rebalance_select_row, 1, y_second_last)), self.batch_size / 2) with open('./test_data/X_second_last_balanced.pkl', 'rb') as f: X_second_last_ref = pd.read_pickle(f) self.assertTrue((X_second_last_ref == X_second_last).all()) with open('./test_data/y_second_last_balanced.pkl', 'rb') as f: y_second_last_ref = pd.read_pickle(f) self.assertTrue((y_second_last_ref == y_second_last).all()) X_last, y_last = self.generator.__getitem__(len(self.generator) - 1) self.assertEqual(X_last.shape, (self.batch_size, self.window_size, 2)) # 2 columns self.assertEqual(y_last.shape, (self.batch_size, self.number_of_predictions)) #print(y_last) self.assertEqual( sum( np.apply_along_axis(self.generator.rebalance_select_row, 1, y_last)), self.batch_size / 2) with open('./test_data/X_last_balanced.pkl', 'rb') as f: X_last_ref = pd.read_pickle(f) self.assertTrue((X_last_ref == X_last).all()) with open('./test_data/y_last_balanced.pkl', 'rb') as f: y_last_ref = pd.read_pickle(f) self.assertTrue((y_last_ref == y_last).all())