Python DataGenerator.getitem示例

# -*- coding: utf-8 -*-
"""
Created on Sat Apr  6 08:30:38 2019

@author: zouco

"""

from data_generator import triplet_generation
from data_generator import DataGenerator

tg = triplet_generation()

ID = "00cfd9bbf55a241e"
img3 = tg.get_one_input_tensor(ID)
print(len(img3))
print(img3[0].shape)

from tools.plot_image import plot_imgs

plot_imgs(img3)

dg = DataGenerator("test", None)
X, y = dg.__getitem__(1)
# print(X)
print(len(X))
print(X[0].shape)
print(y.shape)

示例#2

显示文件

class Test0(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(Test0, self).__init__(*args, **kwargs)

    def setUp(self):
        # read dataset
        columns = np.array(['bpm', 'spo2'])
        self.dataset_reduced_std, _ = get_dataset_pulsi(
            columns, filename='./test_data/42nights.csv')

        # instatiate DataGenerator
        self.names = np.array(['h_17-04-27', 'p_17-04-27'])
        self.batch_size = 3
        self.number_of_predictions = 4
        self.window_size = 12
        self.generator = DataGenerator(
            self.dataset_reduced_std,
            self.names,
            "spo2",
            batch_size=self.batch_size,
            number_of_predictions=self.number_of_predictions,
            window_size=self.window_size,
            step_prediction_dates=1,
            shuffle=False,
            rebalance_data=False,
            debug=False)

    def test_generator(self):
        filtered_by_names = self.dataset_reduced_std[
            self.dataset_reduced_std["name"].isin(self.names)]
        expected_lines = len(filtered_by_names) - len(
            self.names) * (self.window_size + self.number_of_predictions - 1)
        expected = np.ceil(expected_lines / self.batch_size)
        # checks
        self.assertEqual(len(self.generator), expected)

        X0, y0 = self.generator.__getitem__(0)
        self.assertEqual(X0.shape,
                         (self.batch_size, self.window_size, 2))  # 2 columns
        self.assertEqual(y0.shape,
                         (self.batch_size, self.number_of_predictions))
        #print(X0)
        #print(self.dataset_reduced_std[self.dataset_reduced_std["name"] == 'h_17-04-27'][0:20])
        #print(y0)

        with open('./test_data/X0.pkl', 'rb') as f:
            X0_ref = pd.read_pickle(f)
        self.assertTrue((X0_ref == X0).all())
        with open('./test_data/y0.pkl', 'rb') as f:
            y0_ref = pd.read_pickle(f)
        self.assertTrue((y0_ref == y0).all())

        X_second_last, y_second_last = self.generator.__getitem__(
            len(self.generator) - 2)
        self.assertEqual(X_second_last.shape,
                         (self.batch_size, self.window_size, 2))  # 2 columns
        self.assertEqual(y_second_last.shape,
                         (self.batch_size, self.number_of_predictions))
        #print(X_second_last)
        #print(self.dataset_reduced_std[self.dataset_reduced_std["name"] == 'p_17-04-27'][-21:])
        #print(y_second_last)

        with open('./test_data/X_second_last_dupl.pkl', 'rb') as f:
            X_second_last_ref = pd.read_pickle(f)
        self.assertTrue((X_second_last_ref == X_second_last).all())
        with open('./test_data/y_second_last_dupl.pkl', 'rb') as f:
            y_second_last_ref = pd.read_pickle(f)
        self.assertTrue((y_second_last_ref == y_second_last).all())

        X_last, y_last = self.generator.__getitem__(len(self.generator) - 1)
        self.assertEqual(
            X_last.shape,
            (3, self.window_size, 2))  # could be 2 or 1 instead of 3
        self.assertEqual(
            y_last.shape,
            (3, self.number_of_predictions))  # could be 2 or 1 instead of 3
        #print(X_last)
        #print(self.dataset_reduced_std[self.dataset_reduced_std["name"] == 'p_17-04-27'][-20:])
        #print(y_last)

        with open('./test_data/X_last_dupl.pkl', 'wb') as f:
            pickle.dump(X_last, f, pickle.HIGHEST_PROTOCOL)
        with open('./test_data/y_last_dupl.pkl', 'wb') as f:
            pickle.dump(y_last, f, pickle.HIGHEST_PROTOCOL)
        with open('./test_data/X_last_dupl.pkl', 'rb') as f:
            X_last_ref = pd.read_pickle(f)
        self.assertTrue((X_last_ref == X_last).all())
        with open('./test_data/y_last_dupl.pkl', 'rb') as f:
            y_last_ref = pd.read_pickle(f)
        self.assertTrue((y_last_ref == y_last).all())

    def test_all_batches(self):
        X, y = self.generator.get_all_batches()
        X_b, y_b = self.generator.get_all_batches_debug()
        self.assertTrue((X == X_b).all())
        self.assertTrue((y == y_b).all())

    def test_merge(self):
        X, y = self.generator.get_all_batches()

        names_1 = np.array(['h_17-04-27'])
        generator_1 = DataGenerator(self.dataset_reduced_std,
                                    names_1,
                                    "spo2",
                                    batch_size=3,
                                    number_of_predictions=4,
                                    window_size=12,
                                    step_prediction_dates=1,
                                    shuffle=False,
                                    rebalance_data=False,
                                    debug=False)
        names_2 = np.array(['p_17-04-27'])
        generator_2 = DataGenerator(self.dataset_reduced_std,
                                    names_2,
                                    "spo2",
                                    batch_size=3,
                                    number_of_predictions=4,
                                    window_size=12,
                                    step_prediction_dates=1,
                                    shuffle=False,
                                    rebalance_data=False,
                                    debug=False)
        generator_all = generator_1.get_merged_generator(generator_2)
        X_b, y_b = generator_all.get_all_batches()
        self.assertTrue((X == X_b).all())

        generator_all_bis = generator_2.get_merged_generator(generator_1)
        X_c, y_c = generator_all_bis.get_all_batches()
        self.assertTrue((X == X_c).all())

示例#3

显示文件

    return data_ids


if __name__ == '__main__':
    train_ids = get_data_ids("Train")
    valid_ids = get_data_ids("Validation")

    train_dir = os.path.join(FLAGS.data_dir, "Train")
    valid_dir = os.path.join(FLAGS.data_dir, "Validation")

    train_gen = DataGenerator(train_ids,
                              train_dir,
                              image_size=FLAGS.image_size,
                              batch_size=FLAGS.batch_size)
    print("***************", train_gen.__getitem__(0)[1].shape)
    valid_gen = DataGenerator(valid_ids,
                              valid_dir,
                              image_size=FLAGS.image_size,
                              batch_size=FLAGS.batch_size)

    train_steps = len(train_ids) // FLAGS.batch_size
    valid_steps = len(valid_ids) // FLAGS.batch_size

    epochs = FLAGS.epochs

    model = build_ResUNet(FLAGS.image_size, FLAGS.num_class)

    history = model.fit(train_gen,
                        validation_data=valid_gen,
                        steps_per_epoch=train_steps,

示例#4

显示文件

文件： produce_output.py 项目： gsaha009/HHbbWWAnalysis

 def OutputFromTraining(self,data,path_output,output_name=None,crossval_use_training=False):
     """
         Get the output of the model from the test set
         This is data separated from the training
         If output_name is specified, the whole data will be written in 'output_name'.root
             if not, the samples in the dataframe are used to split into different files with names 'sample'.root
     """
     if not self.generator:
         inputs = data[self.list_inputs]
         if len(self.model) == 1: # classic training
             instance = HyperModel(self.model[0])
             output = instance.HyperRestore(inputs,verbose=1)
             output_df = pd.DataFrame(output,columns=[('output_%s'%o).replace('$','') for o in parameters.outputs],index=data.index)
         else:   # cross validation
             output_df = pd.DataFrame(np.zeros((data.shape[0],len(parameters.outputs))),columns=[('output_%s'%o).replace('$','') for o in parameters.outputs],index=data.index)
             used_train_idx = [] # for train output
             for model_idx,model in enumerate(self.model):
                 instance = HyperModel(model)
                 apply_idx,eval_idx,train_idx = GenerateSliceIndices(model_idx)
                 if crossval_use_training:
                     for i in range(model_idx,model_idx+len(train_idx)):
                         if train_idx[i%len(train_idx)] not in used_train_idx:
                             train_idx = [train_idx[i%len(train_idx)]]
                             used_train_idx.extend(train_idx)
                             break
                             # logic necessary so that each model is applied once
                     apply_mask = GenerateSliceMask(train_idx,data['mask']) 
                 else:
                     apply_mask = GenerateSliceMask(apply_idx,data['mask']) 
                 model_out = instance.HyperRestore(inputs[apply_mask])
                 output_df[apply_mask] = model_out
         assert not (output_df.max(1)==0).any()
         full_df = pd.concat([data,output_df],axis=1)
         self.SaveToRoot(full_df,path_output,output_name)
     else:
         if len(self.model) == 1: # classic training
             output_generator = DataGenerator(path = parameters.config,
                                              inputs = parameters.inputs,
                                              inputsLBN = parameters.LBN_inputs,
                                              outputs = parameters.outputs,
                                              other = parameters.other_variables,
                                              cut = parameters.cut,
                                              weight  = parameters.weight,
                                              batch_size = parameters.output_batch_size,
                                              state_set = 'output')
             instance = HyperModel(self.model[0])
             for i in range(len(output_generator)):
                 data = output_generator.__getitem__(i,True)
                 output = instance.HyperRestore(data[self.list_inputs])
                 output_df = pd.DataFrame(output,columns=[('output_%s'%o).replace('$','') for o in parameters.outputs],index=data.index)
                 full_df = pd.concat([data,output_df],axis=1)
                 self.SaveToRoot(full_df,path_output,output_name,out_idx='_slice%d'%i)
         else:   # cross validation
             output=None
             for model_idx,model in enumerate(self.model):
                 logging.info('Starting generator for model %d'%model_idx)
                 instance = HyperModel(model)
                 output_generator = DataGenerator(path = parameters.config,
                                                  inputs = parameters.inputs,
                                                  inputsLBN = parameters.LBN_inputs,
                                                  outputs = parameters.outputs,
                                                  other = parameters.other_variables,
                                                  cut = parameters.cut,
                                                  weight  = parameters.weight,
                                                  batch_size = parameters.output_batch_size,
                                                  state_set = 'output',
                                                  model_idx = model_idx)
                 for i in range(len(output_generator)):
                     data = output_generator.__getitem__(i,True)
                     output = instance.HyperRestore(data[self.list_inputs])
                     output_df = pd.DataFrame(output,columns=[('output_%s'%o).replace('$','') for o in parameters.outputs],index=data.index)
                     full_df = pd.concat([data,output_df],axis=1)
                     self.SaveToRoot(full_df,path_output,output_name,out_idx='_model%d_slice%d'%(model_idx,i))

示例#5

显示文件

class Test0(unittest.TestCase):
    def __init__(self, *args, **kwargs):
        super(Test0, self).__init__(*args, **kwargs)

    def setUp(self):
        # read dataset
        columns = np.array(['bpm', 'spo2'])
        self.dataset_reduced_std, _ = get_dataset_pulsi(
            columns, filename='./test_data/42nights_reduced.csv')

        # instatiate DataGenerator
        self.names = np.array(['p_17-01-19', 'p_17-01-20', 'p_17-01-25'])
        self.batch_size = 4
        self.number_of_predictions = 2
        self.window_size = 4
        self.rebalance_threshold = 0.8
        random.seed(1234)
        self.generator = DataGenerator(
            self.dataset_reduced_std,
            self.names,
            "spo2",
            batch_size=self.batch_size,
            number_of_predictions=self.number_of_predictions,
            window_size=self.window_size,
            step_prediction_dates=1,
            shuffle=False,
            rebalance_data=True,
            rebalance_threshold=self.rebalance_threshold,
            debug=False)

    def test_generator(self):
        filtered_by_names = self.dataset_reduced_std[
            self.dataset_reduced_std["name"].isin(self.names)]
        expected_lines = len(filtered_by_names) - len(
            self.names) * (self.window_size + self.number_of_predictions - 1)
        expected = np.ceil(expected_lines / self.batch_size)
        # checks
        self.assertEqual(len(self.generator), expected)

        X0, y0 = self.generator.__getitem__(0)
        self.assertEqual(X0.shape,
                         (self.batch_size, self.window_size, 2))  # 2 columns
        self.assertEqual(y0.shape,
                         (self.batch_size, self.number_of_predictions))
        #print(y0)
        self.assertEqual(
            sum(np.apply_along_axis(self.generator.rebalance_select_row, 1,
                                    y0)), self.batch_size / 2)

        with open('./test_data/X0_balanced.pkl', 'rb') as f:
            X0_ref = pd.read_pickle(f)
        self.assertTrue((X0_ref == X0).all())
        with open('./test_data/y0_balanced.pkl', 'rb') as f:
            y0_ref = pd.read_pickle(f)
        self.assertTrue((y0_ref == y0).all())

        X_second_last, y_second_last = self.generator.__getitem__(
            len(self.generator) - 2)
        self.assertEqual(X_second_last.shape,
                         (self.batch_size, self.window_size, 2))  # 2 columns
        self.assertEqual(y_second_last.shape,
                         (self.batch_size, self.number_of_predictions))
        #print(y_second_last)
        self.assertEqual(
            sum(
                np.apply_along_axis(self.generator.rebalance_select_row, 1,
                                    y_second_last)), self.batch_size / 2)

        with open('./test_data/X_second_last_balanced.pkl', 'rb') as f:
            X_second_last_ref = pd.read_pickle(f)
        self.assertTrue((X_second_last_ref == X_second_last).all())
        with open('./test_data/y_second_last_balanced.pkl', 'rb') as f:
            y_second_last_ref = pd.read_pickle(f)
        self.assertTrue((y_second_last_ref == y_second_last).all())

        X_last, y_last = self.generator.__getitem__(len(self.generator) - 1)
        self.assertEqual(X_last.shape,
                         (self.batch_size, self.window_size, 2))  # 2 columns
        self.assertEqual(y_last.shape,
                         (self.batch_size, self.number_of_predictions))
        #print(y_last)
        self.assertEqual(
            sum(
                np.apply_along_axis(self.generator.rebalance_select_row, 1,
                                    y_last)), self.batch_size / 2)

        with open('./test_data/X_last_balanced.pkl', 'rb') as f:
            X_last_ref = pd.read_pickle(f)
        self.assertTrue((X_last_ref == X_last).all())
        with open('./test_data/y_last_balanced.pkl', 'rb') as f:
            y_last_ref = pd.read_pickle(f)
        self.assertTrue((y_last_ref == y_last).all())

Python DataGenerator.__getitem__示例

Python DataGenerator.getitem示例