Пример #1
0
    def reset_val_batches(self):

        self.val_batches = int(
            np.floor(self.val_input_data.shape[0] / self.batch_size))

        if self.val_batches > 0:
            self.val_input_batches = self.val_input_data
            self.val_output_batches = self.val_output_data

            self.val_input_batches = self.val_input_batches[:self.batch_size *
                                                            self.val_batches]
            self.val_output_batches = self.val_output_batches[:self.
                                                              batch_size *
                                                              self.val_batches]

        else:
            self.val_input_batches = np.zeros(
                (self.batch_size, self.val_input_data.shape))
            self.val_output_batches = np.zeros(
                (self.batch_size, self.val_output_data.shape))

            self.val_input_batches = data.reshape_1D_input(
                self.val_input_batches)

            self.val_input_batches[:self.val_input_data.
                                   shape[0]] = self.val_input_data
            self.val_output_batches[:self.val_output_data.
                                    shape[0]] = self.val_output_data

        if self.frame_work == 'Tensorflow':
            self.val_input_batches, self.val_output_batches = data.convert_to_tensorflow_minbatch(
                self.val_input_batches, self.val_output_batches,
                self.batch_size)
Пример #2
0
    def reset_train_batches(self, batch_size=None, num_batches=None):
        ''' This function resets the training batches

        Example:    Training data set = 1000 samples,
                    Batch size = 300

        Framework:  Keras
            - The output is an array of as many training samples that fit within the batch size.
                    Train_input_batches.shape = [900, data_size]

        Framework: Tensorflow
            - The Tensorflow implementation requires each mini-batch to be explicitly set.
                    Train_input_batches.shape = (# batches, ) - In each batch is a numpy array of size (batch_size, data_size)

        :param batch_size:
        :param num_batches:
        :return:
        '''

        # Update batch size if passed
        if batch_size is not None:
            self.batch_size = batch_size

        # Calc number of batches
        if num_batches is not None:
            self.num_train_batches = int(num_batches)
        else:
            self.num_train_batches = int(
                np.floor(self.train_input_data.shape[0] / self.batch_size))

        # Copy all training data
        self.train_input_batches = self.train_input_data
        self.train_output_batches = self.train_output_data

        # Shuffle Training data
        self.train_input_batches, self.train_output_batches = data.shuffle_input_output(
            self.train_input_batches, self.train_output_batches)

        ## Restrict the amount of training Data a number that fits in the number of batches
        self.train_input_batches = self.train_input_batches[:self.batch_size *
                                                            self.
                                                            num_train_batches]
        self.train_output_batches = self.train_output_batches[:self.
                                                              batch_size *
                                                              self.
                                                              num_train_batches]

        if self.frame_work == 'Keras':
            return

        if self.frame_work == 'Tensorflow':
            self.train_input_batches, self.train_output_batches = data.convert_to_tensorflow_minbatch(
                self.train_input_batches, self.train_output_batches,
                self.batch_size)
Пример #3
0
def shuffle_data_samples(GraphData):
    ''' Shuffle Data
    '''

    GraphData.train_input_data, GraphData.train_output_data = data.shuffle_input_output(
        GraphData.train_input_data, GraphData.train_output_data)

    GraphData.val_input_data, GraphData.val_output_data = data.shuffle_input_output(
        GraphData.val_input_data, GraphData.val_output_data)

    GraphData.eval_input_data, GraphData.eval_output_data = data.shuffle_input_output(
        GraphData.eval_input_data, GraphData.eval_output_data)
Пример #4
0
def combined_1D_onehot(DataCenter, folder_paths, samples = 4):
    folder_paths = np.array(folder_paths)

    channels = folder_paths.shape[0]

    plot = research.SubPlot(samples, channels)

    update_plot = 1

    # Iterate over all samples
    for sample in range(samples):

        for channel in range(channels):

            one_hot_labels = data.load_data(folder_paths[channel] + 'one_hot_labels.csv')
            max_curves = data.load_data(folder_paths[channel] + 'max_array_record.csv')
            min_curves = data.load_data(folder_paths[channel] + 'min_array_record.csv')

            participants = max_curves[:, 0]

            max_curves = max_curves[:, 1:]
            min_curves = min_curves[:, 1:]

            # Randomly Select Input Data
            rand_idx = np.random.randint(DataCenter.all_input_data.shape[0])

            # Get Output idx
            output_idx = np.argmax(DataCenter.all_output_data[rand_idx])

            # Get Participant ID from One Hot Labels
            participant = one_hot_labels[output_idx]

            curve_id = np.array(np.where(participants == participant))[0][0]

            max_curve = np.divide(max_curves[curve_id],DataCenter.input_scale[channel])
            min_curve = np.divide(min_curves[curve_id], DataCenter.input_scale[channel])

            # Plot Max/Min Curves
            plot.current_plot = update_plot
            plot.add_subplot_data(max_curve, add_data_to=update_plot, color='black')
            plot.add_subplot_data(min_curve, add_data_to=update_plot, color='black')

            # Plot Input Data
            input_data = DataCenter.all_input_data[rand_idx][:,channel]
            plot.add_subplot_data(input_data, add_data_to=update_plot, color='blue')

            update_plot += 1

    plot.show_plow()
Пример #5
0
    def load_all_data_multiple(self, data_folder, data_files):
        print('Loading Data from multiple CSV files')
        print('Loading from {}. {} Left'.format(data_folder + data_files[0],
                                                len(data_files)))
        self.data_location = data_folder
        self.all_data = data.load_data(data_folder + data_files[0])
        print('Current Samples = {}'.format(self.all_data.shape[0]))

        for i in range(1, len(data_files)):
            print('Loading from {}. {} Left'.format(
                data_folder + data_files[i],
                len(data_files) - i))
            new_data = data.load_data(data_folder + data_files[i])
            self.all_data = np.concatenate([self.all_data, new_data], axis=0)
            print('Current Samples = {}'.format(self.all_data.shape[0]))
Пример #6
0
 def augment_1D_squeeze_stretch(self, squeeze=0.98, stretch=1.02, steps=3):
     print(
         'Augmenting - Squeeze & Stretch \n Inefficient Implementation, perform before other Augmentation'
     )
     self.train_input_data, self.train_output_data = data.augment_1D_squeeze_stretch(
         self.train_input_data, self.train_output_data, squeeze, stretch,
         steps)
     self.print_num_samples()
Пример #7
0
    def balance_batch_for_dual_sided_one_hot(self):
        print('Balancing Batches for Dual Sided One Hot Array')
        if self.one_hot_balance_rate is None:
            self.one_hot_balance_rate = 1

        self.load_data()
        self.train_input_data, self.train_output_data = data.balance_batch_for_dual_sided_one_hot(
            self.train_input_data, self.train_output_data)
Пример #8
0
 def calc_eval_siamese_batches(self):
     self.unique_ids = data.calc_unique_ids(self.all_output_data)
     self.siamese_eval_input_left, self.siamese_eval_input_right, self.siamese_eval_output_batches, self.siamese_eval_left_idx, self.siamese_eval_right_idx = data.calc_siamese_batches(
         self.eval_input_data,
         self.eval_output_data,
         self.unique_ids,
         self.num_eval_batches,
         self.batch_size,
         reshape=False)
Пример #9
0
 def augment_1D_squash_pull(self,
                            squash=0.98,
                            pull=1.02,
                            steps=10,
                            type='multiply'):
     print('Augmenting - Squash & Pull')
     self.train_input_data, self.train_output_data = data.augment_1D_squash_pull(
         self.train_input_data, self.train_output_data, squash, pull, steps,
         type)
     self.print_num_samples()
Пример #10
0
def reshape_channel(data):
    channels = data.shape[-1]

    if channels == 1:
        if data.shape[channels + 1] == 1:
            data = data.reshape(data.shape[0], data.shape[1])

    else:
        raise 'To Do: Code needed for more than 1 dimension'

    return data
Пример #11
0
 def contin_one_hot_output(self):
     self.all_output_data, self.one_hot_range = data.create_continuous_one_hot_array(
         self.all_output_data, self.one_hot_val_min, self.one_hot_val_max,
         self.one_hot_length)
     # Save one_hot range
     np.savetxt(self.data_location + self.file_prefix +
                'one_hot_labels.csv',
                self.one_hot_range,
                delimiter=',')
     np.savetxt(self.folder_path + self.file_prefix + 'one_hot_labels.csv',
                self.one_hot_range,
                delimiter=',')
Пример #12
0
    def one_hot_output(self, column, concat=False):
        self.all_output_data, self.one_hot_labels = data.one_hot_output(
            self.all_output_data, column, concat=concat)

        # Save one_hot_labels
        np.savetxt(self.data_location + self.file_prefix +
                   'one_hot_labels.csv',
                   self.one_hot_labels,
                   delimiter=',')
        np.savetxt(self.folder_path + self.file_prefix + 'one_hot_labels.csv',
                   self.one_hot_labels,
                   delimiter=',')
        print(self.folder_path + self.file_prefix + 'one_hot_labels.csv')
Пример #13
0
    def split_train_val_eval(self, val_split=0.3, eval_split=0, shuffle=False):

        if shuffle == True:
            self.all_input_data, self.all_output_data = data.shuffle_input_output(
                self.all_input_data, self.all_output_data)

        total_samples = int(self.all_input_data.shape[0])

        # Split into training/validation/evaluation data
        self.eval_samples = int(self.all_input_data.shape[0] * eval_split)
        self.val_samples = int(self.all_input_data.shape[0] * val_split)
        self.train_samples = int(self.all_input_data.shape[0] -
                                 self.eval_samples - self.val_samples)
        print(
            'Train Samples = {}({}%), Val Samples = {}({}%), Eval Samples = {}({}%)'
            .format(self.train_samples,
                    np.round(self.train_samples / total_samples * 100,
                             2), self.val_samples,
                    np.round(self.val_samples / total_samples * 100, 2),
                    self.eval_samples,
                    np.round(self.eval_samples / total_samples * 100, 2)))

        self.eval_input_data = self.all_input_data[total_samples -
                                                   self.eval_samples:]
        self.eval_output_data = self.all_output_data[total_samples -
                                                     self.eval_samples:]

        self.val_input_data = self.all_input_data[total_samples -
                                                  self.eval_samples - self.
                                                  val_samples:total_samples -
                                                  self.eval_samples]
        self.val_output_data = self.all_output_data[total_samples -
                                                    self.eval_samples - self.
                                                    val_samples:total_samples -
                                                    self.eval_samples]

        self.train_input_data = self.all_input_data[:total_samples -
                                                    self.eval_samples -
                                                    self.val_samples]
        self.train_output_data = self.all_output_data[:total_samples -
                                                      self.eval_samples -
                                                      self.val_samples]
Пример #14
0
    def load_data(self):
        path = self.folder_path + self.file_prefix

        print('Loading data from {}'.format(path))
        self.train_input_data = np.load(path + 'training_input_data.npy')
        self.val_input_data = np.load(path + 'validation_input_data.npy')
        self.eval_input_data = np.load(path + 'evaluation_input_data.npy')

        self.train_output_data = np.load(path + 'training_output_data.npy')
        self.val_output_data = np.load(path + 'validation_output_data.npy')
        self.eval_output_data = np.load(path + 'evaluation_output_data.npy')

        try:
            self.one_hot_labels = data.load_data(self.folder_path +
                                                 self.file_prefix +
                                                 'one_hot_labels.csv')
        except:
            'No one-hot labels'

        self.print_num_samples()
Пример #15
0
 def shuffle_training_only(self):
     print('Shuffling Training Data')
     self.train_input_data, self.train_output_data = data.shuffle_input_output(
         self.train_input_data, self.train_output_data)
Пример #16
0
 def augment_add_noise(self, std_dev=0.01):
     print('Augmenting - Adding Gausian Noise')
     self.train_input_data = data.augment_add_noise(self.train_input_data,
                                                    std_dev)
Пример #17
0
 def load_all_data_single(self, data_folder, data_file):
     print('Loading Data from CSV file')
     self.data_location = data_folder
     self.all_data = data.load_data(data_folder + data_file)
     self.all_data = np.nan_to_num(self.all_data)
Пример #18
0
 def augment_1D_left_right(self, left=6, right=6, step=1):
     print('Augmenting Data Left and Right. New Samples =')
     self.train_input_data, self.train_output_data = data.augment_1D_left_right(
         self.train_input_data, self.train_output_data, left, right, step)
     self.print_num_samples()
Пример #19
0
 def scale_multi_chan_input(self, scale=None):
     self.all_input_data, self.input_scale = data.scale_multi_chan_input(
         self.all_input_data, scale=scale)
Пример #20
0
 def calc_train_siamese_batches(self):
     self.unique_ids = data.calc_unique_ids(self.all_output_data)
     self.siamese_train_input_batches_left, self.siamese_train_input_batches_right, self.siamese_train_output_batches, self.siamese_train_left_idx, self.siamese_train_right_idx = data.calc_siamese_batches(
         self.train_input_data, self.train_output_data, self.unique_ids,
         self.num_train_batches, self.batch_size)
Пример #21
0
 def cut_input_data_seq_length(self, out_length=None):
     self.all_input_data = data.cut_input_data_seq_length(
         self.all_input_data, out_length)
Пример #22
0
def export_val_one_hot_predictions(DataCenter, model):
    val_predictions = predict(DataCenter, model, DataCenter.val_input_batches)
    val_true = DataProcess.combine_batches(DataCenter.val_output_batches)
    val_true_arg_max = np.argmax(val_true, axis=1)
    return np.concatenate([val_true_arg_max.reshape(-1, 1), val_predictions],
                          axis=1)
Пример #23
0
 def reshape_1D_input(self):
     self.all_input_data = data.reshape_1D_input(self.all_input_data)
Пример #24
0
 def integrate_input_curve(self, col_start=None, col_end=None):
     self.all_output_data = data.integrate_input_curve(self.all_input_data,
                                                       col_start=col_start,
                                                       col_end=col_end)
Пример #25
0
 def dynamic_updating_continuous_mse_loss(self):
     self.all_output_data, self.dyn_mse_shift = data.continuous_mse_loss(
         self.all_output_data, self.dyn_mse_base_width, self.dyn_mse_power,
         self.dyn_mse_top_width, self.dyn_mse_offset)
Пример #26
0
    def split_input_output_data(self, num_outputs, output_first=True):
        # Split into input and output data
        self.all_input_data, self.all_output_data = data.split_input_output_data(
            self.all_data, num_outputs, output_first)

        assert self.all_input_data.shape[0] == self.all_output_data.shape[0]
Пример #27
0
 def padd_one_hot_output(self, pad_reduce):
     self.all_output_data = data.padd_one_hot_array(self.all_output_data,
                                                    pad_reduce)
Пример #28
0
 def restrict_to_ids(self, ids, column=0):
     print(self.all_output_data.shape)
     self.all_input_data, self.all_output_data = data.restrict_to_ids(
         self.all_input_data, self.all_output_data, ids, column)
     print(self.all_output_data.shape)
Пример #29
0
 def scale_outputs(self, scale_type='max'):
     self.all_output_data, self.output_scale = data.scale_outputs(
         self.all_output_data, scale_type)
Пример #30
0
def export_val_mse_predictions(DataCenter, model):
    print('Line 98: Exporting Val Predictions')
    val_predictions = predict(DataCenter, model, DataCenter.val_input_batches)
    val_true = DataProcess.combine_batches(DataCenter.val_output_batches)
    return val_predictions, val_true